fix: Initialize sudo reference before its first actual use (#7360 )

fix: double sudo invocation fix in the install script (#7359 )
Double sudo invocation fix in the install script
2026-02-03 03:02:38 -05:00 · 2025-11-26 16:03:42 +01:00 · 2025-11-26 16:03:10 +01:00 · 2025-11-26 08:23:21 +01:00 · 2025-11-25 22:19:53 +01:00 · 2025-11-25 19:31:05 +01:00
90 changed files with 6641 additions and 1214 deletions
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -1090,7 +1090,7 @@ jobs:
        go-version: ['1.21.x']
    steps:
      - name: Clone
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
@@ -1176,7 +1176,7 @@ jobs:
        go-version: ['1.21.x']
    steps:
      - name: Clone
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
--- a/.github/workflows/backend_build.yml
+++ b/.github/workflows/backend_build.yml
@@ -97,7 +97,7 @@ jobs:
          && sudo apt-get install -y git

      - name: Checkout
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6

      - name: Release space from worker
        if: inputs.runs-on == 'ubuntu-latest'
--- a/.github/workflows/backend_build_darwin.yml
+++ b/.github/workflows/backend_build_darwin.yml
@@ -50,7 +50,7 @@ jobs:
        go-version: ['${{ inputs.go-version }}']
    steps:
      - name: Clone
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          submodules: true

--- a/.github/workflows/backend_pr.yml
+++ b/.github/workflows/backend_pr.yml
@@ -17,7 +17,7 @@ jobs:
      has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }}
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6

      - name: Setup Bun
        uses: oven-sh/setup-bun@v2
--- a/.github/workflows/build-test.yaml
+++ b/.github/workflows/build-test.yaml
@@ -11,7 +11,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - name: Set up Go
@@ -25,7 +25,7 @@ jobs:
    runs-on: macos-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - name: Set up Go
@@ -47,7 +47,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - name: Set up Go
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -31,7 +31,7 @@ jobs:
            file: "backend/go/piper/Makefile"
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v6
      - name: Bump dependencies 🔧
        id: bump
        run: |
--- a/.github/workflows/bump_docs.yaml
+++ b/.github/workflows/bump_docs.yaml
@@ -12,7 +12,7 @@ jobs:
          - repository: "mudler/LocalAI"
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v6
      - name: Bump dependencies 🔧
        run: |
          bash .github/bump_docs.sh ${{ matrix.repository }}
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -15,7 +15,7 @@ jobs:
          && sudo add-apt-repository -y ppa:git-core/ppa \
          && sudo apt-get update \
          && sudo apt-get install -y git
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v6
      - name: Install dependencies
        run: |
          sudo apt-get update
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -20,7 +20,7 @@ jobs:
          skip-commit-verification: true

      - name: Checkout repository
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6

      - name: Approve a PR if not already approved
        run: |
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -15,7 +15,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          submodules: true
      - uses: actions/setup-go@v5
--- a/.github/workflows/gallery-agent.yaml
+++ b/.github/workflows/gallery-agent.yaml
@@ -30,7 +30,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          token: ${{ secrets.GITHUB_TOKEN }}

--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -73,7 +73,7 @@ jobs:
        uses: docker/setup-buildx-action@master

      - name: Checkout
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6

      - name: Cache GRPC
        uses: docker/build-push-action@v6
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -16,7 +16,7 @@ jobs:
      matrix:
        include:
          - base-image: intel/oneapi-basekit:2025.2.0-0-devel-ubuntu22.04
-            runs-on: 'ubuntu-latest'
+            runs-on: 'arc-runner-set'
            platforms: 'linux/amd64'
    runs-on: ${{matrix.runs-on}}
    steps:
@@ -43,7 +43,7 @@ jobs:
        uses: docker/setup-buildx-action@master

      - name: Checkout
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6

      - name: Cache Intel images
        uses: docker/build-push-action@v6
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -94,7 +94,7 @@ jobs:
          && sudo apt-get update \
          && sudo apt-get install -y git
      - name: Checkout
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6

      - name: Release space from worker
        if: inputs.runs-on == 'ubuntu-latest'
--- a/.github/workflows/localaibot_automerge.yml
+++ b/.github/workflows/localaibot_automerge.yml
@@ -14,7 +14,7 @@ jobs:
    if: ${{ github.actor == 'localai-bot' && !contains(github.event.pull_request.title, 'chore(model gallery):') }}
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6

      - name: Approve a PR if not already approved
        run: |
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -15,7 +15,7 @@ jobs:
        MODEL_NAME: gemma-3-12b-it-qat
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v6
      with:
        fetch-depth: 0 # needed to checkout all branches for this Action to work
        ref: ${{ github.event.pull_request.head.sha }} # Checkout the PR head to get the actual changes
@@ -95,7 +95,7 @@ jobs:
        MODEL_NAME: gemma-3-12b-it-qat
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v6
      with:
        fetch-depth: 0 # needed to checkout all branches for this Action to work
        ref: ${{ github.event.pull_request.head.sha }} # Checkout the PR head to get the actual changes
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -10,7 +10,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - name: Set up Go
@@ -28,7 +28,7 @@ jobs:
    runs-on: macos-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - name: Set up Go
@@ -46,7 +46,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - name: Set up Go
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@@ -14,7 +14,7 @@ jobs:
      GO111MODULE: on
    steps:
      - name: Checkout Source
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        if: ${{ github.actor != 'dependabot[bot]' }}
      - name: Run Gosec Security Scanner
        if: ${{ github.actor != 'dependabot[bot]' }}
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -19,7 +19,7 @@ jobs:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v5
+  #       uses: actions/checkout@v6
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -40,7 +40,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          submodules: true
      - name: Dependencies
@@ -61,7 +61,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          submodules: true
      - name: Dependencies
@@ -83,7 +83,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          submodules: true
      - name: Dependencies
@@ -104,7 +104,7 @@ jobs:
  #  runs-on: ubuntu-latest
  #  steps:
  #    - name: Clone
-  #      uses: actions/checkout@v5
+  #      uses: actions/checkout@v6
  #      with:
  #        submodules: true
  #    - name: Dependencies
@@ -124,7 +124,7 @@ jobs:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v5
+  #       uses: actions/checkout@v6
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -186,7 +186,7 @@ jobs:
  #           sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
  #           df -h
  #     - name: Clone
-  #       uses: actions/checkout@v5
+  #       uses: actions/checkout@v6
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -211,7 +211,7 @@ jobs:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v5
+  #       uses: actions/checkout@v6
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -232,7 +232,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          submodules: true
      - name: Dependencies
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -70,7 +70,7 @@ jobs:
          sudo rm -rfv build || true
          df -h
      - name: Clone
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
@@ -166,7 +166,7 @@ jobs:
          sudo rm -rfv build || true
          df -h
      - name: Clone
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          submodules: true
      - name: Dependencies
@@ -196,7 +196,7 @@ jobs:
        go-version: ['1.25.x']
    steps:
      - name: Clone
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
--- a/.github/workflows/update_swagger.yaml
+++ b/.github/workflows/update_swagger.yaml
@@ -9,7 +9,7 @@ jobs:
      fail-fast: false
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v6
      - uses: actions/setup-go@v5
        with:
          go-version: 'stable'
--- a/2
+++ b/2
@@ -332,6 +332,6 @@ RUN mkdir -p /models /backends
 HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
  CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1

-VOLUME /models /backends
+VOLUME /models /backends /configuration
 EXPOSE 8080
 ENTRYPOINT [ "/entrypoint.sh" ]
--- a/README.md
+++ b/README.md
@@ -108,7 +108,7 @@ Run the installer script:
 curl https://localai.io/install.sh | sh
 ```

-For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).
+For more installation options, see [Installer Options](https://localai.io/installation/).

 ### macOS Download:

@@ -206,6 +206,7 @@ For more information, see [💻 Getting started](https://localai.io/basics/getti

 ## 📰 Latest project news

+- November 2025: Major improvements to the UX. Among these: [Import models via URL](https://github.com/mudler/LocalAI/pull/7245) and [Multiple chats and history](https://github.com/mudler/LocalAI/pull/7325)
 - October 2025: 🔌 [Model Context Protocol (MCP)](https://localai.io/docs/features/mcp/) support added for agentic capabilities with external tools
 - September 2025: New Launcher application for MacOS and Linux, extended support to many backends for Mac and Nvidia L4T devices. Models: Added MLX-Audio, WAN 2.2. WebUI improvements and Python-based backends now ships portable python environments.
 - August 2025: MLX, MLX-VLM, Diffusers and llama.cpp are now supported on Mac M1/M2/M3+ chips ( with `development` suffix in the gallery ): https://github.com/mudler/LocalAI/pull/6049 https://github.com/mudler/LocalAI/pull/6119 https://github.com/mudler/LocalAI/pull/6121 https://github.com/mudler/LocalAI/pull/6060
--- a/backend/cpp/llama-cpp/CMakeLists.txt
+++ b/backend/cpp/llama-cpp/CMakeLists.txt
@@ -57,7 +57,7 @@ add_library(hw_grpc_proto
  ${hw_proto_srcs}
  ${hw_proto_hdrs} )

-add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp httplib.h)
+add_executable(${TARGET} grpc-server.cpp json.hpp httplib.h)

 target_include_directories(${TARGET} PRIVATE ../llava)
 target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@

-LLAMA_VERSION?=10e9780154365b191fb43ca4830659ef12def80f
+LLAMA_VERSION?=583cb83416467e8abf9b37349dcf1f6a0083745a
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

 CMAKE_ARGS?=
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -8,6 +8,10 @@
 //

 #include "server.cpp"
+#include "server-task.cpp"
+#include "server-queue.cpp"
+#include "server-common.cpp"
+
 // LocalAI

 #include "backend.pb.h"
@@ -137,15 +141,43 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, const
    }
    
    // Extract tools and tool_choice from proto and add to data JSON
+    SRV_INF("[TOOLS DEBUG] parse_options: Checking for tools in proto, tools().empty()=%d, tools().size()=%zu\n", 
+            predict->tools().empty() ? 1 : 0, predict->tools().size());
    if (!predict->tools().empty()) {
+        SRV_INF("[TOOLS DEBUG] parse_options: Tools string from proto (first 500 chars): %s\n", 
+                predict->tools().substr(0, std::min<size_t>(500, predict->tools().size())).c_str());
        try {
            // Parse tools JSON string and add to data
            json tools_json = json::parse(predict->tools());
            data["tools"] = tools_json;
            SRV_INF("Extracted tools from proto: %s\n", predict->tools().c_str());
+            // Debug: Log tools count and names
+            if (tools_json.is_array()) {
+                SRV_INF("[TOOLS DEBUG] parse_options: Successfully parsed %zu tools from Go layer\n", tools_json.size());
+                for (size_t i = 0; i < tools_json.size(); i++) {
+                    if (tools_json[i].contains("function") && tools_json[i]["function"].contains("name")) {
+                        SRV_INF("[TOOLS DEBUG] parse_options: Tool %zu: %s\n", i, tools_json[i]["function"]["name"].get<std::string>().c_str());
+                    } else if (tools_json[i].contains("name")) {
+                        SRV_INF("[TOOLS DEBUG] parse_options: Tool %zu: %s\n", i, tools_json[i]["name"].get<std::string>().c_str());
+                    }
+                }
+            } else {
+                SRV_WRN("[TOOLS DEBUG] parse_options: Parsed tools JSON is not an array: %s\n", tools_json.dump().c_str());
+            }
        } catch (const json::parse_error& e) {
            SRV_WRN("Failed to parse tools JSON from proto: %s\n", e.what());
+            SRV_WRN("[TOOLS DEBUG] parse_options: Tools string that failed to parse: %s\n", predict->tools().c_str());
        }
+    } else {
+        SRV_INF("%s", "[TOOLS DEBUG] parse_options: No tools received from Go layer (predict->tools() is empty)\n");
+    }
+    
+    // Debug: Verify tools are in data after extraction
+    if (data.contains("tools")) {
+        SRV_INF("[TOOLS DEBUG] parse_options: Tools successfully added to data, count: %zu\n", 
+                data["tools"].is_array() ? data["tools"].size() : 0);
+    } else {
+        SRV_INF("%s", "[TOOLS DEBUG] parse_options: WARNING - Tools NOT in data after extraction!\n");
    }
    if (!predict->toolchoice().empty()) {
        try {
@@ -155,9 +187,11 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, const
            // Store it as-is (string or object) so we can convert object to "required" later when adding to body_json
            if (tool_choice_json.is_string()) {
                data["tool_choice"] = tool_choice_json.get<std::string>();
+                SRV_DBG("[TOOLS DEBUG] Received tool_choice from Go layer: %s\n", tool_choice_json.get<std::string>().c_str());
            } else {
                // Store object as-is so we can detect it later and convert to "required"
                data["tool_choice"] = tool_choice_json;
+                SRV_DBG("[TOOLS DEBUG] Received tool_choice object from Go layer: %s\n", tool_choice_json.dump().c_str());
            }
            SRV_INF("Extracted tool_choice from proto: %s\n", predict->toolchoice().c_str());
        } catch (const json::parse_error& e) {
@@ -666,6 +700,10 @@ public:
                        json content_val;
                        try {
                            content_val = json::parse(msg.content());
+                            // Handle null values - convert to empty string to avoid template errors
+                            if (content_val.is_null()) {
+                                content_val = "";
+                            }
                        } catch (const json::parse_error&) {
                            // Not JSON, treat as plain string
                            content_val = msg.content();
@@ -707,7 +745,12 @@ public:
                            msg_json["content"] = content_array;
                        } else {
                            // Use content as-is (already array or not last user message)
-                            msg_json["content"] = content_val;
+                            // Ensure null values are converted to empty string
+                            if (content_val.is_null()) {
+                                msg_json["content"] = "";
+                            } else {
+                                msg_json["content"] = content_val;
+                            }
                        }
                    } else if (is_last_user_msg && has_images_or_audio) {
                        // If no content but this is the last user message with images/audio, create content array
@@ -734,6 +777,57 @@ public:
                            }
                        }
                        msg_json["content"] = content_array;
+                    } else if (msg.role() == "tool") {
+                        // Tool role messages must have content field set, even if empty
+                        // Jinja templates expect content to be a string, not null or object
+                        SRV_INF("[CONTENT DEBUG] PredictStream: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
+                        if (msg.content().empty()) {
+                            msg_json["content"] = "";
+                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): empty content, set to empty string\n", i);
+                        } else {
+                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): content exists: %s\n", 
+                                    i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
+                            // Content exists, parse and ensure it's a string
+                            json content_val;
+                            try {
+                                content_val = json::parse(msg.content());
+                                SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): parsed JSON, type=%s\n", 
+                                        i, content_val.is_null() ? "null" : 
+                                           content_val.is_object() ? "object" :
+                                           content_val.is_string() ? "string" :
+                                           content_val.is_array() ? "array" : "other");
+                                // Handle null values - Jinja templates expect content to be a string, not null
+                                if (content_val.is_null()) {
+                                    msg_json["content"] = "";
+                                    SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): null content, converted to empty string\n", i);
+                                } else if (content_val.is_object()) {
+                                    // If content is an object (e.g., from tool call failures/errors), convert to string
+                                    msg_json["content"] = content_val.dump();
+                                    SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): object content, converted to string: %s\n", 
+                                            i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
+                                } else if (content_val.is_string()) {
+                                    msg_json["content"] = content_val.get<std::string>();
+                                    SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): string content, using as-is\n", i);
+                                } else {
+                                    // For arrays or other types, convert to string
+                                    msg_json["content"] = content_val.dump();
+                                    SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): %s content, converted to string\n", 
+                                            i, content_val.is_array() ? "array" : "other type");
+                                }
+                            } catch (const json::parse_error&) {
+                                // Not JSON, treat as plain string
+                                msg_json["content"] = msg.content();
+                                SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): not JSON, using as string\n", i);
+                            }
+                        }
+                    } else {
+                        // Ensure all messages have content set (fallback for any unhandled cases)
+                        // Jinja templates expect content to be present, default to empty string if not set
+                        if (!msg_json.contains("content")) {
+                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (role=%s): no content field, adding empty string\n", 
+                                    i, msg.role().c_str());
+                            msg_json["content"] = "";
+                        }
                    }
                    
                    // Add optional fields for OpenAI-compatible message format
@@ -751,14 +845,96 @@ public:
                        try {
                            json tool_calls = json::parse(msg.tool_calls());
                            msg_json["tool_calls"] = tool_calls;
+                            SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
+                            // IMPORTANT: If message has tool_calls but content is empty or not set,
+                            // set content to space " " instead of empty string "", because llama.cpp's
+                            // common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
+                            // which causes template errors when accessing message.content[:tool_start_length]
+                            if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
+                                SRV_INF("[CONTENT DEBUG] PredictStream: Message %d has tool_calls but empty content, setting to space\n", i);
+                                msg_json["content"] = " ";
+                            }
+                            // Log each tool call with name and arguments
+                            if (tool_calls.is_array()) {
+                                for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
+                                    const auto& tc = tool_calls[tc_idx];
+                                    std::string tool_name = "unknown";
+                                    std::string tool_args = "{}";
+                                    if (tc.contains("function")) {
+                                        const auto& func = tc["function"];
+                                        if (func.contains("name")) {
+                                            tool_name = func["name"].get<std::string>();
+                                        }
+                                        if (func.contains("arguments")) {
+                                            tool_args = func["arguments"].is_string() ? 
+                                                func["arguments"].get<std::string>() : 
+                                                func["arguments"].dump();
+                                        }
+                                    } else if (tc.contains("name")) {
+                                        tool_name = tc["name"].get<std::string>();
+                                        if (tc.contains("arguments")) {
+                                            tool_args = tc["arguments"].is_string() ? 
+                                                tc["arguments"].get<std::string>() : 
+                                                tc["arguments"].dump();
+                                        }
+                                    }
+                                    SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d, tool_call %zu: name=%s, arguments=%s\n", 
+                                            i, tc_idx, tool_name.c_str(), tool_args.c_str());
+                                }
+                            }
                        } catch (const json::parse_error& e) {
                            SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
                        }
                    }
                    
+                    // Debug: Log final content state before adding to array
+                    if (msg_json.contains("content")) {
+                        if (msg_json["content"].is_null()) {
+                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
+                        } else {
+                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content type=%s, has_value=%d\n", 
+                                    i, msg_json["content"].is_string() ? "string" :
+                                       msg_json["content"].is_array() ? "array" :
+                                       msg_json["content"].is_object() ? "object" : "other",
+                                    msg_json["content"].is_null() ? 0 : 1);
+                        }
+                    } else {
+                        SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
+                    }
+                    
                    messages_json.push_back(msg_json);
                }

+                // Final safety check: Ensure no message has null content (Jinja templates require strings)
+                SRV_INF("[CONTENT DEBUG] PredictStream: Running final safety check on %zu messages\n", messages_json.size());
+                for (size_t idx = 0; idx < messages_json.size(); idx++) {
+                    auto& msg = messages_json[idx];
+                    if (msg.contains("content") && msg["content"].is_null()) {
+                        SRV_INF("[CONTENT DEBUG] PredictStream: Safety check found message %zu with NULL content, converting to empty string\n", idx);
+                        msg["content"] = "";
+                    } else if (!msg.contains("content")) {
+                        SRV_INF("[CONTENT DEBUG] PredictStream: Safety check found message %zu without content field, adding empty string\n", idx);
+                        msg["content"] = "";
+                    } else {
+                        SRV_INF("[CONTENT DEBUG] PredictStream: Safety check message %zu: content OK, type=%s\n", 
+                                idx, msg["content"].is_string() ? "string" :
+                                    msg["content"].is_array() ? "array" :
+                                    msg["content"].is_object() ? "object" : "other");
+                    }
+                }
+
+                // Debug: Count tool messages
+                int tool_msg_count = 0;
+                for (const auto& msg : messages_json) {
+                    if (msg.contains("role") && msg["role"] == "tool") {
+                        tool_msg_count++;
+                    }
+                }
+                SRV_DBG("[TOOLS DEBUG] PredictStream: Built %d tool messages out of %zu total messages\n", tool_msg_count, messages_json.size());
+
+                // Debug: Print full conversation (messages)
+                SRV_DBG("[CONVERSATION DEBUG] PredictStream: Full messages array:\n%s\n", messages_json.dump(2).c_str());
+
                body_json["messages"] = messages_json;
                body_json["stream"] = true; // PredictStream is always streaming
                
@@ -769,6 +945,16 @@ public:
                    data["grammar"].is_string() && 
                    !data["grammar"].get<std::string>().empty();
                
+                SRV_INF("[TOOLS DEBUG] PredictStream: has_grammar_from_go=%d, data.contains(\"tools\")=%d, data.contains(\"grammar\")=%d\n",
+                        has_grammar_from_go ? 1 : 0,
+                        data.contains("tools") ? 1 : 0,
+                        data.contains("grammar") ? 1 : 0);
+                if (data.contains("grammar")) {
+                    SRV_INF("[TOOLS DEBUG] PredictStream: grammar type=%s, empty=%d\n",
+                            data["grammar"].is_string() ? "string" : "other",
+                            data["grammar"].is_string() && data["grammar"].get<std::string>().empty() ? 1 : 0);
+                }
+                
                // Copy other relevant fields from data that oaicompat_chat_params_parse expects
                // Tools and tool_choice are only passed when NoGrammar is true (grammar not provided)
                // When grammar is provided from Go layer, we use it instead of template-generated grammar
@@ -778,8 +964,36 @@ public:
                        body_json["tools"] = data["tools"];
                        std::string tools_str = data["tools"].dump();
                        SRV_INF("Using tools from data (NoGrammar=true): %s\n", tools_str.c_str());
+                        // Debug: Log tools count and details before template processing
+                        if (data["tools"].is_array()) {
+                            SRV_INF("[TOOLS DEBUG] PredictStream: Passing %zu tools to oaicompat_chat_params_parse\n", data["tools"].size());
+                            for (size_t t_idx = 0; t_idx < data["tools"].size(); t_idx++) {
+                                const auto& tool = data["tools"][t_idx];
+                                std::string tool_name = "unknown";
+                                std::string tool_desc = "";
+                                if (tool.contains("function")) {
+                                    const auto& func = tool["function"];
+                                    if (func.contains("name")) {
+                                        tool_name = func["name"].get<std::string>();
+                                    }
+                                    if (func.contains("description")) {
+                                        tool_desc = func["description"].is_string() ? 
+                                            func["description"].get<std::string>() : "";
+                                    }
+                                } else if (tool.contains("name")) {
+                                    tool_name = tool["name"].get<std::string>();
+                                    if (tool.contains("description")) {
+                                        tool_desc = tool["description"].is_string() ? 
+                                            tool["description"].get<std::string>() : "";
+                                    }
+                                }
+                                SRV_INF("[TOOLS DEBUG] PredictStream: Tool %zu: name=%s, description=%s\n", 
+                                        t_idx, tool_name.c_str(), tool_desc.substr(0, 100).c_str());
+                            }
+                        }
                    } else {
                        SRV_WRN("%s", "No tools found in data - tool calls will not work without tools field\n");
+                        SRV_DBG("[TOOLS DEBUG] PredictStream: No tools in data, tool_choice=%s\n", data.contains("tool_choice") ? data["tool_choice"].dump().c_str() : "not set");
                    }
                    if (data.contains("tool_choice")) {
                        // tool_choice can be a string or object, but oaicompat_chat_params_parse expects a string
@@ -821,6 +1035,17 @@ public:
                if (data.contains("chat_template_kwargs")) {
                    body_json["chat_template_kwargs"] = data["chat_template_kwargs"];
                }
+                // Pass parallel_tool_calls if present (used by oaicompat_chat_params_parse)
+                if (data.contains("parallel_tool_calls")) {
+                    body_json["parallel_tool_calls"] = data["parallel_tool_calls"];
+                }
+                // Pass add_generation_prompt if present (used by oaicompat_chat_params_parse)
+                if (data.contains("add_generation_prompt")) {
+                    body_json["add_generation_prompt"] = data["add_generation_prompt"];
+                }
+
+                // Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)
+                SRV_DBG("[CONVERSATION DEBUG] PredictStream: Full body_json before oaicompat_chat_params_parse:\n%s\n", body_json.dump(2).c_str());

                // Use the same approach as server.cpp: call oaicompat_chat_params_parse
                // This handles all template application, grammar merging, etc. automatically
@@ -831,8 +1056,56 @@ public:
                // Update allow_image and allow_audio based on current mctx state
                parser_opt.allow_image = ctx_server.mctx ? mtmd_support_vision(ctx_server.mctx) : false;
                parser_opt.allow_audio = ctx_server.mctx ? mtmd_support_audio(ctx_server.mctx) : false;
+                
+                // Debug: Log tools before template processing
+                if (body_json.contains("tools")) {
+                    SRV_DBG("[TOOLS DEBUG] PredictStream: Before oaicompat_chat_params_parse - tools count: %zu\n", 
+                            body_json["tools"].is_array() ? body_json["tools"].size() : 0);
+                }
+                
+                // Debug: Verify messages content before template processing
+                // Also ensure ALL messages have content set to string (not null) - templates expect strings
+                if (body_json.contains("messages") && body_json["messages"].is_array()) {
+                    SRV_INF("[CONTENT DEBUG] PredictStream: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
+                    for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
+                        auto& msg = body_json["messages"][idx];
+                        std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
+                        if (msg.contains("content")) {
+                            if (msg["content"].is_null()) {
+                                SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
+                                msg["content"] = ""; // Fix null content
+                            } else if (!msg["content"].is_string() && !msg["content"].is_array()) {
+                                // If content is object or other non-string type, convert to string for templates
+                                SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
+                                if (msg["content"].is_object()) {
+                                    msg["content"] = msg["content"].dump();
+                                } else {
+                                    msg["content"] = "";
+                                }
+                            } else {
+                                SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n", 
+                                        idx, role_str.c_str(),
+                                        msg["content"].is_string() ? "string" :
+                                        msg["content"].is_array() ? "array" :
+                                        msg["content"].is_object() ? "object" : "other");
+                            }
+                        } else {
+                            SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
+                            msg["content"] = ""; // Add missing content
+                        }
+                    }
+                }
+                
                json parsed_data = oaicompat_chat_params_parse(body_json, parser_opt, files);
                
+                // Debug: Log tools after template processing
+                if (parsed_data.contains("tools")) {
+                    SRV_DBG("[TOOLS DEBUG] PredictStream: After oaicompat_chat_params_parse - tools count: %zu\n",
+                            parsed_data["tools"].is_array() ? parsed_data["tools"].size() : 0);
+                } else {
+                    SRV_DBG("%s", "[TOOLS DEBUG] PredictStream: After oaicompat_chat_params_parse - no tools in parsed_data\n");
+                }
+                
                // Extract the prompt from parsed data
                prompt_str = parsed_data.at("prompt").get<std::string>();
                
@@ -843,8 +1116,9 @@ public:
                    preserved_grammar = data["grammar"];
                }
                
-                // Merge all fields from parsed_data into data (grammar, grammar_triggers, preserved_tokens, etc.)
+                // Merge all fields from parsed_data into data (grammar, grammar_triggers, preserved_tokens, parse_tool_calls, etc.)
                // This ensures all template-generated fields are included
+                // parse_tool_calls is set by oaicompat_chat_params_parse when tools are present
                for (const auto& item : parsed_data.items()) {
                    if (item.key() != "prompt") { // Don't overwrite prompt_str, we already extracted it
                        // If grammar was provided from Go layer, preserve it instead of template-generated grammar
@@ -855,6 +1129,11 @@ public:
                        }
                    }
                }
+                
+                // Debug: Log parse_tool_calls if present (set by oaicompat_chat_params_parse when tools are present)
+                if (data.contains("parse_tool_calls")) {
+                    SRV_DBG("[TOOLS DEBUG] PredictStream: parse_tool_calls=%s\n", data["parse_tool_calls"].get<bool>() ? "true" : "false");
+                }
            } else {
                // Use prompt directly from data
                if (data.contains("prompt") && data["prompt"].is_string()) {
@@ -1109,11 +1388,19 @@ public:
                    }
                }
                
+                SRV_INF("[CONTENT DEBUG] Predict: Processing %d messages\n", request->messages_size());
                for (int i = 0; i < request->messages_size(); i++) {
                    const auto& msg = request->messages(i);
                    json msg_json;
                    msg_json["role"] = msg.role();
                    
+                    SRV_INF("[CONTENT DEBUG] Predict: Message %d: role=%s, content_empty=%d, content_length=%zu\n", 
+                            i, msg.role().c_str(), msg.content().empty() ? 1 : 0, msg.content().size());
+                    if (!msg.content().empty()) {
+                        SRV_INF("[CONTENT DEBUG] Predict: Message %d content (first 200 chars): %s\n", 
+                                i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
+                    }
+                    
                    bool is_last_user_msg = (i == last_user_msg_idx);
                    bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0);
                    
@@ -1124,6 +1411,11 @@ public:
                        json content_val;
                        try {
                            content_val = json::parse(msg.content());
+                            // Handle null values - convert to empty string to avoid template errors
+                            if (content_val.is_null()) {
+                                SRV_INF("[CONTENT DEBUG] Predict: Message %d parsed JSON is null, converting to empty string\n", i);
+                                content_val = "";
+                            }
                        } catch (const json::parse_error&) {
                            // Not JSON, treat as plain string
                            content_val = msg.content();
@@ -1131,6 +1423,7 @@ public:
                        
                        // If content is an object (e.g., from tool call failures), convert to string
                        if (content_val.is_object()) {
+                            SRV_INF("[CONTENT DEBUG] Predict: Message %d content is object, converting to string\n", i);
                            content_val = content_val.dump();
                        }
                        
@@ -1165,7 +1458,17 @@ public:
                            msg_json["content"] = content_array;
                        } else {
                            // Use content as-is (already array or not last user message)
-                            msg_json["content"] = content_val;
+                            // Ensure null values are converted to empty string
+                            if (content_val.is_null()) {
+                                SRV_INF("[CONTENT DEBUG] Predict: Message %d content_val was null, setting to empty string\n", i);
+                                msg_json["content"] = "";
+                            } else {
+                                msg_json["content"] = content_val;
+                                SRV_INF("[CONTENT DEBUG] Predict: Message %d content set, type=%s\n", 
+                                        i, content_val.is_string() ? "string" : 
+                                           content_val.is_array() ? "array" : 
+                                           content_val.is_object() ? "object" : "other");
+                            }
                        }
                    } else if (is_last_user_msg && has_images_or_audio) {
                        // If no content but this is the last user message with images/audio, create content array
@@ -1192,9 +1495,65 @@ public:
                            }
                        }
                        msg_json["content"] = content_array;
+                        SRV_INF("[CONTENT DEBUG] Predict: Message %d created content array with media\n", i);
                    } else if (!msg.tool_calls().empty()) {
-                        // Tool call messages may have null content
-                        msg_json["content"] = json();
+                        // Tool call messages may have null content, but templates expect string
+                        // IMPORTANT: Set to space " " instead of empty string "", because llama.cpp's
+                        // common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
+                        // which causes template errors when accessing message.content[:tool_start_length]
+                        SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls, setting content to space (not empty string)\n", i);
+                        msg_json["content"] = " ";
+                    } else if (msg.role() == "tool") {
+                        // Tool role messages must have content field set, even if empty
+                        // Jinja templates expect content to be a string, not null or object
+                        SRV_INF("[CONTENT DEBUG] Predict: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
+                        if (msg.content().empty()) {
+                            msg_json["content"] = "";
+                            SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): empty content, set to empty string\n", i);
+                        } else {
+                            SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): content exists: %s\n", 
+                                    i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
+                            // Content exists, parse and ensure it's a string
+                            json content_val;
+                            try {
+                                content_val = json::parse(msg.content());
+                                SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): parsed JSON, type=%s\n", 
+                                        i, content_val.is_null() ? "null" : 
+                                           content_val.is_object() ? "object" :
+                                           content_val.is_string() ? "string" :
+                                           content_val.is_array() ? "array" : "other");
+                                // Handle null values - Jinja templates expect content to be a string, not null
+                                if (content_val.is_null()) {
+                                    msg_json["content"] = "";
+                                    SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): null content, converted to empty string\n", i);
+                                } else if (content_val.is_object()) {
+                                    // If content is an object (e.g., from tool call failures/errors), convert to string
+                                    msg_json["content"] = content_val.dump();
+                                    SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): object content, converted to string: %s\n", 
+                                            i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
+                                } else if (content_val.is_string()) {
+                                    msg_json["content"] = content_val.get<std::string>();
+                                    SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): string content, using as-is\n", i);
+                                } else {
+                                    // For arrays or other types, convert to string
+                                    msg_json["content"] = content_val.dump();
+                                    SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): %s content, converted to string\n", 
+                                            i, content_val.is_array() ? "array" : "other type");
+                                }
+                            } catch (const json::parse_error&) {
+                                // Not JSON, treat as plain string
+                                msg_json["content"] = msg.content();
+                                SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): not JSON, using as string\n", i);
+                            }
+                        }
+                    } else {
+                        // Ensure all messages have content set (fallback for any unhandled cases)
+                        // Jinja templates expect content to be present, default to empty string if not set
+                        if (!msg_json.contains("content")) {
+                            SRV_INF("[CONTENT DEBUG] Predict: Message %d (role=%s): no content field, adding empty string\n", 
+                                    i, msg.role().c_str());
+                            msg_json["content"] = "";
+                        }
                    }
                    
                    // Add optional fields for OpenAI-compatible message format
@@ -1212,14 +1571,98 @@ public:
                        try {
                            json tool_calls = json::parse(msg.tool_calls());
                            msg_json["tool_calls"] = tool_calls;
+                            SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
+                            // IMPORTANT: If message has tool_calls but content is empty or not set,
+                            // set content to space " " instead of empty string "", because llama.cpp's
+                            // common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
+                            // which causes template errors when accessing message.content[:tool_start_length]
+                            if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
+                                SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls but empty content, setting to space\n", i);
+                                msg_json["content"] = " ";
+                            }
+                            // Log each tool call with name and arguments
+                            if (tool_calls.is_array()) {
+                                for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
+                                    const auto& tc = tool_calls[tc_idx];
+                                    std::string tool_name = "unknown";
+                                    std::string tool_args = "{}";
+                                    if (tc.contains("function")) {
+                                        const auto& func = tc["function"];
+                                        if (func.contains("name")) {
+                                            tool_name = func["name"].get<std::string>();
+                                        }
+                                        if (func.contains("arguments")) {
+                                            tool_args = func["arguments"].is_string() ? 
+                                                func["arguments"].get<std::string>() : 
+                                                func["arguments"].dump();
+                                        }
+                                    } else if (tc.contains("name")) {
+                                        tool_name = tc["name"].get<std::string>();
+                                        if (tc.contains("arguments")) {
+                                            tool_args = tc["arguments"].is_string() ? 
+                                                tc["arguments"].get<std::string>() : 
+                                                tc["arguments"].dump();
+                                        }
+                                    }
+                                    SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d, tool_call %zu: name=%s, arguments=%s\n", 
+                                            i, tc_idx, tool_name.c_str(), tool_args.c_str());
+                                }
+                            }
                        } catch (const json::parse_error& e) {
                            SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
                        }
                    }
                    
+                    // Debug: Log final content state before adding to array
+                    if (msg_json.contains("content")) {
+                        if (msg_json["content"].is_null()) {
+                            SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
+                        } else {
+                            SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content type=%s, has_value=%d\n", 
+                                    i, msg_json["content"].is_string() ? "string" :
+                                       msg_json["content"].is_array() ? "array" :
+                                       msg_json["content"].is_object() ? "object" : "other",
+                                    msg_json["content"].is_null() ? 0 : 1);
+                        }
+                    } else {
+                        SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
+                    }
+                    
                    messages_json.push_back(msg_json);
                }

+                // Final safety check: Ensure no message has null content (Jinja templates require strings)
+                SRV_INF("[CONTENT DEBUG] Predict: Running final safety check on %zu messages\n", messages_json.size());
+                for (size_t idx = 0; idx < messages_json.size(); idx++) {
+                    auto& msg = messages_json[idx];
+                    std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
+                    if (msg.contains("content") && msg["content"].is_null()) {
+                        SRV_INF("[CONTENT DEBUG] Predict: Safety check found message %zu (role=%s) with NULL content, converting to empty string\n", idx, role_str.c_str());
+                        msg["content"] = "";
+                    } else if (!msg.contains("content")) {
+                        SRV_INF("[CONTENT DEBUG] Predict: Safety check found message %zu (role=%s) without content field, adding empty string\n", idx, role_str.c_str());
+                        msg["content"] = "";
+                    } else {
+                        SRV_INF("[CONTENT DEBUG] Predict: Safety check message %zu (role=%s): content OK, type=%s\n", 
+                                idx, role_str.c_str(),
+                                msg["content"].is_string() ? "string" :
+                                msg["content"].is_array() ? "array" :
+                                msg["content"].is_object() ? "object" : "other");
+                    }
+                }
+
+                // Debug: Count tool messages
+                int tool_msg_count = 0;
+                for (const auto& msg : messages_json) {
+                    if (msg.contains("role") && msg["role"] == "tool") {
+                        tool_msg_count++;
+                    }
+                }
+                SRV_DBG("[TOOLS DEBUG] Predict: Built %d tool messages out of %zu total messages\n", tool_msg_count, messages_json.size());
+
+                // Debug: Print full conversation (messages)
+                SRV_DBG("[CONVERSATION DEBUG] Predict: Full messages array:\n%s\n", messages_json.dump(2).c_str());
+
                body_json["messages"] = messages_json;
                body_json["stream"] = false;
                
@@ -1230,6 +1673,16 @@ public:
                    data["grammar"].is_string() && 
                    !data["grammar"].get<std::string>().empty();
                
+                SRV_INF("[TOOLS DEBUG] Predict: has_grammar_from_go=%d, data.contains(\"tools\")=%d, data.contains(\"grammar\")=%d\n",
+                        has_grammar_from_go ? 1 : 0,
+                        data.contains("tools") ? 1 : 0,
+                        data.contains("grammar") ? 1 : 0);
+                if (data.contains("grammar")) {
+                    SRV_INF("[TOOLS DEBUG] Predict: grammar type=%s, empty=%d\n",
+                            data["grammar"].is_string() ? "string" : "other",
+                            data["grammar"].is_string() && data["grammar"].get<std::string>().empty() ? 1 : 0);
+                }
+                
                // Copy other relevant fields from data that oaicompat_chat_params_parse expects
                // Tools and tool_choice are only passed when NoGrammar is true (grammar not provided)
                // When grammar is provided from Go layer, we use it instead of template-generated grammar
@@ -1239,8 +1692,36 @@ public:
                        body_json["tools"] = data["tools"];
                        std::string tools_str = data["tools"].dump();
                        SRV_INF("Using tools from data (NoGrammar=true): %s\n", tools_str.c_str());
+                        // Debug: Log tools count and details before template processing
+                        if (data["tools"].is_array()) {
+                            SRV_INF("[TOOLS DEBUG] Predict: Passing %zu tools to oaicompat_chat_params_parse\n", data["tools"].size());
+                            for (size_t t_idx = 0; t_idx < data["tools"].size(); t_idx++) {
+                                const auto& tool = data["tools"][t_idx];
+                                std::string tool_name = "unknown";
+                                std::string tool_desc = "";
+                                if (tool.contains("function")) {
+                                    const auto& func = tool["function"];
+                                    if (func.contains("name")) {
+                                        tool_name = func["name"].get<std::string>();
+                                    }
+                                    if (func.contains("description")) {
+                                        tool_desc = func["description"].is_string() ? 
+                                            func["description"].get<std::string>() : "";
+                                    }
+                                } else if (tool.contains("name")) {
+                                    tool_name = tool["name"].get<std::string>();
+                                    if (tool.contains("description")) {
+                                        tool_desc = tool["description"].is_string() ? 
+                                            tool["description"].get<std::string>() : "";
+                                    }
+                                }
+                                SRV_INF("[TOOLS DEBUG] Predict: Tool %zu: name=%s, description=%s\n", 
+                                        t_idx, tool_name.c_str(), tool_desc.substr(0, 100).c_str());
+                            }
+                        }
                    } else {
                        SRV_WRN("%s", "No tools found in data - tool calls will not work without tools field\n");
+                        SRV_DBG("[TOOLS DEBUG] Predict: No tools in data, tool_choice=%s\n", data.contains("tool_choice") ? data["tool_choice"].dump().c_str() : "not set");
                    }
                    if (data.contains("tool_choice")) {
                        // tool_choice can be a string or object, but oaicompat_chat_params_parse expects a string
@@ -1282,6 +1763,17 @@ public:
                if (data.contains("chat_template_kwargs")) {
                    body_json["chat_template_kwargs"] = data["chat_template_kwargs"];
                }
+                // Pass parallel_tool_calls if present (used by oaicompat_chat_params_parse)
+                if (data.contains("parallel_tool_calls")) {
+                    body_json["parallel_tool_calls"] = data["parallel_tool_calls"];
+                }
+                // Pass add_generation_prompt if present (used by oaicompat_chat_params_parse)
+                if (data.contains("add_generation_prompt")) {
+                    body_json["add_generation_prompt"] = data["add_generation_prompt"];
+                }
+
+                // Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)
+                SRV_DBG("[CONVERSATION DEBUG] Predict: Full body_json before oaicompat_chat_params_parse:\n%s\n", body_json.dump(2).c_str());

                // Use the same approach as server.cpp: call oaicompat_chat_params_parse
                // This handles all template application, grammar merging, etc. automatically
@@ -1292,8 +1784,56 @@ public:
                // Update allow_image and allow_audio based on current mctx state
                parser_opt.allow_image = ctx_server.mctx ? mtmd_support_vision(ctx_server.mctx) : false;
                parser_opt.allow_audio = ctx_server.mctx ? mtmd_support_audio(ctx_server.mctx) : false;
+                
+                // Debug: Log tools before template processing
+                if (body_json.contains("tools")) {
+                    SRV_DBG("[TOOLS DEBUG] Predict: Before oaicompat_chat_params_parse - tools count: %zu\n", 
+                            body_json["tools"].is_array() ? body_json["tools"].size() : 0);
+                }
+                
+                // Debug: Verify messages content before template processing
+                // Also ensure ALL messages have content set to string (not null) - templates expect strings
+                if (body_json.contains("messages") && body_json["messages"].is_array()) {
+                    SRV_INF("[CONTENT DEBUG] Predict: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
+                    for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
+                        auto& msg = body_json["messages"][idx];
+                        std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
+                        if (msg.contains("content")) {
+                            if (msg["content"].is_null()) {
+                                SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
+                                msg["content"] = ""; // Fix null content
+                            } else if (!msg["content"].is_string() && !msg["content"].is_array()) {
+                                // If content is object or other non-string type, convert to string for templates
+                                SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
+                                if (msg["content"].is_object()) {
+                                    msg["content"] = msg["content"].dump();
+                                } else {
+                                    msg["content"] = "";
+                                }
+                            } else {
+                                SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n", 
+                                        idx, role_str.c_str(),
+                                        msg["content"].is_string() ? "string" :
+                                        msg["content"].is_array() ? "array" :
+                                        msg["content"].is_object() ? "object" : "other");
+                            }
+                        } else {
+                            SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
+                            msg["content"] = ""; // Add missing content
+                        }
+                    }
+                }
+                
                json parsed_data = oaicompat_chat_params_parse(body_json, parser_opt, files);
                
+                // Debug: Log tools after template processing
+                if (parsed_data.contains("tools")) {
+                    SRV_DBG("[TOOLS DEBUG] Predict: After oaicompat_chat_params_parse - tools count: %zu\n",
+                            parsed_data["tools"].is_array() ? parsed_data["tools"].size() : 0);
+                } else {
+                    SRV_DBG("%s", "[TOOLS DEBUG] Predict: After oaicompat_chat_params_parse - no tools in parsed_data\n");
+                }
+                
                // Extract the prompt from parsed data
                prompt_str = parsed_data.at("prompt").get<std::string>();
                
@@ -1304,8 +1844,9 @@ public:
                    preserved_grammar = data["grammar"];
                }
                
-                // Merge all fields from parsed_data into data (grammar, grammar_triggers, preserved_tokens, etc.)
+                // Merge all fields from parsed_data into data (grammar, grammar_triggers, preserved_tokens, parse_tool_calls, etc.)
                // This ensures all template-generated fields are included
+                // parse_tool_calls is set by oaicompat_chat_params_parse when tools are present
                for (const auto& item : parsed_data.items()) {
                    if (item.key() != "prompt") { // Don't overwrite prompt_str, we already extracted it
                        // If grammar was provided from Go layer, preserve it instead of template-generated grammar
@@ -1316,6 +1857,11 @@ public:
                        }
                    }
                }
+                
+                // Debug: Log parse_tool_calls if present (set by oaicompat_chat_params_parse when tools are present)
+                if (data.contains("parse_tool_calls")) {
+                    SRV_DBG("[TOOLS DEBUG] Predict: parse_tool_calls=%s\n", data["parse_tool_calls"].get<bool>() ? "true" : "false");
+                }
            } else {
                // Use prompt directly from data
                if (data.contains("prompt") && data["prompt"].is_string()) {
@@ -1592,7 +2138,7 @@ public:
            
            tasks.reserve(documents.size());
            for (size_t i = 0; i < documents.size(); i++) {
-                auto tmp = format_rerank(ctx_server.model, ctx_server.vocab, ctx_server.mctx, request->query(), documents[i]);
+                auto tmp = format_prompt_rerank(ctx_server.model, ctx_server.vocab, ctx_server.mctx, request->query(), documents[i]);
                server_task task = server_task(SERVER_TASK_TYPE_RERANK);
                task.id = ctx_server.queue_tasks.get_new_id();
                task.index = i;
--- a/backend/cpp/llama-cpp/prepare.sh
+++ b/backend/cpp/llama-cpp/prepare.sh
@@ -9,13 +9,14 @@ done

 set -e

+for file in $(ls llama.cpp/tools/server/); do
+    cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
+done
+
 cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
 cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
 cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
-cp -rfv llama.cpp/tools/server/utils.hpp llama.cpp/tools/grpc-server/
 cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
-cp -rfv llama.cpp/tools/server/server-http.cpp llama.cpp/tools/grpc-server/
-cp -rfv llama.cpp/tools/server/server-http.h llama.cpp/tools/grpc-server/

 set +e
 if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then
--- a/backend/go/whisper/Makefile
+++ b/backend/go/whisper/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
-WHISPER_CPP_VERSION?=b12abefa9be2abae39a73fa903322af135024a36
+WHISPER_CPP_VERSION?=19ceec8eac980403b714d603e5ca31653cd42a3f
 SO_TARGET?=libgowhisper.so

 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
--- a/backend/python/transformers/requirements-cpu.txt
+++ b/backend/python/transformers/requirements-cpu.txt
@@ -6,4 +6,4 @@ transformers
 bitsandbytes
 outetts
 sentence-transformers==5.1.0
-protobuf==6.32.0
+protobuf==6.33.1
--- a/backend/python/transformers/requirements-cublas11.txt
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -7,4 +7,4 @@ transformers
 bitsandbytes
 outetts
 sentence-transformers==5.1.0
-protobuf==6.32.0
+protobuf==6.33.1
--- a/backend/python/transformers/requirements-cublas12.txt
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -6,4 +6,4 @@ transformers
 bitsandbytes
 outetts
 sentence-transformers==5.1.0
-protobuf==6.32.0
+protobuf==6.33.1
--- a/backend/python/transformers/requirements-hipblas.txt
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -8,4 +8,4 @@ bitsandbytes
 outetts
 bitsandbytes
 sentence-transformers==5.1.0
-protobuf==6.32.0
+protobuf==6.33.1
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -10,4 +10,4 @@ intel-extension-for-transformers
 bitsandbytes
 outetts
 sentence-transformers==5.1.0
-protobuf==6.32.0
+protobuf==6.33.1
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,5 +1,5 @@
 grpcio==1.76.0
-protobuf==6.32.0
+protobuf==6.33.1
 certifi
 setuptools
 scipy==1.15.1
--- a/backend/python/vllm/install.sh
+++ b/backend/python/vllm/install.sh
@@ -3,6 +3,13 @@ set -e

 EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"

+# Avoid to overcommit the CPU during build
+# https://github.com/vllm-project/vllm/issues/20079
+# https://docs.vllm.ai/en/v0.8.3/serving/env_vars.html
+# https://docs.redhat.com/it/documentation/red_hat_ai_inference_server/3.0/html/vllm_server_arguments/environment_variables-server-arguments
+export NVCC_THREADS=2
+export MAX_JOBS=1
+
 backend_dir=$(dirname $0)

 if [ -d $backend_dir/common ]; then
--- a/backend/python/vllm/requirements-cublas12-after.txt
+++ b/backend/python/vllm/requirements-cublas12-after.txt
@@ -1 +1 @@
-flash-attn
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.7cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
--- a/core/application/application.go
+++ b/core/application/application.go
@@ -1,6 +1,9 @@
 package application

 import (
+	"context"
+	"sync"
+
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/core/templates"
@@ -11,8 +14,14 @@ type Application struct {
 	backendLoader      *config.ModelConfigLoader
 	modelLoader        *model.ModelLoader
 	applicationConfig  *config.ApplicationConfig
+	startupConfig      *config.ApplicationConfig // Stores original config from env vars (before file loading)
 	templatesEvaluator *templates.Evaluator
 	galleryService     *services.GalleryService
+	watchdogMutex      sync.Mutex
+	watchdogStop       chan bool
+	p2pMutex           sync.Mutex
+	p2pCtx             context.Context
+	p2pCancel          context.CancelFunc
 }

 func newApplication(appConfig *config.ApplicationConfig) *Application {
@@ -44,6 +53,11 @@ func (a *Application) GalleryService() *services.GalleryService {
 	return a.galleryService
 }

+// StartupConfig returns the original startup configuration (from env vars, before file loading)
+func (a *Application) StartupConfig() *config.ApplicationConfig {
+	return a.startupConfig
+}
+
 func (a *Application) start() error {
 	galleryService := services.NewGalleryService(a.ApplicationConfig(), a.ModelLoader())
 	err := galleryService.Start(a.ApplicationConfig().Context, a.ModelConfigLoader(), a.ApplicationConfig().SystemState)
--- a/core/application/config_file_watcher.go
+++ b/core/application/config_file_watcher.go
@@ -1,180 +1,343 @@
-package application
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"path"
-	"path/filepath"
-	"time"
-
-	"dario.cat/mergo"
-	"github.com/fsnotify/fsnotify"
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/rs/zerolog/log"
-)
-
-type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error
-
-type configFileHandler struct {
-	handlers map[string]fileHandler
-
-	watcher *fsnotify.Watcher
-
-	appConfig *config.ApplicationConfig
-}
-
-// TODO: This should be a singleton eventually so other parts of the code can register config file handlers,
-// then we can export it to other packages
-func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler {
-	c := configFileHandler{
-		handlers:  make(map[string]fileHandler),
-		appConfig: appConfig,
-	}
-	err := c.Register("api_keys.json", readApiKeysJson(*appConfig), true)
-	if err != nil {
-		log.Error().Err(err).Str("file", "api_keys.json").Msg("unable to register config file handler")
-	}
-	err = c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true)
-	if err != nil {
-		log.Error().Err(err).Str("file", "external_backends.json").Msg("unable to register config file handler")
-	}
-	return c
-}
-
-func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error {
-	_, ok := c.handlers[filename]
-	if ok {
-		return fmt.Errorf("handler already registered for file %s", filename)
-	}
-	c.handlers[filename] = handler
-	if runNow {
-		c.callHandler(filename, handler)
-	}
-	return nil
-}
-
-func (c *configFileHandler) callHandler(filename string, handler fileHandler) {
-	rootedFilePath := filepath.Join(c.appConfig.DynamicConfigsDir, filepath.Clean(filename))
-	log.Trace().Str("filename", rootedFilePath).Msg("reading file for dynamic config update")
-	fileContent, err := os.ReadFile(rootedFilePath)
-	if err != nil && !os.IsNotExist(err) {
-		log.Error().Err(err).Str("filename", rootedFilePath).Msg("could not read file")
-	}
-
-	if err = handler(fileContent, c.appConfig); err != nil {
-		log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options")
-	}
-}
-
-func (c *configFileHandler) Watch() error {
-	configWatcher, err := fsnotify.NewWatcher()
-	c.watcher = configWatcher
-	if err != nil {
-		return err
-	}
-
-	if c.appConfig.DynamicConfigsDirPollInterval > 0 {
-		log.Debug().Msg("Poll interval set, falling back to polling for configuration changes")
-		ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval)
-		go func() {
-			for {
-				<-ticker.C
-				for file, handler := range c.handlers {
-					log.Debug().Str("file", file).Msg("polling config file")
-					c.callHandler(file, handler)
-				}
-			}
-		}()
-	}
-
-	// Start listening for events.
-	go func() {
-		for {
-			select {
-			case event, ok := <-c.watcher.Events:
-				if !ok {
-					return
-				}
-				if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) {
-					handler, ok := c.handlers[path.Base(event.Name)]
-					if !ok {
-						continue
-					}
-
-					c.callHandler(filepath.Base(event.Name), handler)
-				}
-			case err, ok := <-c.watcher.Errors:
-				log.Error().Err(err).Msg("config watcher error received")
-				if !ok {
-					return
-				}
-			}
-		}
-	}()
-
-	// Add a path.
-	err = c.watcher.Add(c.appConfig.DynamicConfigsDir)
-	if err != nil {
-		return fmt.Errorf("unable to create a watcher on the configuration directory: %+v", err)
-	}
-
-	return nil
-}
-
-// TODO: When we institute graceful shutdown, this should be called
-func (c *configFileHandler) Stop() error {
-	return c.watcher.Close()
-}
-
-func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler {
-	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
-		log.Debug().Msg("processing api keys runtime update")
-		log.Trace().Int("numKeys", len(startupAppConfig.ApiKeys)).Msg("api keys provided at startup")
-
-		if len(fileContent) > 0 {
-			// Parse JSON content from the file
-			var fileKeys []string
-			err := json.Unmarshal(fileContent, &fileKeys)
-			if err != nil {
-				return err
-			}
-
-			log.Trace().Int("numKeys", len(fileKeys)).Msg("discovered API keys from api keys dynamic config dile")
-
-			appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...)
-		} else {
-			log.Trace().Msg("no API keys discovered from dynamic config file")
-			appConfig.ApiKeys = startupAppConfig.ApiKeys
-		}
-		log.Trace().Int("numKeys", len(appConfig.ApiKeys)).Msg("total api keys after processing")
-		return nil
-	}
-
-	return handler
-}
-
-func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler {
-	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
-		log.Debug().Msg("processing external_backends.json")
-
-		if len(fileContent) > 0 {
-			// Parse JSON content from the file
-			var fileBackends map[string]string
-			err := json.Unmarshal(fileContent, &fileBackends)
-			if err != nil {
-				return err
-			}
-			appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
-			err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends)
-			if err != nil {
-				return err
-			}
-		} else {
-			appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
-		}
-		log.Debug().Msg("external backends loaded from external_backends.json")
-		return nil
-	}
-	return handler
-}
+package application
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path"
+	"path/filepath"
+	"time"
+
+	"dario.cat/mergo"
+	"github.com/fsnotify/fsnotify"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/rs/zerolog/log"
+)
+
+type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error
+
+type configFileHandler struct {
+	handlers map[string]fileHandler
+
+	watcher *fsnotify.Watcher
+
+	appConfig *config.ApplicationConfig
+}
+
+// TODO: This should be a singleton eventually so other parts of the code can register config file handlers,
+// then we can export it to other packages
+func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler {
+	c := configFileHandler{
+		handlers:  make(map[string]fileHandler),
+		appConfig: appConfig,
+	}
+	err := c.Register("api_keys.json", readApiKeysJson(*appConfig), true)
+	if err != nil {
+		log.Error().Err(err).Str("file", "api_keys.json").Msg("unable to register config file handler")
+	}
+	err = c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true)
+	if err != nil {
+		log.Error().Err(err).Str("file", "external_backends.json").Msg("unable to register config file handler")
+	}
+	err = c.Register("runtime_settings.json", readRuntimeSettingsJson(*appConfig), true)
+	if err != nil {
+		log.Error().Err(err).Str("file", "runtime_settings.json").Msg("unable to register config file handler")
+	}
+	return c
+}
+
+func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error {
+	_, ok := c.handlers[filename]
+	if ok {
+		return fmt.Errorf("handler already registered for file %s", filename)
+	}
+	c.handlers[filename] = handler
+	if runNow {
+		c.callHandler(filename, handler)
+	}
+	return nil
+}
+
+func (c *configFileHandler) callHandler(filename string, handler fileHandler) {
+	rootedFilePath := filepath.Join(c.appConfig.DynamicConfigsDir, filepath.Clean(filename))
+	log.Trace().Str("filename", rootedFilePath).Msg("reading file for dynamic config update")
+	fileContent, err := os.ReadFile(rootedFilePath)
+	if err != nil && !os.IsNotExist(err) {
+		log.Error().Err(err).Str("filename", rootedFilePath).Msg("could not read file")
+	}
+
+	if err = handler(fileContent, c.appConfig); err != nil {
+		log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options")
+	}
+}
+
+func (c *configFileHandler) Watch() error {
+	configWatcher, err := fsnotify.NewWatcher()
+	c.watcher = configWatcher
+	if err != nil {
+		return err
+	}
+
+	if c.appConfig.DynamicConfigsDirPollInterval > 0 {
+		log.Debug().Msg("Poll interval set, falling back to polling for configuration changes")
+		ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval)
+		go func() {
+			for {
+				<-ticker.C
+				for file, handler := range c.handlers {
+					log.Debug().Str("file", file).Msg("polling config file")
+					c.callHandler(file, handler)
+				}
+			}
+		}()
+	}
+
+	// Start listening for events.
+	go func() {
+		for {
+			select {
+			case event, ok := <-c.watcher.Events:
+				if !ok {
+					return
+				}
+				if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) {
+					handler, ok := c.handlers[path.Base(event.Name)]
+					if !ok {
+						continue
+					}
+
+					c.callHandler(filepath.Base(event.Name), handler)
+				}
+			case err, ok := <-c.watcher.Errors:
+				log.Error().Err(err).Msg("config watcher error received")
+				if !ok {
+					return
+				}
+			}
+		}
+	}()
+
+	// Add a path.
+	err = c.watcher.Add(c.appConfig.DynamicConfigsDir)
+	if err != nil {
+		return fmt.Errorf("unable to create a watcher on the configuration directory: %+v", err)
+	}
+
+	return nil
+}
+
+// TODO: When we institute graceful shutdown, this should be called
+func (c *configFileHandler) Stop() error {
+	return c.watcher.Close()
+}
+
+func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler {
+	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
+		log.Debug().Msg("processing api keys runtime update")
+		log.Trace().Int("numKeys", len(startupAppConfig.ApiKeys)).Msg("api keys provided at startup")
+
+		if len(fileContent) > 0 {
+			// Parse JSON content from the file
+			var fileKeys []string
+			err := json.Unmarshal(fileContent, &fileKeys)
+			if err != nil {
+				return err
+			}
+
+			log.Trace().Int("numKeys", len(fileKeys)).Msg("discovered API keys from api keys dynamic config dile")
+
+			appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...)
+		} else {
+			log.Trace().Msg("no API keys discovered from dynamic config file")
+			appConfig.ApiKeys = startupAppConfig.ApiKeys
+		}
+		log.Trace().Int("numKeys", len(appConfig.ApiKeys)).Msg("total api keys after processing")
+		return nil
+	}
+
+	return handler
+}
+
+func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler {
+	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
+		log.Debug().Msg("processing external_backends.json")
+
+		if len(fileContent) > 0 {
+			// Parse JSON content from the file
+			var fileBackends map[string]string
+			err := json.Unmarshal(fileContent, &fileBackends)
+			if err != nil {
+				return err
+			}
+			appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
+			err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends)
+			if err != nil {
+				return err
+			}
+		} else {
+			appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
+		}
+		log.Debug().Msg("external backends loaded from external_backends.json")
+		return nil
+	}
+	return handler
+}
+
+type runtimeSettings struct {
+	WatchdogEnabled          *bool             `json:"watchdog_enabled,omitempty"`
+	WatchdogIdleEnabled      *bool             `json:"watchdog_idle_enabled,omitempty"`
+	WatchdogBusyEnabled      *bool             `json:"watchdog_busy_enabled,omitempty"`
+	WatchdogIdleTimeout      *string           `json:"watchdog_idle_timeout,omitempty"`
+	WatchdogBusyTimeout      *string           `json:"watchdog_busy_timeout,omitempty"`
+	SingleBackend            *bool             `json:"single_backend,omitempty"`
+	ParallelBackendRequests  *bool             `json:"parallel_backend_requests,omitempty"`
+	Threads                  *int              `json:"threads,omitempty"`
+	ContextSize              *int              `json:"context_size,omitempty"`
+	F16                      *bool             `json:"f16,omitempty"`
+	Debug                    *bool             `json:"debug,omitempty"`
+	CORS                     *bool             `json:"cors,omitempty"`
+	CSRF                     *bool             `json:"csrf,omitempty"`
+	CORSAllowOrigins         *string           `json:"cors_allow_origins,omitempty"`
+	P2PToken                 *string           `json:"p2p_token,omitempty"`
+	P2PNetworkID             *string           `json:"p2p_network_id,omitempty"`
+	Federated                *bool             `json:"federated,omitempty"`
+	Galleries                *[]config.Gallery `json:"galleries,omitempty"`
+	BackendGalleries         *[]config.Gallery `json:"backend_galleries,omitempty"`
+	AutoloadGalleries        *bool             `json:"autoload_galleries,omitempty"`
+	AutoloadBackendGalleries *bool             `json:"autoload_backend_galleries,omitempty"`
+	ApiKeys                  *[]string         `json:"api_keys,omitempty"`
+}
+
+func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHandler {
+	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
+		log.Debug().Msg("processing runtime_settings.json")
+
+		// Determine if settings came from env vars by comparing with startup config
+		// startupAppConfig contains the original values set from env vars at startup.
+		// If current values match startup values, they came from env vars (or defaults).
+		// We apply file settings only if current values match startup values (meaning not from env vars).
+		envWatchdogIdle := appConfig.WatchDogIdle == startupAppConfig.WatchDogIdle
+		envWatchdogBusy := appConfig.WatchDogBusy == startupAppConfig.WatchDogBusy
+		envWatchdogIdleTimeout := appConfig.WatchDogIdleTimeout == startupAppConfig.WatchDogIdleTimeout
+		envWatchdogBusyTimeout := appConfig.WatchDogBusyTimeout == startupAppConfig.WatchDogBusyTimeout
+		envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend
+		envParallelRequests := appConfig.ParallelBackendRequests == startupAppConfig.ParallelBackendRequests
+		envThreads := appConfig.Threads == startupAppConfig.Threads
+		envContextSize := appConfig.ContextSize == startupAppConfig.ContextSize
+		envF16 := appConfig.F16 == startupAppConfig.F16
+		envDebug := appConfig.Debug == startupAppConfig.Debug
+		envCORS := appConfig.CORS == startupAppConfig.CORS
+		envCSRF := appConfig.CSRF == startupAppConfig.CSRF
+		envCORSAllowOrigins := appConfig.CORSAllowOrigins == startupAppConfig.CORSAllowOrigins
+		envP2PToken := appConfig.P2PToken == startupAppConfig.P2PToken
+		envP2PNetworkID := appConfig.P2PNetworkID == startupAppConfig.P2PNetworkID
+		envFederated := appConfig.Federated == startupAppConfig.Federated
+		envAutoloadGalleries := appConfig.AutoloadGalleries == startupAppConfig.AutoloadGalleries
+		envAutoloadBackendGalleries := appConfig.AutoloadBackendGalleries == startupAppConfig.AutoloadBackendGalleries
+
+		if len(fileContent) > 0 {
+			var settings runtimeSettings
+			err := json.Unmarshal(fileContent, &settings)
+			if err != nil {
+				return err
+			}
+
+			// Apply file settings only if they don't match startup values (i.e., not from env vars)
+			if settings.WatchdogIdleEnabled != nil && !envWatchdogIdle {
+				appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled
+				if appConfig.WatchDogIdle {
+					appConfig.WatchDog = true
+				}
+			}
+			if settings.WatchdogBusyEnabled != nil && !envWatchdogBusy {
+				appConfig.WatchDogBusy = *settings.WatchdogBusyEnabled
+				if appConfig.WatchDogBusy {
+					appConfig.WatchDog = true
+				}
+			}
+			if settings.WatchdogIdleTimeout != nil && !envWatchdogIdleTimeout {
+				dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
+				if err == nil {
+					appConfig.WatchDogIdleTimeout = dur
+				} else {
+					log.Warn().Err(err).Str("timeout", *settings.WatchdogIdleTimeout).Msg("invalid watchdog idle timeout in runtime_settings.json")
+				}
+			}
+			if settings.WatchdogBusyTimeout != nil && !envWatchdogBusyTimeout {
+				dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
+				if err == nil {
+					appConfig.WatchDogBusyTimeout = dur
+				} else {
+					log.Warn().Err(err).Str("timeout", *settings.WatchdogBusyTimeout).Msg("invalid watchdog busy timeout in runtime_settings.json")
+				}
+			}
+			if settings.SingleBackend != nil && !envSingleBackend {
+				appConfig.SingleBackend = *settings.SingleBackend
+			}
+			if settings.ParallelBackendRequests != nil && !envParallelRequests {
+				appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
+			}
+			if settings.Threads != nil && !envThreads {
+				appConfig.Threads = *settings.Threads
+			}
+			if settings.ContextSize != nil && !envContextSize {
+				appConfig.ContextSize = *settings.ContextSize
+			}
+			if settings.F16 != nil && !envF16 {
+				appConfig.F16 = *settings.F16
+			}
+			if settings.Debug != nil && !envDebug {
+				appConfig.Debug = *settings.Debug
+			}
+			if settings.CORS != nil && !envCORS {
+				appConfig.CORS = *settings.CORS
+			}
+			if settings.CSRF != nil && !envCSRF {
+				appConfig.CSRF = *settings.CSRF
+			}
+			if settings.CORSAllowOrigins != nil && !envCORSAllowOrigins {
+				appConfig.CORSAllowOrigins = *settings.CORSAllowOrigins
+			}
+			if settings.P2PToken != nil && !envP2PToken {
+				appConfig.P2PToken = *settings.P2PToken
+			}
+			if settings.P2PNetworkID != nil && !envP2PNetworkID {
+				appConfig.P2PNetworkID = *settings.P2PNetworkID
+			}
+			if settings.Federated != nil && !envFederated {
+				appConfig.Federated = *settings.Federated
+			}
+			if settings.Galleries != nil {
+				appConfig.Galleries = *settings.Galleries
+			}
+			if settings.BackendGalleries != nil {
+				appConfig.BackendGalleries = *settings.BackendGalleries
+			}
+			if settings.AutoloadGalleries != nil && !envAutoloadGalleries {
+				appConfig.AutoloadGalleries = *settings.AutoloadGalleries
+			}
+			if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries {
+				appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
+			}
+			if settings.ApiKeys != nil {
+				// API keys from env vars (startup) should be kept, runtime settings keys replace all runtime keys
+				// If runtime_settings.json specifies ApiKeys (even if empty), it replaces all runtime keys
+				// Start with env keys, then add runtime_settings.json keys (which may be empty to clear them)
+				envKeys := startupAppConfig.ApiKeys
+				runtimeKeys := *settings.ApiKeys
+				// Replace all runtime keys with what's in runtime_settings.json
+				appConfig.ApiKeys = append(envKeys, runtimeKeys...)
+			}
+
+			// If watchdog is enabled via file but not via env, ensure WatchDog flag is set
+			if !envWatchdogIdle && !envWatchdogBusy {
+				if settings.WatchdogEnabled != nil && *settings.WatchdogEnabled {
+					appConfig.WatchDog = true
+				}
+			}
+		}
+		log.Debug().Msg("runtime settings loaded from runtime_settings.json")
+		return nil
+	}
+	return handler
+}
--- a/core/application/p2p.go
+++ b/core/application/p2p.go
@@ -0,0 +1,240 @@
+package application
+
+import (
+	"context"
+	"fmt"
+	"net"
+	"slices"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/services"
+
+	"github.com/mudler/edgevpn/pkg/node"
+	"github.com/rs/zerolog/log"
+	zlog "github.com/rs/zerolog/log"
+)
+
+func (a *Application) StopP2P() error {
+	if a.p2pCancel != nil {
+		a.p2pCancel()
+		a.p2pCancel = nil
+		a.p2pCtx = nil
+		// Wait a bit for shutdown to complete
+		time.Sleep(200 * time.Millisecond)
+	}
+	return nil
+}
+
+func (a *Application) StartP2P() error {
+	// we need a p2p token
+	if a.applicationConfig.P2PToken == "" {
+		return fmt.Errorf("P2P token is not set")
+	}
+
+	networkID := a.applicationConfig.P2PNetworkID
+
+	ctx, cancel := context.WithCancel(a.ApplicationConfig().Context)
+	a.p2pCtx = ctx
+	a.p2pCancel = cancel
+
+	var n *node.Node
+	// Here we are avoiding creating multiple nodes:
+	// - if the federated mode is enabled, we create a federated node and expose a service
+	// - exposing a service creates a node with specific options, and we don't want to create another node
+
+	// If the federated mode is enabled, we expose a service to the local instance running
+	// at r.Address
+	if a.applicationConfig.Federated {
+		_, port, err := net.SplitHostPort(a.applicationConfig.APIAddress)
+		if err != nil {
+			return err
+		}
+
+		// Here a new node is created and started
+		// and a service is exposed by the node
+		node, err := p2p.ExposeService(ctx, "localhost", port, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.FederatedID))
+		if err != nil {
+			return err
+		}
+
+		if err := p2p.ServiceDiscoverer(ctx, node, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
+			return err
+		}
+
+		n = node
+		// start node sync in the background
+		if err := a.p2pSync(ctx, node); err != nil {
+			return err
+		}
+	}
+
+	// If a node wasn't created previously, create it
+	if n == nil {
+		node, err := p2p.NewNode(a.applicationConfig.P2PToken)
+		if err != nil {
+			return err
+		}
+		err = node.Start(ctx)
+		if err != nil {
+			return fmt.Errorf("starting new node: %w", err)
+		}
+		n = node
+	}
+
+	// Attach a ServiceDiscoverer to the p2p node
+	log.Info().Msg("Starting P2P server discovery...")
+	if err := p2p.ServiceDiscoverer(ctx, n, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node schema.NodeData) {
+		var tunnelAddresses []string
+		for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
+			if v.IsOnline() {
+				tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
+			} else {
+				log.Info().Msgf("Node %s is offline", v.ID)
+			}
+		}
+		if a.applicationConfig.TunnelCallback != nil {
+			a.applicationConfig.TunnelCallback(tunnelAddresses)
+		}
+	}, true); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// RestartP2P restarts the P2P stack with current ApplicationConfig settings
+// Note: This method signals that P2P should be restarted, but the actual restart
+// is handled by the caller to avoid import cycles
+func (a *Application) RestartP2P() error {
+	a.p2pMutex.Lock()
+	defer a.p2pMutex.Unlock()
+
+	// Stop existing P2P if running
+	if a.p2pCancel != nil {
+		a.p2pCancel()
+		a.p2pCancel = nil
+		a.p2pCtx = nil
+		// Wait a bit for shutdown to complete
+		time.Sleep(200 * time.Millisecond)
+	}
+
+	appConfig := a.ApplicationConfig()
+
+	// Start P2P if token is set
+	if appConfig.P2PToken == "" {
+		return fmt.Errorf("P2P token is not set")
+	}
+
+	// Create new context for P2P
+	ctx, cancel := context.WithCancel(appConfig.Context)
+	a.p2pCtx = ctx
+	a.p2pCancel = cancel
+
+	// Get API address from config
+	address := appConfig.APIAddress
+	if address == "" {
+		address = "127.0.0.1:8080" // default
+	}
+
+	// Start P2P stack in a goroutine
+	go func() {
+		if err := a.StartP2P(); err != nil {
+			log.Error().Err(err).Msg("Failed to start P2P stack")
+			cancel() // Cancel context on error
+		}
+	}()
+	log.Info().Msg("P2P stack restarted with new settings")
+
+	return nil
+}
+
+func syncState(ctx context.Context, n *node.Node, app *Application) error {
+	zlog.Debug().Msg("[p2p-sync] Syncing state")
+
+	whatWeHave := []string{}
+	for _, model := range app.ModelConfigLoader().GetAllModelsConfigs() {
+		whatWeHave = append(whatWeHave, model.Name)
+	}
+
+	ledger, _ := n.Ledger()
+	currentData := ledger.CurrentData()
+	zlog.Debug().Msgf("[p2p-sync] Current data: %v", currentData)
+	data, exists := ledger.GetKey("shared_state", "models")
+	if !exists {
+		ledger.AnnounceUpdate(ctx, time.Minute, "shared_state", "models", whatWeHave)
+		zlog.Debug().Msgf("No models found in the ledger, announced our models: %v", whatWeHave)
+	}
+
+	models := []string{}
+	if err := data.Unmarshal(&models); err != nil {
+		zlog.Warn().Err(err).Msg("error unmarshalling models")
+		return nil
+	}
+
+	zlog.Debug().Msgf("[p2p-sync] Models that are present in this instance: %v\nModels that are in the ledger: %v", whatWeHave, models)
+
+	// Sync with our state
+	whatIsNotThere := []string{}
+	for _, model := range whatWeHave {
+		if !slices.Contains(models, model) {
+			whatIsNotThere = append(whatIsNotThere, model)
+		}
+	}
+	if len(whatIsNotThere) > 0 {
+		zlog.Debug().Msgf("[p2p-sync] Announcing our models: %v", append(models, whatIsNotThere...))
+		ledger.AnnounceUpdate(
+			ctx,
+			1*time.Minute,
+			"shared_state",
+			"models",
+			append(models, whatIsNotThere...),
+		)
+	}
+
+	// Check if we have a model that is not in our state, otherwise install it
+	for _, model := range models {
+		if slices.Contains(whatWeHave, model) {
+			zlog.Debug().Msgf("[p2p-sync] Model %s is already present in this instance", model)
+			continue
+		}
+
+		// we install model
+		zlog.Info().Msgf("[p2p-sync] Installing model which is not present in this instance: %s", model)
+
+		uuid, err := uuid.NewUUID()
+		if err != nil {
+			zlog.Error().Err(err).Msg("error generating UUID")
+			continue
+		}
+
+		app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
+			ID:                 uuid.String(),
+			GalleryElementName: model,
+			Galleries:          app.ApplicationConfig().Galleries,
+			BackendGalleries:   app.ApplicationConfig().BackendGalleries,
+		}
+	}
+
+	return nil
+}
+
+func (a *Application) p2pSync(ctx context.Context, n *node.Node) error {
+	go func() {
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-time.After(1 * time.Minute):
+				if err := syncState(ctx, n, a); err != nil {
+					zlog.Error().Err(err).Msg("error syncing state")
+				}
+			}
+
+		}
+	}()
+	return nil
+}
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -1,8 +1,11 @@
 package application

 import (
+	"encoding/json"
 	"fmt"
 	"os"
+	"path/filepath"
+	"time"

 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
@@ -18,7 +21,12 @@ import (

 func New(opts ...config.AppOption) (*Application, error) {
 	options := config.NewApplicationConfig(opts...)
+
+	// Store a copy of the startup config (from env vars, before file loading)
+	// This is used to determine if settings came from env vars vs file
+	startupConfigCopy := *options
 	application := newApplication(options)
+	application.startupConfig = &startupConfigCopy

 	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.SystemState.Model.ModelsPath)
 	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
@@ -110,6 +118,13 @@ func New(opts ...config.AppOption) (*Application, error) {
 		}
 	}

+	// Load runtime settings from file if DynamicConfigsDir is set
+	// This applies file settings with env var precedence (env vars take priority)
+	// Note: startupConfigCopy was already created above, so it has the original env var values
+	if options.DynamicConfigsDir != "" {
+		loadRuntimeSettingsFromFile(options)
+	}
+
 	// turn off any process that was started by GRPC if the context is canceled
 	go func() {
 		<-options.Context.Done()
@@ -120,21 +135,8 @@ func New(opts ...config.AppOption) (*Application, error) {
 		}
 	}()

-	if options.WatchDog {
-		wd := model.NewWatchDog(
-			application.ModelLoader(),
-			options.WatchDogBusyTimeout,
-			options.WatchDogIdleTimeout,
-			options.WatchDogBusy,
-			options.WatchDogIdle)
-		application.ModelLoader().SetWatchDog(wd)
-		go wd.Run()
-		go func() {
-			<-options.Context.Done()
-			log.Debug().Msgf("Context canceled, shutting down")
-			wd.Shutdown()
-		}()
-	}
+	// Initialize watchdog with current settings (after loading from file)
+	initializeWatchdog(application, options)

 	if options.LoadToMemory != nil && !options.SingleBackend {
 		for _, m := range options.LoadToMemory {
@@ -186,3 +188,131 @@ func startWatcher(options *config.ApplicationConfig) {
 		log.Error().Err(err).Msg("failed creating watcher")
 	}
 }
+
+// loadRuntimeSettingsFromFile loads settings from runtime_settings.json with env var precedence
+// This function is called at startup, before env vars are applied via AppOptions.
+// Since env vars are applied via AppOptions in run.go, we need to check if they're set.
+// We do this by checking if the current options values differ from defaults, which would
+// indicate they were set from env vars. However, a simpler approach is to just apply
+// file settings here, and let the AppOptions (which are applied after this) override them.
+// But actually, this is called AFTER AppOptions are applied in New(), so we need to check env vars.
+// The cleanest solution: Store original values before applying file, or check if values match
+// what would be set from env vars. For now, we'll apply file settings and they'll be
+// overridden by AppOptions if env vars were set (but AppOptions are already applied).
+// Actually, this function is called in New() before AppOptions are fully processed for watchdog.
+// Let's check the call order: New() -> loadRuntimeSettingsFromFile() -> initializeWatchdog()
+// But AppOptions are applied in NewApplicationConfig() which is called first.
+// So at this point, options already has values from env vars. We should compare against
+// defaults to see if env vars were set. But we don't have defaults stored.
+// Simplest: Just apply file settings. If env vars were set, they're already in options.
+// The file watcher handler will handle runtime changes properly by comparing with startupAppConfig.
+func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
+	settingsFile := filepath.Join(options.DynamicConfigsDir, "runtime_settings.json")
+	fileContent, err := os.ReadFile(settingsFile)
+	if err != nil {
+		if os.IsNotExist(err) {
+			log.Debug().Msg("runtime_settings.json not found, using defaults")
+			return
+		}
+		log.Warn().Err(err).Msg("failed to read runtime_settings.json")
+		return
+	}
+
+	var settings struct {
+		WatchdogEnabled         *bool   `json:"watchdog_enabled,omitempty"`
+		WatchdogIdleEnabled     *bool   `json:"watchdog_idle_enabled,omitempty"`
+		WatchdogBusyEnabled     *bool   `json:"watchdog_busy_enabled,omitempty"`
+		WatchdogIdleTimeout     *string `json:"watchdog_idle_timeout,omitempty"`
+		WatchdogBusyTimeout     *string `json:"watchdog_busy_timeout,omitempty"`
+		SingleBackend           *bool   `json:"single_backend,omitempty"`
+		ParallelBackendRequests *bool   `json:"parallel_backend_requests,omitempty"`
+	}
+
+	if err := json.Unmarshal(fileContent, &settings); err != nil {
+		log.Warn().Err(err).Msg("failed to parse runtime_settings.json")
+		return
+	}
+
+	// At this point, options already has values from env vars (via AppOptions in run.go).
+	// To avoid env var duplication, we determine if env vars were set by checking if
+	// current values differ from defaults. Defaults are: false for bools, 0 for durations.
+	// If current value is at default, it likely wasn't set from env var, so we can apply file.
+	// If current value is non-default, it was likely set from env var, so we preserve it.
+	// Note: This means env vars explicitly setting to false/0 won't be distinguishable from defaults,
+	// but that's an acceptable limitation to avoid env var duplication.
+
+	if settings.WatchdogIdleEnabled != nil {
+		// Only apply if current value is default (false), suggesting it wasn't set from env var
+		if !options.WatchDogIdle {
+			options.WatchDogIdle = *settings.WatchdogIdleEnabled
+			if options.WatchDogIdle {
+				options.WatchDog = true
+			}
+		}
+	}
+	if settings.WatchdogBusyEnabled != nil {
+		if !options.WatchDogBusy {
+			options.WatchDogBusy = *settings.WatchdogBusyEnabled
+			if options.WatchDogBusy {
+				options.WatchDog = true
+			}
+		}
+	}
+	if settings.WatchdogIdleTimeout != nil {
+		// Only apply if current value is default (0), suggesting it wasn't set from env var
+		if options.WatchDogIdleTimeout == 0 {
+			dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
+			if err == nil {
+				options.WatchDogIdleTimeout = dur
+			} else {
+				log.Warn().Err(err).Str("timeout", *settings.WatchdogIdleTimeout).Msg("invalid watchdog idle timeout in runtime_settings.json")
+			}
+		}
+	}
+	if settings.WatchdogBusyTimeout != nil {
+		if options.WatchDogBusyTimeout == 0 {
+			dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
+			if err == nil {
+				options.WatchDogBusyTimeout = dur
+			} else {
+				log.Warn().Err(err).Str("timeout", *settings.WatchdogBusyTimeout).Msg("invalid watchdog busy timeout in runtime_settings.json")
+			}
+		}
+	}
+	if settings.SingleBackend != nil {
+		if !options.SingleBackend {
+			options.SingleBackend = *settings.SingleBackend
+		}
+	}
+	if settings.ParallelBackendRequests != nil {
+		if !options.ParallelBackendRequests {
+			options.ParallelBackendRequests = *settings.ParallelBackendRequests
+		}
+	}
+	if !options.WatchDogIdle && !options.WatchDogBusy {
+		if settings.WatchdogEnabled != nil && *settings.WatchdogEnabled {
+			options.WatchDog = true
+		}
+	}
+
+	log.Debug().Msg("Runtime settings loaded from runtime_settings.json")
+}
+
+// initializeWatchdog initializes the watchdog with current ApplicationConfig settings
+func initializeWatchdog(application *Application, options *config.ApplicationConfig) {
+	if options.WatchDog {
+		wd := model.NewWatchDog(
+			application.ModelLoader(),
+			options.WatchDogBusyTimeout,
+			options.WatchDogIdleTimeout,
+			options.WatchDogBusy,
+			options.WatchDogIdle)
+		application.ModelLoader().SetWatchDog(wd)
+		go wd.Run()
+		go func() {
+			<-options.Context.Done()
+			log.Debug().Msgf("Context canceled, shutting down")
+			wd.Shutdown()
+		}()
+	}
+}
--- a/core/application/watchdog.go
+++ b/core/application/watchdog.go
@@ -0,0 +1,88 @@
+package application
+
+import (
+	"time"
+
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/rs/zerolog/log"
+)
+
+func (a *Application) StopWatchdog() error {
+	if a.watchdogStop != nil {
+		close(a.watchdogStop)
+		a.watchdogStop = nil
+	}
+	return nil
+}
+
+// startWatchdog starts the watchdog with current ApplicationConfig settings
+// This is an internal method that assumes the caller holds the watchdogMutex
+func (a *Application) startWatchdog() error {
+	appConfig := a.ApplicationConfig()
+
+	// Create new watchdog if enabled
+	if appConfig.WatchDog {
+		wd := model.NewWatchDog(
+			a.modelLoader,
+			appConfig.WatchDogBusyTimeout,
+			appConfig.WatchDogIdleTimeout,
+			appConfig.WatchDogBusy,
+			appConfig.WatchDogIdle)
+		a.modelLoader.SetWatchDog(wd)
+
+		// Create new stop channel
+		a.watchdogStop = make(chan bool, 1)
+
+		// Start watchdog goroutine
+		go wd.Run()
+
+		// Setup shutdown handler
+		go func() {
+			select {
+			case <-a.watchdogStop:
+				log.Debug().Msg("Watchdog stop signal received")
+				wd.Shutdown()
+			case <-appConfig.Context.Done():
+				log.Debug().Msg("Context canceled, shutting down watchdog")
+				wd.Shutdown()
+			}
+		}()
+
+		log.Info().Msg("Watchdog started with new settings")
+	} else {
+		log.Info().Msg("Watchdog disabled")
+	}
+
+	return nil
+}
+
+// StartWatchdog starts the watchdog with current ApplicationConfig settings
+func (a *Application) StartWatchdog() error {
+	a.watchdogMutex.Lock()
+	defer a.watchdogMutex.Unlock()
+
+	return a.startWatchdog()
+}
+
+// RestartWatchdog restarts the watchdog with current ApplicationConfig settings
+func (a *Application) RestartWatchdog() error {
+	a.watchdogMutex.Lock()
+	defer a.watchdogMutex.Unlock()
+
+	// Shutdown existing watchdog if running
+	if a.watchdogStop != nil {
+		close(a.watchdogStop)
+		a.watchdogStop = nil
+	}
+
+	// Shutdown existing watchdog if running
+	currentWD := a.modelLoader.GetWatchDog()
+	if currentWD != nil {
+		currentWD.Shutdown()
+		// Wait a bit for shutdown to complete
+		time.Sleep(100 * time.Millisecond)
+	}
+
+	// Start watchdog with new settings
+	return a.startWatchdog()
+}
--- a/core/backend/image.go
+++ b/core/backend/image.go
@@ -40,3 +40,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat

 	return fn, nil
 }
+
+// ImageGenerationFunc is a test-friendly indirection to call image generation logic.
+// Tests can override this variable to provide a stub implementation.
+var ImageGenerationFunc = ImageGeneration
--- a/core/cli/api/p2p.go
+++ b/core/cli/api/p2p.go
@@ -1,87 +0,0 @@
-package cli_api
-
-import (
-	"context"
-	"fmt"
-	"net"
-	"os"
-	"strings"
-
-	"github.com/mudler/LocalAI/core/application"
-	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/edgevpn/pkg/node"
-
-	"github.com/rs/zerolog/log"
-)
-
-func StartP2PStack(ctx context.Context, address, token, networkID string, federated bool, app *application.Application) error {
-	var n *node.Node
-	// Here we are avoiding creating multiple nodes:
-	// - if the federated mode is enabled, we create a federated node and expose a service
-	// - exposing a service creates a node with specific options, and we don't want to create another node
-
-	// If the federated mode is enabled, we expose a service to the local instance running
-	// at r.Address
-	if federated {
-		_, port, err := net.SplitHostPort(address)
-		if err != nil {
-			return err
-		}
-
-		// Here a new node is created and started
-		// and a service is exposed by the node
-		node, err := p2p.ExposeService(ctx, "localhost", port, token, p2p.NetworkID(networkID, p2p.FederatedID))
-		if err != nil {
-			return err
-		}
-
-		if err := p2p.ServiceDiscoverer(ctx, node, token, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
-			return err
-		}
-
-		n = node
-
-		// start node sync in the background
-		if err := p2p.Sync(ctx, node, app); err != nil {
-			return err
-		}
-	}
-
-	// If the p2p mode is enabled, we start the service discovery
-	if token != "" {
-		// If a node wasn't created previously, create it
-		if n == nil {
-			node, err := p2p.NewNode(token)
-			if err != nil {
-				return err
-			}
-			err = node.Start(ctx)
-			if err != nil {
-				return fmt.Errorf("starting new node: %w", err)
-			}
-			n = node
-		}
-
-		// Attach a ServiceDiscoverer to the p2p node
-		log.Info().Msg("Starting P2P server discovery...")
-		if err := p2p.ServiceDiscoverer(ctx, n, token, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node schema.NodeData) {
-			var tunnelAddresses []string
-			for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
-				if v.IsOnline() {
-					tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
-				} else {
-					log.Info().Msgf("Node %s is offline", v.ID)
-				}
-			}
-			tunnelEnvVar := strings.Join(tunnelAddresses, ",")
-
-			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
-			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
-		}, true); err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -8,7 +8,6 @@ import (
 	"time"

 	"github.com/mudler/LocalAI/core/application"
-	cli_api "github.com/mudler/LocalAI/core/cli/api"
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http"
@@ -52,6 +51,7 @@ type RunCMD struct {
 	UploadLimit                        int      `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
 	APIKeys                            []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
 	DisableWebUI                       bool     `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disables the web user interface. When set to true, the server will only expose API endpoints without serving the web interface" group:"api"`
+	DisableRuntimeSettings             bool     `env:"LOCALAI_DISABLE_RUNTIME_SETTINGS,DISABLE_RUNTIME_SETTINGS" default:"false" help:"Disables the runtime settings. When set to true, the server will not load the runtime settings from the runtime_settings.json file" group:"api"`
 	DisablePredownloadScan             bool     `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
 	OpaqueErrors                       bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
 	UseSubtleKeyComparison             bool     `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
@@ -98,6 +98,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 	}

 	opts := []config.AppOption{
+		config.WithContext(context.Background()),
 		config.WithConfigFile(r.ModelsConfigFile),
 		config.WithJSONStringPreload(r.PreloadModels),
 		config.WithYAMLConfigPreload(r.PreloadModelsConfig),
@@ -128,12 +129,22 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithLoadToMemory(r.LoadToMemory),
 		config.WithMachineTag(r.MachineTag),
 		config.WithAPIAddress(r.Address),
+		config.WithTunnelCallback(func(tunnels []string) {
+			tunnelEnvVar := strings.Join(tunnels, ",")
+			// TODO: this is very specific to llama.cpp, we should have a more generic way to set the environment variable
+			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
+			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
+		}),
 	}

 	if r.DisableMetricsEndpoint {
 		opts = append(opts, config.DisableMetricsEndpoint)
 	}

+	if r.DisableRuntimeSettings {
+		opts = append(opts, config.DisableRuntimeSettings)
+	}
+
 	token := ""
 	if r.Peer2Peer || r.Peer2PeerToken != "" {
 		log.Info().Msg("P2P mode enabled")
@@ -152,7 +163,9 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		opts = append(opts, config.WithP2PToken(token))
 	}

-	backgroundCtx := context.Background()
+	if r.Federated {
+		opts = append(opts, config.EnableFederated)
+	}

 	idleWatchDog := r.EnableWatchdogIdle
 	busyWatchDog := r.EnableWatchdogBusy
@@ -222,8 +235,10 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		return err
 	}

-	if err := cli_api.StartP2PStack(backgroundCtx, r.Address, token, r.Peer2PeerNetworkID, r.Federated, app); err != nil {
-		return err
+	if token != "" {
+		if err := app.StartP2P(); err != nil {
+			return err
+		}
 	}

 	signals.RegisterGracefulTerminationHandler(func() {
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -33,6 +33,7 @@ type ApplicationConfig struct {
 	ApiKeys                       []string
 	P2PToken                      string
 	P2PNetworkID                  string
+	Federated                     bool

 	DisableWebUI                       bool
 	EnforcePredownloadScans            bool
@@ -65,6 +66,10 @@ type ApplicationConfig struct {
 	MachineTag string

 	APIAddress string
+
+	TunnelCallback func(tunnels []string)
+
+	DisableRuntimeSettings bool
 }

 type AppOption func(*ApplicationConfig)
@@ -73,7 +78,6 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
 	opt := &ApplicationConfig{
 		Context:       context.Background(),
 		UploadLimitMB: 15,
-		ContextSize:   512,
 		Debug:         true,
 	}
 	for _, oo := range o {
@@ -152,6 +156,10 @@ var DisableWebUI = func(o *ApplicationConfig) {
 	o.DisableWebUI = true
 }

+var DisableRuntimeSettings = func(o *ApplicationConfig) {
+	o.DisableRuntimeSettings = true
+}
+
 func SetWatchDogBusyTimeout(t time.Duration) AppOption {
 	return func(o *ApplicationConfig) {
 		o.WatchDogBusyTimeout = t
@@ -180,6 +188,10 @@ var EnableBackendGalleriesAutoload = func(o *ApplicationConfig) {
 	o.AutoloadBackendGalleries = true
 }

+var EnableFederated = func(o *ApplicationConfig) {
+	o.Federated = true
+}
+
 func WithExternalBackend(name string, uri string) AppOption {
 	return func(o *ApplicationConfig) {
 		if o.ExternalGRPCBackends == nil {
@@ -273,6 +285,12 @@ func WithContextSize(ctxSize int) AppOption {
 	}
 }

+func WithTunnelCallback(callback func(tunnels []string)) AppOption {
+	return func(o *ApplicationConfig) {
+		o.TunnelCallback = callback
+	}
+}
+
 func WithF16(f16 bool) AppOption {
 	return func(o *ApplicationConfig) {
 		o.F16 = f16
--- a/core/config/model_config_test.go
+++ b/core/config/model_config_test.go
@@ -166,76 +166,4 @@ parameters:
 		Expect(i.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
 		Expect(i.HasUsecases(FLAG_CHAT)).To(BeTrue())
 	})
-
-	It("Handles multiple configs with same model file but different names", func() {
-		// Create a temporary directory for test configs
-		tmpDir, err := os.MkdirTemp("", "config_test_*")
-		Expect(err).To(BeNil())
-		defer os.RemoveAll(tmpDir)
-
-		// Write first config without MCP
-		config1Path := tmpDir + "/model-without-mcp.yaml"
-		err = os.WriteFile(config1Path, []byte(`name: model-without-mcp
-backend: llama-cpp
-parameters:
-  model: shared-model.gguf
-`), 0644)
-		Expect(err).To(BeNil())
-
-		// Write second config with MCP
-		config2Path := tmpDir + "/model-with-mcp.yaml"
-		err = os.WriteFile(config2Path, []byte(`name: model-with-mcp
-backend: llama-cpp
-parameters:
-  model: shared-model.gguf
-mcp:
-  stdio: |
-    mcpServers:
-      test:
-        command: echo
-        args: ["hello"]
-`), 0644)
-		Expect(err).To(BeNil())
-
-		// Load all configs
-		loader := NewModelConfigLoader(tmpDir)
-		err = loader.LoadModelConfigsFromPath(tmpDir)
-		Expect(err).To(BeNil())
-
-		// Verify both configs are loaded
-		cfg1, exists1 := loader.GetModelConfig("model-without-mcp")
-		Expect(exists1).To(BeTrue())
-		Expect(cfg1.Name).To(Equal("model-without-mcp"))
-		Expect(cfg1.Model).To(Equal("shared-model.gguf"))
-		Expect(cfg1.MCP.Stdio).To(Equal(""))
-		Expect(cfg1.MCP.Servers).To(Equal(""))
-
-		cfg2, exists2 := loader.GetModelConfig("model-with-mcp")
-		Expect(exists2).To(BeTrue())
-		Expect(cfg2.Name).To(Equal("model-with-mcp"))
-		Expect(cfg2.Model).To(Equal("shared-model.gguf"))
-		Expect(cfg2.MCP.Stdio).ToNot(Equal(""))
-
-		// Verify both configs are in the list
-		allConfigs := loader.GetAllModelsConfigs()
-		Expect(len(allConfigs)).To(Equal(2))
-
-		// Find each config in the list
-		foundWithoutMCP := false
-		foundWithMCP := false
-		for _, cfg := range allConfigs {
-			if cfg.Name == "model-without-mcp" {
-				foundWithoutMCP = true
-				Expect(cfg.Model).To(Equal("shared-model.gguf"))
-				Expect(cfg.MCP.Stdio).To(Equal(""))
-			}
-			if cfg.Name == "model-with-mcp" {
-				foundWithMCP = true
-				Expect(cfg.Model).To(Equal("shared-model.gguf"))
-				Expect(cfg.MCP.Stdio).ToNot(Equal(""))
-			}
-		}
-		Expect(foundWithoutMCP).To(BeTrue())
-		Expect(foundWithMCP).To(BeTrue())
-	})
 })
--- a/core/gallery/importers/diffuser.go
+++ b/core/gallery/importers/diffuser.go
@@ -0,0 +1,121 @@
+package importers
+
+import (
+	"encoding/json"
+	"path/filepath"
+	"strings"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/schema"
+	"gopkg.in/yaml.v3"
+)
+
+var _ Importer = &DiffuserImporter{}
+
+type DiffuserImporter struct{}
+
+func (i *DiffuserImporter) Match(details Details) bool {
+	preferences, err := details.Preferences.MarshalJSON()
+	if err != nil {
+		return false
+	}
+	preferencesMap := make(map[string]any)
+	err = json.Unmarshal(preferences, &preferencesMap)
+	if err != nil {
+		return false
+	}
+
+	b, ok := preferencesMap["backend"].(string)
+	if ok && b == "diffusers" {
+		return true
+	}
+
+	if details.HuggingFace != nil {
+		for _, file := range details.HuggingFace.Files {
+			if strings.Contains(file.Path, "model_index.json") ||
+				strings.Contains(file.Path, "scheduler/scheduler_config.json") {
+				return true
+			}
+		}
+	}
+
+	return false
+}
+
+func (i *DiffuserImporter) Import(details Details) (gallery.ModelConfig, error) {
+	preferences, err := details.Preferences.MarshalJSON()
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+	preferencesMap := make(map[string]any)
+	err = json.Unmarshal(preferences, &preferencesMap)
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+
+	name, ok := preferencesMap["name"].(string)
+	if !ok {
+		name = filepath.Base(details.URI)
+	}
+
+	description, ok := preferencesMap["description"].(string)
+	if !ok {
+		description = "Imported from " + details.URI
+	}
+
+	backend := "diffusers"
+	b, ok := preferencesMap["backend"].(string)
+	if ok {
+		backend = b
+	}
+
+	pipelineType, ok := preferencesMap["pipeline_type"].(string)
+	if !ok {
+		pipelineType = "StableDiffusionPipeline"
+	}
+
+	schedulerType, ok := preferencesMap["scheduler_type"].(string)
+	if !ok {
+		schedulerType = ""
+	}
+
+	enableParameters, ok := preferencesMap["enable_parameters"].(string)
+	if !ok {
+		enableParameters = "negative_prompt,num_inference_steps"
+	}
+
+	cuda := false
+	if cudaVal, ok := preferencesMap["cuda"].(bool); ok {
+		cuda = cudaVal
+	}
+
+	modelConfig := config.ModelConfig{
+		Name:                name,
+		Description:         description,
+		KnownUsecaseStrings: []string{"image"},
+		Backend:             backend,
+		PredictionOptions: schema.PredictionOptions{
+			BasicModelRequest: schema.BasicModelRequest{
+				Model: details.URI,
+			},
+		},
+		Diffusers: config.Diffusers{
+			PipelineType:     pipelineType,
+			SchedulerType:    schedulerType,
+			EnableParameters: enableParameters,
+			CUDA:             cuda,
+		},
+	}
+
+	data, err := yaml.Marshal(modelConfig)
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+
+	return gallery.ModelConfig{
+		Name:        name,
+		Description: description,
+		ConfigFile:  string(data),
+	}, nil
+}
--- a/core/gallery/importers/diffuser_test.go
+++ b/core/gallery/importers/diffuser_test.go
@@ -0,0 +1,246 @@
+package importers_test
+
+import (
+	"encoding/json"
+
+	"github.com/mudler/LocalAI/core/gallery/importers"
+	. "github.com/mudler/LocalAI/core/gallery/importers"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("DiffuserImporter", func() {
+	var importer *DiffuserImporter
+
+	BeforeEach(func() {
+		importer = &DiffuserImporter{}
+	})
+
+	Context("Match", func() {
+		It("should match when backend preference is diffusers", func() {
+			preferences := json.RawMessage(`{"backend": "diffusers"}`)
+			details := Details{
+				URI:         "https://example.com/model",
+				Preferences: preferences,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeTrue())
+		})
+
+		It("should match when HuggingFace details contain model_index.json", func() {
+			hfDetails := &hfapi.ModelDetails{
+				Files: []hfapi.ModelFile{
+					{Path: "model_index.json"},
+				},
+			}
+			details := Details{
+				URI:         "https://huggingface.co/test/model",
+				HuggingFace: hfDetails,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeTrue())
+		})
+
+		It("should match when HuggingFace details contain scheduler config", func() {
+			hfDetails := &hfapi.ModelDetails{
+				Files: []hfapi.ModelFile{
+					{Path: "scheduler/scheduler_config.json"},
+				},
+			}
+			details := Details{
+				URI:         "https://huggingface.co/test/model",
+				HuggingFace: hfDetails,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeTrue())
+		})
+
+		It("should not match when URI has no diffuser files and no backend preference", func() {
+			details := Details{
+				URI: "https://example.com/model.bin",
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+
+		It("should not match when backend preference is different", func() {
+			preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
+			details := Details{
+				URI:         "https://example.com/model",
+				Preferences: preferences,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+
+		It("should return false when JSON preferences are invalid", func() {
+			preferences := json.RawMessage(`invalid json`)
+			details := Details{
+				URI:         "https://example.com/model",
+				Preferences: preferences,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+	})
+
+	Context("Import", func() {
+		It("should import model config with default name and description", func() {
+			details := Details{
+				URI: "https://huggingface.co/test/my-diffuser-model",
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("my-diffuser-model"))
+			Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/test/my-diffuser-model"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: diffusers"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: https://huggingface.co/test/my-diffuser-model"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("pipeline_type: StableDiffusionPipeline"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("enable_parameters: negative_prompt,num_inference_steps"))
+		})
+
+		It("should import model config with custom name and description from preferences", func() {
+			preferences := json.RawMessage(`{"name": "custom-diffuser", "description": "Custom diffuser model"}`)
+			details := Details{
+				URI:         "https://huggingface.co/test/my-model",
+				Preferences: preferences,
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("custom-diffuser"))
+			Expect(modelConfig.Description).To(Equal("Custom diffuser model"))
+		})
+
+		It("should use custom pipeline_type from preferences", func() {
+			preferences := json.RawMessage(`{"pipeline_type": "StableDiffusion3Pipeline"}`)
+			details := Details{
+				URI:         "https://huggingface.co/test/my-model",
+				Preferences: preferences,
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("pipeline_type: StableDiffusion3Pipeline"))
+		})
+
+		It("should use default pipeline_type when not specified", func() {
+			details := Details{
+				URI: "https://huggingface.co/test/my-model",
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("pipeline_type: StableDiffusionPipeline"))
+		})
+
+		It("should use custom scheduler_type from preferences", func() {
+			preferences := json.RawMessage(`{"scheduler_type": "k_dpmpp_2m"}`)
+			details := Details{
+				URI:         "https://huggingface.co/test/my-model",
+				Preferences: preferences,
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("scheduler_type: k_dpmpp_2m"))
+		})
+
+		It("should use cuda setting from preferences", func() {
+			preferences := json.RawMessage(`{"cuda": true}`)
+			details := Details{
+				URI:         "https://huggingface.co/test/my-model",
+				Preferences: preferences,
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("cuda: true"))
+		})
+
+		It("should use custom enable_parameters from preferences", func() {
+			preferences := json.RawMessage(`{"enable_parameters": "num_inference_steps,guidance_scale"}`)
+			details := Details{
+				URI:         "https://huggingface.co/test/my-model",
+				Preferences: preferences,
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("enable_parameters: num_inference_steps,guidance_scale"))
+		})
+
+		It("should use custom backend from preferences", func() {
+			preferences := json.RawMessage(`{"backend": "diffusers"}`)
+			details := Details{
+				URI:         "https://huggingface.co/test/my-model",
+				Preferences: preferences,
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: diffusers"))
+		})
+
+		It("should handle invalid JSON preferences", func() {
+			preferences := json.RawMessage(`invalid json`)
+			details := Details{
+				URI:         "https://huggingface.co/test/my-model",
+				Preferences: preferences,
+			}
+
+			_, err := importer.Import(details)
+			Expect(err).To(HaveOccurred())
+		})
+
+		It("should extract filename correctly from URI with path", func() {
+			details := importers.Details{
+				URI: "https://huggingface.co/test/path/to/model",
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("model"))
+		})
+
+		It("should include known_usecases as image in config", func() {
+			details := Details{
+				URI: "https://huggingface.co/test/my-model",
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("known_usecases:"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("- image"))
+		})
+
+		It("should include diffusers configuration in config", func() {
+			details := Details{
+				URI: "https://huggingface.co/test/my-model",
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("diffusers:"))
+		})
+	})
+})
--- a/core/gallery/importers/importers.go
+++ b/core/gallery/importers/importers.go
@@ -20,6 +20,7 @@ var defaultImporters = []Importer{
 	&MLXImporter{},
 	&VLLMImporter{},
 	&TransformersImporter{},
+	&DiffuserImporter{},
 }

 type Details struct {
--- a/core/gallery/models.go
+++ b/core/gallery/models.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"slices"
 	"strings"

 	"dario.cat/mergo"
@@ -293,13 +294,24 @@ func GetLocalModelConfiguration(basePath string, name string) (*ModelConfig, err
 	return ReadConfigFile[ModelConfig](galleryFile)
 }

-func DeleteModelFromSystem(systemState *system.SystemState, name string) error {
-	additionalFiles := []string{}
+func listModelFiles(systemState *system.SystemState, name string) ([]string, error) {

 	configFile := filepath.Join(systemState.Model.ModelsPath, fmt.Sprintf("%s.yaml", name))
 	if err := utils.VerifyPath(configFile, systemState.Model.ModelsPath); err != nil {
-		return fmt.Errorf("failed to verify path %s: %w", configFile, err)
+		return nil, fmt.Errorf("failed to verify path %s: %w", configFile, err)
 	}
+
+	// os.PathSeparator is not allowed in model names. Replace them with "__" to avoid conflicts with file paths.
+	name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
+
+	galleryFile := filepath.Join(systemState.Model.ModelsPath, galleryFileName(name))
+	if err := utils.VerifyPath(galleryFile, systemState.Model.ModelsPath); err != nil {
+		return nil, fmt.Errorf("failed to verify path %s: %w", galleryFile, err)
+	}
+
+	additionalFiles := []string{}
+	allFiles := []string{}
+
 	// Galleryname is the name of the model in this case
 	dat, err := os.ReadFile(configFile)
 	if err == nil {
@@ -307,7 +319,7 @@ func DeleteModelFromSystem(systemState *system.SystemState, name string) error {

 		err = yaml.Unmarshal(dat, &modelConfig)
 		if err != nil {
-			return err
+			return nil, err
 		}
 		if modelConfig.Model != "" {
 			additionalFiles = append(additionalFiles, modelConfig.ModelFileName())
@@ -318,26 +330,15 @@ func DeleteModelFromSystem(systemState *system.SystemState, name string) error {
 		}
 	}

-	// os.PathSeparator is not allowed in model names. Replace them with "__" to avoid conflicts with file paths.
-	name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
-
-	galleryFile := filepath.Join(systemState.Model.ModelsPath, galleryFileName(name))
-	if err := utils.VerifyPath(galleryFile, systemState.Model.ModelsPath); err != nil {
-		return fmt.Errorf("failed to verify path %s: %w", galleryFile, err)
-	}
-
-	var filesToRemove []string
-
-	// Delete all the files associated to the model
 	// read the model config
 	galleryconfig, err := ReadConfigFile[ModelConfig](galleryFile)
 	if err == nil && galleryconfig != nil {
 		for _, f := range galleryconfig.Files {
 			fullPath := filepath.Join(systemState.Model.ModelsPath, f.Filename)
 			if err := utils.VerifyPath(fullPath, systemState.Model.ModelsPath); err != nil {
-				return fmt.Errorf("failed to verify path %s: %w", fullPath, err)
+				return allFiles, fmt.Errorf("failed to verify path %s: %w", fullPath, err)
 			}
-			filesToRemove = append(filesToRemove, fullPath)
+			allFiles = append(allFiles, fullPath)
 		}
 	} else {
 		log.Error().Err(err).Msgf("failed to read gallery file %s", configFile)
@@ -346,18 +347,68 @@ func DeleteModelFromSystem(systemState *system.SystemState, name string) error {
 	for _, f := range additionalFiles {
 		fullPath := filepath.Join(filepath.Join(systemState.Model.ModelsPath, f))
 		if err := utils.VerifyPath(fullPath, systemState.Model.ModelsPath); err != nil {
-			return fmt.Errorf("failed to verify path %s: %w", fullPath, err)
+			return allFiles, fmt.Errorf("failed to verify path %s: %w", fullPath, err)
 		}
-		filesToRemove = append(filesToRemove, fullPath)
+		allFiles = append(allFiles, fullPath)
 	}

-	filesToRemove = append(filesToRemove, galleryFile)
+	allFiles = append(allFiles, galleryFile)

 	// skip duplicates
-	filesToRemove = utils.Unique(filesToRemove)
+	allFiles = utils.Unique(allFiles)
+
+	return allFiles, nil
+}
+
+func DeleteModelFromSystem(systemState *system.SystemState, name string) error {
+	configFile := filepath.Join(systemState.Model.ModelsPath, fmt.Sprintf("%s.yaml", name))
+
+	filesToRemove, err := listModelFiles(systemState, name)
+	if err != nil {
+		return err
+	}
+
+	allOtherFiles := []string{}
+	// Get all files of all other models
+	fi, err := os.ReadDir(systemState.Model.ModelsPath)
+	if err != nil {
+		return err
+	}
+	for _, f := range fi {
+		if f.IsDir() {
+			continue
+		}
+		if strings.HasPrefix(f.Name(), "._gallery_") {
+			continue
+		}
+		if !strings.HasSuffix(f.Name(), ".yaml") && !strings.HasSuffix(f.Name(), ".yml") {
+			continue
+		}
+		if f.Name() == fmt.Sprintf("%s.yaml", name) || f.Name() == fmt.Sprintf("%s.yml", name) {
+			continue
+		}
+
+		name := strings.TrimSuffix(f.Name(), ".yaml")
+		name = strings.TrimSuffix(name, ".yml")
+
+		log.Debug().Msgf("Checking file %s", f.Name())
+		files, err := listModelFiles(systemState, name)
+		if err != nil {
+			log.Debug().Err(err).Msgf("failed to list files for model %s", f.Name())
+			continue
+		}
+		allOtherFiles = append(allOtherFiles, files...)
+	}
+
+	log.Debug().Msgf("Files to remove: %+v", filesToRemove)
+	log.Debug().Msgf("All other files: %+v", allOtherFiles)

 	// Removing files
 	for _, f := range filesToRemove {
+		if slices.Contains(allOtherFiles, f) {
+			log.Debug().Msgf("Skipping file %s because it is part of another model", f)
+			continue
+		}
 		if e := os.Remove(f); e != nil {
 			log.Error().Err(e).Msgf("failed to remove file %s", f)
 		}
--- a/core/gallery/models_test.go
+++ b/core/gallery/models_test.go
@@ -183,5 +183,98 @@ var _ = Describe("Model test", func() {
 			_, err = InstallModel(context.TODO(), systemState, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
 			Expect(err).To(HaveOccurred())
 		})
+
+		It("does not delete shared model files when one config is deleted", func() {
+			tempdir, err := os.MkdirTemp("", "test")
+			Expect(err).ToNot(HaveOccurred())
+			defer os.RemoveAll(tempdir)
+
+			systemState, err := system.GetSystemState(
+				system.WithModelPath(tempdir),
+			)
+			Expect(err).ToNot(HaveOccurred())
+
+			// Create a shared model file
+			sharedModelFile := filepath.Join(tempdir, "shared_model.bin")
+			err = os.WriteFile(sharedModelFile, []byte("fake model content"), 0600)
+			Expect(err).ToNot(HaveOccurred())
+
+			// Create first model configuration
+			config1 := `name: model1
+model: shared_model.bin`
+			err = os.WriteFile(filepath.Join(tempdir, "model1.yaml"), []byte(config1), 0600)
+			Expect(err).ToNot(HaveOccurred())
+
+			// Create first model's gallery file
+			galleryConfig1 := ModelConfig{
+				Name: "model1",
+				Files: []File{
+					{Filename: "shared_model.bin"},
+				},
+			}
+			galleryData1, err := yaml.Marshal(galleryConfig1)
+			Expect(err).ToNot(HaveOccurred())
+			err = os.WriteFile(filepath.Join(tempdir, "._gallery_model1.yaml"), galleryData1, 0600)
+			Expect(err).ToNot(HaveOccurred())
+
+			// Create second model configuration sharing the same model file
+			config2 := `name: model2
+model: shared_model.bin`
+			err = os.WriteFile(filepath.Join(tempdir, "model2.yaml"), []byte(config2), 0600)
+			Expect(err).ToNot(HaveOccurred())
+
+			// Create second model's gallery file
+			galleryConfig2 := ModelConfig{
+				Name: "model2",
+				Files: []File{
+					{Filename: "shared_model.bin"},
+				},
+			}
+			galleryData2, err := yaml.Marshal(galleryConfig2)
+			Expect(err).ToNot(HaveOccurred())
+			err = os.WriteFile(filepath.Join(tempdir, "._gallery_model2.yaml"), galleryData2, 0600)
+			Expect(err).ToNot(HaveOccurred())
+
+			// Verify both configurations exist
+			_, err = os.Stat(filepath.Join(tempdir, "model1.yaml"))
+			Expect(err).ToNot(HaveOccurred())
+			_, err = os.Stat(filepath.Join(tempdir, "model2.yaml"))
+			Expect(err).ToNot(HaveOccurred())
+
+			// Verify the shared model file exists
+			_, err = os.Stat(sharedModelFile)
+			Expect(err).ToNot(HaveOccurred())
+
+			// Delete the first model
+			err = DeleteModelFromSystem(systemState, "model1")
+			Expect(err).ToNot(HaveOccurred())
+
+			// Verify the first configuration is deleted
+			_, err = os.Stat(filepath.Join(tempdir, "model1.yaml"))
+			Expect(err).To(HaveOccurred())
+			Expect(errors.Is(err, os.ErrNotExist)).To(BeTrue())
+
+			// Verify the shared model file still exists (not deleted because model2 still uses it)
+			_, err = os.Stat(sharedModelFile)
+			Expect(err).ToNot(HaveOccurred(), "shared model file should not be deleted when used by other configs")
+
+			// Verify the second configuration still exists
+			_, err = os.Stat(filepath.Join(tempdir, "model2.yaml"))
+			Expect(err).ToNot(HaveOccurred())
+
+			// Now delete the second model
+			err = DeleteModelFromSystem(systemState, "model2")
+			Expect(err).ToNot(HaveOccurred())
+
+			// Verify the second configuration is deleted
+			_, err = os.Stat(filepath.Join(tempdir, "model2.yaml"))
+			Expect(err).To(HaveOccurred())
+			Expect(errors.Is(err, os.ErrNotExist)).To(BeTrue())
+
+			// Verify the shared model file is now deleted (no more references)
+			_, err = os.Stat(sharedModelFile)
+			Expect(err).To(HaveOccurred(), "shared model file should be deleted when no configs reference it")
+			Expect(errors.Is(err, os.ErrNotExist)).To(BeTrue())
+		})
 	})
 })
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -208,7 +208,7 @@ func API(application *application.Application) (*echo.Echo, error) {
 	routes.RegisterLocalAIRoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application.TemplatesEvaluator())
 	routes.RegisterOpenAIRoutes(e, requestExtractor, application)
 	if !application.ApplicationConfig().DisableWebUI {
-		routes.RegisterUIAPIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
+		routes.RegisterUIAPIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application)
 		routes.RegisterUIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
 	}
 	routes.RegisterJINARoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -1199,6 +1199,9 @@ parameters:

 	Context("Config file", func() {
 		BeforeEach(func() {
+			if runtime.GOOS != "linux" {
+				Skip("run this test only on linux")
+			}
 			modelPath := os.Getenv("MODELS_PATH")
 			backendPath := os.Getenv("BACKENDS_PATH")
 			c, cancel = context.WithCancel(context.Background())
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -145,7 +145,7 @@ func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applica
 		}

 		// Set defaults
-		modelConfig.SetDefaults()
+		modelConfig.SetDefaults(appConfig.ToConfigLoaderOptions()...)

 		// Validate the configuration
 		if valid, _ := modelConfig.Validate(); !valid {
--- a/core/http/endpoints/localai/mcp.go
+++ b/core/http/endpoints/localai/mcp.go
@@ -5,7 +5,7 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"strings"
+	"net"
 	"time"

 	"github.com/labstack/echo/v4"
@@ -105,7 +105,10 @@ func MCPStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eval
 			fragment = fragment.AddMessage(message.Role, message.StringContent)
 		}

-		port := appConfig.APIAddress[strings.LastIndex(appConfig.APIAddress, ":")+1:]
+		_, port, err := net.SplitHostPort(appConfig.APIAddress)
+		if err != nil {
+			return err
+		}
 		apiKey := ""
 		if len(appConfig.ApiKeys) > 0 {
 			apiKey = appConfig.ApiKeys[0]
--- a/core/http/endpoints/localai/settings.go
+++ b/core/http/endpoints/localai/settings.go
@@ -0,0 +1,340 @@
+package localai
+
+import (
+	"encoding/json"
+	"io"
+	"net/http"
+	"os"
+	"path/filepath"
+	"time"
+
+	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/application"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/rs/zerolog/log"
+)
+
+type SettingsResponse struct {
+	Success bool   `json:"success"`
+	Error   string `json:"error,omitempty"`
+	Message string `json:"message,omitempty"`
+}
+
+type RuntimeSettings struct {
+	WatchdogEnabled          *bool             `json:"watchdog_enabled,omitempty"`
+	WatchdogIdleEnabled      *bool             `json:"watchdog_idle_enabled,omitempty"`
+	WatchdogBusyEnabled      *bool             `json:"watchdog_busy_enabled,omitempty"`
+	WatchdogIdleTimeout      *string           `json:"watchdog_idle_timeout,omitempty"`
+	WatchdogBusyTimeout      *string           `json:"watchdog_busy_timeout,omitempty"`
+	SingleBackend            *bool             `json:"single_backend,omitempty"`
+	ParallelBackendRequests  *bool             `json:"parallel_backend_requests,omitempty"`
+	Threads                  *int              `json:"threads,omitempty"`
+	ContextSize              *int              `json:"context_size,omitempty"`
+	F16                      *bool             `json:"f16,omitempty"`
+	Debug                    *bool             `json:"debug,omitempty"`
+	CORS                     *bool             `json:"cors,omitempty"`
+	CSRF                     *bool             `json:"csrf,omitempty"`
+	CORSAllowOrigins         *string           `json:"cors_allow_origins,omitempty"`
+	P2PToken                 *string           `json:"p2p_token,omitempty"`
+	P2PNetworkID             *string           `json:"p2p_network_id,omitempty"`
+	Federated                *bool             `json:"federated,omitempty"`
+	Galleries                *[]config.Gallery `json:"galleries,omitempty"`
+	BackendGalleries         *[]config.Gallery `json:"backend_galleries,omitempty"`
+	AutoloadGalleries        *bool             `json:"autoload_galleries,omitempty"`
+	AutoloadBackendGalleries *bool             `json:"autoload_backend_galleries,omitempty"`
+	ApiKeys                  *[]string         `json:"api_keys"` // No omitempty - we need to save empty arrays to clear keys
+}
+
+// GetSettingsEndpoint returns current settings with precedence (env > file > defaults)
+func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		appConfig := app.ApplicationConfig()
+		startupConfig := app.StartupConfig()
+
+		if startupConfig == nil {
+			// Fallback if startup config not available
+			startupConfig = appConfig
+		}
+
+		settings := RuntimeSettings{}
+
+		// Set all current values (using pointers for RuntimeSettings)
+		watchdogIdle := appConfig.WatchDogIdle
+		watchdogBusy := appConfig.WatchDogBusy
+		watchdogEnabled := appConfig.WatchDog
+		singleBackend := appConfig.SingleBackend
+		parallelBackendRequests := appConfig.ParallelBackendRequests
+		threads := appConfig.Threads
+		contextSize := appConfig.ContextSize
+		f16 := appConfig.F16
+		debug := appConfig.Debug
+		cors := appConfig.CORS
+		csrf := appConfig.CSRF
+		corsAllowOrigins := appConfig.CORSAllowOrigins
+		p2pToken := appConfig.P2PToken
+		p2pNetworkID := appConfig.P2PNetworkID
+		federated := appConfig.Federated
+		galleries := appConfig.Galleries
+		backendGalleries := appConfig.BackendGalleries
+		autoloadGalleries := appConfig.AutoloadGalleries
+		autoloadBackendGalleries := appConfig.AutoloadBackendGalleries
+		apiKeys := appConfig.ApiKeys
+
+		settings.WatchdogIdleEnabled = &watchdogIdle
+		settings.WatchdogBusyEnabled = &watchdogBusy
+		settings.WatchdogEnabled = &watchdogEnabled
+		settings.SingleBackend = &singleBackend
+		settings.ParallelBackendRequests = &parallelBackendRequests
+		settings.Threads = &threads
+		settings.ContextSize = &contextSize
+		settings.F16 = &f16
+		settings.Debug = &debug
+		settings.CORS = &cors
+		settings.CSRF = &csrf
+		settings.CORSAllowOrigins = &corsAllowOrigins
+		settings.P2PToken = &p2pToken
+		settings.P2PNetworkID = &p2pNetworkID
+		settings.Federated = &federated
+		settings.Galleries = &galleries
+		settings.BackendGalleries = &backendGalleries
+		settings.AutoloadGalleries = &autoloadGalleries
+		settings.AutoloadBackendGalleries = &autoloadBackendGalleries
+		settings.ApiKeys = &apiKeys
+
+		var idleTimeout, busyTimeout string
+		if appConfig.WatchDogIdleTimeout > 0 {
+			idleTimeout = appConfig.WatchDogIdleTimeout.String()
+		} else {
+			idleTimeout = "15m" // default
+		}
+		if appConfig.WatchDogBusyTimeout > 0 {
+			busyTimeout = appConfig.WatchDogBusyTimeout.String()
+		} else {
+			busyTimeout = "5m" // default
+		}
+		settings.WatchdogIdleTimeout = &idleTimeout
+		settings.WatchdogBusyTimeout = &busyTimeout
+		return c.JSON(http.StatusOK, settings)
+	}
+}
+
+// UpdateSettingsEndpoint updates settings, saves to file, and applies immediately
+func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		appConfig := app.ApplicationConfig()
+		startupConfig := app.StartupConfig()
+
+		if startupConfig == nil {
+			// Fallback if startup config not available
+			startupConfig = appConfig
+		}
+
+		body, err := io.ReadAll(c.Request().Body)
+		if err != nil {
+			return c.JSON(http.StatusBadRequest, SettingsResponse{
+				Success: false,
+				Error:   "Failed to read request body: " + err.Error(),
+			})
+		}
+
+		var settings RuntimeSettings
+		if err := json.Unmarshal(body, &settings); err != nil {
+			return c.JSON(http.StatusBadRequest, SettingsResponse{
+				Success: false,
+				Error:   "Failed to parse JSON: " + err.Error(),
+			})
+		}
+
+		// Validate timeouts if provided
+		if settings.WatchdogIdleTimeout != nil {
+			_, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
+			if err != nil {
+				return c.JSON(http.StatusBadRequest, SettingsResponse{
+					Success: false,
+					Error:   "Invalid watchdog_idle_timeout format: " + err.Error(),
+				})
+			}
+		}
+		if settings.WatchdogBusyTimeout != nil {
+			_, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
+			if err != nil {
+				return c.JSON(http.StatusBadRequest, SettingsResponse{
+					Success: false,
+					Error:   "Invalid watchdog_busy_timeout format: " + err.Error(),
+				})
+			}
+		}
+
+		// Save to file
+		if appConfig.DynamicConfigsDir == "" {
+			return c.JSON(http.StatusBadRequest, SettingsResponse{
+				Success: false,
+				Error:   "DynamicConfigsDir is not set",
+			})
+		}
+
+		settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
+		settingsJSON, err := json.MarshalIndent(settings, "", "  ")
+		if err != nil {
+			return c.JSON(http.StatusInternalServerError, SettingsResponse{
+				Success: false,
+				Error:   "Failed to marshal settings: " + err.Error(),
+			})
+		}
+
+		if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
+			return c.JSON(http.StatusInternalServerError, SettingsResponse{
+				Success: false,
+				Error:   "Failed to write settings file: " + err.Error(),
+			})
+		}
+
+		// Apply settings immediately, checking env var overrides per field
+		watchdogChanged := false
+		if settings.WatchdogEnabled != nil {
+			appConfig.WatchDog = *settings.WatchdogEnabled
+			watchdogChanged = true
+		}
+		if settings.WatchdogIdleEnabled != nil {
+			appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled
+			if appConfig.WatchDogIdle {
+				appConfig.WatchDog = true
+			}
+			watchdogChanged = true
+		}
+		if settings.WatchdogBusyEnabled != nil {
+			appConfig.WatchDogBusy = *settings.WatchdogBusyEnabled
+			if appConfig.WatchDogBusy {
+				appConfig.WatchDog = true
+			}
+			watchdogChanged = true
+		}
+		if settings.WatchdogIdleTimeout != nil {
+			dur, _ := time.ParseDuration(*settings.WatchdogIdleTimeout)
+			appConfig.WatchDogIdleTimeout = dur
+			watchdogChanged = true
+		}
+		if settings.WatchdogBusyTimeout != nil {
+			dur, _ := time.ParseDuration(*settings.WatchdogBusyTimeout)
+			appConfig.WatchDogBusyTimeout = dur
+			watchdogChanged = true
+		}
+		if settings.SingleBackend != nil {
+			appConfig.SingleBackend = *settings.SingleBackend
+		}
+		if settings.ParallelBackendRequests != nil {
+			appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
+		}
+		if settings.Threads != nil {
+			appConfig.Threads = *settings.Threads
+		}
+		if settings.ContextSize != nil {
+			appConfig.ContextSize = *settings.ContextSize
+		}
+		if settings.F16 != nil {
+			appConfig.F16 = *settings.F16
+		}
+		if settings.Debug != nil {
+			appConfig.Debug = *settings.Debug
+		}
+		if settings.CORS != nil {
+			appConfig.CORS = *settings.CORS
+		}
+		if settings.CSRF != nil {
+			appConfig.CSRF = *settings.CSRF
+		}
+		if settings.CORSAllowOrigins != nil {
+			appConfig.CORSAllowOrigins = *settings.CORSAllowOrigins
+		}
+		if settings.P2PToken != nil {
+			appConfig.P2PToken = *settings.P2PToken
+		}
+		if settings.P2PNetworkID != nil {
+			appConfig.P2PNetworkID = *settings.P2PNetworkID
+		}
+		if settings.Federated != nil {
+			appConfig.Federated = *settings.Federated
+		}
+		if settings.Galleries != nil {
+			appConfig.Galleries = *settings.Galleries
+		}
+		if settings.BackendGalleries != nil {
+			appConfig.BackendGalleries = *settings.BackendGalleries
+		}
+		if settings.AutoloadGalleries != nil {
+			appConfig.AutoloadGalleries = *settings.AutoloadGalleries
+		}
+		if settings.AutoloadBackendGalleries != nil {
+			appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
+		}
+		if settings.ApiKeys != nil {
+			// API keys from env vars (startup) should be kept, runtime settings keys are added
+			// Combine startup keys (env vars) with runtime settings keys
+			envKeys := startupConfig.ApiKeys
+			runtimeKeys := *settings.ApiKeys
+			// Merge: env keys first (they take precedence), then runtime keys
+			appConfig.ApiKeys = append(envKeys, runtimeKeys...)
+
+			// Note: We only save to runtime_settings.json (not api_keys.json) to avoid duplication
+			// The runtime_settings.json is the unified config file. If api_keys.json exists,
+			// it will be loaded first, but runtime_settings.json takes precedence and deduplicates.
+		}
+
+		// Restart watchdog if settings changed
+		if watchdogChanged {
+			if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled || settings.WatchdogEnabled == nil {
+				if err := app.StopWatchdog(); err != nil {
+					log.Error().Err(err).Msg("Failed to stop watchdog")
+					return c.JSON(http.StatusInternalServerError, SettingsResponse{
+						Success: false,
+						Error:   "Settings saved but failed to stop watchdog: " + err.Error(),
+					})
+				}
+			} else {
+				if err := app.RestartWatchdog(); err != nil {
+					log.Error().Err(err).Msg("Failed to restart watchdog")
+					return c.JSON(http.StatusInternalServerError, SettingsResponse{
+						Success: false,
+						Error:   "Settings saved but failed to restart watchdog: " + err.Error(),
+					})
+				}
+			}
+		}
+
+		// Restart P2P if P2P settings changed
+		p2pChanged := settings.P2PToken != nil || settings.P2PNetworkID != nil || settings.Federated != nil
+		if p2pChanged {
+			if settings.P2PToken != nil && *settings.P2PToken == "" {
+				// stop P2P
+				if err := app.StopP2P(); err != nil {
+					log.Error().Err(err).Msg("Failed to stop P2P")
+					return c.JSON(http.StatusInternalServerError, SettingsResponse{
+						Success: false,
+						Error:   "Settings saved but failed to stop P2P: " + err.Error(),
+					})
+				}
+			} else {
+				if settings.P2PToken != nil && *settings.P2PToken == "0" {
+					// generate a token if users sets 0 (disabled)
+					token := p2p.GenerateToken(60, 60)
+					settings.P2PToken = &token
+					appConfig.P2PToken = token
+				}
+				// Stop existing P2P
+				if err := app.RestartP2P(); err != nil {
+					log.Error().Err(err).Msg("Failed to stop P2P")
+					return c.JSON(http.StatusInternalServerError, SettingsResponse{
+						Success: false,
+						Error:   "Settings saved but failed to stop P2P: " + err.Error(),
+					})
+				}
+			}
+		}
+
+		return c.JSON(http.StatusOK, SettingsResponse{
+			Success: true,
+			Message: "Settings updated successfully",
+		})
+	}
+}
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -43,17 +43,18 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 		processingModels, taskTypes := opcache.GetStatus()

 		summary := map[string]interface{}{
-			"Title":             "LocalAI API - " + internal.PrintableVersion(),
-			"Version":           internal.PrintableVersion(),
-			"BaseURL":           middleware.BaseURL(c),
-			"Models":            modelsWithoutConfig,
-			"ModelsConfig":      modelConfigs,
-			"GalleryConfig":     galleryConfigs,
-			"ApplicationConfig": appConfig,
-			"ProcessingModels":  processingModels,
-			"TaskTypes":         taskTypes,
-			"LoadedModels":      loadedModelsMap,
-			"InstalledBackends": installedBackends,
+			"Title":                  "LocalAI API - " + internal.PrintableVersion(),
+			"Version":                internal.PrintableVersion(),
+			"BaseURL":                middleware.BaseURL(c),
+			"Models":                 modelsWithoutConfig,
+			"ModelsConfig":           modelConfigs,
+			"GalleryConfig":          galleryConfigs,
+			"ApplicationConfig":      appConfig,
+			"ProcessingModels":       processingModels,
+			"TaskTypes":              taskTypes,
+			"LoadedModels":           loadedModelsMap,
+			"InstalledBackends":      installedBackends,
+			"DisableRuntimeSettings": appConfig.DisableRuntimeSettings,
 		}

 		contentType := c.Request().Header.Get("Content-Type")
--- a/core/http/endpoints/openai/inpainting.go
+++ b/core/http/endpoints/openai/inpainting.go
@@ -0,0 +1,268 @@
+package openai
+
+import (
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strconv"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/labstack/echo/v4"
+	"github.com/rs/zerolog/log"
+
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/middleware"
+	"github.com/mudler/LocalAI/core/schema"
+	model "github.com/mudler/LocalAI/pkg/model"
+)
+
+// InpaintingEndpoint handles POST /v1/images/inpainting
+//
+// Swagger / OpenAPI docstring (swaggo):
+// @Summary      Image inpainting
+// @Description  Perform image inpainting. Accepts multipart/form-data with `image` and `mask` files.
+// @Tags         images
+// @Accept       multipart/form-data
+// @Produce      application/json
+// @Param        model   formData  string  true   "Model identifier"
+// @Param        prompt  formData  string  true   "Text prompt guiding the generation"
+// @Param        steps   formData  int     false  "Number of inference steps (default 25)"
+// @Param        image   formData  file    true   "Original image file"
+// @Param        mask    formData  file    true   "Mask image file (white = area to inpaint)"
+// @Success      200 {object} schema.OpenAIResponse
+// @Failure      400 {object} map[string]string
+// @Failure      500 {object} map[string]string
+// @Router       /v1/images/inpainting [post]
+func InpaintingEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		// Parse basic form values
+		modelName := c.FormValue("model")
+		prompt := c.FormValue("prompt")
+		stepsStr := c.FormValue("steps")
+
+		if modelName == "" || prompt == "" {
+			log.Error().Msg("Inpainting Endpoint - missing model or prompt")
+			return echo.ErrBadRequest
+		}
+
+		// steps default
+		steps := 25
+		if stepsStr != "" {
+			if v, err := strconv.Atoi(stepsStr); err == nil {
+				steps = v
+			}
+		}
+
+		// Get uploaded files
+		imageFile, err := c.FormFile("image")
+		if err != nil {
+			log.Error().Err(err).Msg("Inpainting Endpoint - missing image file")
+			return echo.NewHTTPError(http.StatusBadRequest, "missing image file")
+		}
+		maskFile, err := c.FormFile("mask")
+		if err != nil {
+			log.Error().Err(err).Msg("Inpainting Endpoint - missing mask file")
+			return echo.NewHTTPError(http.StatusBadRequest, "missing mask file")
+		}
+
+		// Read files into memory (small files expected)
+		imgSrc, err := imageFile.Open()
+		if err != nil {
+			return err
+		}
+		defer imgSrc.Close()
+		imgBytes, err := io.ReadAll(imgSrc)
+		if err != nil {
+			return err
+		}
+
+		maskSrc, err := maskFile.Open()
+		if err != nil {
+			return err
+		}
+		defer maskSrc.Close()
+		maskBytes, err := io.ReadAll(maskSrc)
+		if err != nil {
+			return err
+		}
+
+		// Create JSON with base64 fields expected by backend
+		b64Image := base64.StdEncoding.EncodeToString(imgBytes)
+		b64Mask := base64.StdEncoding.EncodeToString(maskBytes)
+
+		// get model config from context (middleware set it)
+		cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
+		if !ok || cfg == nil {
+			log.Error().Msg("Inpainting Endpoint - model config not found in context")
+			return echo.ErrBadRequest
+		}
+
+		// Use the GeneratedContentDir so the generated PNG is placed where the
+		// HTTP static handler serves `/generated-images`.
+		tmpDir := appConfig.GeneratedContentDir
+		// Ensure the directory exists
+		if err := os.MkdirAll(tmpDir, 0750); err != nil {
+			log.Error().Err(err).Msgf("Inpainting Endpoint - failed to create generated content dir: %s", tmpDir)
+			return echo.NewHTTPError(http.StatusInternalServerError, "failed to prepare storage")
+		}
+		id := uuid.New().String()
+		jsonPath := filepath.Join(tmpDir, fmt.Sprintf("inpaint_%s.json", id))
+		jsonFile := map[string]string{
+			"image":      b64Image,
+			"mask_image": b64Mask,
+		}
+		jf, err := os.CreateTemp(tmpDir, "inpaint_")
+		if err != nil {
+			return err
+		}
+		// setup cleanup on error; if everything succeeds we set success = true
+		success := false
+		var dst string
+		var origRef string
+		var maskRef string
+		defer func() {
+			if !success {
+				// Best-effort cleanup; log any failures
+				if jf != nil {
+					if cerr := jf.Close(); cerr != nil {
+						log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close temp json file in cleanup")
+					}
+					if name := jf.Name(); name != "" {
+						if rerr := os.Remove(name); rerr != nil && !os.IsNotExist(rerr) {
+							log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove temp json file %s in cleanup", name)
+						}
+					}
+				}
+				if jsonPath != "" {
+					if rerr := os.Remove(jsonPath); rerr != nil && !os.IsNotExist(rerr) {
+						log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove json file %s in cleanup", jsonPath)
+					}
+				}
+				if dst != "" {
+					if rerr := os.Remove(dst); rerr != nil && !os.IsNotExist(rerr) {
+						log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove dst file %s in cleanup", dst)
+					}
+				}
+				if origRef != "" {
+					if rerr := os.Remove(origRef); rerr != nil && !os.IsNotExist(rerr) {
+						log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove orig ref file %s in cleanup", origRef)
+					}
+				}
+				if maskRef != "" {
+					if rerr := os.Remove(maskRef); rerr != nil && !os.IsNotExist(rerr) {
+						log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove mask ref file %s in cleanup", maskRef)
+					}
+				}
+			}
+		}()
+
+		// write original image and mask to disk as ref images so backends that
+		// accept reference image files can use them (maintainer request).
+		origTmp, err := os.CreateTemp(tmpDir, "refimg_")
+		if err != nil {
+			return err
+		}
+		if _, err := origTmp.Write(imgBytes); err != nil {
+			_ = origTmp.Close()
+			_ = os.Remove(origTmp.Name())
+			return err
+		}
+		if cerr := origTmp.Close(); cerr != nil {
+			log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close orig temp file")
+		}
+		origRef = origTmp.Name()
+
+		maskTmp, err := os.CreateTemp(tmpDir, "refmask_")
+		if err != nil {
+			// cleanup origTmp on error
+			_ = os.Remove(origRef)
+			return err
+		}
+		if _, err := maskTmp.Write(maskBytes); err != nil {
+			_ = maskTmp.Close()
+			_ = os.Remove(maskTmp.Name())
+			_ = os.Remove(origRef)
+			return err
+		}
+		if cerr := maskTmp.Close(); cerr != nil {
+			log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close mask temp file")
+		}
+		maskRef = maskTmp.Name()
+		// write JSON
+		enc := json.NewEncoder(jf)
+		if err := enc.Encode(jsonFile); err != nil {
+			if cerr := jf.Close(); cerr != nil {
+				log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close temp json file after encode error")
+			}
+			return err
+		}
+		if cerr := jf.Close(); cerr != nil {
+			log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close temp json file")
+		}
+		// rename to desired name
+		if err := os.Rename(jf.Name(), jsonPath); err != nil {
+			return err
+		}
+		// prepare dst
+		outTmp, err := os.CreateTemp(tmpDir, "out_")
+		if err != nil {
+			return err
+		}
+		if cerr := outTmp.Close(); cerr != nil {
+			log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close out temp file")
+		}
+		dst = outTmp.Name() + ".png"
+		if err := os.Rename(outTmp.Name(), dst); err != nil {
+			return err
+		}
+
+		// Determine width/height default
+		width := 512
+		height := 512
+
+		// Call backend image generation via indirection so tests can stub it
+		// Note: ImageGenerationFunc will call into the loaded model's GenerateImage which expects src JSON
+		// Also pass ref images (orig + mask) so backends that support ref images can use them.
+		refImages := []string{origRef, maskRef}
+		fn, err := backend.ImageGenerationFunc(height, width, 0, steps, 0, prompt, "", jsonPath, dst, ml, *cfg, appConfig, refImages)
+		if err != nil {
+			return err
+		}
+
+		// Execute generation function (blocking)
+		if err := fn(); err != nil {
+			return err
+		}
+
+		// On success, build response URL using BaseURL middleware helper and
+		// the same `generated-images` prefix used by the server static mount.
+		baseURL := middleware.BaseURL(c)
+
+		// Build response using url.JoinPath for correct URL escaping
+		imgPath, err := url.JoinPath(baseURL, "generated-images", filepath.Base(dst))
+		if err != nil {
+			return err
+		}
+
+		created := int(time.Now().Unix())
+		resp := &schema.OpenAIResponse{
+			ID:      id,
+			Created: created,
+			Data: []schema.Item{{
+				URL: imgPath,
+			}},
+		}
+
+		// mark success so defer cleanup will not remove output files
+		success = true
+
+		return c.JSON(http.StatusOK, resp)
+	}
+}
--- a/core/http/endpoints/openai/inpainting_test.go
+++ b/core/http/endpoints/openai/inpainting_test.go
@@ -0,0 +1,107 @@
+package openai
+
+import (
+	"bytes"
+	"mime/multipart"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/http/middleware"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/stretchr/testify/require"
+)
+
+func makeMultipartRequest(t *testing.T, fields map[string]string, files map[string][]byte) (*http.Request, string) {
+	b := &bytes.Buffer{}
+	w := multipart.NewWriter(b)
+	for k, v := range fields {
+		_ = w.WriteField(k, v)
+	}
+	for fname, content := range files {
+		fw, err := w.CreateFormFile(fname, fname+".png")
+		require.NoError(t, err)
+		_, err = fw.Write(content)
+		require.NoError(t, err)
+	}
+	require.NoError(t, w.Close())
+	req := httptest.NewRequest(http.MethodPost, "/v1/images/inpainting", b)
+	req.Header.Set("Content-Type", w.FormDataContentType())
+	return req, w.FormDataContentType()
+}
+
+func TestInpainting_MissingFiles(t *testing.T) {
+	e := echo.New()
+	// handler requires cl, ml, appConfig but this test verifies missing files early
+	h := InpaintingEndpoint(nil, nil, config.NewApplicationConfig())
+
+	req := httptest.NewRequest(http.MethodPost, "/v1/images/inpainting", nil)
+	rec := httptest.NewRecorder()
+	c := e.NewContext(req, rec)
+
+	err := h(c)
+	require.Error(t, err)
+}
+
+func TestInpainting_HappyPath(t *testing.T) {
+	// Setup temp generated content dir
+	tmpDir, err := os.MkdirTemp("", "gencontent")
+	require.NoError(t, err)
+	defer os.RemoveAll(tmpDir)
+
+	appConf := config.NewApplicationConfig(config.WithGeneratedContentDir(tmpDir))
+
+	// stub the backend.ImageGenerationFunc
+	orig := backend.ImageGenerationFunc
+	backend.ImageGenerationFunc = func(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig, refImages []string) (func() error, error) {
+		fn := func() error {
+			// write a fake png file to dst
+			return os.WriteFile(dst, []byte("PNGDATA"), 0644)
+		}
+		return fn, nil
+	}
+	defer func() { backend.ImageGenerationFunc = orig }()
+
+	// prepare multipart request with image and mask
+	fields := map[string]string{"model": "dreamshaper-8-inpainting", "prompt": "A test"}
+	files := map[string][]byte{"image": []byte("IMAGEDATA"), "mask": []byte("MASKDATA")}
+	reqBuf, _ := makeMultipartRequest(t, fields, files)
+
+	rec := httptest.NewRecorder()
+	e := echo.New()
+	c := e.NewContext(reqBuf, rec)
+
+	// set a minimal model config in context as handler expects
+	c.Set(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG, &config.ModelConfig{Backend: "diffusers"})
+
+	h := InpaintingEndpoint(nil, nil, appConf)
+
+	// call handler
+	err = h(c)
+	require.NoError(t, err)
+	require.Equal(t, http.StatusOK, rec.Code)
+
+	// verify response body contains generated-images path
+	body := rec.Body.String()
+	require.Contains(t, body, "generated-images")
+
+	// confirm the file was created in tmpDir
+	// parse out filename from response (naive search)
+	// find "generated-images/" and extract until closing quote or brace
+	idx := bytes.Index(rec.Body.Bytes(), []byte("generated-images/"))
+	require.True(t, idx >= 0)
+	rest := rec.Body.Bytes()[idx:]
+	end := bytes.IndexAny(rest, "\",}\n")
+	if end == -1 {
+		end = len(rest)
+	}
+	fname := string(rest[len("generated-images/"):end])
+	// ensure file exists
+	_, err = os.Stat(filepath.Join(tmpDir, fname))
+	require.NoError(t, err)
+}
--- a/core/http/endpoints/openai/mcp.go
+++ b/core/http/endpoints/openai/mcp.go
@@ -5,7 +5,7 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"strings"
+	"net"
 	"time"

 	"github.com/labstack/echo/v4"
@@ -75,7 +75,11 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 			fragment = fragment.AddMessage(message.Role, message.StringContent)
 		}

-		port := appConfig.APIAddress[strings.LastIndex(appConfig.APIAddress, ":")+1:]
+		_, port, err := net.SplitHostPort(appConfig.APIAddress)
+		if err != nil {
+			return err
+		}
+
 		apiKey := ""
 		if appConfig.ApiKeys != nil {
 			apiKey = appConfig.ApiKeys[0]
@@ -104,11 +108,11 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 				log.Debug().Msgf("[model agent] [model: %s] Reasoning: %s", config.Name, s)
 			}),
 			cogito.WithToolCallBack(func(t *cogito.ToolChoice) bool {
-				log.Debug().Msgf("[model agent] [model: %s] Tool call: %s, reasoning: %s, arguments: %+v", t.Name, t.Reasoning, t.Arguments)
+				log.Debug().Msgf("[model agent] [model: %s] Tool call: %s, reasoning: %s, arguments: %+v", config.Name, t.Name, t.Reasoning, t.Arguments)
 				return true
 			}),
 			cogito.WithToolCallResultCallback(func(t cogito.ToolStatus) {
-				log.Debug().Msgf("[model agent] [model: %s] Tool call result: %s, tool arguments: %+v", t.Name, t.Result, t.ToolArguments)
+				log.Debug().Msgf("[model agent] [model: %s] Tool call result: %s, result: %s, tool arguments: %+v", config.Name, t.Name, t.Result, t.ToolArguments)
 			}),
 		)

--- a/core/http/middleware/request.go
+++ b/core/http/middleware/request.go
@@ -55,6 +55,11 @@ func (re *RequestExtractor) setModelNameFromRequest(c echo.Context) {
 		model = c.QueryParam("model")
 	}

+	// Check FormValue for multipart/form-data requests (e.g., /v1/images/inpainting)
+	if model == "" {
+		model = c.FormValue("model")
+	}
+
 	if model == "" {
 		// Set model from bearer token, if available
 		auth := c.Request().Header.Get("Authorization")
--- a/core/http/routes/openai.go
+++ b/core/http/routes/openai.go
@@ -140,7 +140,8 @@ func RegisterOpenAIRoutes(app *echo.Echo,
 	// images
 	imageHandler := openai.ImageEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
 	imageMiddleware := []echo.MiddlewareFunc{
-		re.BuildConstantDefaultModelNameMiddleware("stablediffusion"),
+		// Default: use the first available image generation model
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_IMAGE)),
 		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
 		func(next echo.HandlerFunc) echo.HandlerFunc {
 			return func(c echo.Context) error {
@@ -155,6 +156,11 @@ func RegisterOpenAIRoutes(app *echo.Echo,
 	app.POST("/v1/images/generations", imageHandler, imageMiddleware...)
 	app.POST("/images/generations", imageHandler, imageMiddleware...)

+	// inpainting endpoint (image + mask) - reuse same middleware config as images
+	inpaintingHandler := openai.InpaintingEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
+	app.POST("/v1/images/inpainting", inpaintingHandler, imageMiddleware...)
+	app.POST("/images/inpainting", inpaintingHandler, imageMiddleware...)
+
 	// videos (OpenAI-compatible endpoints mapped to LocalAI video handler)
 	videoHandler := openai.VideoEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
 	videoMiddleware := []echo.MiddlewareFunc{
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -23,6 +23,17 @@ func RegisterUIRoutes(app *echo.Echo,
 	app.GET("/", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps))
 	app.GET("/manage", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps))

+	if !appConfig.DisableRuntimeSettings {
+		// Settings page
+		app.GET("/settings", func(c echo.Context) error {
+			summary := map[string]interface{}{
+				"Title":   "LocalAI - Settings",
+				"BaseURL": middleware.BaseURL(c),
+			}
+			return c.Render(200, "views/settings", summary)
+		})
+	}
+
 	// P2P
 	app.GET("/p2p/", func(c echo.Context) error {
 		summary := map[string]interface{}{
--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -12,8 +12,10 @@ import (

 	"github.com/google/uuid"
 	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/application"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/http/endpoints/localai"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/pkg/model"
@@ -21,7 +23,7 @@ import (
 )

 // RegisterUIAPIRoutes registers JSON API routes for the web UI
-func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) {
+func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache, applicationInstance *application.Application) {

 	// Operations API - Get all current operations (models + backends)
 	app.GET("/api/operations", func(c echo.Context) error {
@@ -264,17 +266,17 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		installedModelsCount := len(modelConfigs) + len(modelsWithoutConfig)

 		return c.JSON(200, map[string]interface{}{
-			"models":            modelsJSON,
-			"repositories":      appConfig.Galleries,
-			"allTags":           tags,
-			"processingModels":  processingModelsData,
-			"taskTypes":         taskTypes,
-			"availableModels":   totalModels,
-			"installedModels":   installedModelsCount,
-			"currentPage":       pageNum,
-			"totalPages":        totalPages,
-			"prevPage":          prevPage,
-			"nextPage":          nextPage,
+			"models":           modelsJSON,
+			"repositories":     appConfig.Galleries,
+			"allTags":          tags,
+			"processingModels": processingModelsData,
+			"taskTypes":        taskTypes,
+			"availableModels":  totalModels,
+			"installedModels":  installedModelsCount,
+			"currentPage":      pageNum,
+			"totalPages":       totalPages,
+			"prevPage":         prevPage,
+			"nextPage":         nextPage,
 		})
 	})

@@ -802,4 +804,10 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 			},
 		})
 	})
+
+	if !appConfig.DisableRuntimeSettings {
+		// Settings API
+		app.GET("/api/settings", localai.GetSettingsEndpoint(applicationInstance))
+		app.POST("/api/settings", localai.UpdateSettingsEndpoint(applicationInstance))
+	}
 }
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
--- a/core/http/views/index.html
+++ b/core/http/views/index.html
@@ -128,6 +128,9 @@
                audioFiles: [], 
                textFiles: [],
                attachedFiles: [],
+                mcpMode: false,
+                mcpAvailable: false,
+                mcpModels: {},
                currentPlaceholder: 'Send a message...',
                placeholderIndex: 0,
                charIndex: 0,
@@ -163,6 +166,8 @@
                init() {
                    window.currentPlaceholderText = this.currentPlaceholder;
                    this.startTypingAnimation();
+                    // Build MCP models map from data attributes
+                    this.buildMCPModelsMap();
                    // Select first model by default
                    this.$nextTick(() => {
                        const select = this.$el.querySelector('select');
@@ -171,9 +176,43 @@
                            const firstModelOption = select.options[1];
                            if (firstModelOption && firstModelOption.value) {
                                this.selectedModel = firstModelOption.value;
+                                this.checkMCPAvailability();
                            }
                        }
                    });
+                    // Watch for changes to selectedModel to update MCP availability
+                    this.$watch('selectedModel', () => {
+                        this.checkMCPAvailability();
+                    });
+                },
+                buildMCPModelsMap() {
+                    const select = this.$el.querySelector('select');
+                    if (!select) return;
+                    this.mcpModels = {};
+                    for (let i = 0; i < select.options.length; i++) {
+                        const option = select.options[i];
+                        if (option.value) {
+                            const hasMcpAttr = option.getAttribute('data-has-mcp');
+                            this.mcpModels[option.value] = hasMcpAttr === 'true';
+                        }
+                    }
+                    // Debug: uncomment to see the MCP models map
+                    // console.log('MCP Models Map:', this.mcpModels);
+                },
+                checkMCPAvailability() {
+                    if (!this.selectedModel) {
+                        this.mcpAvailable = false;
+                        this.mcpMode = false;
+                        return;
+                    }
+                    // Check MCP availability from the map
+                    const hasMCP = this.mcpModels[this.selectedModel] === true;
+                    this.mcpAvailable = hasMCP;
+                    // Debug: uncomment to see what's happening
+                    // console.log('MCP Check:', { model: this.selectedModel, hasMCP, mcpAvailable: this.mcpAvailable, map: this.mcpModels });
+                    if (!hasMCP) {
+                        this.mcpMode = false;
+                    }
                },
                startTypingAnimation() {
                    if (this.isTyping) return;
@@ -268,24 +307,50 @@
                    }
                }
            }">
-                <!-- Model Selector -->
+                <!-- Model Selector with MCP Toggle -->
                <div class="mb-4">
                    <label class="block text-sm font-medium text-[#94A3B8] mb-2">Select Model</label>
-                    <select
-                        x-model="selectedModel"
-                        class="w-full bg-[#1E293B] text-[#E5E7EB] border border-[#38BDF8]/20 focus:border-[#38BDF8] focus:ring-2 focus:ring-[#38BDF8]/50 rounded-lg p-3 appearance-none"
-                        required
-                    >
-                        <option value="" disabled class="text-[#94A3B8]">Select a model to chat with...</option>
-                        {{ range .ModelsConfig }}
-                        {{ $cfg := . }}
-                        {{ range .KnownUsecaseStrings }}
-                            {{ if eq . "FLAG_CHAT" }}
-                            <option value="{{$cfg.Name}}" class="bg-[#1E293B] text-[#E5E7EB]">{{$cfg.Name}}</option>
+                    <div class="flex items-center gap-3">
+                        <select
+                            x-model="selectedModel"
+                            @change="$nextTick(() => checkMCPAvailability())"
+                            class="flex-1 bg-[#1E293B] text-[#E5E7EB] border border-[#38BDF8]/20 focus:border-[#38BDF8] focus:ring-2 focus:ring-[#38BDF8]/50 rounded-lg p-3 appearance-none"
+                            required
+                        >
+                            <option value="" disabled class="text-[#94A3B8]">Select a model to chat with...</option>
+                            {{ range .ModelsConfig }}
+                            {{ $cfg := . }}
+                            {{ $hasMCP := or (ne $cfg.MCP.Servers "") (ne $cfg.MCP.Stdio "") }}
+                            {{ range .KnownUsecaseStrings }}
+                                {{ if eq . "FLAG_CHAT" }}
+                                <option value="{{$cfg.Name}}" data-has-mcp="{{if $hasMCP}}true{{else}}false{{end}}" class="bg-[#1E293B] text-[#E5E7EB]">{{$cfg.Name}}</option>
+                                {{ end }}
                            {{ end }}
-                        {{ end }}
-                        {{ end }}
-                    </select>
+                            {{ end }}
+                        </select>
+                        
+                        <!-- Compact MCP Toggle - Show only if MCP is available for selected model -->
+                        <div 
+                            x-show="mcpAvailable"
+                            class="flex items-center gap-2 px-3 py-2 text-xs rounded text-[#E5E7EB] bg-[#1E293B] border border-[#38BDF8]/20 whitespace-nowrap">
+                            <i class="fa-solid fa-plug text-[#38BDF8] text-sm"></i>
+                            <span class="text-[#94A3B8]">MCP</span>
+                            <label class="relative inline-flex items-center cursor-pointer ml-1">
+                                <input type="checkbox" id="index_mcp_toggle" class="sr-only peer" x-model="mcpMode">
+                                <div class="w-9 h-5 bg-[#101827] peer-focus:outline-none peer-focus:ring-2 peer-focus:ring-[#38BDF8]/30 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-[#1E293B] after:border after:rounded-full after:h-4 after:w-4 after:transition-all peer-checked:bg-[#38BDF8]"></div>
+                            </label>
+                        </div>
+                    </div>
+                    
+                    <!-- MCP Mode Notification - Compact tooltip style -->
+                    <div 
+                        x-show="mcpMode && mcpAvailable"
+                        class="mt-2 p-2 bg-[#38BDF8]/10 border border-[#38BDF8]/30 rounded text-[#94A3B8] text-xs">
+                        <div class="flex items-start space-x-2">
+                            <i class="fa-solid fa-info-circle text-[#38BDF8] mt-0.5 text-xs"></i>
+                            <p class="text-[#94A3B8]">Non-streaming mode active. Responses may take longer to process.</p>
+                        </div>
+                    </div>
                </div>

                <!-- Input Bar -->
@@ -476,12 +541,20 @@ function startChat(event) {
        return;
    }

+    // Get MCP mode from checkbox (if available)
+    let mcpMode = false;
+    const mcpToggle = document.getElementById('index_mcp_toggle');
+    if (mcpToggle && mcpToggle.checked) {
+        mcpMode = true;
+    }
+
    // Store message and files in localStorage for chat page to pick up
    const chatData = {
        message: message,
        imageFiles: [],
        audioFiles: [],
-        textFiles: []
+        textFiles: [],
+        mcpMode: mcpMode
    };

    // Convert files to base64 for storage
--- a/core/http/views/manage.html
+++ b/core/http/views/manage.html
@@ -66,6 +66,14 @@
                <i class="fas fa-cogs mr-1.5 text-[10px]"></i>
                <span>Backend Gallery</span>
            </a>
+
+            {{ if not .DisableRuntimeSettings }}
+            <a href="/settings"
+               class="inline-flex items-center bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#38BDF8]/20 text-[#E5E7EB] py-1.5 px-3 rounded text-xs font-medium transition-colors">
+                <i class="fas fa-cog mr-1.5 text-[10px]"></i>
+                <span>Settings</span>
+            </a>
+            {{ end }}
        </div>

        <!-- Models Section -->
--- a/core/http/views/model-editor.html
+++ b/core/http/views/model-editor.html
@@ -299,6 +299,7 @@
                                <option value="mlx-vlm">mlx-vlm</option>
                                <option value="transformers">transformers</option>
                                <option value="vllm">vllm</option>
+                                <option value="diffusers">diffusers</option>
                            </select>
                            <p class="mt-1 text-xs text-gray-400">
                                Force a specific backend. Leave empty to auto-detect from URI.
@@ -401,6 +402,71 @@
                                Model type for transformers backend. Examples: AutoModelForCausalLM, SentenceTransformer, Mamba, MusicgenForConditionalGeneration. Leave empty to use default (AutoModelForCausalLM).
                            </p>
                        </div>
+                        
+                        <!-- Pipeline Type (Diffusers) -->
+                        <div x-show="commonPreferences.backend === 'diffusers'">
+                            <label class="block text-sm font-medium text-gray-300 mb-2">
+                                <i class="fas fa-stream mr-2"></i>Pipeline Type
+                            </label>
+                            <input 
+                                x-model="commonPreferences.pipeline_type"
+                                type="text" 
+                                placeholder="StableDiffusionPipeline (for diffusers backend)"
+                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                :disabled="isSubmitting">
+                            <p class="mt-1 text-xs text-gray-400">
+                                Pipeline type for diffusers backend. Examples: StableDiffusionPipeline, StableDiffusion3Pipeline, FluxPipeline. Leave empty to use default (StableDiffusionPipeline).
+                            </p>
+                        </div>
+                        
+                        <!-- Scheduler Type (Diffusers) -->
+                        <div x-show="commonPreferences.backend === 'diffusers'">
+                            <label class="block text-sm font-medium text-gray-300 mb-2">
+                                <i class="fas fa-clock mr-2"></i>Scheduler Type
+                            </label>
+                            <input 
+                                x-model="commonPreferences.scheduler_type"
+                                type="text" 
+                                placeholder="k_dpmpp_2m (optional)"
+                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                :disabled="isSubmitting">
+                            <p class="mt-1 text-xs text-gray-400">
+                                Scheduler type for diffusers backend. Examples: k_dpmpp_2m, euler_a, ddim. Leave empty to use model default.
+                            </p>
+                        </div>
+                        
+                        <!-- Enable Parameters (Diffusers) -->
+                        <div x-show="commonPreferences.backend === 'diffusers'">
+                            <label class="block text-sm font-medium text-gray-300 mb-2">
+                                <i class="fas fa-cogs mr-2"></i>Enable Parameters
+                            </label>
+                            <input 
+                                x-model="commonPreferences.enable_parameters"
+                                type="text" 
+                                placeholder="negative_prompt,num_inference_steps (comma-separated)"
+                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                :disabled="isSubmitting">
+                            <p class="mt-1 text-xs text-gray-400">
+                                Enabled parameters for diffusers backend (comma-separated). Leave empty to use default (negative_prompt,num_inference_steps).
+                            </p>
+                        </div>
+                        
+                        <!-- CUDA (Diffusers) -->
+                        <div x-show="commonPreferences.backend === 'diffusers'">
+                            <label class="flex items-center cursor-pointer">
+                                <input 
+                                    x-model="commonPreferences.cuda"
+                                    type="checkbox" 
+                                    class="w-5 h-5 rounded bg-gray-900/90 border-gray-700/70 text-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all cursor-pointer"
+                                    :disabled="isSubmitting">
+                                <span class="ml-3 text-sm font-medium text-gray-300">
+                                    <i class="fas fa-microchip mr-2"></i>CUDA
+                                </span>
+                            </label>
+                            <p class="mt-1 ml-8 text-xs text-gray-400">
+                                Enable CUDA support for GPU acceleration with diffusers backend.
+                            </p>
+                        </div>
                    </div>
                    
                    <!-- Custom Preferences -->
@@ -658,7 +724,11 @@ function importModel() {
            quantizations: '',
            mmproj_quantizations: '',
            embeddings: false,
-            type: ''
+            type: '',
+            pipeline_type: '',
+            scheduler_type: '',
+            enable_parameters: '',
+            cuda: false
        },
        isSubmitting: false,
        currentJobId: null,
@@ -733,6 +803,18 @@ function importModel() {
                if (this.commonPreferences.type && this.commonPreferences.type.trim()) {
                    prefsObj.type = this.commonPreferences.type.trim();
                }
+                if (this.commonPreferences.pipeline_type && this.commonPreferences.pipeline_type.trim()) {
+                    prefsObj.pipeline_type = this.commonPreferences.pipeline_type.trim();
+                }
+                if (this.commonPreferences.scheduler_type && this.commonPreferences.scheduler_type.trim()) {
+                    prefsObj.scheduler_type = this.commonPreferences.scheduler_type.trim();
+                }
+                if (this.commonPreferences.enable_parameters && this.commonPreferences.enable_parameters.trim()) {
+                    prefsObj.enable_parameters = this.commonPreferences.enable_parameters.trim();
+                }
+                if (this.commonPreferences.cuda) {
+                    prefsObj.cuda = true;
+                }
                
                // Add custom preferences (can override common ones)
                this.preferences.forEach(pref => {
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -1,12 +1,12 @@
 <nav class="bg-[#101827] shadow-2xl border-b border-[#1E293B]">
-    <div class="container mx-auto px-4 py-3">
+    <div class="container mx-auto px-4 py-2">
        <div class="flex items-center justify-between">
            <div class="flex items-center">
                <!-- Logo Image -->
                <a href="./" class="flex items-center group">
                    <img src="static/logo_horizontal.png" 
                         alt="LocalAI Logo" 
-                         class="h-14 mr-3 brightness-110 transition-all duration-300 group-hover:brightness-125 group-hover:drop-shadow-[0_0_8px_rgba(56,189,248,0.5)]">
+                         class="h-10 mr-3 brightness-110 transition-all duration-300 group-hover:brightness-125 group-hover:drop-shadow-[0_0_8px_rgba(56,189,248,0.5)]">
                </a>
            </div>
            
--- a/core/http/views/settings.html
+++ b/core/http/views/settings.html
@@ -0,0 +1,653 @@
+<!DOCTYPE html>
+<html lang="en">
+{{template "views/partials/head" .}}
+
+<body class="bg-[#101827] text-[#E5E7EB]">
+<div class="flex flex-col min-h-screen" x-data="settingsDashboard()">
+
+    {{template "views/partials/navbar" .}}
+
+    <!-- Notifications -->
+    <div class="fixed top-20 right-4 z-50 space-y-2" style="max-width: 400px;">
+        <template x-for="notification in notifications" :key="notification.id">
+            <div x-show="true" 
+                 x-transition:enter="transition ease-out duration-200"
+                 x-transition:enter-start="opacity-0"
+                 x-transition:enter-end="opacity-100"
+                 x-transition:leave="transition ease-in duration-150"
+                 x-transition:leave-start="opacity-100"
+                 x-transition:leave-end="opacity-0"
+                 :class="notification.type === 'error' ? 'bg-red-500' : 'bg-green-500'"
+                 class="rounded-lg p-4 text-white flex items-start space-x-3">
+                <div class="flex-shrink-0">
+                    <i :class="notification.type === 'error' ? 'fas fa-exclamation-circle' : 'fas fa-check-circle'" class="text-xl"></i>
+                </div>
+                <div class="flex-1 min-w-0">
+                    <p class="text-sm font-medium break-words" x-text="notification.message"></p>
+                </div>
+                <button @click="dismissNotification(notification.id)" class="flex-shrink-0 text-white hover:opacity-80 transition-opacity">
+                    <i class="fas fa-times"></i>
+                </button>
+            </div>
+        </template>
+    </div>
+
+    <div class="container mx-auto px-4 py-6 flex-grow max-w-4xl">
+        <!-- Header -->
+        <div class="mb-6">
+            <div class="flex items-center justify-between mb-2">
+                <h1 class="text-2xl font-semibold text-[#E5E7EB]">
+                    Application Settings
+                </h1>
+                <a href="/manage" 
+                   class="inline-flex items-center text-[#94A3B8] hover:text-[#E5E7EB] transition-colors">
+                    <i class="fas fa-arrow-left mr-2 text-sm"></i>
+                    <span class="text-sm">Back to Manage</span>
+                </a>
+            </div>
+            <p class="text-sm text-[#94A3B8]">Configure watchdog and backend request settings</p>
+        </div>
+
+        <!-- Settings Form -->
+        <form @submit.prevent="saveSettings()" class="space-y-6">
+            <!-- Watchdog Settings Section -->
+            <div class="bg-[#1E293B] border border-[#38BDF8]/20 rounded-lg p-6">
+                <h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
+                    <i class="fas fa-shield-alt mr-2 text-[#38BDF8] text-sm"></i>
+                    Watchdog Settings
+                </h2>
+                <p class="text-xs text-[#94A3B8] mb-4">
+                    Configure automatic monitoring and management of backend processes
+                </p>
+
+                <div class="space-y-4">
+                    <!-- Enable Watchdog -->
+                    <div class="flex items-center justify-between">
+                        <div>
+                            <label class="text-sm font-medium text-[#E5E7EB]">Enable Watchdog</label>
+                            <p class="text-xs text-[#94A3B8] mt-1">Enable automatic monitoring of backend processes</p>
+                        </div>
+                        <label class="relative inline-flex items-center cursor-pointer">
+                            <input type="checkbox" x-model="settings.watchdog_enabled" 
+                                   @change="updateWatchdogEnabled()"
+                                   class="sr-only peer">
+                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#38BDF8]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#38BDF8]"></div>
+                        </label>
+                    </div>
+
+                    <!-- Enable Idle Check -->
+                    <div class="flex items-center justify-between">
+                        <div>
+                            <label class="text-sm font-medium text-[#E5E7EB]">Enable Idle Check</label>
+                            <p class="text-xs text-[#94A3B8] mt-1">Automatically stop backends that are idle for too long</p>
+                        </div>
+                        <label class="relative inline-flex items-center cursor-pointer">
+                            <input type="checkbox" x-model="settings.watchdog_idle_enabled" 
+                                   :disabled="!settings.watchdog_enabled"
+                                   class="sr-only peer" :class="!settings.watchdog_enabled ? 'opacity-50' : ''">
+                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#38BDF8]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#38BDF8]"></div>
+                        </label>
+                    </div>
+
+                    <!-- Idle Timeout -->
+                    <div>
+                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">Idle Timeout</label>
+                        <p class="text-xs text-[#94A3B8] mb-2">Time before an idle backend is stopped (e.g., 15m, 1h)</p>
+                        <input type="text" x-model="settings.watchdog_idle_timeout" 
+                               :disabled="!settings.watchdog_idle_enabled"
+                               placeholder="15m"
+                               class="w-full px-3 py-2 bg-[#101827] border border-[#38BDF8]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#38BDF8]/50"
+                               :class="!settings.watchdog_idle_enabled ? 'opacity-50 cursor-not-allowed' : ''">
+                    </div>
+
+                    <!-- Enable Busy Check -->
+                    <div class="flex items-center justify-between">
+                        <div>
+                            <label class="text-sm font-medium text-[#E5E7EB]">Enable Busy Check</label>
+                            <p class="text-xs text-[#94A3B8] mt-1">Automatically stop backends that are busy for too long (stuck processes)</p>
+                        </div>
+                        <label class="relative inline-flex items-center cursor-pointer">
+                            <input type="checkbox" x-model="settings.watchdog_busy_enabled" 
+                                   :disabled="!settings.watchdog_enabled"
+                                   class="sr-only peer" :class="!settings.watchdog_enabled ? 'opacity-50' : ''">
+                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#38BDF8]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#38BDF8]"></div>
+                        </label>
+                    </div>
+
+                    <!-- Busy Timeout -->
+                    <div>
+                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">Busy Timeout</label>
+                        <p class="text-xs text-[#94A3B8] mb-2">Time before a busy backend is stopped (e.g., 5m, 30m)</p>
+                        <input type="text" x-model="settings.watchdog_busy_timeout" 
+                               :disabled="!settings.watchdog_busy_enabled"
+                               placeholder="5m"
+                               class="w-full px-3 py-2 bg-[#101827] border border-[#38BDF8]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#38BDF8]/50"
+                               :class="!settings.watchdog_busy_enabled ? 'opacity-50 cursor-not-allowed' : ''">
+                    </div>
+                </div>
+            </div>
+
+            <!-- Backend Request Settings Section -->
+            <div class="bg-[#1E293B] border border-[#8B5CF6]/20 rounded-lg p-6">
+                <h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
+                    <i class="fas fa-cogs mr-2 text-[#8B5CF6] text-sm"></i>
+                    Backend Request Settings
+                </h2>
+                <p class="text-xs text-[#94A3B8] mb-4">
+                    Configure how backends handle multiple requests
+                </p>
+
+                <div class="space-y-4">
+                    <!-- Single Backend Mode -->
+                    <div class="flex items-center justify-between">
+                        <div>
+                            <label class="text-sm font-medium text-[#E5E7EB]">Single Backend Mode</label>
+                            <p class="text-xs text-[#94A3B8] mt-1">Allow only one backend to be active at a time</p>
+                        </div>
+                        <label class="relative inline-flex items-center cursor-pointer">
+                            <input type="checkbox" x-model="settings.single_backend" 
+                                   class="sr-only peer">
+                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#8B5CF6]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#8B5CF6]"></div>
+                        </label>
+                    </div>
+
+                    <!-- Parallel Backend Requests -->
+                    <div class="flex items-center justify-between">
+                        <div>
+                            <label class="text-sm font-medium text-[#E5E7EB]">Parallel Backend Requests</label>
+                            <p class="text-xs text-[#94A3B8] mt-1">Enable backends to handle multiple requests in parallel (if supported)</p>
+                        </div>
+                        <label class="relative inline-flex items-center cursor-pointer">
+                            <input type="checkbox" x-model="settings.parallel_backend_requests" 
+                                   class="sr-only peer">
+                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#8B5CF6]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#8B5CF6]"></div>
+                        </label>
+                    </div>
+                </div>
+            </div>
+
+            <!-- Performance Settings Section -->
+            <div class="bg-[#1E293B] border border-[#10B981]/20 rounded-lg p-6">
+                <h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
+                    <i class="fas fa-tachometer-alt mr-2 text-[#10B981] text-sm"></i>
+                    Performance Settings
+                </h2>
+                <p class="text-xs text-[#94A3B8] mb-4">
+                    Configure default performance parameters for models
+                </p>
+
+                <div class="space-y-4">
+                    <!-- Threads -->
+                    <div>
+                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">Default Threads</label>
+                        <p class="text-xs text-[#94A3B8] mb-2">Number of threads to use for model inference (0 = auto)</p>
+                        <input type="number" x-model="settings.threads" 
+                               min="0"
+                               placeholder="0"
+                               class="w-full px-3 py-2 bg-[#101827] border border-[#10B981]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#10B981]/50">
+                    </div>
+
+                    <!-- Context Size -->
+                    <div>
+                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">Default Context Size</label>
+                        <p class="text-xs text-[#94A3B8] mb-2">Default context window size for models</p>
+                        <input type="number" x-model="settings.context_size" 
+                               min="0"
+                               placeholder="512"
+                               class="w-full px-3 py-2 bg-[#101827] border border-[#10B981]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#10B981]/50">
+                    </div>
+
+                    <!-- F16 -->
+                    <div class="flex items-center justify-between">
+                        <div>
+                            <label class="text-sm font-medium text-[#E5E7EB]">F16 Precision</label>
+                            <p class="text-xs text-[#94A3B8] mt-1">Use 16-bit floating point precision</p>
+                        </div>
+                        <label class="relative inline-flex items-center cursor-pointer">
+                            <input type="checkbox" x-model="settings.f16" 
+                                   class="sr-only peer">
+                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#10B981]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#10B981]"></div>
+                        </label>
+                    </div>
+
+                    <!-- Debug -->
+                    <div class="flex items-center justify-between">
+                        <div>
+                            <label class="text-sm font-medium text-[#E5E7EB]">Debug Mode</label>
+                            <p class="text-xs text-[#94A3B8] mt-1">Enable debug logging</p>
+                        </div>
+                        <label class="relative inline-flex items-center cursor-pointer">
+                            <input type="checkbox" x-model="settings.debug" 
+                                   class="sr-only peer">
+                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#10B981]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#10B981]"></div>
+                        </label>
+                    </div>
+                </div>
+            </div>
+
+            <!-- API Settings Section -->
+            <div class="bg-[#1E293B] border border-[#F59E0B]/20 rounded-lg p-6">
+                <h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
+                    <i class="fas fa-globe mr-2 text-[#F59E0B] text-sm"></i>
+                    API Settings
+                </h2>
+                <p class="text-xs text-[#94A3B8] mb-4">
+                    Configure CORS and CSRF protection
+                </p>
+
+                <div class="space-y-4">
+                    <!-- CORS -->
+                    <div class="flex items-center justify-between">
+                        <div>
+                            <label class="text-sm font-medium text-[#E5E7EB]">Enable CORS</label>
+                            <p class="text-xs text-[#94A3B8] mt-1">Enable Cross-Origin Resource Sharing</p>
+                        </div>
+                        <label class="relative inline-flex items-center cursor-pointer">
+                            <input type="checkbox" x-model="settings.cors" 
+                                   class="sr-only peer">
+                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#F59E0B]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#F59E0B]"></div>
+                        </label>
+                    </div>
+
+                    <!-- CORS Allow Origins -->
+                    <div>
+                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">CORS Allow Origins</label>
+                        <p class="text-xs text-[#94A3B8] mb-2">Comma-separated list of allowed origins</p>
+                        <input type="text" x-model="settings.cors_allow_origins" 
+                               placeholder="*"
+                               class="w-full px-3 py-2 bg-[#101827] border border-[#F59E0B]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#F59E0B]/50">
+                    </div>
+
+                    <!-- CSRF -->
+                    <div class="flex items-center justify-between">
+                        <div>
+                            <label class="text-sm font-medium text-[#E5E7EB]">Enable CSRF Protection</label>
+                            <p class="text-xs text-[#94A3B8] mt-1">Enable Cross-Site Request Forgery protection</p>
+                        </div>
+                        <label class="relative inline-flex items-center cursor-pointer">
+                            <input type="checkbox" x-model="settings.csrf" 
+                                   class="sr-only peer">
+                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#F59E0B]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#F59E0B]"></div>
+                        </label>
+                    </div>
+                </div>
+            </div>
+
+            <!-- P2P Settings Section -->
+            <div class="bg-[#1E293B] border border-[#EC4899]/20 rounded-lg p-6">
+                <h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
+                    <i class="fas fa-network-wired mr-2 text-[#EC4899] text-sm"></i>
+                    P2P Settings
+                </h2>
+                <p class="text-xs text-[#94A3B8] mb-4">
+                    Configure peer-to-peer networking
+                </p>
+
+                <div class="space-y-4">
+                    <!-- P2P Token -->
+                    <div>
+                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">P2P Token</label>
+                        <p class="text-xs text-[#94A3B8] mb-2">Authentication token for P2P network (set to 0 to generate a new token)</p>
+                        <input type="text" x-model="settings.p2p_token" 
+                               placeholder=""
+                               class="w-full px-3 py-2 bg-[#101827] border border-[#EC4899]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#EC4899]/50">
+                    </div>
+
+                    <!-- P2P Network ID -->
+                    <div>
+                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">P2P Network ID</label>
+                        <p class="text-xs text-[#94A3B8] mb-2">Network identifier for P2P connections</p>
+                        <input type="text" x-model="settings.p2p_network_id" 
+                               placeholder=""
+                               class="w-full px-3 py-2 bg-[#101827] border border-[#EC4899]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#EC4899]/50">
+                    </div>
+
+                    <!-- Federated -->
+                    <div class="flex items-center justify-between">
+                        <div>
+                            <label class="text-sm font-medium text-[#E5E7EB]">Federated Mode</label>
+                            <p class="text-xs text-[#94A3B8] mt-1">Enable federated instance mode</p>
+                        </div>
+                        <label class="relative inline-flex items-center cursor-pointer">
+                            <input type="checkbox" x-model="settings.federated" 
+                                   class="sr-only peer">
+                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#EC4899]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#EC4899]"></div>
+                        </label>
+                    </div>
+                </div>
+            </div>
+
+            <!-- API Keys Settings Section -->
+            <div class="bg-[#1E293B] border border-[#EF4444]/20 rounded-lg p-6">
+                <h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
+                    <i class="fas fa-key mr-2 text-[#EF4444] text-sm"></i>
+                    API Keys
+                </h2>
+                <p class="text-xs text-[#94A3B8] mb-4">
+                    Manage API keys for authentication. Keys from environment variables are always included.
+                </p>
+
+                <div class="space-y-4">
+                    <!-- API Keys List -->
+                    <div>
+                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">API Keys</label>
+                        <p class="text-xs text-[#94A3B8] mb-2">List of API keys (one per line or comma-separated)</p>
+                        <textarea x-model="settings.api_keys_text" 
+                                  rows="4"
+                                  placeholder="sk-1234567890abcdef&#10;sk-0987654321fedcba"
+                                  class="w-full px-3 py-2 bg-[#101827] border border-[#EF4444]/20 rounded text-sm text-[#E5E7EB] font-mono focus:outline-none focus:ring-2 focus:ring-[#EF4444]/50"></textarea>
+                        <p class="text-xs text-[#94A3B8] mt-1">Note: API keys are sensitive. Handle with care.</p>
+                    </div>
+                </div>
+            </div>
+
+            <!-- Gallery Settings Section -->
+            <div class="bg-[#1E293B] border border-[#6366F1]/20 rounded-lg p-6">
+                <h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
+                    <i class="fas fa-images mr-2 text-[#6366F1] text-sm"></i>
+                    Gallery Settings
+                </h2>
+                <p class="text-xs text-[#94A3B8] mb-4">
+                    Configure model and backend galleries
+                </p>
+
+                <div class="space-y-4">
+                    <!-- Autoload Galleries -->
+                    <div class="flex items-center justify-between">
+                        <div>
+                            <label class="text-sm font-medium text-[#E5E7EB]">Autoload Galleries</label>
+                            <p class="text-xs text-[#94A3B8] mt-1">Automatically load model galleries on startup</p>
+                        </div>
+                        <label class="relative inline-flex items-center cursor-pointer">
+                            <input type="checkbox" x-model="settings.autoload_galleries" 
+                                   class="sr-only peer">
+                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#6366F1]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#6366F1]"></div>
+                        </label>
+                    </div>
+
+                    <!-- Autoload Backend Galleries -->
+                    <div class="flex items-center justify-between">
+                        <div>
+                            <label class="text-sm font-medium text-[#E5E7EB]">Autoload Backend Galleries</label>
+                            <p class="text-xs text-[#94A3B8] mt-1">Automatically load backend galleries on startup</p>
+                        </div>
+                        <label class="relative inline-flex items-center cursor-pointer">
+                            <input type="checkbox" x-model="settings.autoload_backend_galleries" 
+                                   class="sr-only peer">
+                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#6366F1]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#6366F1]"></div>
+                        </label>
+                    </div>
+
+                    <!-- Galleries (JSON) -->
+                    <div>
+                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">Model Galleries (JSON)</label>
+                        <p class="text-xs text-[#94A3B8] mb-2">Array of gallery objects with 'url' and 'name' fields</p>
+                        <textarea x-model="settings.galleries_json" 
+                                  rows="4"
+                                  placeholder='[{"url": "https://example.com", "name": "Example Gallery"}]'
+                                  class="w-full px-3 py-2 bg-[#101827] border border-[#6366F1]/20 rounded text-sm text-[#E5E7EB] font-mono focus:outline-none focus:ring-2 focus:ring-[#6366F1]/50"></textarea>
+                    </div>
+
+                    <!-- Backend Galleries (JSON) -->
+                    <div>
+                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">Backend Galleries (JSON)</label>
+                        <p class="text-xs text-[#94A3B8] mb-2">Array of backend gallery objects with 'url' and 'name' fields</p>
+                        <textarea x-model="settings.backend_galleries_json" 
+                                  rows="4"
+                                  placeholder='[{"url": "https://example.com", "name": "Example Backend Gallery"}]'
+                                  class="w-full px-3 py-2 bg-[#101827] border border-[#6366F1]/20 rounded text-sm text-[#E5E7EB] font-mono focus:outline-none focus:ring-2 focus:ring-[#6366F1]/50"></textarea>
+                    </div>
+                </div>
+            </div>
+
+            <!-- Source Info -->
+            <div class="bg-yellow-500/10 border border-yellow-500/20 rounded-lg p-4" x-show="sourceInfo">
+                <div class="flex items-start">
+                    <i class="fas fa-info-circle text-yellow-400 mr-2 mt-0.5"></i>
+                    <div class="flex-1">
+                        <p class="text-sm text-yellow-300 font-medium mb-1">Configuration Source</p>
+                        <p class="text-xs text-yellow-200" x-text="'Settings are currently loaded from: ' + sourceInfo"></p>
+                        <p class="text-xs text-yellow-200 mt-1" x-show="sourceInfo === 'env'">
+                            Environment variables take precedence. To modify settings via the UI, unset the relevant environment variables first.
+                        </p>
+                    </div>
+                </div>
+            </div>
+
+            <!-- Save Button -->
+            <div class="flex justify-end">
+                <button type="submit" 
+                        :disabled="saving"
+                        class="inline-flex items-center bg-[#38BDF8] hover:bg-[#38BDF8]/90 disabled:opacity-50 disabled:cursor-not-allowed text-white py-2 px-6 rounded-lg font-medium transition-colors">
+                    <i class="fas fa-save mr-2" :class="saving ? 'fa-spin fa-spinner' : ''"></i>
+                    <span x-text="saving ? 'Saving...' : 'Save Settings'"></span>
+                </button>
+            </div>
+        </form>
+    </div>
+
+    {{template "views/partials/footer" .}}
+</div>
+
+<script>
+function settingsDashboard() {
+    return {
+        notifications: [],
+        settings: {
+            watchdog_enabled: false,
+            watchdog_idle_enabled: false,
+            watchdog_busy_enabled: false,
+            watchdog_idle_timeout: '15m',
+            watchdog_busy_timeout: '5m',
+            single_backend: false,
+            parallel_backend_requests: false,
+            threads: 0,
+            context_size: 0,
+            f16: false,
+            debug: false,
+            cors: false,
+            csrf: false,
+            cors_allow_origins: '',
+            p2p_token: '',
+            p2p_network_id: '',
+            federated: false,
+            autoload_galleries: false,
+            autoload_backend_galleries: false,
+            galleries_json: '[]',
+            backend_galleries_json: '[]',
+            api_keys_text: ''
+        },
+        sourceInfo: '',
+        saving: false,
+        
+        init() {
+            this.loadSettings();
+        },
+        
+        async loadSettings() {
+            try {
+                const response = await fetch('/api/settings');
+                const data = await response.json();
+                
+                if (response.ok) {
+                    this.settings = {
+                        watchdog_enabled: data.watchdog_enabled,
+                        watchdog_idle_enabled: data.watchdog_idle_enabled,
+                        watchdog_busy_enabled: data.watchdog_busy_enabled,
+                        watchdog_idle_timeout: data.watchdog_idle_timeout || '15m',
+                        watchdog_busy_timeout: data.watchdog_busy_timeout || '5m',
+                        single_backend: data.single_backend,
+                        parallel_backend_requests: data.parallel_backend_requests,
+                        threads: data.threads || 0,
+                        context_size: data.context_size || 0,
+                        f16: data.f16 || false,
+                        debug: data.debug || false,
+                        cors: data.cors || false,
+                        csrf: data.csrf || false,
+                        cors_allow_origins: data.cors_allow_origins || '',
+                        p2p_token: data.p2p_token || '',
+                        p2p_network_id: data.p2p_network_id || '',
+                        federated: data.federated || false,
+                        autoload_galleries: data.autoload_galleries || false,
+                        autoload_backend_galleries: data.autoload_backend_galleries || false,
+                        galleries_json: JSON.stringify(data.galleries || [], null, 2),
+                        backend_galleries_json: JSON.stringify(data.backend_galleries || [], null, 2),
+                        api_keys_text: (data.api_keys || []).join('\n')
+                    };
+                    this.sourceInfo = data.source || 'default';
+                } else {
+                    this.addNotification('Failed to load settings: ' + (data.error || 'Unknown error'), 'error');
+                }
+            } catch (error) {
+                console.error('Error loading settings:', error);
+                this.addNotification('Failed to load settings: ' + error.message, 'error');
+            }
+        },
+        
+        updateWatchdogEnabled() {
+            if (!this.settings.watchdog_enabled) {
+                this.settings.watchdog_idle_enabled = false;
+                this.settings.watchdog_busy_enabled = false;
+            }
+        },
+        
+        async saveSettings() {
+            if (this.saving) return;
+            
+            this.saving = true;
+            
+            try {
+                const payload = {};
+                
+                // Only include changed values
+                if (this.settings.watchdog_enabled !== undefined) {
+                    payload.watchdog_enabled = this.settings.watchdog_enabled;
+                }
+                if (this.settings.watchdog_idle_enabled !== undefined) {
+                    payload.watchdog_idle_enabled = this.settings.watchdog_idle_enabled;
+                }
+                if (this.settings.watchdog_busy_enabled !== undefined) {
+                    payload.watchdog_busy_enabled = this.settings.watchdog_busy_enabled;
+                }
+                if (this.settings.watchdog_idle_timeout) {
+                    payload.watchdog_idle_timeout = this.settings.watchdog_idle_timeout;
+                }
+                if (this.settings.watchdog_busy_timeout) {
+                    payload.watchdog_busy_timeout = this.settings.watchdog_busy_timeout;
+                }
+                if (this.settings.single_backend !== undefined) {
+                    payload.single_backend = this.settings.single_backend;
+                }
+                if (this.settings.parallel_backend_requests !== undefined) {
+                    payload.parallel_backend_requests = this.settings.parallel_backend_requests;
+                }
+                if (this.settings.threads !== undefined) {
+                    payload.threads = parseInt(this.settings.threads) || 0;
+                }
+                if (this.settings.context_size !== undefined) {
+                    payload.context_size = parseInt(this.settings.context_size) || 0;
+                }
+                if (this.settings.f16 !== undefined) {
+                    payload.f16 = this.settings.f16;
+                }
+                if (this.settings.debug !== undefined) {
+                    payload.debug = this.settings.debug;
+                }
+                if (this.settings.cors !== undefined) {
+                    payload.cors = this.settings.cors;
+                }
+                if (this.settings.csrf !== undefined) {
+                    payload.csrf = this.settings.csrf;
+                }
+                if (this.settings.cors_allow_origins !== undefined) {
+                    payload.cors_allow_origins = this.settings.cors_allow_origins;
+                }
+                if (this.settings.p2p_token !== undefined) {
+                    payload.p2p_token = this.settings.p2p_token;
+                }
+                if (this.settings.p2p_network_id !== undefined) {
+                    payload.p2p_network_id = this.settings.p2p_network_id;
+                }
+                if (this.settings.federated !== undefined) {
+                    payload.federated = this.settings.federated;
+                }
+                if (this.settings.autoload_galleries !== undefined) {
+                    payload.autoload_galleries = this.settings.autoload_galleries;
+                }
+                if (this.settings.autoload_backend_galleries !== undefined) {
+                    payload.autoload_backend_galleries = this.settings.autoload_backend_galleries;
+                }
+                // Parse API keys from text (split by newline or comma, trim whitespace, filter empty)
+                if (this.settings.api_keys_text !== undefined) {
+                    const keys = this.settings.api_keys_text
+                        .split(/[\n,]/)
+                        .map(k => k.trim())
+                        .filter(k => k.length > 0);
+                    if (keys.length > 0) {
+                        payload.api_keys = keys;
+                    } else {
+                        // If empty, send empty array to clear keys
+                        payload.api_keys = [];
+                    }
+                }
+                // Parse galleries JSON
+                if (this.settings.galleries_json) {
+                    try {
+                        payload.galleries = JSON.parse(this.settings.galleries_json);
+                    } catch (e) {
+                        this.addNotification('Invalid galleries JSON: ' + e.message, 'error');
+                        this.saving = false;
+                        return;
+                    }
+                }
+                if (this.settings.backend_galleries_json) {
+                    try {
+                        payload.backend_galleries = JSON.parse(this.settings.backend_galleries_json);
+                    } catch (e) {
+                        this.addNotification('Invalid backend galleries JSON: ' + e.message, 'error');
+                        this.saving = false;
+                        return;
+                    }
+                }
+                
+                const response = await fetch('/api/settings', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify(payload)
+                });
+                
+                const data = await response.json();
+                
+                if (response.ok && data.success) {
+                    this.addNotification('Settings saved successfully!', 'success');
+                    // Reload settings to get updated source info
+                    setTimeout(() => this.loadSettings(), 1000);
+                } else {
+                    this.addNotification('Failed to save settings: ' + (data.error || 'Unknown error'), 'error');
+                }
+            } catch (error) {
+                console.error('Error saving settings:', error);
+                this.addNotification('Failed to save settings: ' + error.message, 'error');
+            } finally {
+                this.saving = false;
+            }
+        },
+        
+        addNotification(message, type = 'success') {
+            const id = Date.now();
+            this.notifications.push({ id, message, type });
+            setTimeout(() => this.dismissNotification(id), 5000);
+        },
+        
+        dismissNotification(id) {
+            this.notifications = this.notifications.filter(n => n.id !== id);
+        }
+    }
+}
+</script>
+
+</body>
+</html>
+
--- a/core/p2p/sync.go
+++ b/core/p2p/sync.go
@@ -1,102 +0,0 @@
-package p2p
-
-import (
-	"context"
-	"slices"
-	"time"
-
-	"github.com/google/uuid"
-	"github.com/mudler/LocalAI/core/application"
-	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/core/services"
-
-	"github.com/mudler/edgevpn/pkg/node"
-	zlog "github.com/rs/zerolog/log"
-)
-
-func syncState(ctx context.Context, n *node.Node, app *application.Application) error {
-	zlog.Debug().Msg("[p2p-sync] Syncing state")
-
-	whatWeHave := []string{}
-	for _, model := range app.ModelConfigLoader().GetAllModelsConfigs() {
-		whatWeHave = append(whatWeHave, model.Name)
-	}
-
-	ledger, _ := n.Ledger()
-	currentData := ledger.CurrentData()
-	zlog.Debug().Msgf("[p2p-sync] Current data: %v", currentData)
-	data, exists := ledger.GetKey("shared_state", "models")
-	if !exists {
-		ledger.AnnounceUpdate(ctx, time.Minute, "shared_state", "models", whatWeHave)
-		zlog.Debug().Msgf("No models found in the ledger, announced our models: %v", whatWeHave)
-	}
-
-	models := []string{}
-	if err := data.Unmarshal(&models); err != nil {
-		zlog.Warn().Err(err).Msg("error unmarshalling models")
-		return nil
-	}
-
-	zlog.Debug().Msgf("[p2p-sync] Models that are present in this instance: %v\nModels that are in the ledger: %v", whatWeHave, models)
-
-	// Sync with our state
-	whatIsNotThere := []string{}
-	for _, model := range whatWeHave {
-		if !slices.Contains(models, model) {
-			whatIsNotThere = append(whatIsNotThere, model)
-		}
-	}
-	if len(whatIsNotThere) > 0 {
-		zlog.Debug().Msgf("[p2p-sync] Announcing our models: %v", append(models, whatIsNotThere...))
-		ledger.AnnounceUpdate(
-			ctx,
-			1*time.Minute,
-			"shared_state",
-			"models",
-			append(models, whatIsNotThere...),
-		)
-	}
-
-	// Check if we have a model that is not in our state, otherwise install it
-	for _, model := range models {
-		if slices.Contains(whatWeHave, model) {
-			zlog.Debug().Msgf("[p2p-sync] Model %s is already present in this instance", model)
-			continue
-		}
-
-		// we install model
-		zlog.Info().Msgf("[p2p-sync] Installing model which is not present in this instance: %s", model)
-
-		uuid, err := uuid.NewUUID()
-		if err != nil {
-			zlog.Error().Err(err).Msg("error generating UUID")
-			continue
-		}
-
-		app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
-			ID:                 uuid.String(),
-			GalleryElementName: model,
-			Galleries:          app.ApplicationConfig().Galleries,
-			BackendGalleries:   app.ApplicationConfig().BackendGalleries,
-		}
-	}
-
-	return nil
-}
-
-func Sync(ctx context.Context, n *node.Node, app *application.Application) error {
-	go func() {
-		for {
-			select {
-			case <-ctx.Done():
-				return
-			case <-time.After(1 * time.Minute):
-				if err := syncState(ctx, n, app); err != nil {
-					zlog.Error().Err(err).Msg("error syncing state")
-				}
-			}
-
-		}
-	}()
-	return nil
-}
--- a/docs/content/advanced/vram-management.md
+++ b/docs/content/advanced/vram-management.md
@@ -48,12 +48,15 @@ curl http://localhost:8080/v1/chat/completions -d '{"model": "model-b", ...}'

 For more flexible memory management, LocalAI provides watchdog mechanisms that automatically unload models based on their activity state. This allows multiple models to be loaded simultaneously, but automatically frees memory when models become inactive or stuck.

+> **Note:** Watchdog settings can be configured via the [Runtime Settings]({{%relref "features/runtime-settings#watchdog-settings" %}}) web interface, which allows you to adjust settings without restarting the application.
+
 ### Idle Watchdog

 The idle watchdog monitors models that haven't been used for a specified period and automatically unloads them to free VRAM.

 #### Configuration

+Via environment variables or CLI:
 ```bash
 LOCALAI_WATCHDOG_IDLE=true ./local-ai

@@ -62,12 +65,15 @@ LOCALAI_WATCHDOG_IDLE=true LOCALAI_WATCHDOG_IDLE_TIMEOUT=10m ./local-ai
 ./local-ai --enable-watchdog-idle --watchdog-idle-timeout=10m
 ```

+Via web UI: Navigate to Settings → Watchdog Settings and enable "Watchdog Idle Enabled" with your desired timeout.
+
 ### Busy Watchdog

 The busy watchdog monitors models that have been processing requests for an unusually long time and terminates them if they exceed a threshold. This is useful for detecting and recovering from stuck or hung backends.

 #### Configuration

+Via environment variables or CLI:
 ```bash
 LOCALAI_WATCHDOG_BUSY=true ./local-ai

@@ -76,6 +82,8 @@ LOCALAI_WATCHDOG_BUSY=true LOCALAI_WATCHDOG_BUSY_TIMEOUT=10m ./local-ai
 ./local-ai --enable-watchdog-busy --watchdog-busy-timeout=10m
 ```

+Via web UI: Navigate to Settings → Watchdog Settings and enable "Watchdog Busy Enabled" with your desired timeout.
+
 ### Combined Configuration

 You can enable both watchdogs simultaneously for comprehensive memory management:
--- a/docs/content/features/_index.en.md
+++ b/docs/content/features/_index.en.md
@@ -32,6 +32,7 @@ LocalAI provides a comprehensive set of features for running AI models locally.
 - **[Stores](stores/)** - Vector similarity search for embeddings
 - **[Model Gallery](model-gallery/)** - Browse and install pre-configured models
 - **[Backends](backends/)** - Learn about available backends and how to manage them
+- **[Runtime Settings](runtime-settings/)** - Configure application settings via web UI without restarting

 ## Getting Started

--- a/docs/content/features/model-gallery.md
+++ b/docs/content/features/model-gallery.md
@@ -33,12 +33,18 @@ Navigate the WebUI interface in the "Models" section from the navbar at the top.

 ## Add other galleries

-You can add other galleries by setting the `GALLERIES` environment variable. The `GALLERIES` environment variable is a list of JSON objects, where each object has a `name` and a `url` field. The `name` field is the name of the gallery, and the `url` field is the URL of the gallery's index file, for example:
+You can add other galleries by:
+
+1. **Using the Web UI**: Navigate to the [Runtime Settings]({{%relref "features/runtime-settings#gallery-settings" %}}) page and configure galleries through the interface.
+
+2. **Using Environment Variables**: Set the `GALLERIES` environment variable. The `GALLERIES` environment variable is a list of JSON objects, where each object has a `name` and a `url` field. The `name` field is the name of the gallery, and the `url` field is the URL of the gallery's index file, for example:

 ```json
 GALLERIES=[{"name":"<GALLERY_NAME>", "url":"<GALLERY_URL"}]
 ```

+3. **Using Configuration Files**: Add galleries to `runtime_settings.json` in the `LOCALAI_CONFIG_DIR` directory.
+
 The models in the gallery will be automatically indexed and available for installation.

 ## API Reference
--- a/docs/content/features/runtime-settings.md
+++ b/docs/content/features/runtime-settings.md
@@ -0,0 +1,180 @@
+++
+disableToc = false
+title = "⚙️ Runtime Settings"
+weight = 25
+url = '/features/runtime-settings'
+++
+
+LocalAI provides a web-based interface for managing application settings at runtime. These settings can be configured through the web UI and are automatically persisted to a configuration file, allowing changes to take effect immediately without requiring a restart.
+
+## Accessing Runtime Settings
+
+Navigate to the **Settings** page from the management interface at `http://localhost:8080/manage`. The settings page provides a comprehensive interface for configuring various aspects of LocalAI.
+
+## Available Settings
+
+### Watchdog Settings
+
+The watchdog monitors backend activity and can automatically stop idle or overly busy models to free up resources.
+
+- **Watchdog Enabled**: Master switch to enable/disable the watchdog
+- **Watchdog Idle Enabled**: Enable stopping backends that are idle longer than the idle timeout
+- **Watchdog Busy Enabled**: Enable stopping backends that are busy longer than the busy timeout
+- **Watchdog Idle Timeout**: Duration threshold for idle backends (default: `15m`)
+- **Watchdog Busy Timeout**: Duration threshold for busy backends (default: `5m`)
+
+Changes to watchdog settings are applied immediately by restarting the watchdog service.
+
+### Backend Configuration
+
+- **Single Backend**: Allow only one backend to run at a time
+- **Parallel Backend Requests**: Enable backends to handle multiple requests in parallel if supported
+
+### Performance Settings
+
+- **Threads**: Number of threads used for parallel computation (recommended: number of physical cores)
+- **Context Size**: Default context size for models (default: `512`)
+- **F16**: Enable GPU acceleration using 16-bit floating point
+
+### Debug and Logging
+
+- **Debug Mode**: Enable debug logging (deprecated, use log-level instead)
+
+### API Security
+
+- **CORS**: Enable Cross-Origin Resource Sharing
+- **CORS Allow Origins**: Comma-separated list of allowed CORS origins
+- **CSRF**: Enable CSRF protection middleware
+- **API Keys**: Manage API keys for authentication (one per line or comma-separated)
+
+### P2P Settings
+
+Configure peer-to-peer networking for distributed inference:
+
+- **P2P Token**: Authentication token for P2P network
+- **P2P Network ID**: Network identifier for P2P connections
+- **Federated Mode**: Enable federated mode for P2P network
+
+Changes to P2P settings automatically restart the P2P stack with the new configuration.
+
+### Gallery Settings
+
+Manage model and backend galleries:
+
+- **Model Galleries**: JSON array of gallery objects with `url` and `name` fields
+- **Backend Galleries**: JSON array of backend gallery objects
+- **Autoload Galleries**: Automatically load model galleries on startup
+- **Autoload Backend Galleries**: Automatically load backend galleries on startup
+
+## Configuration Persistence
+
+All settings are automatically saved to `runtime_settings.json` in the `LOCALAI_CONFIG_DIR` directory (default: `BASEPATH/configuration`). This file is watched for changes, so modifications made directly to the file will also be applied at runtime.
+
+## Environment Variable Precedence
+
+Environment variables take precedence over settings configured via the web UI or configuration files. If a setting is controlled by an environment variable, it cannot be modified through the web interface. The settings page will indicate when a setting is controlled by an environment variable.
+
+The precedence order is:
+1. **Environment variables** (highest priority)
+2. **Configuration files** (`runtime_settings.json`, `api_keys.json`)
+3. **Default values** (lowest priority)
+
+## Example Configuration
+
+The `runtime_settings.json` file follows this structure:
+
+```json
+{
+  "watchdog_enabled": true,
+  "watchdog_idle_enabled": true,
+  "watchdog_busy_enabled": false,
+  "watchdog_idle_timeout": "15m",
+  "watchdog_busy_timeout": "5m",
+  "single_backend": false,
+  "parallel_backend_requests": true,
+  "threads": 8,
+  "context_size": 2048,
+  "f16": false,
+  "debug": false,
+  "cors": true,
+  "csrf": false,
+  "cors_allow_origins": "*",
+  "p2p_token": "",
+  "p2p_network_id": "",
+  "federated": false,
+  "galleries": [
+    {
+      "url": "github:mudler/LocalAI/gallery/index.yaml@master",
+      "name": "localai"
+    }
+  ],
+  "backend_galleries": [
+    {
+      "url": "github:mudler/LocalAI/backend/index.yaml@master",
+      "name": "localai"
+    }
+  ],
+  "autoload_galleries": true,
+  "autoload_backend_galleries": true,
+  "api_keys": []
+}
+```
+
+## API Keys Management
+
+API keys can be managed through the runtime settings interface. Keys can be entered one per line or comma-separated. 
+
+**Important Notes:**
+- API keys from environment variables are always included and cannot be removed via the UI
+- Runtime API keys are stored in `runtime_settings.json`
+- For backward compatibility, API keys can also be managed via `api_keys.json`
+- Empty arrays will clear all runtime API keys (but preserve environment variable keys)
+
+## Dynamic Configuration
+
+The runtime settings system supports dynamic configuration file watching. When `LOCALAI_CONFIG_DIR` is set, LocalAI monitors the following files for changes:
+
+- `runtime_settings.json` - Unified runtime settings
+- `api_keys.json` - API keys (for backward compatibility)
+- `external_backends.json` - External backend configurations
+
+Changes to these files are automatically detected and applied without requiring a restart.
+
+## Best Practices
+
+1. **Use Environment Variables for Production**: For production deployments, use environment variables for critical settings to ensure they cannot be accidentally changed via the web UI.
+
+2. **Backup Configuration Files**: Before making significant changes, consider backing up your `runtime_settings.json` file.
+
+3. **Monitor Resource Usage**: When enabling watchdog features, monitor your system to ensure the timeout values are appropriate for your workload.
+
+4. **Secure API Keys**: API keys are sensitive information. Ensure proper file permissions on configuration files (they should be readable only by the LocalAI process).
+
+5. **Test Changes**: Some settings (like watchdog timeouts) may require testing to find optimal values for your specific use case.
+
+## Troubleshooting
+
+### Settings Not Applying
+
+If settings are not being applied:
+1. Check if the setting is controlled by an environment variable
+2. Verify the `LOCALAI_CONFIG_DIR` is set correctly
+3. Check file permissions on `runtime_settings.json`
+4. Review application logs for configuration errors
+
+### Watchdog Not Working
+
+If the watchdog is not functioning:
+1. Ensure "Watchdog Enabled" is turned on
+2. Verify at least one of the idle or busy watchdogs is enabled
+3. Check that timeout values are reasonable for your workload
+4. Review logs for watchdog-related messages
+
+### P2P Not Starting
+
+If P2P is not starting:
+1. Verify the P2P token is set (non-empty)
+2. Check network connectivity
+3. Ensure the P2P network ID matches across nodes (if using federated mode)
+4. Review logs for P2P-related errors
+
--- a/docs/content/getting-started/models.md
+++ b/docs/content/getting-started/models.md
@@ -1,23 +1,46 @@
 +++
 disableToc = false
-title = "Install and Run Models"
-weight = 4
-icon = "rocket_launch"
+title = "Setting Up Models"
+weight = 2
+icon = "hub"
+description = "Learn how to install, configure, and manage models in LocalAI"
 +++

-To install models with LocalAI, you can:
+This section covers everything you need to know about installing and configuring models in LocalAI. You'll learn multiple methods to get models running.

- Browse the Model Gallery from the Web Interface and install models with a couple of clicks. For more details, refer to the [Gallery Documentation]({{% relref "features/model-gallery" %}}).
- Specify a model from the LocalAI gallery during startup, e.g., `local-ai run <model_gallery_name>`.
- Use a URI to specify a model file (e.g., `huggingface://...`, `oci://`, or `ollama://`) when starting LocalAI, e.g., `local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf`.
- Specify a URL to a model configuration file when starting LocalAI, e.g., `local-ai run https://gist.githubusercontent.com/.../phi-2.yaml`.
- Manually install the models by copying the files into the models directory (`--models`).
+## Prerequisites

-## Run and Install Models via the Gallery
+- LocalAI installed and running (see [Quickstart]({{% relref "getting-started/quickstart" %}}) if you haven't set it up yet)
+- Basic understanding of command line usage

-To run models available in the LocalAI gallery, you can use the WebUI or specify the model name when starting LocalAI. Models can be found in the gallery via the Web interface, the [model gallery](https://models.localai.io), or the CLI with: `local-ai models list`.
+## Method 1: Using the Model Gallery (Easiest)

-To install a model from the gallery, use the model name as the URI. For example, to run LocalAI with the Hermes model, execute:
+The Model Gallery is the simplest way to install models. It provides pre-configured models ready to use.
+
+### Via WebUI
+
+1. Open the LocalAI WebUI at `http://localhost:8080`
+2. Navigate to the "Models" tab
+3. Browse available models
+4. Click "Install" on any model you want
+5. Wait for installation to complete
+
+For more details, refer to the [Gallery Documentation]({{% relref "features/model-gallery" %}}).
+
+### Via CLI
+
+```bash
+# List available models
+local-ai models list
+
+# Install a specific model
+local-ai models install llama-3.2-1b-instruct:q4_k_m
+
+# Start LocalAI with a model from the gallery
+local-ai run llama-3.2-1b-instruct:q4_k_m
+```
+
+To run models available in the LocalAI gallery, you can use the model name as the URI. For example, to run LocalAI with the Hermes model, execute:

 ```bash
 local-ai run hermes-2-theta-llama-3-8b
@@ -31,7 +54,82 @@ local-ai models install hermes-2-theta-llama-3-8b

 Note: The galleries available in LocalAI can be customized to point to a different URL or a local directory. For more information on how to setup your own gallery, see the [Gallery Documentation]({{% relref "features/model-gallery" %}}).

-## Run Models via URI
+### Browse Online
+
+Visit [models.localai.io](https://models.localai.io) to browse all available models in your browser.
+
+## Method 1.5: Import Models via WebUI
+
+The WebUI provides a powerful model import interface that supports both simple and advanced configuration:
+
+### Simple Import Mode
+
+1. Open the LocalAI WebUI at `http://localhost:8080`
+2. Click "Import Model"
+3. Enter the model URI (e.g., `https://huggingface.co/Qwen/Qwen3-VL-8B-Instruct-GGUF`)
+4. Optionally configure preferences:
+   - Backend selection
+   - Model name
+   - Description
+   - Quantizations
+   - Embeddings support
+   - Custom preferences
+5. Click "Import Model" to start the import process
+
+### Advanced Import Mode
+
+For full control over model configuration:
+
+1. In the WebUI, click "Import Model"
+2. Toggle to "Advanced Mode"
+3. Edit the YAML configuration directly in the code editor
+4. Use the "Validate" button to check your configuration
+5. Click "Create" or "Update" to save
+
+The advanced editor includes:
+- Syntax highlighting
+- YAML validation
+- Format and copy tools
+- Full configuration options
+
+This is especially useful for:
+- Custom model configurations
+- Fine-tuning model parameters
+- Setting up complex model setups
+- Editing existing model configurations
+
+## Method 2: Installing from Hugging Face
+
+LocalAI can directly install models from Hugging Face:
+
+```bash
+# Install and run a model from Hugging Face
+local-ai run huggingface://TheBloke/phi-2-GGUF
+```
+
+The format is: `huggingface://<repository>/<model-file>` (<model-file> is optional)
+
+### Examples
+
+```bash
+local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
+```
+
+## Method 3: Installing from OCI Registries
+
+### Ollama Registry
+
+```bash
+local-ai run ollama://gemma:2b
+```
+
+### Standard OCI Registry
+
+```bash
+local-ai run oci://localai/phi-2:latest
+```
+
+### Run Models via URI

 To run models via URI, specify a URI to a model file or a configuration file when starting LocalAI. Valid syntax includes:

@@ -51,18 +149,45 @@ local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
 local-ai run oci://localai/phi-2:latest
 ```

-## Run Models Manually
+## Method 4: Manual Installation

-Follow these steps to manually run models using LocalAI:
+For full control, you can manually download and configure models.

-1. **Prepare Your Model and Configuration Files**:
-   Ensure you have a model file and, if necessary, a configuration YAML file. Customize model defaults and settings with a configuration file. For advanced configurations, refer to the [Advanced Documentation]({{% relref "advanced" %}}).
+### Step 1: Download a Model

-2. **GPU Acceleration**:
-   For instructions on GPU acceleration, visit the [GPU Acceleration]({{% relref "features/gpu-acceleration" %}}) page.
+Download a GGUF model file. Popular sources:

-3. **Run LocalAI**:
-   Choose one of the following methods to run LocalAI:
+- [Hugging Face](https://huggingface.co/models?search=gguf)
+
+Example:
+
+```bash
+mkdir -p models
+
+wget https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_K_M.gguf \
+  -O models/phi-2.Q4_K_M.gguf
+```
+
+### Step 2: Create a Configuration File (Optional)
+
+Create a YAML file to configure the model:
+
+```yaml
+# models/phi-2.yaml
+name: phi-2
+parameters:
+  model: phi-2.Q4_K_M.gguf
+  temperature: 0.7
+context_size: 2048
+threads: 4
+backend: llama-cpp
+```
+
+Customize model defaults and settings with a configuration file. For advanced configurations, refer to the [Advanced Documentation]({{% relref "advanced" %}}).
+
+### Step 3: Run LocalAI
+
+Choose one of the following methods to run LocalAI:

 {{< tabs >}}
 {{% tab title="Docker" %}}
@@ -74,7 +199,6 @@ cp your-model.gguf models/

 docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:latest --models-path /models --context-size 700 --threads 4

-
 curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
     "model": "your-model.gguf",
     "prompt": "A long time ago in a galaxy far, far away",
@@ -121,10 +245,8 @@ git clone https://github.com/go-skynet/LocalAI

 cd LocalAI

-
 cp your-model.gguf models/

-
 docker compose up -d --pull always

 curl http://localhost:8080/v1/models
@@ -154,6 +276,11 @@ For Kubernetes deployment, see the [Kubernetes installation guide]({{% relref "i

 LocalAI binary releases are available on [GitHub](https://github.com/go-skynet/LocalAI/releases).

+```bash
+# With binary
+local-ai --models-path ./models
+```
+
 {{% notice tip %}}
 If installing on macOS, you might encounter a message saying:

@@ -174,4 +301,115 @@ For instructions on building LocalAI from source, see the [Build from Source gui
 {{% /tab %}}
 {{< /tabs >}}

+### GPU Acceleration
+
+For instructions on GPU acceleration, visit the [GPU Acceleration]({{% relref "features/gpu-acceleration" %}}) page.
+
 For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI-examples/tree/main/configurations).
+
+## Understanding Model Files
+
+### File Formats
+
+- **GGUF**: Modern format, recommended for most use cases
+- **GGML**: Older format, still supported but deprecated
+
+### Quantization Levels
+
+Models come in different quantization levels (quality vs. size trade-off):
+
+| Quantization | Size | Quality | Use Case |
+|-------------|------|---------|----------|
+| Q8_0 | Largest | Highest | Best quality, requires more RAM |
+| Q6_K | Large | Very High | High quality |
+| Q4_K_M | Medium | High | Balanced (recommended) |
+| Q4_K_S | Small | Medium | Lower RAM usage |
+| Q2_K | Smallest | Lower | Minimal RAM, lower quality |
+
+### Choosing the Right Model
+
+Consider:
+
+- **RAM available**: Larger models need more RAM
+- **Use case**: Different models excel at different tasks
+- **Speed**: Smaller quantizations are faster
+- **Quality**: Higher quantizations produce better output
+
+## Model Configuration
+
+### Basic Configuration
+
+Create a YAML file in your models directory:
+
+```yaml
+name: my-model
+parameters:
+  model: model.gguf
+  temperature: 0.7
+  top_p: 0.9
+context_size: 2048
+threads: 4
+backend: llama-cpp
+```
+
+### Advanced Configuration
+
+See the [Model Configuration]({{% relref "advanced/model-configuration" %}}) guide for all available options.
+
+## Managing Models
+
+### List Installed Models
+
+```bash
+# Via API
+curl http://localhost:8080/v1/models
+
+# Via CLI
+local-ai models list
+```
+
+### Remove Models
+
+Simply delete the model file and configuration from your models directory:
+
+```bash
+rm models/model-name.gguf
+rm models/model-name.yaml  # if exists
+```
+
+## Troubleshooting
+
+### Model Not Loading
+
+1. **Check backend**: Ensure the required backend is installed
+
+   ```bash
+   local-ai backends list
+   local-ai backends install llama-cpp  # if needed
+   ```
+
+2. **Check logs**: Enable debug mode
+
+   ```bash
+   DEBUG=true local-ai
+   ```
+
+3. **Verify file**: Ensure the model file is not corrupted
+
+### Out of Memory
+
+- Use a smaller quantization (Q4_K_S or Q2_K)
+- Reduce `context_size` in configuration
+- Close other applications to free RAM
+
+### Wrong Backend
+
+Check the [Compatibility Table]({{% relref "reference/compatibility-table" %}}) to ensure you're using the correct backend for your model.
+
+## Best Practices
+
+1. **Start small**: Begin with smaller models to test your setup
+2. **Use quantized models**: Q4_K_M is a good balance for most use cases
+3. **Organize models**: Keep your models directory organized
+4. **Backup configurations**: Save your YAML configurations
+5. **Monitor resources**: Watch RAM and disk usage
--- a/docs/content/getting-started/quickstart.md
+++ b/docs/content/getting-started/quickstart.md
@@ -1,7 +1,7 @@
 +++
 disableToc = false
 title = "Quickstart"
-weight = 3
+weight = 1
 url = '/basics/getting_started/'
 icon = "rocket_launch"
 +++
--- a/docs/content/installation/docker.md
+++ b/docs/content/installation/docker.md
@@ -8,7 +8,7 @@ url: '/installation/docker/'
 {{% notice tip %}}
 **Recommended Installation Method**

-Docker is the recommended way to install LocalAI as it works across all platforms (Linux, macOS, Windows) and provides the easiest setup experience.
+Docker is the recommended way to install LocalAI and provides the easiest setup experience.
 {{% /notice %}}

 LocalAI provides Docker images that work with Docker, Podman, and other container engines. These images are available on [Docker Hub](https://hub.docker.com/r/localai/localai) and [Quay.io](https://quay.io/repository/go-skynet/local-ai).
--- a/docs/content/reference/cli-reference.md
+++ b/docs/content/reference/cli-reference.md
@@ -24,7 +24,7 @@ Complete reference for all LocalAI command-line interface (CLI) parameters and e
 | `--models-path` | `BASEPATH/models` | Path containing models used for inferencing | `$LOCALAI_MODELS_PATH`, `$MODELS_PATH` |
 | `--generated-content-path` | `/tmp/generated/content` | Location for assets generated by backends (e.g. stablediffusion, images, audio, videos) | `$LOCALAI_GENERATED_CONTENT_PATH`, `$GENERATED_CONTENT_PATH` |
 | `--upload-path` | `/tmp/localai/upload` | Path to store uploads from files API | `$LOCALAI_UPLOAD_PATH`, `$UPLOAD_PATH` |
-| `--localai-config-dir` | `BASEPATH/configuration` | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | `$LOCALAI_CONFIG_DIR` |
+| `--localai-config-dir` | `BASEPATH/configuration` | Directory for dynamic loading of certain configuration files (currently runtime_settings.json, api_keys.json, and external_backends.json). See [Runtime Settings]({{%relref "features/runtime-settings" %}}) for web-based configuration. | `$LOCALAI_CONFIG_DIR` |
 | `--localai-config-dir-poll-interval` | | Time duration to poll the LocalAI Config Dir if your system has broken fsnotify events (example: `1m`) | `$LOCALAI_CONFIG_DIR_POLL_INTERVAL` |
 | `--models-config-file` | | YAML file containing a list of model backend configs (alias: `--config-file`) | `$LOCALAI_MODELS_CONFIG_FILE`, `$CONFIG_FILE` |

@@ -80,6 +80,7 @@ For more information on VRAM management, see [VRAM and Memory Management]({{%rel
 | `--upload-limit` | `15` | Default upload-limit in MB | `$LOCALAI_UPLOAD_LIMIT`, `$UPLOAD_LIMIT` |
 | `--api-keys` | | List of API Keys to enable API authentication. When this is set, all requests must be authenticated with one of these API keys | `$LOCALAI_API_KEY`, `$API_KEY` |
 | `--disable-webui` | `false` | Disables the web user interface. When set to true, the server will only expose API endpoints without serving the web interface | `$LOCALAI_DISABLE_WEBUI`, `$DISABLE_WEBUI` |
+| `--disable-runtime-settings` | `false` | Disables the runtime settings feature. When set to true, the server will not load runtime settings from the `runtime_settings.json` file and the settings web interface will be disabled | `$LOCALAI_DISABLE_RUNTIME_SETTINGS`, `$DISABLE_RUNTIME_SETTINGS` |
 | `--disable-gallery-endpoint` | `false` | Disable the gallery endpoints | `$LOCALAI_DISABLE_GALLERY_ENDPOINT`, `$DISABLE_GALLERY_ENDPOINT` |
 | `--disable-metrics-endpoint` | `false` | Disable the `/metrics` endpoint | `$LOCALAI_DISABLE_METRICS_ENDPOINT`, `$DISABLE_METRICS_ENDPOINT` |
 | `--machine-tag` | | If not empty, add that string to Machine-Tag header in each response. Useful to track response from different machines using multiple P2P federated nodes | `$LOCALAI_MACHINE_TAG`, `$MACHINE_TAG` |
--- a/docs/layouts/partials/head.html
+++ b/docs/layouts/partials/head.html
@@ -59,4 +59,8 @@
        {{- template "_internal/google_analytics.html" . -}}
    {{- end -}}
    {{- end -}}
+    <!-- Landing page assets (lotusdocs) -->
+    {{- if .IsHome -}}
+        {{- partial "landing-head.html" . -}}
+    {{- end -}}
 </head>
--- a/docs/layouts/partials/menu-footer.html
+++ b/docs/layouts/partials/menu-footer.html
@@ -1,2 +1,3 @@
 <p>© 2023-2025 <a href="https://mudler.pm">Ettore Di Giacinto</a></p>

+
--- a/docs/static/install.sh
+++ b/docs/static/install.sh
@@ -363,7 +363,7 @@ install_container_toolkit_apt() {
    sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
    $SUDO tee /etc/apt/sources.list.d/nvidia-container-toolkit.list

-    $SUDO sudo apt-get update && $SUDO apt-get install -y nvidia-container-toolkit
+    $SUDO apt-get update && $SUDO apt-get install -y nvidia-container-toolkit
 }

 # ref: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-zypper
@@ -877,6 +877,16 @@ if [ "$OS" = "Darwin" ]; then
    exit 0
 fi

+SUDO=
+if [ "$(id -u)" -ne 0 ]; then
+    # Running as root, no need for sudo
+    if ! available sudo; then
+        fatal "This script requires superuser permissions. Please re-run as root."
+    fi
+
+    SUDO="sudo"
+fi
+
 if check_gpu lspci amdgpu || check_gpu lshw amdgpu; then
    HAS_AMD=true
 fi
@@ -889,16 +899,6 @@ if check_gpu lspci intel || check_gpu lshw intel; then
    HAS_INTEL=true
 fi

-SUDO=
-if [ "$(id -u)" -ne 0 ]; then
-    # Running as root, no need for sudo
-    if ! available sudo; then
-        fatal "This script requires superuser permissions. Please re-run as root."
-    fi
-
-    SUDO="sudo"
-fi
-
 PACKAGE_MANAGER=
 for PACKAGE_MANAGER in dnf yum apt-get; do
    if available $PACKAGE_MANAGER; then
--- a/go.mod
+++ b/go.mod
@@ -53,8 +53,7 @@ require (
 	go.opentelemetry.io/otel/exporters/prometheus v0.60.0
 	go.opentelemetry.io/otel/metric v1.38.0
 	go.opentelemetry.io/otel/sdk/metric v1.38.0
-	google.golang.org/grpc v1.76.0
-	google.golang.org/protobuf v1.36.10
+	google.golang.org/grpc v1.77.0
 	gopkg.in/yaml.v2 v2.4.0
 	gopkg.in/yaml.v3 v3.0.1
 	oras.land/oras-go/v2 v2.6.0
@@ -66,6 +65,7 @@ require (
 	github.com/stretchr/testify v1.11.1 // indirect
 	github.com/swaggo/files/v2 v2.0.2 // indirect
 	github.com/valyala/fasttemplate v1.2.2 // indirect
+	google.golang.org/protobuf v1.36.10 // indirect
 )

 require (
@@ -141,15 +141,15 @@ require (
 	github.com/wlynxg/anet v0.0.5 // indirect
 	github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
 	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
-	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
+	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
 	go.uber.org/mock v0.5.2 // indirect
 	go.yaml.in/yaml/v2 v2.4.3
 	go.yaml.in/yaml/v3 v3.0.4 // indirect
 	golang.org/x/image v0.25.0 // indirect
-	golang.org/x/net v0.46.0 // indirect; indirect (for websocket)
-	golang.org/x/oauth2 v0.30.0 // indirect
-	golang.org/x/telemetry v0.0.0-20250908211612-aef8a434d053 // indirect
+	golang.org/x/net v0.47.0 // indirect; indirect (for websocket)
+	golang.org/x/oauth2 v0.32.0 // indirect
+	golang.org/x/telemetry v0.0.0-20251008203120-078029d740a8 // indirect
 	golang.org/x/time v0.12.0 // indirect
 )

@@ -313,19 +313,19 @@ require (
 	go.uber.org/fx v1.24.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.27.0 // indirect
-	golang.org/x/crypto v0.43.0 // indirect
+	golang.org/x/crypto v0.45.0 // indirect
 	golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476 // indirect
-	golang.org/x/mod v0.28.0 // indirect
-	golang.org/x/sync v0.17.0 // indirect
-	golang.org/x/sys v0.37.0 // indirect
-	golang.org/x/term v0.36.0 // indirect
-	golang.org/x/text v0.30.0 // indirect
-	golang.org/x/tools v0.37.0 // indirect
+	golang.org/x/mod v0.29.0 // indirect
+	golang.org/x/sync v0.18.0 // indirect
+	golang.org/x/sys v0.38.0 // indirect
+	golang.org/x/term v0.37.0 // indirect
+	golang.org/x/text v0.31.0 // indirect
+	golang.org/x/tools v0.38.0 // indirect
 	golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect
 	golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb // indirect
 	golang.zx2c4.com/wireguard/windows v0.5.3 // indirect
 	gonum.org/v1/gonum v0.16.0 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20250804133106-a7a43d27e69b // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8 // indirect
 	gopkg.in/fsnotify.v1 v1.4.7 // indirect
 	gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
 	howett.net/plist v1.0.2-0.20250314012144-ee69052608d9 // indirect
--- a/go.sum
+++ b/go.sum
@@ -667,8 +667,8 @@ github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJ
 github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
 github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
-github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
-github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
+github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
+github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
 github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
 github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY=
 github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ=
@@ -827,8 +827,8 @@ github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQ
 go.opencensus.io v0.18.0/go.mod h1:vKdFvxhtzZ9onBp9VKHK8z/sRpBMnKAsufL7wlDrCOA=
 go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
 go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
-go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
-go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
+go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
+go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q=
 go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
@@ -886,8 +886,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
 golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE=
 golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw=
 golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
-golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
-golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
+golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
+golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476 h1:bsqhLWFR6G6xiQcb+JoGqdKdRU6WzPWmK8E0jxTjzo4=
 golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
@@ -905,8 +905,8 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
-golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U=
-golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI=
+golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
+golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -930,14 +930,14 @@ golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
 golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
 golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
-golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
-golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
+golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
+golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
-golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
+golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY=
+golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
 golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -949,8 +949,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
-golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
+golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20181029174526-d69651ed3497/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -982,10 +982,10 @@ golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
-golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
-golang.org/x/telemetry v0.0.0-20250908211612-aef8a434d053 h1:dHQOQddU4YHS5gY33/6klKjq7Gp3WwMyOXGNp5nzRj8=
-golang.org/x/telemetry v0.0.0-20250908211612-aef8a434d053/go.mod h1:+nZKN+XVh4LCiA9DV3ywrzN4gumyCnKjau3NGb9SGoE=
+golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
+golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/telemetry v0.0.0-20251008203120-078029d740a8 h1:LvzTn0GQhWuvKH/kVRS3R3bVAsdQWI7hvfLHGgh9+lU=
+golang.org/x/telemetry v0.0.0-20251008203120-078029d740a8/go.mod h1:Pi4ztBfryZoJEkyFTI5/Ocsu2jXyDr6iSdgJiYE/uwE=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
@@ -993,8 +993,8 @@ golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
 golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
 golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
 golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
-golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
-golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
+golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
+golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
@@ -1004,8 +1004,8 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
-golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
-golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
+golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
+golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
@@ -1027,8 +1027,8 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
-golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE=
-golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w=
+golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
+golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -1056,10 +1056,10 @@ google.golang.org/genproto v0.0.0-20190306203927-b5d61aea6440/go.mod h1:VzzqZJRn
 google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
 google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
 google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 h1:ToEetK57OidYuqD4Q5w+vfEnPvPpuTwedCNVohYJfNk=
-google.golang.org/genproto/googleapis/api v0.0.0-20250804133106-a7a43d27e69b h1:ULiyYQ0FdsJhwwZUwbaXpZF5yUE3h+RA+gxvBu37ucc=
-google.golang.org/genproto/googleapis/api v0.0.0-20250804133106-a7a43d27e69b/go.mod h1:oDOGiMSXHL4sDTJvFvIB9nRQCGdLP1o/iVaqQK8zB+M=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20250804133106-a7a43d27e69b h1:zPKJod4w6F1+nRGDI9ubnXYhU9NSWoFAijkHkUXeTK8=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20250804133106-a7a43d27e69b/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A=
+google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8 h1:mepRgnBZa07I4TRuomDE4sTIYieg/osKmzIf4USdWS4=
+google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8/go.mod h1:fDMmzKV90WSg1NbozdqrE64fkuTv6mlq2zxo9ad+3yo=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8 h1:M1rk8KBnUsBDg1oPGHNCxG4vc1f49epmTO7xscSajMk=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk=
 google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
 google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio=
 google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
@@ -1068,8 +1068,8 @@ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyac
 google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
 google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
 google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
-google.golang.org/grpc v1.76.0 h1:UnVkv1+uMLYXoIz6o7chp59WfQUYA2ex/BXQ9rHZu7A=
-google.golang.org/grpc v1.76.0/go.mod h1:Ju12QI8M6iQJtbcsV+awF5a4hfJMLi4X0JLo94ULZ6c=
+google.golang.org/grpc v1.77.0 h1:wVVY6/8cGA6vvffn+wWK5ToddbgdU3d8MNENr4evgXM=
+google.golang.org/grpc v1.77.0/go.mod h1:z0BY1iVj0q8E1uSQCjL9cppRj+gnZjzDnzV0dHhrNig=
 google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
 google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
 google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -44,6 +44,10 @@ func (ml *ModelLoader) SetWatchDog(wd *WatchDog) {
 	ml.wd = wd
 }

+func (ml *ModelLoader) GetWatchDog() *WatchDog {
+	return ml.wd
+}
+
 func (ml *ModelLoader) ExistsInModelPath(s string) bool {
 	return utils.ExistsInPath(ml.ModelPath, s)
 }
--- a/pkg/model/watchdog.go
+++ b/pkg/model/watchdog.go
@@ -51,6 +51,7 @@ func NewWatchDog(pm ProcessManager, timeoutBusy, timeoutIdle time.Duration, busy
 func (wd *WatchDog) Shutdown() {
 	wd.Lock()
 	defer wd.Unlock()
+	log.Info().Msg("[WatchDog] Shutting down watchdog")
 	wd.stop <- true
 }

--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -634,6 +634,83 @@ const docTemplate = `{
                }
            }
        },
+        "/v1/images/inpainting": {
+            "post": {
+                "description": "Perform image inpainting. Accepts multipart/form-data with ` + "`" + `image` + "`" + ` and ` + "`" + `mask` + "`" + ` files.",
+                "consumes": [
+                    "multipart/form-data"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "images"
+                ],
+                "summary": "Image inpainting",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Model identifier",
+                        "name": "model",
+                        "in": "formData",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "Text prompt guiding the generation",
+                        "name": "prompt",
+                        "in": "formData",
+                        "required": true
+                    },
+                    {
+                        "type": "integer",
+                        "description": "Number of inference steps (default 25)",
+                        "name": "steps",
+                        "in": "formData"
+                    },
+                    {
+                        "type": "file",
+                        "description": "Original image file",
+                        "name": "image",
+                        "in": "formData",
+                        "required": true
+                    },
+                    {
+                        "type": "file",
+                        "description": "Mask image file (white = area to inpaint)",
+                        "name": "mask",
+                        "in": "formData",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Bad Request",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
        "/v1/mcp/chat/completions": {
            "post": {
                "summary": "Stream MCP chat completions with reasoning, tool calls, and results",
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -627,6 +627,83 @@
                }
            }
        },
+        "/v1/images/inpainting": {
+            "post": {
+                "description": "Perform image inpainting. Accepts multipart/form-data with `image` and `mask` files.",
+                "consumes": [
+                    "multipart/form-data"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "images"
+                ],
+                "summary": "Image inpainting",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Model identifier",
+                        "name": "model",
+                        "in": "formData",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "Text prompt guiding the generation",
+                        "name": "prompt",
+                        "in": "formData",
+                        "required": true
+                    },
+                    {
+                        "type": "integer",
+                        "description": "Number of inference steps (default 25)",
+                        "name": "steps",
+                        "in": "formData"
+                    },
+                    {
+                        "type": "file",
+                        "description": "Original image file",
+                        "name": "image",
+                        "in": "formData",
+                        "required": true
+                    },
+                    {
+                        "type": "file",
+                        "description": "Mask image file (white = area to inpaint)",
+                        "name": "mask",
+                        "in": "formData",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Bad Request",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
        "/v1/mcp/chat/completions": {
            "post": {
                "summary": "Stream MCP chat completions with reasoning, tool calls, and results",
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -1197,6 +1197,59 @@ paths:
          schema:
            $ref: '#/definitions/schema.OpenAIResponse'
      summary: Creates an image given a prompt.
+  /v1/images/inpainting:
+    post:
+      consumes:
+      - multipart/form-data
+      description: Perform image inpainting. Accepts multipart/form-data with `image`
+        and `mask` files.
+      parameters:
+      - description: Model identifier
+        in: formData
+        name: model
+        required: true
+        type: string
+      - description: Text prompt guiding the generation
+        in: formData
+        name: prompt
+        required: true
+        type: string
+      - description: Number of inference steps (default 25)
+        in: formData
+        name: steps
+        type: integer
+      - description: Original image file
+        in: formData
+        name: image
+        required: true
+        type: file
+      - description: Mask image file (white = area to inpaint)
+        in: formData
+        name: mask
+        required: true
+        type: file
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            $ref: '#/definitions/schema.OpenAIResponse'
+        "400":
+          description: Bad Request
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+        "500":
+          description: Internal Server Error
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Image inpainting
+      tags:
+      - images
  /v1/mcp/chat/completions:
    post:
      parameters:
Author	SHA1	Message	Date
Igor B. Poretsky	c0d1d0211f	fix: Initialize sudo reference before its first actual use (#7360 )	2025-11-26 16:03:42 +01:00
Igor B. Poretsky	f617bec686	fix: double sudo invocation fix in the install script (#7359 ) Double sudo invocation fix in the install script	2025-11-26 16:03:10 +01:00
Ettore Di Giacinto	7a94d237c4	chore(deps): bump llama.cpp to '583cb83416467e8abf9b37349dcf1f6a0083745a (#7358 ) chore(deps): bump llama.cpp to '583cb83416467e8abf9b37349dcf1f6a0083745a' Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-11-26 08:23:21 +01:00
LocalAI [bot]	304ac94d01	feat(swagger): update swagger (#7356 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-11-25 22:19:53 +01:00
Ettore Di Giacinto	f9f9b9d444	Update project news section in README.md Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2025-11-25 19:31:05 +01:00
dependabot[bot]	70d78b9fd4	chore(deps): bump google.golang.org/grpc from 1.76.0 to 1.77.0 (#7343 ) Bumps [google.golang.org/grpc](https://github.com/grpc/grpc-go) from 1.76.0 to 1.77.0. - [Release notes](https://github.com/grpc/grpc-go/releases) - [Commits](https://github.com/grpc/grpc-go/compare/v1.76.0...v1.77.0) --- updated-dependencies: - dependency-name: google.golang.org/grpc dependency-version: 1.77.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-24 21:18:41 +01:00
dependabot[bot]	91248da09e	chore(deps): bump actions/checkout from 5 to 6 (#7339 ) Bumps [actions/checkout](https://github.com/actions/checkout) from 5 to 6. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v5...v6) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-24 21:18:15 +01:00
Gregory Mariani	745c31e013	feat(inpainting): add inpainting endpoint, wire ImageGenerationFunc and return generated image URL (#7328 ) feat(inpainting): add inpainting endpoint with automatic model selection Signed-off-by: Greg <marianigregory@pm.me>	2025-11-24 21:13:54 +01:00
dependabot[bot]	7e01aa8faa	chore(deps): bump protobuf from 6.32.0 to 6.33.1 in /backend/python/transformers (#7340 ) chore(deps): bump protobuf in /backend/python/transformers Bumps [protobuf](https://github.com/protocolbuffers/protobuf) from 6.32.0 to 6.33.1. - [Release notes](https://github.com/protocolbuffers/protobuf/releases) - [Changelog](https://github.com/protocolbuffers/protobuf/blob/main/protobuf_release.bzl) - [Commits](https://github.com/protocolbuffers/protobuf/commits) --- updated-dependencies: - dependency-name: protobuf dependency-version: 6.33.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-24 20:12:17 +00:00
Ettore Di Giacinto	aceebf81d6	chore(ui): fix slider overflow Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-11-24 14:43:38 +01:00
Ettore Di Giacinto	71ed03102f	feat(ui): add chat history (#7325 ) * feat(chat): add history and management Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Display in progress chats Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fetch available context size as we switch chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add search Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Display MCP toggle correctly Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Re-ordering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Re-style Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Stable ordering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Display token/sec correctly Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Visual changes Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Display chat time Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-11-24 11:48:24 +01:00
LocalAI [bot]	f6d2a52cd5	chore: ⬆️ Update ggml-org/llama.cpp to `0c7220db56525d40177fcce3baa0d083448ec813` (#7337 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-11-24 09:11:38 +01:00
LocalAI [bot]	05a00b2399	chore: ⬆️ Update ggml-org/llama.cpp to `3f3a4fb9c3b907c68598363b204e6f58f4757c8c` (#7336 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-11-22 21:53:40 +00:00
Ettore Di Giacinto	3a232446e0	Revert "chore(chatterbox): bump l4t index to support more recent pytorch" (#7333 ) Revert "chore(chatterbox): bump l4t index to support more recent pytorch (#7332)" This reverts commit `55607a5aac`.	2025-11-22 10:10:27 +01:00
LocalAI [bot]	bdfe8431fa	chore: ⬆️ Update ggml-org/llama.cpp to `23bc779a6e58762ea892eca1801b2ea1b9050c00` (#7331 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-11-22 08:44:01 +01:00
Ettore Di Giacinto	55607a5aac	chore(chatterbox): bump l4t index to support more recent pytorch (#7332 ) This should add support for devices like the DGX Spark Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2025-11-21 22:24:46 +01:00
Ettore Di Giacinto	ec492a4c56	fix(typo): environment variable name for max jobs Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2025-11-21 18:37:22 +01:00
Ettore Di Giacinto	2defe98df8	fix(vllm): Update flash-attn to specific wheel URL Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2025-11-21 18:06:46 +01:00
Filipe Oliveira	b406b088a7	fix: Update Installer Options URL (#7330 )	2025-11-21 17:29:36 +01:00
Ettore Di Giacinto	6261c87b1b	Add NVCC_THREADS and MAX_JOB environment variables Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2025-11-21 16:14:13 +01:00
Ettore Di Giacinto	fa00aa0085	chore(ci): add OS check to skip test if not on Linux Skip test on non-Linux operating systems. Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2025-11-21 15:01:04 +01:00
Ettore Di Giacinto	0e53ce60b4	chore(ci): remove context size configuration from application Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2025-11-21 14:57:32 +01:00
Ettore Di Giacinto	8aba078439	chore(tests): add context size option to application initialization Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2025-11-21 09:50:05 +01:00
Ettore Di Giacinto	e88db7d142	fix(llama.cpp): handle corner cases with tool content (#7324 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-11-21 09:21:49 +01:00
LocalAI [bot]	b7b8a0a748	chore: ⬆️ Update ggml-org/llama.cpp to `dd0f3219419b24740864b5343958a97e1b3e4b26` (#7322 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-11-21 08:11:47 +01:00
Ettore Di Giacinto	dd2828241c	chore(docs): add documentation about import (#7315 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-11-20 23:07:36 +01:00
LocalAI [bot]	b8011f49f2	chore: ⬆️ Update ggml-org/whisper.cpp to `19ceec8eac980403b714d603e5ca31653cd42a3f` (#7321 ) ⬆️ Update ggml-org/whisper.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-11-20 23:07:22 +01:00
Copilot	16e5689162	feat(importers): Add diffuser backend importer with ginkgo tests and UI support (#7316 ) * Initial plan * Add diffuser backend importer with ginkgo tests Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Finalize diffuser backend importer implementation Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Add diffuser preferences to model-editor import section Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * Use gopkg.in/yaml.v3 for consistency in diffuser importer Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-11-20 22:38:30 +01:00
Ettore Di Giacinto	2dd42292dc	feat(ui): runtime settings (#7320 ) * feat(ui): add watchdog settings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Do not re-read env Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Some refactor, move other settings to runtime (p2p) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add API Keys handling Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Allow to disable runtime settings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Documentation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Small fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * show MCP toggle in index Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop context default Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-11-20 22:37:20 +01:00
Ettore Di Giacinto	53d51671d7	Update Docker installation recommendation wording Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2025-11-20 17:27:48 +01:00
Ettore Di Giacinto	daf39e1efd	chore(vllm/ci): set maximum number of jobs Also added comments to clarify CPU usage during build. Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2025-11-20 15:53:32 +01:00
Ettore Di Giacinto	382474e4a1	fix: do not delete files if used by other configured models (#7235 ) * WIP Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: prevent deletion of model files shared by multiple configurations (#7317) * Initial plan * fix: do not delete files if used by other configured models - Fixed bug in DeleteModelFromSystem where OR was used instead of AND for file suffix check - Fixed bug where model config filename comparison was incorrect - Added comprehensive Ginkgo test to verify shared model files are not deleted Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> * fix: prevent deletion of model files shared by multiple configurations Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-11-20 14:55:51 +01:00
Ettore Di Giacinto	5fed9c6596	chore(ci): move intel image builds to self-hosted Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-11-20 09:36:54 +01:00
LocalAI [bot]	bfa07df7cd	chore: ⬆️ Update ggml-org/llama.cpp to `7d77f07325985c03a91fa371d0a68ef88a91ec7f` (#7314 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2025-11-20 07:58:42 +01:00
dependabot[bot]	fbaa21b0e5	chore(deps): bump golang.org/x/crypto from 0.43.0 to 0.45.0 in the go_modules group across 1 directory (#7319 ) chore(deps): bump golang.org/x/crypto Bumps the go_modules group with 1 update in the / directory: [golang.org/x/crypto](https://github.com/golang/crypto). Updates `golang.org/x/crypto` from 0.43.0 to 0.45.0 - [Commits](https://github.com/golang/crypto/compare/v0.43.0...v0.45.0) --- updated-dependencies: - dependency-name: golang.org/x/crypto dependency-version: 0.45.0 dependency-type: indirect dependency-group: go_modules ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-20 04:19:22 +00:00
				`@@ -1,2 +1,3 @@`
				`<p>© 2023-2025 <a href="https://mudler.pm">Ettore Di Giacinto</a></p>`