deterministic builds

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
refactor, macOS fixes
2026-05-19 22:29:54 -04:00 · 2026-04-01 19:45:31 +00:00 · 2026-04-01 19:42:16 +00:00 · 2026-04-01 17:57:03 +00:00
119 changed files with 617 additions and 8500 deletions
--- a/.github/gallery-agent/agent.go
+++ b/.github/gallery-agent/agent.go
@@ -133,7 +133,6 @@ func getRealReadme(ctx context.Context, repository string) (string, error) {
 	result, err := cogito.ExecuteTools(llm, fragment,
 		cogito.WithIterations(3),
 		cogito.WithMaxAttempts(3),
-		cogito.DisableSinkState,
 		cogito.WithTools(&HFReadmeTool{client: hfapi.NewClient()}))
 	if err != nil {
 		return "", err
--- a/.github/gallery-agent/gallery.go
+++ b/.github/gallery-agent/gallery.go
@@ -79,20 +79,7 @@ func generateYAMLEntry(model ProcessedModel, quantization string) string {
 	description = cleanTextContent(description)
 	formattedDescription := formatTextContent(description)

-	// Strip name and description from config file since they are
-	// already present at the gallery entry level and should not
-	// appear under overrides.
-	configFileContent := modelConfig.ConfigFile
-	var cfgMap map[string]any
-	if err := yaml.Unmarshal([]byte(configFileContent), &cfgMap); err == nil {
-		delete(cfgMap, "name")
-		delete(cfgMap, "description")
-		if cleaned, err := yaml.Marshal(cfgMap); err == nil {
-			configFileContent = string(cleaned)
-		}
-	}
-
-	configFile := formatTextContent(configFileContent)
+	configFile := formatTextContent(modelConfig.ConfigFile)

 	filesYAML, _ := yaml.Marshal(modelConfig.Files)

--- a/.github/gallery-agent/testing.go
+++ b/.github/gallery-agent/testing.go
@@ -17,7 +17,7 @@ func runSyntheticMode() error {
 	fmt.Printf("Generating %d synthetic models for testing...\n", numModels)

 	var models []ProcessedModel
-	for range numModels {
+	for i := range numModels {
 		model := generator.GenerateProcessedModel()
 		models = append(models, model)
 		fmt.Printf("Generated synthetic model: %s\n", model.ModelID)
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -14,6 +14,10 @@ jobs:
            variable: "LLAMA_VERSION"
            branch: "master"
            file: "backend/cpp/llama-cpp/Makefile"
+          - repository: "TheTom/llama-cpp-turboquant"
+            variable: "TURBOQUANT_VERSION"
+            branch: "feature/turboquant-kv-cache"
+            file: "backend/cpp/llama-cpp/Makefile"
          - repository: "ggml-org/whisper.cpp"
            variable: "WHISPER_CPP_VERSION"
            branch: "master"
@@ -63,6 +67,3 @@ jobs:
          branch: "update/${{ matrix.variable }}"
          body: ${{ steps.bump.outputs.message }}
          signoff: true
-
-
-
--- a/.github/workflows/gallery-agent.yaml
+++ b/.github/workflows/gallery-agent.yaml
@@ -55,7 +55,7 @@ jobs:
      - name: Run gallery agent
        env:
          #OPENAI_MODEL: ${{ secrets.OPENAI_MODEL }}
-          OPENAI_MODEL: Qwen3.5-2B-GGUF
+          OPENAI_MODE: Qwen3.5-2B-GGUF
          OPENAI_BASE_URL: "http://localhost:8080"
          OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
          #OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
--- a/README.md
+++ b/README.md
@@ -42,38 +42,16 @@ Created and maintained by [Ettore Di Giacinto](https://github.com/mudler).

 > [:book: Documentation](https://localai.io/) | [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) | [💻 Quickstart](https://localai.io/basics/getting_started/) | [🖼️ Models](https://models.localai.io/) | [❓FAQ](https://localai.io/faq/)

-## Guided tour
+## Screenshots
+
+### Chat, Model gallery

 https://github.com/user-attachments/assets/08cbb692-57da-48f7-963d-2e7b43883c18

-<details>
-
-<summary>
-Click to see more!
-</summary>
-
-#### User and auth
-
-https://github.com/user-attachments/assets/228fa9ad-81a3-4d43-bfb9-31557e14a36c
-
-#### Agents
+### Agents

 https://github.com/user-attachments/assets/6270b331-e21d-4087-a540-6290006b381a

-#### Usage metrics per user
-
-https://github.com/user-attachments/assets/cbb03379-23b4-4e3d-bd26-d152f057007f
-
-#### Fine-tuning and Quantization
-
-https://github.com/user-attachments/assets/5ba4ace9-d3df-4795-b7d4-b0b404ea71ee
-
-#### WebRTC
-
-https://github.com/user-attachments/assets/ed88e34c-fed3-4b83-8a67-4716a9feeb7b
-
-</details>
-
 ## Quickstart

 ### macOS
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,7 +1,9 @@

-LLAMA_VERSION?=b8635075ffe27b135c49afb9a8b5c434bd42c502
+LLAMA_VERSION?=0fcb3760b2b9a3a496ef14621a7e4dad7a8df90f
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

+TURBOQUANT_VERSION?=8ad0f00e9a38df6c29fc10363341dde300f92ae4
+
 CMAKE_ARGS?=
 BUILD_TYPE?=
 NATIVE?=false
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -40,41 +40,45 @@ using grpc::ServerBuilder;
 using grpc::ServerContext;
 using grpc::Status;

-// gRPC bearer token auth for distributed mode.
+// gRPC bearer token auth via AuthMetadataProcessor for distributed mode.
 // Reads LOCALAI_GRPC_AUTH_TOKEN from the environment. When set, rejects
 // requests without a matching "authorization: Bearer <token>" metadata header.
+class TokenAuthMetadataProcessor : public grpc::AuthMetadataProcessor {
+public:
+    explicit TokenAuthMetadataProcessor(const std::string& token) : token_(token) {}

-// Cached auth token — empty means auth is disabled.
-static std::string g_grpc_auth_token;
+    bool IsBlocking() const override { return false; }

-// Minimal constant-time comparison (avoids OpenSSL dependency)
-static int ct_memcmp(const void* a, const void* b, size_t n) {
-    const unsigned char* pa = static_cast<const unsigned char*>(a);
-    const unsigned char* pb = static_cast<const unsigned char*>(b);
-    unsigned char result = 0;
-    for (size_t i = 0; i < n; i++) {
-        result |= pa[i] ^ pb[i];
-    }
-    return result;
-}
-
-// Returns OK when auth is disabled or the token matches.
-static grpc::Status checkAuth(grpc::ServerContext* context) {
-    if (g_grpc_auth_token.empty()) {
-        return grpc::Status::OK;
-    }
-    auto metadata = context->client_metadata();
-    auto it = metadata.find("authorization");
-    if (it != metadata.end()) {
-        std::string expected = "Bearer " + g_grpc_auth_token;
-        std::string got(it->second.data(), it->second.size());
-        if (expected.size() == got.size() &&
-            ct_memcmp(expected.data(), got.data(), expected.size()) == 0) {
-            return grpc::Status::OK;
+    grpc::Status Process(const InputMetadata& auth_metadata,
+                         grpc::AuthContext* /*context*/,
+                         OutputMetadata* /*consumed_auth_metadata*/,
+                         OutputMetadata* /*response_metadata*/) override {
+        auto it = auth_metadata.find("authorization");
+        if (it != auth_metadata.end()) {
+            std::string expected = "Bearer " + token_;
+            std::string got(it->second.data(), it->second.size());
+            // Constant-time comparison
+            if (expected.size() == got.size() && ct_memcmp(expected.data(), got.data(), expected.size()) == 0) {
+                return grpc::Status::OK;
+            }
        }
+        return grpc::Status(grpc::StatusCode::UNAUTHENTICATED, "invalid token");
    }
-    return grpc::Status(grpc::StatusCode::UNAUTHENTICATED, "invalid token");
-}
+
+private:
+    std::string token_;
+
+    // Minimal constant-time comparison (avoids OpenSSL dependency)
+    static int ct_memcmp(const void* a, const void* b, size_t n) {
+        const unsigned char* pa = static_cast<const unsigned char*>(a);
+        const unsigned char* pb = static_cast<const unsigned char*>(b);
+        unsigned char result = 0;
+        for (size_t i = 0; i < n; i++) {
+            result |= pa[i] ^ pb[i];
+        }
+        return result;
+    }
+};

 // END LocalAI

@@ -753,17 +757,13 @@ private:
 public:
    BackendServiceImpl(server_context& ctx) : ctx_server(ctx) {}

-    grpc::Status Health(ServerContext* context, const backend::HealthMessage* /*request*/, backend::Reply* reply) override {
-        auto auth = checkAuth(context);
-        if (!auth.ok()) return auth;
+    grpc::Status Health(ServerContext* /*context*/, const backend::HealthMessage* /*request*/, backend::Reply* reply) override {
        // Implement Health RPC
        reply->set_message("OK");
        return Status::OK;
    }

-    grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) override {
-        auto auth = checkAuth(context);
-        if (!auth.ok()) return auth;
+    grpc::Status LoadModel(ServerContext* /*context*/, const backend::ModelOptions* request, backend::Result* result) override {
        // Implement LoadModel RPC
        common_params params;
        params_parse(ctx_server, request, params);
@@ -962,8 +962,6 @@ public:
    }

    grpc::Status PredictStream(grpc::ServerContext* context, const backend::PredictOptions* request, grpc::ServerWriter<backend::Reply>* writer) override {
-        auto auth = checkAuth(context);
-        if (!auth.ok()) return auth;
        if (params_base.model.path.empty()) {
            return grpc::Status(grpc::StatusCode::FAILED_PRECONDITION, "Model not loaded");
        }
@@ -1608,18 +1606,8 @@ public:
        auto attach_chat_deltas = [](backend::Reply & reply, server_task_result * raw_result) {
            // Try streaming partial result first
            auto* partial = dynamic_cast<server_task_result_cmpl_partial*>(raw_result);
-            if (partial) {
-                if (!partial->oaicompat_msg_diffs.empty()) {
-                    populate_chat_deltas_from_diffs(reply, partial->oaicompat_msg_diffs);
-                } else if (partial->is_updated) {
-                    // Autoparser is active but hasn't classified this chunk yet
-                    // (PEG parser warming up). Clear the raw message so the Go
-                    // side doesn't try to parse partial tag tokens (e.g. "<|channel>"
-                    // before the full "<|channel>thought\n" is received).
-                    // This matches llama.cpp server behavior which only emits SSE
-                    // chunks when the parser produces diffs.
-                    reply.set_message("");
-                }
+            if (partial && !partial->oaicompat_msg_diffs.empty()) {
+                populate_chat_deltas_from_diffs(reply, partial->oaicompat_msg_diffs);
                return;
            }
            // Try final result
@@ -1677,8 +1665,6 @@ public:
    }

    grpc::Status Predict(ServerContext* context, const backend::PredictOptions* request, backend::Reply* reply) override {
-         auto auth = checkAuth(context);
-         if (!auth.ok()) return auth;
         if (params_base.model.path.empty()) {
             return grpc::Status(grpc::StatusCode::FAILED_PRECONDITION, "Model not loaded");
         }
@@ -2397,8 +2383,6 @@ public:
    }

    grpc::Status Embedding(ServerContext* context, const backend::PredictOptions* request, backend::EmbeddingResult* embeddingResult) override {
-        auto auth = checkAuth(context);
-        if (!auth.ok()) return auth;
        if (params_base.model.path.empty()) {
            return grpc::Status(grpc::StatusCode::FAILED_PRECONDITION, "Model not loaded");
        }
@@ -2579,9 +2563,7 @@ public:
        return grpc::Status::OK;
    }

-    grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response) override {
-        auto auth = checkAuth(context);
-        if (!auth.ok()) return auth;
+    grpc::Status TokenizeString(ServerContext* /*context*/, const backend::PredictOptions* request, backend::TokenizationResponse* response) override {
        if (params_base.model.path.empty()) {
            return grpc::Status(grpc::StatusCode::FAILED_PRECONDITION, "Model not loaded");
        }
@@ -2821,14 +2803,19 @@ int main(int argc, char** argv) {
    BackendServiceImpl service(ctx_server);

    ServerBuilder builder;
-    builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
-
-    // Initialize bearer token auth if LOCALAI_GRPC_AUTH_TOKEN is set
+    // Add bearer token auth via AuthMetadataProcessor if LOCALAI_GRPC_AUTH_TOKEN is set
    const char* auth_token = std::getenv("LOCALAI_GRPC_AUTH_TOKEN");
+    std::shared_ptr<grpc::ServerCredentials> creds;
    if (auth_token != nullptr && auth_token[0] != '\0') {
-        g_grpc_auth_token = auth_token;
+        creds = grpc::InsecureServerCredentials();
+        creds->SetAuthMetadataProcessor(
+            std::make_shared<TokenAuthMetadataProcessor>(auth_token));
        std::cout << "gRPC auth enabled via LOCALAI_GRPC_AUTH_TOKEN" << std::endl;
+    } else {
+        creds = grpc::InsecureServerCredentials();
    }
+
+    builder.AddListeningPort(server_address, creds);
    builder.RegisterService(&service);
    builder.SetMaxMessageSize(50 * 1024 * 1024); // 50MB
    builder.SetMaxSendMessageSize(50 * 1024 * 1024); // 50MB
--- a/backend/cpp/llama-cpp/patches/sources.yaml
+++ b/backend/cpp/llama-cpp/patches/sources.yaml
@@ -0,0 +1,14 @@
+# Patch sources for the llama-cpp backend.
+# Each source declares a fork whose commits are extracted as patches
+# and applied on top of upstream llama.cpp during the build.
+# See scripts/patch_utils/apply_patches.sh for the generic patch engine.
+#
+# version_var: Makefile variable with the pinned fork commit SHA
+# base_var:    Makefile variable with the upstream base commit SHA
+# Both are read from version_file (relative to backend dir) to compute the diff.
+sources:
+  - name: turboquant
+    repo: https://github.com/TheTom/llama-cpp-turboquant.git
+    version_var: TURBOQUANT_VERSION
+    base_var: LLAMA_VERSION
+    version_file: Makefile
--- a/backend/cpp/llama-cpp/prepare.sh
+++ b/backend/cpp/llama-cpp/prepare.sh
@@ -1,17 +1,13 @@
 #!/bin/bash
-
-## Patches
-
-## Apply patches from the `patches` directory
-if [ -d "patches" ]; then
-    for patch in $(ls patches); do
-        echo "Applying patch $patch"
-        patch -d llama.cpp/ -p1 < patches/$patch
-    done 
-fi
-
 set -e

+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+REPO_ROOT="$SCRIPT_DIR/../../.."
+
+## Apply patches from sources and/or local .patch files
+"$REPO_ROOT/scripts/patch_utils/apply_patches.sh" "$SCRIPT_DIR" llama.cpp
+
+## Copy server files into grpc-server build directory
 for file in $(ls llama.cpp/tools/server/); do
    cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
 done
@@ -28,4 +24,3 @@ else
    echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
 fi
 set -e
-
--- a/backend/go/stablediffusion-ggml/Makefile
+++ b/backend/go/stablediffusion-ggml/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

 # stablediffusion.cpp (ggml)
 STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=87ecb95cbc65dc8e58e3d88f4f4a59a0939796f5
+STABLEDIFFUSION_GGML_VERSION?=09b12d5f6d51d862749e8e0ee8baac8f012089e2

 CMAKE_ARGS+=-DGGML_MAX_NAME=128

--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -36,27 +36,6 @@ type TokenUsage struct {
 	Completion             int
 	TimingPromptProcessing float64
 	TimingTokenGeneration  float64
-	ChatDeltas             []*proto.ChatDelta // per-chunk deltas from C++ autoparser (only set during streaming)
-}
-
-// HasChatDeltaContent returns true if any chat delta carries content or reasoning text.
-// Used to decide whether to prefer C++ autoparser deltas over Go-side tag extraction.
-func (t TokenUsage) HasChatDeltaContent() bool {
-	for _, d := range t.ChatDeltas {
-		if d.Content != "" || d.ReasoningContent != "" {
-			return true
-		}
-	}
-	return false
-}
-
-// ChatDeltaReasoningAndContent extracts accumulated reasoning and content from chat deltas.
-func (t TokenUsage) ChatDeltaReasoningAndContent() (reasoning, content string) {
-	for _, d := range t.ChatDeltas {
-		content += d.Content
-		reasoning += d.ReasoningContent
-	}
-	return reasoning, content
 }

 // ModelInferenceFunc is a test-friendly indirection to call model inference logic.
@@ -192,9 +171,6 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
 					allChatDeltas = append(allChatDeltas, reply.ChatDeltas...)
 				}

-				// Attach per-chunk chat deltas to tokenUsage so the callback can use them
-				tokenUsage.ChatDeltas = reply.ChatDeltas
-
 				// Parse logprobs from reply if present (collect from last chunk that has them)
 				if len(reply.Logprobs) > 0 {
 					var parsedLogprobs schema.Logprobs
@@ -224,9 +200,6 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
 				if len(msg) == 0 {
 					tokenCallback("", tokenUsage)
 				}
-
-				// Clear per-chunk deltas so they don't leak to the next chunk
-				tokenUsage.ChatDeltas = nil
 			})
 			if len(allChatDeltas) > 0 {
 				xlog.Debug("[ChatDeltas] streaming completed, accumulated deltas from C++ autoparser", "total_deltas", len(allChatDeltas))
--- a/core/backend/llm_test.go
+++ b/core/backend/llm_test.go
@@ -4,7 +4,6 @@ import (
 	. "github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"

 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
@@ -108,111 +107,3 @@ var _ = Describe("LLM tests", func() {
 		})
 	})
 })
-
-var _ = Describe("TokenUsage ChatDelta helpers", func() {
-	Describe("HasChatDeltaContent", func() {
-		It("should return false when ChatDeltas is nil", func() {
-			usage := TokenUsage{}
-			Expect(usage.HasChatDeltaContent()).To(BeFalse())
-		})
-
-		It("should return false when ChatDeltas is empty", func() {
-			usage := TokenUsage{ChatDeltas: []*pb.ChatDelta{}}
-			Expect(usage.HasChatDeltaContent()).To(BeFalse())
-		})
-
-		It("should return false when all deltas have empty content and reasoning", func() {
-			usage := TokenUsage{
-				ChatDeltas: []*pb.ChatDelta{
-					{Content: "", ReasoningContent: ""},
-					{Content: ""},
-				},
-			}
-			Expect(usage.HasChatDeltaContent()).To(BeFalse())
-		})
-
-		It("should return true when a delta has content", func() {
-			usage := TokenUsage{
-				ChatDeltas: []*pb.ChatDelta{
-					{Content: "hello"},
-				},
-			}
-			Expect(usage.HasChatDeltaContent()).To(BeTrue())
-		})
-
-		It("should return true when a delta has reasoning content", func() {
-			usage := TokenUsage{
-				ChatDeltas: []*pb.ChatDelta{
-					{ReasoningContent: "thinking..."},
-				},
-			}
-			Expect(usage.HasChatDeltaContent()).To(BeTrue())
-		})
-
-		It("should return true when a delta has both content and reasoning", func() {
-			usage := TokenUsage{
-				ChatDeltas: []*pb.ChatDelta{
-					{Content: "hello", ReasoningContent: "thinking..."},
-				},
-			}
-			Expect(usage.HasChatDeltaContent()).To(BeTrue())
-		})
-	})
-
-	Describe("ChatDeltaReasoningAndContent", func() {
-		It("should return empty strings when ChatDeltas is nil", func() {
-			usage := TokenUsage{}
-			reasoning, content := usage.ChatDeltaReasoningAndContent()
-			Expect(reasoning).To(BeEmpty())
-			Expect(content).To(BeEmpty())
-		})
-
-		It("should concatenate content from multiple deltas", func() {
-			usage := TokenUsage{
-				ChatDeltas: []*pb.ChatDelta{
-					{Content: "Hello"},
-					{Content: " world"},
-				},
-			}
-			reasoning, content := usage.ChatDeltaReasoningAndContent()
-			Expect(content).To(Equal("Hello world"))
-			Expect(reasoning).To(BeEmpty())
-		})
-
-		It("should concatenate reasoning from multiple deltas", func() {
-			usage := TokenUsage{
-				ChatDeltas: []*pb.ChatDelta{
-					{ReasoningContent: "step 1"},
-					{ReasoningContent: " step 2"},
-				},
-			}
-			reasoning, content := usage.ChatDeltaReasoningAndContent()
-			Expect(reasoning).To(Equal("step 1 step 2"))
-			Expect(content).To(BeEmpty())
-		})
-
-		It("should separate reasoning and content from mixed deltas", func() {
-			usage := TokenUsage{
-				ChatDeltas: []*pb.ChatDelta{
-					{ReasoningContent: "thinking"},
-					{Content: "answer"},
-				},
-			}
-			reasoning, content := usage.ChatDeltaReasoningAndContent()
-			Expect(reasoning).To(Equal("thinking"))
-			Expect(content).To(Equal("answer"))
-		})
-
-		It("should handle deltas with both fields set", func() {
-			usage := TokenUsage{
-				ChatDeltas: []*pb.ChatDelta{
-					{Content: "a", ReasoningContent: "r1"},
-					{Content: "b", ReasoningContent: "r2"},
-				},
-			}
-			reasoning, content := usage.ChatDeltaReasoningAndContent()
-			Expect(reasoning).To(Equal("r1r2"))
-			Expect(content).To(Equal("ab"))
-		})
-	})
-})
--- a/core/cli/worker.go
+++ b/core/cli/worker.go
@@ -512,9 +512,11 @@ func (s *backendSupervisor) stopBackend(backend string) {

 	// Network I/O outside the lock
 	client := grpc.NewClientWithToken(bp.addr, false, nil, false, s.cmd.RegistrationToken)
-	xlog.Debug("Calling Free() before stopping backend", "backend", backend)
-	if err := client.Free(context.Background()); err != nil {
-		xlog.Warn("Free() failed (best-effort)", "backend", backend, "error", err)
+	if freeFunc, ok := client.(interface{ Free(context.Context) error }); ok {
+		xlog.Debug("Calling Free() before stopping backend", "backend", backend)
+		if err := freeFunc.Free(context.Background()); err != nil {
+			xlog.Warn("Free() failed (best-effort)", "backend", backend, "error", err)
+		}
 	}

 	xlog.Info("Stopping backend process", "backend", backend, "addr", bp.addr)
@@ -690,13 +692,13 @@ func (s *backendSupervisor) subscribeLifecycleEvents() {

 	// backend.delete — stop backend + delete files (request-reply)
 	s.nats.SubscribeReply(messaging.SubjectNodeBackendDelete(s.nodeID), func(data []byte, reply func([]byte)) {
+		xlog.Info("Received NATS backend.delete event")
 		var req messaging.BackendDeleteRequest
 		if err := json.Unmarshal(data, &req); err != nil {
 			resp := messaging.BackendDeleteReply{Success: false, Error: fmt.Sprintf("invalid request: %v", err)}
 			replyJSON(reply, resp)
 			return
 		}
-		xlog.Info("Received NATS backend.delete event", "backend", req.Backend)

 		// Stop if running this backend
 		if s.isRunning(req.Backend) {
@@ -772,8 +774,10 @@ func (s *backendSupervisor) subscribeLifecycleEvents() {
 		if targetAddr != "" {
 			// Best-effort gRPC Free()
 			client := grpc.NewClientWithToken(targetAddr, false, nil, false, s.cmd.RegistrationToken)
-			if err := client.Free(context.Background()); err != nil {
-				xlog.Warn("Free() failed during model.unload", "error", err, "addr", targetAddr)
+			if freeFunc, ok := client.(interface{ Free(context.Context) error }); ok {
+				if err := freeFunc.Free(context.Background()); err != nil {
+					xlog.Warn("Free() failed during model.unload", "error", err, "addr", targetAddr)
+				}
 			}
 		}

--- a/core/config/inference_defaults.json
+++ b/core/config/inference_defaults.json
@@ -14,7 +14,6 @@
    "qwen2-vl": {"min_p":0.1,"repeat_penalty":1,"temperature":1.5,"top_k":-1,"top_p":0.95},
    "qwen2": {"min_p":0,"repeat_penalty":1,"temperature":0.7,"top_k":20,"top_p":0.8},
    "qwq": {"min_p":0,"repeat_penalty":1,"temperature":0.6,"top_k":40,"top_p":0.95},
-    "gemma-4": {"min_p":0,"presence_penalty":0,"repeat_penalty":1,"temperature":1,"top_k":64,"top_p":0.95},
    "gemma-3n": {"min_p":0,"repeat_penalty":1,"temperature":1,"top_k":64,"top_p":0.95},
    "gemma-3": {"min_p":0,"repeat_penalty":1,"temperature":1,"top_k":64,"top_p":0.95},
    "medgemma": {"min_p":0,"repeat_penalty":1,"temperature":1,"top_k":64,"top_p":0.95},
@@ -54,5 +53,5 @@
    "grok": {"min_p":0.01,"repeat_penalty":1,"temperature":1,"top_k":-1,"top_p":0.95},
    "mimo": {"min_p":0.01,"repeat_penalty":1,"temperature":0.7,"top_k":-1,"top_p":0.95}
  },
-  "patterns": ["qwen3.5","qwen3-coder","qwen3-next","qwen3-vl","qwen3","qwen2.5-coder","qwen2.5-vl","qwen2.5-omni","qwen2.5-math","qwen2.5","qwen2-vl","qwen2","qwq","gemma-4","gemma-3n","gemma-3","medgemma","gemma-2","llama-4","llama-3.3","llama-3.2","llama-3.1","llama-3","phi-4","phi-3","mistral-nemo","mistral-small","mistral-large","magistral","ministral","devstral","pixtral","deepseek-r1","deepseek-v3","deepseek-ocr","glm-5","glm-4","nemotron","minimax-m2.5","minimax","gpt-oss","granite-4","kimi-k2","kimi","lfm2","smollm","olmo","falcon","ernie","seed","grok","mimo"]
+  "patterns": ["qwen3.5","qwen3-coder","qwen3-next","qwen3-vl","qwen3","qwen2.5-coder","qwen2.5-vl","qwen2.5-omni","qwen2.5-math","qwen2.5","qwen2-vl","qwen2","qwq","gemma-3n","gemma-3","medgemma","gemma-2","llama-4","llama-3.3","llama-3.2","llama-3.1","llama-3","phi-4","phi-3","mistral-nemo","mistral-small","mistral-large","magistral","ministral","devstral","pixtral","deepseek-r1","deepseek-v3","deepseek-ocr","glm-5","glm-4","nemotron","minimax-m2.5","minimax","gpt-oss","granite-4","kimi-k2","kimi","lfm2","smollm","olmo","falcon","ernie","seed","grok","mimo"]
 }
--- a/core/config/meta/build.go
+++ b/core/config/meta/build.go
@@ -1,132 +0,0 @@
-package meta
-
-import (
-	"reflect"
-	"sort"
-	"sync"
-)
-
-var (
-	cachedMetadata *ConfigMetadata
-	cacheMu        sync.RWMutex
-)
-
-// BuildConfigMetadata reflects on the given struct type (ModelConfig),
-// merges the enrichment registry, and returns the full ConfigMetadata.
-// The result is cached in memory after the first call.
-func BuildConfigMetadata(modelConfigType reflect.Type) *ConfigMetadata {
-	cacheMu.RLock()
-	if cachedMetadata != nil {
-		cacheMu.RUnlock()
-		return cachedMetadata
-	}
-	cacheMu.RUnlock()
-
-	cacheMu.Lock()
-	defer cacheMu.Unlock()
-
-	if cachedMetadata != nil {
-		return cachedMetadata
-	}
-
-	cachedMetadata = buildConfigMetadataUncached(modelConfigType, DefaultRegistry())
-	return cachedMetadata
-}
-
-// buildConfigMetadataUncached does the actual work without caching.
-func buildConfigMetadataUncached(modelConfigType reflect.Type, registry map[string]FieldMetaOverride) *ConfigMetadata {
-	fields := WalkModelConfig(modelConfigType)
-
-	for i := range fields {
-		override, ok := registry[fields[i].Path]
-		if !ok {
-			continue
-		}
-		applyOverride(&fields[i], override)
-	}
-
-	allSections := DefaultSections()
-
-	sectionOrder := make(map[string]int, len(allSections))
-	for _, s := range allSections {
-		sectionOrder[s.ID] = s.Order
-	}
-
-	sort.SliceStable(fields, func(i, j int) bool {
-		si := sectionOrder[fields[i].Section]
-		sj := sectionOrder[fields[j].Section]
-		if si != sj {
-			return si < sj
-		}
-		return fields[i].Order < fields[j].Order
-	})
-
-	usedSections := make(map[string]bool)
-	for _, f := range fields {
-		usedSections[f.Section] = true
-	}
-
-	var sections []Section
-	for _, s := range allSections {
-		if usedSections[s.ID] {
-			sections = append(sections, s)
-		}
-	}
-
-	return &ConfigMetadata{
-		Sections: sections,
-		Fields:   fields,
-	}
-}
-
-// applyOverride merges non-zero override values into the field.
-func applyOverride(f *FieldMeta, o FieldMetaOverride) {
-	if o.Section != "" {
-		f.Section = o.Section
-	}
-	if o.Label != "" {
-		f.Label = o.Label
-	}
-	if o.Description != "" {
-		f.Description = o.Description
-	}
-	if o.Component != "" {
-		f.Component = o.Component
-	}
-	if o.Placeholder != "" {
-		f.Placeholder = o.Placeholder
-	}
-	if o.Default != nil {
-		f.Default = o.Default
-	}
-	if o.Min != nil {
-		f.Min = o.Min
-	}
-	if o.Max != nil {
-		f.Max = o.Max
-	}
-	if o.Step != nil {
-		f.Step = o.Step
-	}
-	if o.Options != nil {
-		f.Options = o.Options
-	}
-	if o.AutocompleteProvider != "" {
-		f.AutocompleteProvider = o.AutocompleteProvider
-	}
-	if o.VRAMImpact {
-		f.VRAMImpact = true
-	}
-	if o.Advanced {
-		f.Advanced = true
-	}
-	if o.Order != 0 {
-		f.Order = o.Order
-	}
-}
-
-// BuildForTest builds metadata without caching, for use in tests.
-func BuildForTest(modelConfigType reflect.Type, registry map[string]FieldMetaOverride) *ConfigMetadata {
-	return buildConfigMetadataUncached(modelConfigType, registry)
-}
-
--- a/core/config/meta/build_test.go
+++ b/core/config/meta/build_test.go
@@ -1,211 +0,0 @@
-package meta_test
-
-import (
-	"reflect"
-	"testing"
-
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/config/meta"
-)
-
-func TestBuildConfigMetadata(t *testing.T) {
-	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
-
-	if len(md.Sections) == 0 {
-		t.Fatal("expected sections, got 0")
-	}
-	if len(md.Fields) == 0 {
-		t.Fatal("expected fields, got 0")
-	}
-
-	// Verify sections are ordered
-	for i := 1; i < len(md.Sections); i++ {
-		if md.Sections[i].Order < md.Sections[i-1].Order {
-			t.Errorf("sections not ordered: %s (order=%d) before %s (order=%d)",
-				md.Sections[i-1].ID, md.Sections[i-1].Order,
-				md.Sections[i].ID, md.Sections[i].Order)
-		}
-	}
-}
-
-func TestRegistryOverrides(t *testing.T) {
-	registry := map[string]meta.FieldMetaOverride{
-		"name": {
-			Label:       "My Custom Label",
-			Description: "Custom description",
-			Component:   "textarea",
-			Order:       999,
-		},
-	}
-
-	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), registry)
-
-	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
-	for _, f := range md.Fields {
-		byPath[f.Path] = f
-	}
-
-	f, ok := byPath["name"]
-	if !ok {
-		t.Fatal("field 'name' not found")
-	}
-	if f.Label != "My Custom Label" {
-		t.Errorf("expected label 'My Custom Label', got %q", f.Label)
-	}
-	if f.Description != "Custom description" {
-		t.Errorf("expected description 'Custom description', got %q", f.Description)
-	}
-	if f.Component != "textarea" {
-		t.Errorf("expected component 'textarea', got %q", f.Component)
-	}
-	if f.Order != 999 {
-		t.Errorf("expected order 999, got %d", f.Order)
-	}
-}
-
-func TestUnregisteredFieldsGetDefaults(t *testing.T) {
-	// Use empty registry - all fields should still get auto-generated metadata
-	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), map[string]meta.FieldMetaOverride{})
-
-	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
-	for _, f := range md.Fields {
-		byPath[f.Path] = f
-	}
-
-	// context_size should still exist with auto-generated label
-	f, ok := byPath["context_size"]
-	if !ok {
-		t.Fatal("field 'context_size' not found")
-	}
-	if f.Label == "" {
-		t.Error("expected auto-generated label, got empty")
-	}
-	if f.UIType != "int" {
-		t.Errorf("expected UIType 'int', got %q", f.UIType)
-	}
-	if f.Component == "" {
-		t.Error("expected auto-generated component, got empty")
-	}
-}
-
-func TestDefaultRegistryOverridesApply(t *testing.T) {
-	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
-
-	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
-	for _, f := range md.Fields {
-		byPath[f.Path] = f
-	}
-
-	// Verify enriched fields got their overrides
-	tests := []struct {
-		path        string
-		label       string
-		description string
-		vramImpact  bool
-	}{
-		{"context_size", "Context Size", "Maximum context window in tokens", true},
-		{"gpu_layers", "GPU Layers", "Number of layers to offload to GPU (-1 = all)", true},
-		{"backend", "Backend", "The inference backend to use (e.g. llama-cpp, vllm, diffusers)", false},
-		{"parameters.temperature", "Temperature", "Sampling temperature (higher = more creative, lower = more deterministic)", false},
-		{"template.chat", "Chat Template", "Go template for chat completion requests", false},
-	}
-
-	for _, tt := range tests {
-		f, ok := byPath[tt.path]
-		if !ok {
-			t.Errorf("field %q not found", tt.path)
-			continue
-		}
-		if f.Label != tt.label {
-			t.Errorf("field %q: expected label %q, got %q", tt.path, tt.label, f.Label)
-		}
-		if f.Description != tt.description {
-			t.Errorf("field %q: expected description %q, got %q", tt.path, tt.description, f.Description)
-		}
-		if f.VRAMImpact != tt.vramImpact {
-			t.Errorf("field %q: expected vramImpact=%v, got %v", tt.path, tt.vramImpact, f.VRAMImpact)
-		}
-	}
-}
-
-func TestStaticOptionsFields(t *testing.T) {
-	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
-
-	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
-	for _, f := range md.Fields {
-		byPath[f.Path] = f
-	}
-
-	// Fields with static options should have Options populated and no AutocompleteProvider
-	staticFields := []string{"quantization", "cache_type_k", "cache_type_v", "diffusers.pipeline_type", "diffusers.scheduler_type"}
-	for _, path := range staticFields {
-		f, ok := byPath[path]
-		if !ok {
-			t.Errorf("field %q not found", path)
-			continue
-		}
-		if len(f.Options) == 0 {
-			t.Errorf("field %q: expected Options to be populated", path)
-		}
-		if f.AutocompleteProvider != "" {
-			t.Errorf("field %q: expected no AutocompleteProvider, got %q", path, f.AutocompleteProvider)
-		}
-	}
-}
-
-func TestDynamicProviderFields(t *testing.T) {
-	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
-
-	byPath := make(map[string]meta.FieldMeta, len(md.Fields))
-	for _, f := range md.Fields {
-		byPath[f.Path] = f
-	}
-
-	// Fields with dynamic providers should have AutocompleteProvider and no Options
-	dynamicFields := map[string]string{
-		"backend":                meta.ProviderBackends,
-		"pipeline.llm":          meta.ProviderModelsChat,
-		"pipeline.tts":          meta.ProviderModelsTTS,
-		"pipeline.transcription": meta.ProviderModelsTranscript,
-		"pipeline.vad":          meta.ProviderModelsVAD,
-	}
-	for path, expectedProvider := range dynamicFields {
-		f, ok := byPath[path]
-		if !ok {
-			t.Errorf("field %q not found", path)
-			continue
-		}
-		if f.AutocompleteProvider != expectedProvider {
-			t.Errorf("field %q: expected AutocompleteProvider %q, got %q", path, expectedProvider, f.AutocompleteProvider)
-		}
-		if len(f.Options) != 0 {
-			t.Errorf("field %q: expected no Options, got %d", path, len(f.Options))
-		}
-	}
-}
-
-func TestVRAMImpactFields(t *testing.T) {
-	md := meta.BuildForTest(reflect.TypeOf(config.ModelConfig{}), meta.DefaultRegistry())
-
-	var vramFields []string
-	for _, f := range md.Fields {
-		if f.VRAMImpact {
-			vramFields = append(vramFields, f.Path)
-		}
-	}
-
-	if len(vramFields) == 0 {
-		t.Error("expected some VRAM impact fields, got 0")
-	}
-
-	// context_size and gpu_layers should be marked
-	expected := map[string]bool{"context_size": true, "gpu_layers": true}
-	for _, path := range vramFields {
-		if expected[path] {
-			delete(expected, path)
-		}
-	}
-	for path := range expected {
-		t.Errorf("expected VRAM impact field %q not found", path)
-	}
-}
--- a/core/config/meta/constants.go
+++ b/core/config/meta/constants.go
@@ -1,63 +0,0 @@
-package meta
-
-// Dynamic autocomplete provider constants (runtime lookup required).
-const (
-	ProviderBackends         = "backends"
-	ProviderModels           = "models"
-	ProviderModelsChat       = "models:chat"
-	ProviderModelsTTS        = "models:tts"
-	ProviderModelsTranscript = "models:transcript"
-	ProviderModelsVAD        = "models:vad"
-)
-
-// Static option lists embedded directly in field metadata.
-
-var QuantizationOptions = []FieldOption{
-	{Value: "q4_0", Label: "Q4_0"},
-	{Value: "q4_1", Label: "Q4_1"},
-	{Value: "q5_0", Label: "Q5_0"},
-	{Value: "q5_1", Label: "Q5_1"},
-	{Value: "q8_0", Label: "Q8_0"},
-	{Value: "q2_K", Label: "Q2_K"},
-	{Value: "q3_K_S", Label: "Q3_K_S"},
-	{Value: "q3_K_M", Label: "Q3_K_M"},
-	{Value: "q3_K_L", Label: "Q3_K_L"},
-	{Value: "q4_K_S", Label: "Q4_K_S"},
-	{Value: "q4_K_M", Label: "Q4_K_M"},
-	{Value: "q5_K_S", Label: "Q5_K_S"},
-	{Value: "q5_K_M", Label: "Q5_K_M"},
-	{Value: "q6_K", Label: "Q6_K"},
-}
-
-var CacheTypeOptions = []FieldOption{
-	{Value: "f16", Label: "F16"},
-	{Value: "f32", Label: "F32"},
-	{Value: "q8_0", Label: "Q8_0"},
-	{Value: "q4_0", Label: "Q4_0"},
-	{Value: "q4_1", Label: "Q4_1"},
-	{Value: "q5_0", Label: "Q5_0"},
-	{Value: "q5_1", Label: "Q5_1"},
-}
-
-var DiffusersPipelineOptions = []FieldOption{
-	{Value: "StableDiffusionPipeline", Label: "StableDiffusionPipeline"},
-	{Value: "StableDiffusionImg2ImgPipeline", Label: "StableDiffusionImg2ImgPipeline"},
-	{Value: "StableDiffusionXLPipeline", Label: "StableDiffusionXLPipeline"},
-	{Value: "StableDiffusionXLImg2ImgPipeline", Label: "StableDiffusionXLImg2ImgPipeline"},
-	{Value: "StableDiffusionDepth2ImgPipeline", Label: "StableDiffusionDepth2ImgPipeline"},
-	{Value: "DiffusionPipeline", Label: "DiffusionPipeline"},
-	{Value: "StableVideoDiffusionPipeline", Label: "StableVideoDiffusionPipeline"},
-}
-
-var DiffusersSchedulerOptions = []FieldOption{
-	{Value: "ddim", Label: "DDIM"},
-	{Value: "ddpm", Label: "DDPM"},
-	{Value: "pndm", Label: "PNDM"},
-	{Value: "lms", Label: "LMS"},
-	{Value: "euler", Label: "Euler"},
-	{Value: "euler_a", Label: "Euler A"},
-	{Value: "dpm_multistep", Label: "DPM Multistep"},
-	{Value: "dpm_singlestep", Label: "DPM Singlestep"},
-	{Value: "heun", Label: "Heun"},
-	{Value: "unipc", Label: "UniPC"},
-}
--- a/core/config/meta/reflect.go
+++ b/core/config/meta/reflect.go
@@ -1,241 +0,0 @@
-package meta
-
-import (
-	"reflect"
-	"strings"
-	"unicode"
-)
-
-// WalkModelConfig uses reflection to discover all exported, YAML-tagged fields
-// in the given struct type (expected to be config.ModelConfig) and returns a
-// slice of FieldMeta with sensible defaults derived from the type information.
-func WalkModelConfig(t reflect.Type) []FieldMeta {
-	if t.Kind() == reflect.Pointer {
-		t = t.Elem()
-	}
-	var fields []FieldMeta
-	walkStruct(t, "", &fields)
-	return fields
-}
-
-// walkStruct recursively walks a struct type, collecting FieldMeta entries.
-// prefix is the dot-path prefix for nested structs (e.g. "function.grammar.").
-func walkStruct(t reflect.Type, prefix string, out *[]FieldMeta) {
-	if t.Kind() == reflect.Pointer {
-		t = t.Elem()
-	}
-	if t.Kind() != reflect.Struct {
-		return
-	}
-
-	for sf := range t.Fields() {
-		if !sf.IsExported() {
-			continue
-		}
-
-		yamlTag := sf.Tag.Get("yaml")
-		if yamlTag == "-" {
-			continue
-		}
-
-		yamlKey, opts := parseTag(yamlTag)
-
-		// Handle inline embedding (e.g. LLMConfig `yaml:",inline"`)
-		if opts.contains("inline") {
-			ft := sf.Type
-			if ft.Kind() == reflect.Pointer {
-				ft = ft.Elem()
-			}
-			if ft.Kind() == reflect.Struct {
-				walkStruct(ft, prefix, out)
-			}
-			continue
-		}
-
-		// If no yaml key and it's an embedded struct without inline, skip unknown pattern
-		if yamlKey == "" {
-			ft := sf.Type
-			if ft.Kind() == reflect.Pointer {
-				ft = ft.Elem()
-			}
-			// Anonymous struct without yaml tag - treat as inline
-			if sf.Anonymous && ft.Kind() == reflect.Struct {
-				walkStruct(ft, prefix, out)
-				continue
-			}
-			// Named field without yaml tag - skip
-			continue
-		}
-
-		ft := sf.Type
-		isPtr := ft.Kind() == reflect.Pointer
-		if isPtr {
-			ft = ft.Elem()
-		}
-
-		// Named nested struct (not a special type) -> recurse with prefix
-		if ft.Kind() == reflect.Struct && !isSpecialType(ft) {
-			nestedPrefix := prefix + yamlKey + "."
-			walkStruct(ft, nestedPrefix, out)
-			continue
-		}
-
-		// Leaf field
-		path := prefix + yamlKey
-		goType := sf.Type.String()
-		uiType, component := inferUIType(sf.Type)
-		section := inferSection(prefix)
-		label := labelFromKey(yamlKey)
-
-		*out = append(*out, FieldMeta{
-			Path:      path,
-			YAMLKey:   yamlKey,
-			GoType:    goType,
-			UIType:    uiType,
-			Pointer:   isPtr,
-			Section:   section,
-			Label:     label,
-			Component: component,
-			Order:     len(*out),
-		})
-	}
-}
-
-// isSpecialType returns true for struct types that should be treated as leaf
-// values rather than recursed into (e.g. custom JSON marshalers).
-func isSpecialType(t reflect.Type) bool {
-	if t.Kind() == reflect.Pointer {
-		t = t.Elem()
-	}
-	name := t.Name()
-	// LogprobsValue, URI types are leaf values despite being structs
-	switch name {
-	case "LogprobsValue", "URI":
-		return true
-	}
-	return false
-}
-
-// inferUIType maps a Go reflect.Type to a UI type string and default component.
-func inferUIType(t reflect.Type) (uiType, component string) {
-	if t.Kind() == reflect.Pointer {
-		t = t.Elem()
-	}
-
-	switch t.Kind() {
-	case reflect.Bool:
-		return "bool", "toggle"
-	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
-		return "int", "number"
-	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
-		return "int", "number"
-	case reflect.Float32, reflect.Float64:
-		return "float", "number"
-	case reflect.String:
-		return "string", "input"
-	case reflect.Slice:
-		elem := t.Elem()
-		if elem.Kind() == reflect.String {
-			return "[]string", "string-list"
-		}
-		if elem.Kind() == reflect.Pointer {
-			elem = elem.Elem()
-		}
-		if elem.Kind() == reflect.Struct {
-			return "[]object", "json-editor"
-		}
-		return "[]any", "json-editor"
-	case reflect.Map:
-		return "map", "map-editor"
-	case reflect.Struct:
-		// Special types treated as leaves
-		if isSpecialType(t) {
-			return "bool", "toggle" // LogprobsValue
-		}
-		return "object", "json-editor"
-	default:
-		return "any", "input"
-	}
-}
-
-// inferSection determines the config section from the dot-path prefix.
-func inferSection(prefix string) string {
-	if prefix == "" {
-		return "general"
-	}
-	// Remove trailing dot
-	p := strings.TrimSuffix(prefix, ".")
-
-	// Use the top-level prefix to determine section
-	parts := strings.SplitN(p, ".", 2)
-	top := parts[0]
-
-	switch top {
-	case "parameters":
-		return "parameters"
-	case "template":
-		return "templates"
-	case "function":
-		return "functions"
-	case "reasoning":
-		return "reasoning"
-	case "diffusers":
-		return "diffusers"
-	case "tts":
-		return "tts"
-	case "pipeline":
-		return "pipeline"
-	case "grpc":
-		return "grpc"
-	case "agent":
-		return "agent"
-	case "mcp":
-		return "mcp"
-	case "feature_flags":
-		return "other"
-	case "limit_mm_per_prompt":
-		return "llm"
-	default:
-		return "other"
-	}
-}
-
-// labelFromKey converts a yaml key like "context_size" to "Context Size".
-func labelFromKey(key string) string {
-	parts := strings.Split(key, "_")
-	for i, p := range parts {
-		if len(p) > 0 {
-			runes := []rune(p)
-			runes[0] = unicode.ToUpper(runes[0])
-			parts[i] = string(runes)
-		}
-	}
-	return strings.Join(parts, " ")
-}
-
-// tagOptions is a set of comma-separated yaml tag options.
-type tagOptions string
-
-func (o tagOptions) contains(optName string) bool {
-	s := string(o)
-	for s != "" {
-		var name string
-		if name, s, _ = strings.Cut(s, ","); name == optName {
-			return true
-		}
-	}
-	return false
-}
-
-// parseTag splits a yaml struct tag into the key name and options.
-func parseTag(tag string) (string, tagOptions) {
-	if tag == "" {
-		return "", ""
-	}
-	before, after, found := strings.Cut(tag, ",")
-	if found {
-		return before, tagOptions(after)
-	}
-	return tag, ""
-}
-
--- a/core/config/meta/reflect_test.go
+++ b/core/config/meta/reflect_test.go
@@ -1,208 +0,0 @@
-package meta_test
-
-import (
-	"reflect"
-	"testing"
-
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/config/meta"
-)
-
-func TestWalkModelConfig(t *testing.T) {
-	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
-	if len(fields) == 0 {
-		t.Fatal("expected fields from ModelConfig, got 0")
-	}
-
-	// Build a lookup by path
-	byPath := make(map[string]meta.FieldMeta, len(fields))
-	for _, f := range fields {
-		byPath[f.Path] = f
-	}
-
-	// Verify some top-level fields exist
-	for _, path := range []string{"name", "backend", "cuda", "step"} {
-		if _, ok := byPath[path]; !ok {
-			t.Errorf("expected field %q not found", path)
-		}
-	}
-
-	// Verify inline LLMConfig fields appear at top level (no prefix)
-	for _, path := range []string{"context_size", "gpu_layers", "threads", "mmap"} {
-		if _, ok := byPath[path]; !ok {
-			t.Errorf("expected inline LLMConfig field %q not found", path)
-		}
-	}
-
-	// Verify nested struct fields have correct prefix
-	for _, path := range []string{
-		"template.chat",
-		"template.completion",
-		"template.use_tokenizer_template",
-		"function.grammar.parallel_calls",
-		"function.grammar.mixed_mode",
-		"diffusers.pipeline_type",
-		"diffusers.cuda",
-		"pipeline.llm",
-		"pipeline.tts",
-		"reasoning.disable",
-		"agent.max_iterations",
-		"grpc.attempts",
-	} {
-		if _, ok := byPath[path]; !ok {
-			t.Errorf("expected nested field %q not found", path)
-		}
-	}
-
-	// Verify PredictionOptions fields have parameters. prefix
-	for _, path := range []string{
-		"parameters.temperature",
-		"parameters.top_p",
-		"parameters.top_k",
-		"parameters.max_tokens",
-		"parameters.seed",
-	} {
-		if _, ok := byPath[path]; !ok {
-			t.Errorf("expected parameters field %q not found", path)
-		}
-	}
-
-	// Verify TTSConfig fields have tts. prefix
-	if _, ok := byPath["tts.voice"]; !ok {
-		t.Error("expected tts.voice field not found")
-	}
-}
-
-func TestSkipsYAMLDashFields(t *testing.T) {
-	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
-
-	byPath := make(map[string]meta.FieldMeta, len(fields))
-	for _, f := range fields {
-		byPath[f.Path] = f
-	}
-
-	// modelConfigFile has yaml:"-" tag, should be skipped
-	for _, f := range fields {
-		if f.Path == "modelConfigFile" || f.Path == "modelTemplate" {
-			t.Errorf("field %q should have been skipped (yaml:\"-\")", f.Path)
-		}
-	}
-}
-
-func TestTypeMapping(t *testing.T) {
-	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
-
-	byPath := make(map[string]meta.FieldMeta, len(fields))
-	for _, f := range fields {
-		byPath[f.Path] = f
-	}
-
-	tests := []struct {
-		path    string
-		uiType  string
-		pointer bool
-	}{
-		{"name", "string", false},
-		{"cuda", "bool", false},
-		{"context_size", "int", true},
-		{"gpu_layers", "int", true},
-		{"threads", "int", true},
-		{"f16", "bool", true},
-		{"mmap", "bool", true},
-		{"stopwords", "[]string", false},
-		{"roles", "map", false},
-		{"parameters.temperature", "float", true},
-		{"parameters.top_k", "int", true},
-		{"function.grammar.parallel_calls", "bool", false},
-	}
-
-	for _, tt := range tests {
-		f, ok := byPath[tt.path]
-		if !ok {
-			t.Errorf("field %q not found", tt.path)
-			continue
-		}
-		if f.UIType != tt.uiType {
-			t.Errorf("field %q: expected UIType %q, got %q", tt.path, tt.uiType, f.UIType)
-		}
-		if f.Pointer != tt.pointer {
-			t.Errorf("field %q: expected Pointer=%v, got %v", tt.path, tt.pointer, f.Pointer)
-		}
-	}
-}
-
-func TestSectionAssignment(t *testing.T) {
-	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
-
-	byPath := make(map[string]meta.FieldMeta, len(fields))
-	for _, f := range fields {
-		byPath[f.Path] = f
-	}
-
-	tests := []struct {
-		path    string
-		section string
-	}{
-		{"name", "general"},
-		{"backend", "general"},
-		{"context_size", "general"},   // inline LLMConfig -> no prefix -> general
-		{"parameters.temperature", "parameters"},
-		{"template.chat", "templates"},
-		{"function.grammar.parallel_calls", "functions"},
-		{"diffusers.cuda", "diffusers"},
-		{"pipeline.llm", "pipeline"},
-		{"reasoning.disable", "reasoning"},
-		{"agent.max_iterations", "agent"},
-		{"grpc.attempts", "grpc"},
-	}
-
-	for _, tt := range tests {
-		f, ok := byPath[tt.path]
-		if !ok {
-			t.Errorf("field %q not found", tt.path)
-			continue
-		}
-		if f.Section != tt.section {
-			t.Errorf("field %q: expected section %q, got %q", tt.path, tt.section, f.Section)
-		}
-	}
-}
-
-func TestLabelGeneration(t *testing.T) {
-	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
-
-	byPath := make(map[string]meta.FieldMeta, len(fields))
-	for _, f := range fields {
-		byPath[f.Path] = f
-	}
-
-	tests := []struct {
-		path  string
-		label string
-	}{
-		{"context_size", "Context Size"},
-		{"gpu_layers", "Gpu Layers"},
-		{"name", "Name"},
-		{"cuda", "Cuda"},
-	}
-
-	for _, tt := range tests {
-		f, ok := byPath[tt.path]
-		if !ok {
-			t.Errorf("field %q not found", tt.path)
-			continue
-		}
-		if f.Label != tt.label {
-			t.Errorf("field %q: expected label %q, got %q", tt.path, tt.label, f.Label)
-		}
-	}
-}
-
-func TestFieldCount(t *testing.T) {
-	fields := meta.WalkModelConfig(reflect.TypeOf(config.ModelConfig{}))
-	// We expect a large number of fields (100+) given the config complexity
-	if len(fields) < 80 {
-		t.Errorf("expected at least 80 fields, got %d", len(fields))
-	}
-	t.Logf("Total fields discovered: %d", len(fields))
-}
--- a/core/config/meta/registry.go
+++ b/core/config/meta/registry.go
@@ -1,314 +0,0 @@
-package meta
-
-// DefaultRegistry returns enrichment overrides for the ~30 most commonly used
-// config fields. Fields not listed here still appear with auto-generated
-// labels and type-inferred components.
-func DefaultRegistry() map[string]FieldMetaOverride {
-	f64 := func(v float64) *float64 { return &v }
-
-	return map[string]FieldMetaOverride{
-		// --- General ---
-		"name": {
-			Section:     "general",
-			Label:       "Model Name",
-			Description: "Unique identifier for this model configuration",
-			Component:   "input",
-			Order:       0,
-		},
-		"backend": {
-			Section:              "general",
-			Label:                "Backend",
-			Description:          "The inference backend to use (e.g. llama-cpp, vllm, diffusers)",
-			Component:            "select",
-			AutocompleteProvider: ProviderBackends,
-			Order:                1,
-		},
-		"description": {
-			Section:     "general",
-			Label:       "Description",
-			Description: "Human-readable description of what this model does",
-			Component:   "textarea",
-			Order:       2,
-		},
-		"usage": {
-			Section:     "general",
-			Label:       "Usage",
-			Description: "Usage instructions or notes",
-			Component:   "textarea",
-			Advanced:    true,
-			Order:       3,
-		},
-		"cuda": {
-			Section:     "general",
-			Label:       "CUDA",
-			Description: "Explicitly enable CUDA acceleration",
-			Order:       5,
-		},
-		"known_usecases": {
-			Section:     "general",
-			Label:       "Known Use Cases",
-			Description: "Capabilities this model supports (e.g. FLAG_CHAT, FLAG_COMPLETION)",
-			Component:   "string-list",
-			Order:       6,
-		},
-
-		// --- LLM ---
-		"context_size": {
-			Section:     "llm",
-			Label:       "Context Size",
-			Description: "Maximum context window in tokens",
-			Component:   "number",
-			VRAMImpact:  true,
-			Order:       10,
-		},
-		"gpu_layers": {
-			Section:     "llm",
-			Label:       "GPU Layers",
-			Description: "Number of layers to offload to GPU (-1 = all)",
-			Component:   "number",
-			Min:         f64(-1),
-			VRAMImpact:  true,
-			Order:       11,
-		},
-		"threads": {
-			Section:     "llm",
-			Label:       "Threads",
-			Description: "Number of CPU threads for inference",
-			Component:   "number",
-			Min:         f64(1),
-			Order:       12,
-		},
-		"f16": {
-			Section:     "llm",
-			Label:       "F16",
-			Description: "Use 16-bit floating point for key/value cache",
-			Order:       13,
-		},
-		"mmap": {
-			Section:     "llm",
-			Label:       "Memory Map",
-			Description: "Use memory-mapped files for model loading",
-			Order:       14,
-		},
-		"mmlock": {
-			Section:     "llm",
-			Label:       "Memory Lock",
-			Description: "Lock model memory to prevent swapping",
-			Advanced:    true,
-			Order:       15,
-		},
-		"low_vram": {
-			Section:     "llm",
-			Label:       "Low VRAM",
-			Description: "Optimize for systems with limited GPU memory",
-			VRAMImpact:  true,
-			Order:       16,
-		},
-		"embeddings": {
-			Section:     "llm",
-			Label:       "Embeddings",
-			Description: "Enable embedding generation mode",
-			Order:       17,
-		},
-		"quantization": {
-			Section:     "llm",
-			Label:       "Quantization",
-			Description: "Quantization method (e.g. q4_0, q5_1, q8_0)",
-			Component:   "select",
-			Options:     QuantizationOptions,
-			Advanced:    true,
-			Order:       20,
-		},
-		"flash_attention": {
-			Section:     "llm",
-			Label:       "Flash Attention",
-			Description: "Enable flash attention for faster inference",
-			Component:   "input",
-			Advanced:    true,
-			Order:       21,
-		},
-		"cache_type_k": {
-			Section:     "llm",
-			Label:       "KV Cache Type (K)",
-			Description: "Quantization type for key cache (e.g. f16, q8_0, q4_0)",
-			Component:   "select",
-			Options:     CacheTypeOptions,
-			VRAMImpact:  true,
-			Advanced:    true,
-			Order:       22,
-		},
-		"cache_type_v": {
-			Section:     "llm",
-			Label:       "KV Cache Type (V)",
-			Description: "Quantization type for value cache",
-			Component:   "select",
-			Options:     CacheTypeOptions,
-			VRAMImpact:  true,
-			Advanced:    true,
-			Order:       23,
-		},
-
-		// --- Parameters ---
-		"parameters.temperature": {
-			Section:     "parameters",
-			Label:       "Temperature",
-			Description: "Sampling temperature (higher = more creative, lower = more deterministic)",
-			Component:   "slider",
-			Min:         f64(0),
-			Max:         f64(2),
-			Step:        f64(0.05),
-			Order:       30,
-		},
-		"parameters.top_p": {
-			Section:     "parameters",
-			Label:       "Top P",
-			Description: "Nucleus sampling threshold",
-			Component:   "slider",
-			Min:         f64(0),
-			Max:         f64(1),
-			Step:        f64(0.01),
-			Order:       31,
-		},
-		"parameters.top_k": {
-			Section:     "parameters",
-			Label:       "Top K",
-			Description: "Top-K sampling: consider only the K most likely tokens",
-			Component:   "number",
-			Min:         f64(0),
-			Order:       32,
-		},
-		"parameters.max_tokens": {
-			Section:     "parameters",
-			Label:       "Max Tokens",
-			Description: "Maximum number of tokens to generate (0 = unlimited)",
-			Component:   "number",
-			Min:         f64(0),
-			Order:       33,
-		},
-		"parameters.repeat_penalty": {
-			Section:     "parameters",
-			Label:       "Repeat Penalty",
-			Description: "Penalize repeated tokens (1.0 = no penalty)",
-			Component:   "number",
-			Min:         f64(0),
-			Advanced:    true,
-			Order:       34,
-		},
-		"parameters.seed": {
-			Section:     "parameters",
-			Label:       "Seed",
-			Description: "Random seed (-1 = random)",
-			Component:   "number",
-			Advanced:    true,
-			Order:       35,
-		},
-
-		// --- Templates ---
-		"template.chat": {
-			Section:     "templates",
-			Label:       "Chat Template",
-			Description: "Go template for chat completion requests",
-			Component:   "code-editor",
-			Order:       40,
-		},
-		"template.chat_message": {
-			Section:     "templates",
-			Label:       "Chat Message Template",
-			Description: "Go template for individual chat messages",
-			Component:   "code-editor",
-			Order:       41,
-		},
-		"template.completion": {
-			Section:     "templates",
-			Label:       "Completion Template",
-			Description: "Go template for completion requests",
-			Component:   "code-editor",
-			Order:       42,
-		},
-		"template.use_tokenizer_template": {
-			Section:     "templates",
-			Label:       "Use Tokenizer Template",
-			Description: "Use the chat template from the model's tokenizer config",
-			Order:       43,
-		},
-
-		// --- Pipeline ---
-		"pipeline.llm": {
-			Section:              "pipeline",
-			Label:                "LLM Model",
-			Description:          "Model to use for LLM inference in the pipeline",
-			Component:            "model-select",
-			AutocompleteProvider: ProviderModelsChat,
-			Order:                60,
-		},
-		"pipeline.tts": {
-			Section:              "pipeline",
-			Label:                "TTS Model",
-			Description:          "Model to use for text-to-speech in the pipeline",
-			Component:            "model-select",
-			AutocompleteProvider: ProviderModelsTTS,
-			Order:                61,
-		},
-		"pipeline.transcription": {
-			Section:              "pipeline",
-			Label:                "Transcription Model",
-			Description:          "Model to use for speech-to-text in the pipeline",
-			Component:            "model-select",
-			AutocompleteProvider: ProviderModelsTranscript,
-			Order:                62,
-		},
-		"pipeline.vad": {
-			Section:              "pipeline",
-			Label:                "VAD Model",
-			Description:          "Model to use for voice activity detection in the pipeline",
-			Component:            "model-select",
-			AutocompleteProvider: ProviderModelsVAD,
-			Order:                63,
-		},
-
-		// --- Functions ---
-		"function.grammar.parallel_calls": {
-			Section:     "functions",
-			Label:       "Parallel Calls",
-			Description: "Allow the LLM to return multiple function calls in one response",
-			Order:       70,
-		},
-		"function.grammar.mixed_mode": {
-			Section:     "functions",
-			Label:       "Mixed Mode",
-			Description: "Allow the LLM to return both text and function calls",
-			Order:       71,
-		},
-		"function.grammar.disable": {
-			Section:     "functions",
-			Label:       "Disable Grammar",
-			Description: "Disable grammar-constrained generation for function calls",
-			Advanced:    true,
-			Order:       72,
-		},
-
-		// --- Diffusers ---
-		"diffusers.pipeline_type": {
-			Section:     "diffusers",
-			Label:       "Pipeline Type",
-			Description: "Diffusers pipeline type (e.g. StableDiffusionPipeline)",
-			Component:   "select",
-			Options:     DiffusersPipelineOptions,
-			Order:       80,
-		},
-		"diffusers.scheduler_type": {
-			Section:     "diffusers",
-			Label:       "Scheduler Type",
-			Description: "Noise scheduler type",
-			Component:   "select",
-			Options:     DiffusersSchedulerOptions,
-			Order:       81,
-		},
-		"diffusers.cuda": {
-			Section:     "diffusers",
-			Label:       "CUDA",
-			Description: "Enable CUDA for diffusers",
-			Order:       82,
-		},
-	}
-}
--- a/core/config/meta/types.go
+++ b/core/config/meta/types.go
@@ -1,83 +0,0 @@
-package meta
-
-// FieldMeta describes a single configuration field for UI rendering and agent discovery.
-type FieldMeta struct {
-	Path        string        `json:"path"`                  // dot-path: "context_size", "function.grammar.parallel_calls"
-	YAMLKey     string        `json:"yaml_key"`              // leaf yaml key
-	GoType      string        `json:"go_type"`               // "*int", "string", "[]string"
-	UIType      string        `json:"ui_type"`               // "string", "int", "float", "bool", "[]string", "map", "object"
-	Pointer     bool          `json:"pointer,omitempty"`     // true = nil means "not set"
-	Section     string        `json:"section"`               // "general", "llm", "templates", etc.
-	Label       string        `json:"label"`                 // human-readable label
-	Description string        `json:"description,omitempty"` // help text
-	Component   string        `json:"component"`             // "input", "number", "toggle", "select", "slider", etc.
-	Placeholder string        `json:"placeholder,omitempty"`
-	Default     any           `json:"default,omitempty"`
-	Min         *float64      `json:"min,omitempty"`
-	Max         *float64      `json:"max,omitempty"`
-	Step        *float64      `json:"step,omitempty"`
-	Options     []FieldOption `json:"options,omitempty"`
-
-	AutocompleteProvider string `json:"autocomplete_provider,omitempty"` // "backends", "models:chat", etc.
-	VRAMImpact           bool   `json:"vram_impact,omitempty"`
-	Advanced             bool   `json:"advanced,omitempty"`
-	Order                int    `json:"order"`
-}
-
-// FieldOption represents a choice in a select/enum field.
-type FieldOption struct {
-	Value string `json:"value"`
-	Label string `json:"label"`
-}
-
-// Section groups related fields in the UI.
-type Section struct {
-	ID    string `json:"id"`
-	Label string `json:"label"`
-	Icon  string `json:"icon,omitempty"`
-	Order int    `json:"order"`
-}
-
-// ConfigMetadata is the top-level response for the metadata API.
-type ConfigMetadata struct {
-	Sections []Section   `json:"sections"`
-	Fields   []FieldMeta `json:"fields"`
-}
-
-// FieldMetaOverride holds registry overrides that are merged on top of
-// the reflection-discovered defaults. Only non-zero fields override.
-type FieldMetaOverride struct {
-	Section              string
-	Label                string
-	Description          string
-	Component            string
-	Placeholder          string
-	Default              any
-	Min                  *float64
-	Max                  *float64
-	Step                 *float64
-	Options              []FieldOption
-	AutocompleteProvider string
-	VRAMImpact           bool
-	Advanced             bool
-	Order                int
-}
-
-// DefaultSections defines the well-known config sections in display order.
-func DefaultSections() []Section {
-	return []Section{
-		{ID: "general", Label: "General", Icon: "settings", Order: 0},
-		{ID: "llm", Label: "LLM", Icon: "cpu", Order: 10},
-		{ID: "parameters", Label: "Parameters", Icon: "sliders", Order: 20},
-		{ID: "templates", Label: "Templates", Icon: "file-text", Order: 30},
-		{ID: "functions", Label: "Functions / Tools", Icon: "tool", Order: 40},
-		{ID: "reasoning", Label: "Reasoning", Icon: "brain", Order: 45},
-		{ID: "diffusers", Label: "Diffusers", Icon: "image", Order: 50},
-		{ID: "tts", Label: "TTS", Icon: "volume-2", Order: 55},
-		{ID: "pipeline", Label: "Pipeline", Icon: "git-merge", Order: 60},
-		{ID: "grpc", Label: "gRPC", Icon: "server", Order: 65},
-		{ID: "agent", Label: "Agent", Icon: "bot", Order: 70},
-		{ID: "mcp", Label: "MCP", Icon: "plug", Order: 75},
-		{ID: "other", Label: "Other", Icon: "more-horizontal", Order: 100},
-	}
-}
--- a/core/gallery/backends.go
+++ b/core/gallery/backends.go
@@ -300,29 +300,14 @@ func DeleteBackendFromSystem(systemState *system.SystemState, name string) error

 	backend, ok := backends.Get(name)
 	if !ok {
-		// Not found by direct key — try matching by gallery name (metadata.Name)
-		// The UI may send gallery-style names like "localai@llama-cpp" which
-		// don't match the directory-based keys used in the backends map.
-		for _, b := range backends {
-			if b.Metadata != nil && b.Metadata.Name == name && !b.IsMeta {
-				backend = b
-				ok = true
-				break
-			}
-		}
-		if !ok {
-			return fmt.Errorf("backend %q: %w", name, ErrBackendNotFound)
-		}
+		return fmt.Errorf("backend %q: %w", name, ErrBackendNotFound)
 	}

 	if backend.IsSystem {
 		return fmt.Errorf("system backend %q cannot be deleted", name)
 	}

-	// Use the backend's actual Name (directory key) for path resolution,
-	// not the caller-supplied name which may be a gallery-style name.
-	dirName := backend.Name
-	backendDirectory := filepath.Join(systemState.Backend.BackendsPath, dirName)
+	backendDirectory := filepath.Join(systemState.Backend.BackendsPath, name)

 	// check if the backend dir exists
 	if _, err := os.Stat(backendDirectory); os.IsNotExist(err) {
@@ -340,7 +325,7 @@ func DeleteBackendFromSystem(systemState *system.SystemState, name string) error
 				if err != nil {
 					return err
 				}
-				if metadata != nil && (metadata.Alias == name || metadata.Alias == dirName) {
+				if metadata != nil && metadata.Alias == name {
 					backendDirectory = filepath.Join(systemState.Backend.BackendsPath, backend.Name())
 					foundBackend = true
 					break
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -52,42 +52,9 @@ var quietPaths = []string{"/api/operations", "/api/resources", "/healthz", "/rea
 // @license.name MIT
 // @license.url https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE
 // @BasePath /
-// @schemes http https
 // @securityDefinitions.apikey BearerAuth
 // @in header
 // @name Authorization
-// @tag.name inference
-// @tag.description Chat completions, text completions, edits, and responses (OpenAI-compatible)
-// @tag.name embeddings
-// @tag.description Vector embeddings (OpenAI-compatible)
-// @tag.name audio
-// @tag.description Text-to-speech, transcription, voice activity detection, sound generation
-// @tag.name images
-// @tag.description Image generation and inpainting
-// @tag.name video
-// @tag.description Video generation from prompts
-// @tag.name detection
-// @tag.description Object detection in images
-// @tag.name tokenize
-// @tag.description Tokenization and token metrics
-// @tag.name models
-// @tag.description Model gallery browsing, installation, deletion, and listing
-// @tag.name backends
-// @tag.description Backend gallery browsing, installation, deletion, and listing
-// @tag.name config
-// @tag.description Model configuration metadata, autocomplete, PATCH updates, VRAM estimation
-// @tag.name monitoring
-// @tag.description Prometheus metrics, backend status, system information
-// @tag.name mcp
-// @tag.description Model Context Protocol — tool-augmented chat with MCP servers
-// @tag.name agent-jobs
-// @tag.description Agent task and job management
-// @tag.name p2p
-// @tag.description Peer-to-peer networking nodes and tokens
-// @tag.name rerank
-// @tag.description Document reranking
-// @tag.name instructions
-// @tag.description API instruction discovery — browse instruction areas and get endpoint guides

 func API(application *application.Application) (*echo.Echo, error) {
 	e := echo.New()
@@ -393,7 +360,7 @@ func API(application *application.Application) (*echo.Echo, error) {
 	routes.RegisterOpenResponsesRoutes(e, requestExtractor, application)
 	if !application.ApplicationConfig().DisableWebUI {
 		routes.RegisterUIAPIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application, adminMiddleware)
-		routes.RegisterUIRoutes(e, application.ModelConfigLoader(), application.ApplicationConfig(), application.GalleryService(), adminMiddleware)
+		routes.RegisterUIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), adminMiddleware)

 		// Serve React SPA from / with SPA fallback via 404 handler
 		reactFS, fsErr := fs.Sub(reactUI, "react-ui/dist")
--- a/core/http/endpoints/anthropic/messages.go
+++ b/core/http/endpoints/anthropic/messages.go
@@ -21,7 +21,6 @@ import (
 // MessagesEndpoint is the Anthropic Messages API endpoint
 // https://docs.anthropic.com/claude/reference/messages_post
 // @Summary Generate a message response for the given messages and model.
-// @Tags inference
 // @Param request body schema.AnthropicRequest true "query params"
 // @Success 200 {object} schema.AnthropicResponse "Response"
 // @Router /v1/messages [post]
@@ -358,7 +357,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 		// Send initial content_block_start event
 		contentBlockStart := schema.AnthropicStreamEvent{
 			Type:         "content_block_start",
-			Index:        intPtr(currentBlockIndex),
+			Index:        currentBlockIndex,
 			ContentBlock: &schema.AnthropicContentBlock{Type: "text", Text: ""},
 		}
 		sendAnthropicSSE(c, contentBlockStart)
@@ -377,7 +376,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 					if !inToolCall && currentBlockIndex == 0 {
 						sendAnthropicSSE(c, schema.AnthropicStreamEvent{
 							Type:  "content_block_stop",
-							Index: intPtr(currentBlockIndex),
+							Index: currentBlockIndex,
 						})
 						currentBlockIndex++
 						inToolCall = true
@@ -387,7 +386,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 						tc := toolCalls[i]
 						sendAnthropicSSE(c, schema.AnthropicStreamEvent{
 							Type:  "content_block_start",
-							Index: intPtr(currentBlockIndex),
+							Index: currentBlockIndex,
 							ContentBlock: &schema.AnthropicContentBlock{
 								Type: "tool_use",
 								ID:   fmt.Sprintf("toolu_%s_%d", id, i),
@@ -396,7 +395,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 						})
 						sendAnthropicSSE(c, schema.AnthropicStreamEvent{
 							Type:  "content_block_delta",
-							Index: intPtr(currentBlockIndex),
+							Index: currentBlockIndex,
 							Delta: &schema.AnthropicStreamDelta{
 								Type:        "input_json_delta",
 								PartialJSON: tc.Arguments,
@@ -404,7 +403,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 						})
 						sendAnthropicSSE(c, schema.AnthropicStreamEvent{
 							Type:  "content_block_stop",
-							Index: intPtr(currentBlockIndex),
+							Index: currentBlockIndex,
 						})
 						currentBlockIndex++
 					}
@@ -417,7 +416,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 			if !inToolCall {
 				sendAnthropicSSE(c, schema.AnthropicStreamEvent{
 					Type:  "content_block_delta",
-					Index: intPtr(0),
+					Index: 0,
 					Delta: &schema.AnthropicStreamDelta{
 						Type: "text_delta",
 						Text: token,
@@ -517,7 +516,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 				// Close the text content block
 				sendAnthropicSSE(c, schema.AnthropicStreamEvent{
 					Type:  "content_block_stop",
-					Index: intPtr(currentBlockIndex),
+					Index: currentBlockIndex,
 				})
 				currentBlockIndex++
 				inToolCall = true
@@ -529,7 +528,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 					}
 					sendAnthropicSSE(c, schema.AnthropicStreamEvent{
 						Type:  "content_block_start",
-						Index: intPtr(currentBlockIndex),
+						Index: currentBlockIndex,
 						ContentBlock: &schema.AnthropicContentBlock{
 							Type: "tool_use",
 							ID:   toolCallID,
@@ -538,7 +537,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 					})
 					sendAnthropicSSE(c, schema.AnthropicStreamEvent{
 						Type:  "content_block_delta",
-						Index: intPtr(currentBlockIndex),
+						Index: currentBlockIndex,
 						Delta: &schema.AnthropicStreamDelta{
 							Type:        "input_json_delta",
 							PartialJSON: fc.Arguments,
@@ -546,7 +545,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 					})
 					sendAnthropicSSE(c, schema.AnthropicStreamEvent{
 						Type:  "content_block_stop",
-						Index: intPtr(currentBlockIndex),
+						Index: currentBlockIndex,
 					})
 					currentBlockIndex++
 					toolCallsEmitted++
@@ -558,7 +557,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 		if !inToolCall {
 			sendAnthropicSSE(c, schema.AnthropicStreamEvent{
 				Type:  "content_block_stop",
-				Index: intPtr(0),
+				Index: 0,
 			})
 		}

@@ -599,8 +598,6 @@ func convertFuncsToOpenAITools(funcs functions.Functions) []functions.Tool {
 	return tools
 }

-func intPtr(i int) *int { return &i }
-
 func sendAnthropicSSE(c echo.Context, event schema.AnthropicStreamEvent) {
 	data, err := json.Marshal(event)
 	if err != nil {
--- a/core/http/endpoints/elevenlabs/soundgeneration.go
+++ b/core/http/endpoints/elevenlabs/soundgeneration.go
@@ -15,7 +15,6 @@ import (

 // SoundGenerationEndpoint is the ElevenLabs SoundGeneration endpoint https://elevenlabs.io/docs/api-reference/sound-generation
 // @Summary Generates audio from the input text.
-// @Tags audio
 // @Param request body schema.ElevenLabsSoundGenerationRequest true "query params"
 // @Success 200 {string} binary	 "Response"
 // @Router /v1/sound-generation [post]
--- a/core/http/endpoints/elevenlabs/tts.go
+++ b/core/http/endpoints/elevenlabs/tts.go
@@ -15,7 +15,6 @@ import (

 // TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
 // @Summary Generates audio from the input text.
-// @Tags audio
 // @Param  voice-id	path string	true	"Account ID"
 // @Param request body schema.TTSRequest true "query params"
 // @Success 200 {string} binary	 "Response"
--- a/core/http/endpoints/jina/rerank.go
+++ b/core/http/endpoints/jina/rerank.go
@@ -15,7 +15,6 @@ import (

 // JINARerankEndpoint acts like the Jina reranker endpoint (https://jina.ai/reranker/)
 // @Summary Reranks a list of phrases by relevance to a given text query.
-// @Tags rerank
 // @Param request body schema.JINARerankRequest true "query params"
 // @Success 200 {object} schema.JINARerankResponse "Response"
 // @Router /v1/rerank [post]
--- a/core/http/endpoints/localai/agent_jobs.go
+++ b/core/http/endpoints/localai/agent_jobs.go
@@ -30,15 +30,6 @@ func getJobService(app *application.Application, c echo.Context) *agentpool.Agen
 	return jobSvc
 }

-// CreateTaskEndpoint creates a new agent task definition.
-// @Summary Create a new agent task
-// @Tags agent-jobs
-// @Accept json
-// @Produce json
-// @Param request body schema.Task true "Task definition"
-// @Success 201 {object} map[string]string "id"
-// @Failure 400 {object} map[string]string "error"
-// @Router /api/agent/tasks [post]
 func CreateTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		var task schema.Task
@@ -55,17 +46,6 @@ func CreateTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }

-// UpdateTaskEndpoint updates an existing agent task.
-// @Summary Update an agent task
-// @Tags agent-jobs
-// @Accept json
-// @Produce json
-// @Param id path string true "Task ID"
-// @Param request body schema.Task true "Updated task definition"
-// @Success 200 {object} map[string]string "message"
-// @Failure 400 {object} map[string]string "error"
-// @Failure 404 {object} map[string]string "error"
-// @Router /api/agent/tasks/{id} [put]
 func UpdateTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		id := c.Param("id")
@@ -85,14 +65,6 @@ func UpdateTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }

-// DeleteTaskEndpoint deletes an agent task.
-// @Summary Delete an agent task
-// @Tags agent-jobs
-// @Produce json
-// @Param id path string true "Task ID"
-// @Success 200 {object} map[string]string "message"
-// @Failure 404 {object} map[string]string "error"
-// @Router /api/agent/tasks/{id} [delete]
 func DeleteTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		id := c.Param("id")
@@ -107,13 +79,6 @@ func DeleteTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }

-// ListTasksEndpoint lists all agent tasks for the current user.
-// @Summary List agent tasks
-// @Tags agent-jobs
-// @Produce json
-// @Param all_users query string false "Set to 'true' for admin cross-user listing"
-// @Success 200 {object} []schema.Task "tasks"
-// @Router /api/agent/tasks [get]
 func ListTasksEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		jobSvc := getJobService(app, c)
@@ -156,14 +121,6 @@ func ListTasksEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }

-// GetTaskEndpoint returns a single agent task by ID.
-// @Summary Get an agent task
-// @Tags agent-jobs
-// @Produce json
-// @Param id path string true "Task ID"
-// @Success 200 {object} schema.Task "task"
-// @Failure 404 {object} map[string]string "error"
-// @Router /api/agent/tasks/{id} [get]
 func GetTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		id := c.Param("id")
@@ -176,15 +133,6 @@ func GetTaskEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }

-// ExecuteJobEndpoint creates and runs a new job for a task.
-// @Summary Execute an agent job
-// @Tags agent-jobs
-// @Accept json
-// @Produce json
-// @Param request body schema.JobExecutionRequest true "Job execution request"
-// @Success 201 {object} schema.JobExecutionResponse "job created"
-// @Failure 400 {object} map[string]string "error"
-// @Router /api/agent/jobs/execute [post]
 func ExecuteJobEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		var req schema.JobExecutionRequest
@@ -220,14 +168,6 @@ func ExecuteJobEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }

-// GetJobEndpoint returns a single job by ID.
-// @Summary Get an agent job
-// @Tags agent-jobs
-// @Produce json
-// @Param id path string true "Job ID"
-// @Success 200 {object} schema.Job "job"
-// @Failure 404 {object} map[string]string "error"
-// @Router /api/agent/jobs/{id} [get]
 func GetJobEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		id := c.Param("id")
@@ -240,16 +180,6 @@ func GetJobEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }

-// ListJobsEndpoint lists jobs, optionally filtered by task or status.
-// @Summary List agent jobs
-// @Tags agent-jobs
-// @Produce json
-// @Param task_id query string false "Filter by task ID"
-// @Param status query string false "Filter by status (pending, running, completed, failed, cancelled)"
-// @Param limit query integer false "Max number of jobs to return"
-// @Param all_users query string false "Set to 'true' for admin cross-user listing"
-// @Success 200 {object} []schema.Job "jobs"
-// @Router /api/agent/jobs [get]
 func ListJobsEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		var taskID *string
@@ -311,15 +241,6 @@ func ListJobsEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }

-// CancelJobEndpoint cancels a running job.
-// @Summary Cancel an agent job
-// @Tags agent-jobs
-// @Produce json
-// @Param id path string true "Job ID"
-// @Success 200 {object} map[string]string "message"
-// @Failure 400 {object} map[string]string "error"
-// @Failure 404 {object} map[string]string "error"
-// @Router /api/agent/jobs/{id}/cancel [post]
 func CancelJobEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		id := c.Param("id")
@@ -334,14 +255,6 @@ func CancelJobEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }

-// DeleteJobEndpoint deletes a job by ID.
-// @Summary Delete an agent job
-// @Tags agent-jobs
-// @Produce json
-// @Param id path string true "Job ID"
-// @Success 200 {object} map[string]string "message"
-// @Failure 404 {object} map[string]string "error"
-// @Router /api/agent/jobs/{id} [delete]
 func DeleteJobEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		id := c.Param("id")
@@ -356,17 +269,6 @@ func DeleteJobEndpoint(app *application.Application) echo.HandlerFunc {
 	}
 }

-// ExecuteTaskByNameEndpoint looks up a task by name and executes it.
-// @Summary Execute an agent task by name
-// @Tags agent-jobs
-// @Accept json
-// @Produce json
-// @Param name path string true "Task name"
-// @Param parameters body object false "Optional template parameters"
-// @Success 201 {object} schema.JobExecutionResponse "job created"
-// @Failure 400 {object} map[string]string "error"
-// @Failure 404 {object} map[string]string "error"
-// @Router /api/agent/tasks/{name}/execute [post]
 func ExecuteTaskByNameEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		name := c.Param("name")
--- a/core/http/endpoints/localai/api_instructions.go
+++ b/core/http/endpoints/localai/api_instructions.go
@@ -1,489 +0,0 @@
-package localai
-
-import (
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"sort"
-	"strings"
-	"sync"
-
-	"github.com/labstack/echo/v4"
-	"github.com/mudler/LocalAI/swagger"
-	"github.com/mudler/xlog"
-)
-
-const swaggerDefsPrefix = "#/definitions/"
-
-// instructionDef is a lightweight instruction definition that maps to swagger tags.
-type instructionDef struct {
-	Name        string   `json:"name"`
-	Description string   `json:"description"`
-	Tags        []string `json:"tags"`
-	Intro       string   `json:"-"` // brief context not in swagger
-}
-
-var instructionDefs = []instructionDef{
-	{
-		Name:        "chat-inference",
-		Description: "OpenAI-compatible chat completions, text completions, and embeddings",
-		Tags:        []string{"inference", "embeddings"},
-		Intro:       "Set \"stream\": true for SSE streaming. Supports tool/function calling when the model config has function templates configured.",
-	},
-	{
-		Name:        "audio",
-		Description: "Text-to-speech, voice activity detection, transcription, and sound generation",
-		Tags:        []string{"audio"},
-	},
-	{
-		Name:        "images",
-		Description: "Image generation and inpainting",
-		Tags:        []string{"images"},
-	},
-	{
-		Name:        "model-management",
-		Description: "Browse the gallery, install, delete, and manage models and backends",
-		Tags:        []string{"models", "backends"},
-	},
-	{
-		Name:        "config-management",
-		Description: "Discover, read, and modify model configuration fields with VRAM estimation",
-		Tags:        []string{"config"},
-		Intro:       "Fields with static options include an \"options\" array in metadata. Fields with dynamic values have an \"autocomplete_provider\" for runtime lookup.",
-	},
-	{
-		Name:        "monitoring",
-		Description: "System metrics, backend status, API and backend traces, backend process logs, and system information",
-		Tags:        []string{"monitoring"},
-		Intro:       "Includes real-time backend log streaming via WebSocket at /ws/backend-logs/:modelId.",
-	},
-	{
-		Name:        "mcp",
-		Description: "Model Context Protocol — tool-augmented chat with MCP servers",
-		Tags:        []string{"mcp"},
-		Intro:       "The model's config must define MCP servers. The endpoint handles tool execution automatically.",
-	},
-	{
-		Name:        "agents",
-		Description: "Agent task and job management for CI/automation workflows",
-		Tags:        []string{"agent-jobs"},
-	},
-	{
-		Name:        "video",
-		Description: "Video generation from text prompts",
-		Tags:        []string{"video"},
-	},
-}
-
-// swaggerState holds parsed swagger spec data, initialised once.
-type swaggerState struct {
-	once  sync.Once
-	spec  map[string]any // full parsed swagger JSON
-	ready bool
-}
-
-var swState swaggerState
-
-func (s *swaggerState) init() {
-	s.once.Do(func() {
-		var spec map[string]any
-		if err := json.Unmarshal(swagger.SwaggerJSON, &spec); err != nil {
-			xlog.Error("failed to parse embedded swagger spec", "err", err)
-			return
-		}
-		s.spec = spec
-		s.ready = true
-	})
-}
-
-// filterSwaggerByTags returns a swagger fragment containing only paths whose
-// operations carry at least one of the given tags, plus the definitions they
-// reference.
-func filterSwaggerByTags(spec map[string]any, tags []string) map[string]any {
-	tagSet := make(map[string]bool, len(tags))
-	for _, t := range tags {
-		tagSet[t] = true
-	}
-
-	paths, _ := spec["paths"].(map[string]any)
-	allDefs, _ := spec["definitions"].(map[string]any)
-
-	filteredPaths := make(map[string]any)
-	for path, methods := range paths {
-		methodMap, ok := methods.(map[string]any)
-		if !ok {
-			continue
-		}
-		filteredMethods := make(map[string]any)
-		for method, opRaw := range methodMap {
-			op, ok := opRaw.(map[string]any)
-			if !ok {
-				continue
-			}
-			opTags, _ := op["tags"].([]any)
-			for _, t := range opTags {
-				if ts, ok := t.(string); ok && tagSet[ts] {
-					filteredMethods[method] = op
-					break
-				}
-			}
-		}
-		if len(filteredMethods) > 0 {
-			filteredPaths[path] = filteredMethods
-		}
-	}
-
-	// Collect all $ref definitions used by the filtered paths.
-	neededDefs := make(map[string]bool)
-	collectRefs(filteredPaths, neededDefs)
-
-	// Resolve nested refs from definitions themselves.
-	changed := true
-	for changed {
-		changed = false
-		for name := range neededDefs {
-			if def, ok := allDefs[name]; ok {
-				before := len(neededDefs)
-				collectRefs(def, neededDefs)
-				if len(neededDefs) > before {
-					changed = true
-				}
-			}
-		}
-	}
-
-	filteredDefs := make(map[string]any)
-	for name := range neededDefs {
-		if def, ok := allDefs[name]; ok {
-			filteredDefs[name] = def
-		}
-	}
-
-	result := map[string]any{
-		"paths": filteredPaths,
-	}
-	if len(filteredDefs) > 0 {
-		result["definitions"] = filteredDefs
-	}
-	return result
-}
-
-// collectRefs walks a JSON structure and collects all $ref definition names.
-func collectRefs(v any, refs map[string]bool) {
-	switch val := v.(type) {
-	case map[string]any:
-		if ref, ok := val["$ref"].(string); ok {
-			if strings.HasPrefix(ref, swaggerDefsPrefix) {
-				refs[ref[len(swaggerDefsPrefix):]] = true
-			}
-		}
-		for _, child := range val {
-			collectRefs(child, refs)
-		}
-	case []any:
-		for _, child := range val {
-			collectRefs(child, refs)
-		}
-	}
-}
-
-// swaggerToMarkdown renders a filtered swagger fragment into concise markdown.
-func swaggerToMarkdown(skillName, intro string, fragment map[string]any) string {
-	var b strings.Builder
-	b.WriteString("# ")
-	b.WriteString(skillName)
-	b.WriteString("\n")
-	if intro != "" {
-		b.WriteString("\n")
-		b.WriteString(intro)
-		b.WriteString("\n")
-	}
-
-	paths, _ := fragment["paths"].(map[string]any)
-	defs, _ := fragment["definitions"].(map[string]any)
-
-	// Sort paths for stable output.
-	sortedPaths := make([]string, 0, len(paths))
-	for p := range paths {
-		sortedPaths = append(sortedPaths, p)
-	}
-	sort.Strings(sortedPaths)
-
-	for _, path := range sortedPaths {
-		methods, ok := paths[path].(map[string]any)
-		if !ok {
-			continue
-		}
-		sortedMethods := sortMethods(methods)
-		for _, method := range sortedMethods {
-			op, ok := methods[method].(map[string]any)
-			if !ok {
-				continue
-			}
-			summary, _ := op["summary"].(string)
-			b.WriteString(fmt.Sprintf("\n## %s %s\n", strings.ToUpper(method), path))
-			if summary != "" {
-				b.WriteString(summary)
-				b.WriteString("\n")
-			}
-
-			// Parameters
-			params, _ := op["parameters"].([]any)
-			bodyParams, nonBodyParams := splitParams(params)
-
-			if len(nonBodyParams) > 0 {
-				b.WriteString("\n**Parameters:**\n")
-				b.WriteString("| Name | In | Type | Required | Description |\n")
-				b.WriteString("|------|----|------|----------|-------------|\n")
-				for _, p := range nonBodyParams {
-					pm, ok := p.(map[string]any)
-					if !ok {
-						continue
-					}
-					name, _ := pm["name"].(string)
-					in, _ := pm["in"].(string)
-					typ, _ := pm["type"].(string)
-					req, _ := pm["required"].(bool)
-					desc, _ := pm["description"].(string)
-					b.WriteString(fmt.Sprintf("| %s | %s | %s | %v | %s |\n", name, in, typ, req, desc))
-				}
-			}
-
-			if len(bodyParams) > 0 {
-				for _, p := range bodyParams {
-					pm, ok := p.(map[string]any)
-					if !ok {
-						continue
-					}
-					schema, _ := pm["schema"].(map[string]any)
-					refName := resolveRefName(schema)
-					if refName != "" {
-						b.WriteString(fmt.Sprintf("\n**Request body** (`%s`):\n", refName))
-						renderSchemaFields(&b, refName, defs)
-					}
-				}
-			}
-
-			// Responses
-			responses, _ := op["responses"].(map[string]any)
-			if len(responses) > 0 {
-				sortedCodes := make([]string, 0, len(responses))
-				for code := range responses {
-					sortedCodes = append(sortedCodes, code)
-				}
-				sort.Strings(sortedCodes)
-				for _, code := range sortedCodes {
-					resp, ok := responses[code].(map[string]any)
-					if !ok {
-						continue
-					}
-					desc, _ := resp["description"].(string)
-					respSchema, _ := resp["schema"].(map[string]any)
-					refName := resolveRefName(respSchema)
-					if refName != "" {
-						b.WriteString(fmt.Sprintf("\n**Response %s** (`%s`): %s\n", code, refName, desc))
-						renderSchemaFields(&b, refName, defs)
-					} else if desc != "" {
-						b.WriteString(fmt.Sprintf("\n**Response %s**: %s\n", code, desc))
-					}
-				}
-			}
-		}
-	}
-
-	return b.String()
-}
-
-// sortMethods returns HTTP methods in a conventional order.
-func sortMethods(methods map[string]any) []string {
-	order := map[string]int{"get": 0, "post": 1, "put": 2, "patch": 3, "delete": 4}
-	keys := make([]string, 0, len(methods))
-	for k := range methods {
-		keys = append(keys, k)
-	}
-	sort.Slice(keys, func(i, j int) bool {
-		oi, oki := order[keys[i]]
-		oj, okj := order[keys[j]]
-		if !oki {
-			oi = 99
-		}
-		if !okj {
-			oj = 99
-		}
-		return oi < oj
-	})
-	return keys
-}
-
-// splitParams separates body parameters from non-body parameters.
-func splitParams(params []any) (body, nonBody []any) {
-	for _, p := range params {
-		pm, ok := p.(map[string]any)
-		if !ok {
-			continue
-		}
-		if in, _ := pm["in"].(string); in == "body" {
-			body = append(body, p)
-		} else {
-			nonBody = append(nonBody, p)
-		}
-	}
-	return
-}
-
-// resolveRefName extracts the definition name from a $ref or returns "".
-func resolveRefName(schema map[string]any) string {
-	if schema == nil {
-		return ""
-	}
-	if ref, ok := schema["$ref"].(string); ok {
-		if strings.HasPrefix(ref, swaggerDefsPrefix) {
-			return ref[len(swaggerDefsPrefix):]
-		}
-	}
-	return ""
-}
-
-// renderSchemaFields writes a markdown field table for a definition.
-func renderSchemaFields(b *strings.Builder, defName string, defs map[string]any) {
-	if defs == nil {
-		return
-	}
-	def, ok := defs[defName].(map[string]any)
-	if !ok {
-		return
-	}
-	props, ok := def["properties"].(map[string]any)
-	if !ok || len(props) == 0 {
-		return
-	}
-
-	// Sort fields
-	fields := make([]string, 0, len(props))
-	for f := range props {
-		fields = append(fields, f)
-	}
-	sort.Strings(fields)
-
-	b.WriteString("| Field | Type | Description |\n")
-	b.WriteString("|-------|------|-------------|\n")
-	for _, field := range fields {
-		prop, ok := props[field].(map[string]any)
-		if !ok {
-			continue
-		}
-		typ := schemaTypeString(prop)
-		desc, _ := prop["description"].(string)
-		b.WriteString(fmt.Sprintf("| %s | %s | %s |\n", field, typ, desc))
-	}
-}
-
-// schemaTypeString returns a human-readable type string for a schema property.
-func schemaTypeString(prop map[string]any) string {
-	if ref := resolveRefName(prop); ref != "" {
-		return ref
-	}
-	typ, _ := prop["type"].(string)
-	if typ == "array" {
-		items, _ := prop["items"].(map[string]any)
-		if items != nil {
-			if ref := resolveRefName(items); ref != "" {
-				return "[]" + ref
-			}
-			it, _ := items["type"].(string)
-			if it != "" {
-				return "[]" + it
-			}
-		}
-		return "[]any"
-	}
-	if typ != "" {
-		return typ
-	}
-	return "object"
-}
-
-// APIInstructionResponse is the JSON response for a single instruction (?format=json).
-type APIInstructionResponse struct {
-	Name            string         `json:"name"`
-	Description     string         `json:"description"`
-	Tags            []string       `json:"tags"`
-	SwaggerFragment map[string]any `json:"swagger_fragment,omitempty"`
-}
-
-// ListAPIInstructionsEndpoint returns all instructions (compact list without guides).
-// @Summary List available API instruction areas
-// @Description Returns a compact list of instruction areas with descriptions and URLs for detailed guides
-// @Tags instructions
-// @Produce json
-// @Success 200 {object} map[string]any "instructions list with hint"
-// @Router /api/instructions [get]
-func ListAPIInstructionsEndpoint() echo.HandlerFunc {
-	return func(c echo.Context) error {
-		type compactInstruction struct {
-			Name        string   `json:"name"`
-			Description string   `json:"description"`
-			Tags        []string `json:"tags"`
-			URL         string   `json:"url"`
-		}
-		instructions := make([]compactInstruction, len(instructionDefs))
-		for i, s := range instructionDefs {
-			instructions[i] = compactInstruction{
-				Name:        s.Name,
-				Description: s.Description,
-				Tags:        s.Tags,
-				URL:         "/api/instructions/" + s.Name,
-			}
-		}
-		return c.JSON(http.StatusOK, map[string]any{
-			"instructions": instructions,
-			"hint":         "Fetch GET {url} for a markdown API guide. Add ?format=json for a raw OpenAPI fragment.",
-		})
-	}
-}
-
-// GetAPIInstructionEndpoint returns a single instruction by name.
-// @Summary Get an instruction's API guide or OpenAPI fragment
-// @Description Returns a markdown guide (default) or filtered OpenAPI fragment (format=json) for a named instruction
-// @Tags instructions
-// @Produce json
-// @Produce text/markdown
-// @Param name path string true "Instruction name (e.g. chat-inference, config-management)"
-// @Param format query string false "Response format: json for OpenAPI fragment, omit for markdown"
-// @Success 200 {object} APIInstructionResponse "instruction documentation"
-// @Failure 404 {object} map[string]string "instruction not found"
-// @Router /api/instructions/{name} [get]
-func GetAPIInstructionEndpoint() echo.HandlerFunc {
-	byName := make(map[string]*instructionDef, len(instructionDefs))
-	for i := range instructionDefs {
-		byName[instructionDefs[i].Name] = &instructionDefs[i]
-	}
-
-	return func(c echo.Context) error {
-		name := c.Param("name")
-		inst, ok := byName[name]
-		if !ok {
-			return c.JSON(http.StatusNotFound, map[string]any{"error": "instruction not found: " + name})
-		}
-
-		swState.init()
-		if !swState.ready {
-			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "swagger spec not available"})
-		}
-
-		fragment := filterSwaggerByTags(swState.spec, inst.Tags)
-
-		format := c.QueryParam("format")
-		if format == "json" {
-			return c.JSON(http.StatusOK, APIInstructionResponse{
-				Name:            inst.Name,
-				Description:     inst.Description,
-				Tags:            inst.Tags,
-				SwaggerFragment: fragment,
-			})
-		}
-
-		guide := swaggerToMarkdown(inst.Name, inst.Intro, fragment)
-		return c.Blob(http.StatusOK, "text/markdown; charset=utf-8", []byte(guide))
-	}
-}
--- a/core/http/endpoints/localai/api_instructions_test.go
+++ b/core/http/endpoints/localai/api_instructions_test.go
@@ -1,222 +0,0 @@
-package localai_test
-
-import (
-	"encoding/json"
-	"io"
-	"net/http"
-	"net/http/httptest"
-	"strings"
-
-	"github.com/labstack/echo/v4"
-	. "github.com/mudler/LocalAI/core/http/endpoints/localai"
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-var _ = Describe("API Instructions Endpoints", func() {
-	var app *echo.Echo
-
-	BeforeEach(func() {
-		app = echo.New()
-		app.GET("/api/instructions", ListAPIInstructionsEndpoint())
-		app.GET("/api/instructions/:name", GetAPIInstructionEndpoint())
-	})
-
-	Context("GET /api/instructions", func() {
-		It("should return all instruction definitions", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/instructions", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusOK))
-
-			var resp map[string]any
-			err := json.Unmarshal(rec.Body.Bytes(), &resp)
-			Expect(err).NotTo(HaveOccurred())
-
-			Expect(resp).To(HaveKey("hint"))
-			Expect(resp).To(HaveKey("instructions"))
-
-			instructions, ok := resp["instructions"].([]any)
-			Expect(ok).To(BeTrue())
-			Expect(instructions).To(HaveLen(9))
-
-			// Verify each instruction has required fields and correct URL format
-			for _, s := range instructions {
-				inst, ok := s.(map[string]any)
-				Expect(ok).To(BeTrue())
-				Expect(inst["name"]).NotTo(BeEmpty())
-				Expect(inst["description"]).NotTo(BeEmpty())
-				Expect(inst["tags"]).NotTo(BeNil())
-				Expect(inst["url"]).To(HavePrefix("/api/instructions/"))
-				Expect(inst["url"]).To(Equal("/api/instructions/" + inst["name"].(string)))
-			}
-		})
-
-		It("should include known instruction names", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/instructions", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			var resp map[string]any
-			Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
-
-			instructions := resp["instructions"].([]any)
-			names := make([]string, len(instructions))
-			for i, s := range instructions {
-				names[i] = s.(map[string]any)["name"].(string)
-			}
-
-			Expect(names).To(ContainElements(
-				"chat-inference",
-				"config-management",
-				"model-management",
-				"monitoring",
-				"agents",
-			))
-		})
-	})
-
-	Context("GET /api/instructions/:name", func() {
-		It("should return 404 for unknown instruction", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/instructions/nonexistent", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusNotFound))
-
-			var resp map[string]any
-			Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
-			Expect(resp["error"]).To(ContainSubstring("instruction not found"))
-		})
-
-		It("should return markdown by default", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/instructions/chat-inference", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusOK))
-			Expect(rec.Header().Get("Content-Type")).To(ContainSubstring("text/markdown"))
-
-			body, err := io.ReadAll(rec.Body)
-			Expect(err).NotTo(HaveOccurred())
-			md := string(body)
-
-			Expect(md).To(HavePrefix("# chat-inference"))
-			// Should contain at least one endpoint heading
-			Expect(md).To(MatchRegexp(`## (GET|POST|PUT|PATCH|DELETE) /`))
-		})
-
-		It("should include intro text for instructions that have one", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/instructions/chat-inference", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			body, _ := io.ReadAll(rec.Body)
-			// chat-inference has an intro about streaming
-			Expect(string(body)).To(ContainSubstring("stream"))
-		})
-
-		It("should return JSON fragment when format=json", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/instructions/chat-inference?format=json", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusOK))
-
-			var resp map[string]any
-			Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
-			Expect(resp["name"]).To(Equal("chat-inference"))
-			Expect(resp["tags"]).To(ContainElements("inference", "embeddings"))
-
-			fragment, ok := resp["swagger_fragment"].(map[string]any)
-			Expect(ok).To(BeTrue())
-			Expect(fragment).To(HaveKey("paths"))
-
-			paths, ok := fragment["paths"].(map[string]any)
-			Expect(ok).To(BeTrue())
-			Expect(paths).NotTo(BeEmpty())
-		})
-
-		It("should include referenced definitions in JSON fragment", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/instructions/chat-inference?format=json", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			var resp map[string]any
-			Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
-
-			fragment := resp["swagger_fragment"].(map[string]any)
-			Expect(fragment).To(HaveKey("definitions"))
-
-			defs, ok := fragment["definitions"].(map[string]any)
-			Expect(ok).To(BeTrue())
-			Expect(defs).NotTo(BeEmpty())
-		})
-
-		It("should only include paths matching the instruction tags in JSON fragment", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/instructions/config-management?format=json", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			var resp map[string]any
-			Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
-
-			fragment := resp["swagger_fragment"].(map[string]any)
-			paths := fragment["paths"].(map[string]any)
-			Expect(paths).NotTo(BeEmpty())
-
-			// Every operation in every path should have the "config" tag
-			for _, methods := range paths {
-				methodMap := methods.(map[string]any)
-				for _, opRaw := range methodMap {
-					op := opRaw.(map[string]any)
-					tags, _ := op["tags"].([]any)
-					tagStrs := make([]string, len(tags))
-					for i, t := range tags {
-						tagStrs[i] = t.(string)
-					}
-					Expect(tagStrs).To(ContainElement("config"))
-				}
-			}
-		})
-
-		It("should produce stable output across calls", func() {
-			req1 := httptest.NewRequest(http.MethodGet, "/api/instructions/chat-inference", nil)
-			rec1 := httptest.NewRecorder()
-			app.ServeHTTP(rec1, req1)
-
-			req2 := httptest.NewRequest(http.MethodGet, "/api/instructions/chat-inference", nil)
-			rec2 := httptest.NewRecorder()
-			app.ServeHTTP(rec2, req2)
-
-			body1, _ := io.ReadAll(rec1.Body)
-			body2, _ := io.ReadAll(rec2.Body)
-			Expect(string(body1)).To(Equal(string(body2)))
-		})
-
-		It("should return markdown for every defined instruction", func() {
-			// First get the list
-			listReq := httptest.NewRequest(http.MethodGet, "/api/instructions", nil)
-			listRec := httptest.NewRecorder()
-			app.ServeHTTP(listRec, listReq)
-
-			var listResp map[string]any
-			Expect(json.Unmarshal(listRec.Body.Bytes(), &listResp)).To(Succeed())
-
-			instructions := listResp["instructions"].([]any)
-			for _, s := range instructions {
-				name := s.(map[string]any)["name"].(string)
-				req := httptest.NewRequest(http.MethodGet, "/api/instructions/"+name, nil)
-				rec := httptest.NewRecorder()
-				app.ServeHTTP(rec, req)
-
-				Expect(rec.Code).To(Equal(http.StatusOK),
-					"instruction %q should return 200", name)
-				body, _ := io.ReadAll(rec.Body)
-				Expect(strings.TrimSpace(string(body))).NotTo(BeEmpty(),
-					"instruction %q should return non-empty markdown", name)
-			}
-		})
-	})
-})
--- a/core/http/endpoints/localai/backend.go
+++ b/core/http/endpoints/localai/backend.go
@@ -37,7 +37,6 @@ func CreateBackendEndpointService(galleries []config.Gallery, systemState *syste

 // GetOpStatusEndpoint returns the job status
 // @Summary Returns the job status
-// @Tags backends
 // @Success 200 {object} galleryop.OpStatus "Response"
 // @Router /backends/jobs/{uuid} [get]
 func (mgs *BackendEndpointService) GetOpStatusEndpoint() echo.HandlerFunc {
@@ -52,7 +51,6 @@ func (mgs *BackendEndpointService) GetOpStatusEndpoint() echo.HandlerFunc {

 // GetAllStatusEndpoint returns all the jobs status progress
 // @Summary Returns all the jobs status progress
-// @Tags backends
 // @Success 200 {object} map[string]galleryop.OpStatus "Response"
 // @Router /backends/jobs [get]
 func (mgs *BackendEndpointService) GetAllStatusEndpoint() echo.HandlerFunc {
@@ -63,7 +61,6 @@ func (mgs *BackendEndpointService) GetAllStatusEndpoint() echo.HandlerFunc {

 // ApplyBackendEndpoint installs a new backend to a LocalAI instance
 // @Summary Install backends to LocalAI.
-// @Tags backends
 // @Param request body GalleryBackend true "query params"
 // @Success 200 {object} schema.BackendResponse "Response"
 // @Router /backends/apply [post]
@@ -91,7 +88,6 @@ func (mgs *BackendEndpointService) ApplyBackendEndpoint() echo.HandlerFunc {

 // DeleteBackendEndpoint lets delete backends from a LocalAI instance
 // @Summary delete backends from LocalAI.
-// @Tags backends
 // @Param name	path string	true	"Backend name"
 // @Success 200 {object} schema.BackendResponse "Response"
 // @Router /backends/delete/{name} [post]
@@ -116,7 +112,6 @@ func (mgs *BackendEndpointService) DeleteBackendEndpoint() echo.HandlerFunc {

 // ListBackendsEndpoint list the available backends configured in LocalAI
 // @Summary List all Backends
-// @Tags backends
 // @Success 200 {object} []gallery.GalleryBackend "Response"
 // @Router /backends [get]
 func (mgs *BackendEndpointService) ListBackendsEndpoint() echo.HandlerFunc {
@@ -131,7 +126,6 @@ func (mgs *BackendEndpointService) ListBackendsEndpoint() echo.HandlerFunc {

 // ListModelGalleriesEndpoint list the available galleries configured in LocalAI
 // @Summary List all Galleries
-// @Tags backends
 // @Success 200 {object} []config.Gallery "Response"
 // @Router /backends/galleries [get]
 // NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
@@ -148,7 +142,6 @@ func (mgs *BackendEndpointService) ListBackendGalleriesEndpoint() echo.HandlerFu

 // ListAvailableBackendsEndpoint list the available backends in the galleries configured in LocalAI
 // @Summary List all available Backends
-// @Tags backends
 // @Success 200 {object} []gallery.GalleryBackend "Response"
 // @Router /backends/available [get]
 func (mgs *BackendEndpointService) ListAvailableBackendsEndpoint(systemState *system.SystemState) echo.HandlerFunc {
--- a/core/http/endpoints/localai/backend_logs.go
+++ b/core/http/endpoints/localai/backend_logs.go
@@ -1,179 +0,0 @@
-package localai
-
-import (
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"net/url"
-	"sync"
-	"time"
-
-	"github.com/gorilla/websocket"
-	"github.com/labstack/echo/v4"
-	"github.com/mudler/LocalAI/pkg/model"
-	"github.com/mudler/xlog"
-)
-
-var backendLogsUpgrader = websocket.Upgrader{
-	CheckOrigin: func(r *http.Request) bool {
-		origin := r.Header.Get("Origin")
-		if origin == "" {
-			return true // no origin header = same-origin or non-browser
-		}
-		u, err := url.Parse(origin)
-		if err != nil {
-			return false
-		}
-		return u.Host == r.Host
-	},
-}
-
-// backendLogsConn wraps a websocket connection with a mutex for safe concurrent writes
-type backendLogsConn struct {
-	*websocket.Conn
-	mu sync.Mutex
-}
-
-func (c *backendLogsConn) writeJSON(v any) error {
-	c.mu.Lock()
-	defer c.mu.Unlock()
-	c.Conn.SetWriteDeadline(time.Now().Add(30 * time.Second))
-	data, err := json.Marshal(v)
-	if err != nil {
-		return fmt.Errorf("marshal error: %w", err)
-	}
-	return c.Conn.WriteMessage(websocket.TextMessage, data)
-}
-
-func (c *backendLogsConn) writePing() error {
-	c.mu.Lock()
-	defer c.mu.Unlock()
-	c.Conn.SetWriteDeadline(time.Now().Add(30 * time.Second))
-	return c.Conn.WriteMessage(websocket.PingMessage, nil)
-}
-
-// ListBackendLogsEndpoint returns model IDs that have log buffers
-// @Summary List models with backend logs
-// @Description Returns a sorted list of model IDs that have captured backend process output
-// @Tags monitoring
-// @Produce json
-// @Success 200 {array} string "Model IDs with logs"
-// @Router /api/backend-logs [get]
-func ListBackendLogsEndpoint(ml *model.ModelLoader) echo.HandlerFunc {
-	return func(c echo.Context) error {
-		return c.JSON(200, ml.BackendLogs().ListModels())
-	}
-}
-
-// GetBackendLogsEndpoint returns log lines for a specific model
-// @Summary Get backend logs for a model
-// @Description Returns all captured log lines (stdout/stderr) for the specified model's backend process
-// @Tags monitoring
-// @Produce json
-// @Param modelId path string true "Model ID"
-// @Success 200 {array} model.BackendLogLine "Log lines"
-// @Router /api/backend-logs/{modelId} [get]
-func GetBackendLogsEndpoint(ml *model.ModelLoader) echo.HandlerFunc {
-	return func(c echo.Context) error {
-		modelID := c.Param("modelId")
-		return c.JSON(200, ml.BackendLogs().GetLines(modelID))
-	}
-}
-
-// ClearBackendLogsEndpoint clears log lines for a specific model
-// @Summary Clear backend logs for a model
-// @Description Removes all captured log lines for the specified model's backend process
-// @Tags monitoring
-// @Param modelId path string true "Model ID"
-// @Success 204 "Logs cleared"
-// @Router /api/backend-logs/{modelId}/clear [post]
-func ClearBackendLogsEndpoint(ml *model.ModelLoader) echo.HandlerFunc {
-	return func(c echo.Context) error {
-		ml.BackendLogs().Clear(c.Param("modelId"))
-		return c.NoContent(204)
-	}
-}
-
-// BackendLogsWebSocketEndpoint streams backend logs in real-time over WebSocket
-// @Summary Stream backend logs via WebSocket
-// @Description Opens a WebSocket connection for real-time backend log streaming. Sends an initial batch of existing lines (type "initial"), then streams new lines as they appear (type "line"). Supports ping/pong keepalive.
-// @Tags monitoring
-// @Param modelId path string true "Model ID"
-// @Router /ws/backend-logs/{modelId} [get]
-func BackendLogsWebSocketEndpoint(ml *model.ModelLoader) echo.HandlerFunc {
-	return func(c echo.Context) error {
-		modelID := c.Param("modelId")
-
-		ws, err := backendLogsUpgrader.Upgrade(c.Response(), c.Request(), nil)
-		if err != nil {
-			return err
-		}
-		defer ws.Close()
-
-		ws.SetReadLimit(4096)
-
-		// Set up ping/pong for keepalive
-		ws.SetReadDeadline(time.Now().Add(90 * time.Second))
-		ws.SetPongHandler(func(string) error {
-			ws.SetReadDeadline(time.Now().Add(90 * time.Second))
-			return nil
-		})
-
-		conn := &backendLogsConn{Conn: ws}
-
-		// Send existing lines as initial batch
-		existingLines := ml.BackendLogs().GetLines(modelID)
-		initialMsg := map[string]any{
-			"type":  "initial",
-			"lines": existingLines,
-		}
-		if err := conn.writeJSON(initialMsg); err != nil {
-			xlog.Debug("WebSocket backend-logs initial write failed", "error", err)
-			return nil
-		}
-
-		// Subscribe to new lines
-		lineCh, unsubscribe := ml.BackendLogs().Subscribe(modelID)
-		defer unsubscribe()
-
-		// Handle close from client side
-		closeCh := make(chan struct{})
-		go func() {
-			for {
-				_, _, err := ws.ReadMessage()
-				if err != nil {
-					close(closeCh)
-					return
-				}
-			}
-		}()
-
-		// Ping ticker for keepalive
-		pingTicker := time.NewTicker(30 * time.Second)
-		defer pingTicker.Stop()
-
-		// Forward new lines to WebSocket
-		for {
-			select {
-			case line, ok := <-lineCh:
-				if !ok {
-					return nil
-				}
-				lineMsg := map[string]any{
-					"type": "line",
-					"line": line,
-				}
-				if err := conn.writeJSON(lineMsg); err != nil {
-					xlog.Debug("WebSocket backend-logs write error", "error", err)
-					return nil
-				}
-			case <-pingTicker.C:
-				if err := conn.writePing(); err != nil {
-					return nil
-				}
-			case <-closeCh:
-				return nil
-			}
-		}
-	}
-}
--- a/core/http/endpoints/localai/backend_logs_test.go
+++ b/core/http/endpoints/localai/backend_logs_test.go
@@ -1,196 +0,0 @@
-package localai_test
-
-import (
-	"encoding/json"
-	"net/http"
-	"net/http/httptest"
-	"os"
-	"path/filepath"
-	"strings"
-	"time"
-
-	"github.com/gorilla/websocket"
-	"github.com/labstack/echo/v4"
-	. "github.com/mudler/LocalAI/core/http/endpoints/localai"
-	"github.com/mudler/LocalAI/pkg/model"
-	"github.com/mudler/LocalAI/pkg/system"
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-var _ = Describe("Backend Logs Endpoints", func() {
-	var (
-		app         *echo.Echo
-		tempDir     string
-		modelLoader *model.ModelLoader
-	)
-
-	BeforeEach(func() {
-		var err error
-		tempDir, err = os.MkdirTemp("", "backend-logs-test-*")
-		Expect(err).NotTo(HaveOccurred())
-
-		modelsPath := filepath.Join(tempDir, "models")
-		Expect(os.MkdirAll(modelsPath, 0750)).To(Succeed())
-
-		systemState, err := system.GetSystemState(
-			system.WithModelPath(modelsPath),
-		)
-		Expect(err).NotTo(HaveOccurred())
-
-		modelLoader = model.NewModelLoader(systemState)
-
-		app = echo.New()
-		app.GET("/api/backend-logs", ListBackendLogsEndpoint(modelLoader))
-		app.GET("/api/backend-logs/:modelId", GetBackendLogsEndpoint(modelLoader))
-		app.POST("/api/backend-logs/:modelId/clear", ClearBackendLogsEndpoint(modelLoader))
-		app.GET("/ws/backend-logs/:modelId", BackendLogsWebSocketEndpoint(modelLoader))
-	})
-
-	AfterEach(func() {
-		os.RemoveAll(tempDir)
-	})
-
-	Context("REST endpoints", func() {
-		It("should return empty list of models with logs", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/backend-logs", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusOK))
-
-			var models []string
-			Expect(json.Unmarshal(rec.Body.Bytes(), &models)).To(Succeed())
-			Expect(models).To(BeEmpty())
-		})
-
-		It("should list models that have logs", func() {
-			modelLoader.BackendLogs().AppendLine("my-model", "stdout", "hello")
-
-			req := httptest.NewRequest(http.MethodGet, "/api/backend-logs", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusOK))
-
-			var models []string
-			Expect(json.Unmarshal(rec.Body.Bytes(), &models)).To(Succeed())
-			Expect(models).To(ContainElement("my-model"))
-		})
-
-		It("should return log lines for a model", func() {
-			modelLoader.BackendLogs().AppendLine("my-model", "stdout", "line one")
-			modelLoader.BackendLogs().AppendLine("my-model", "stderr", "line two")
-
-			req := httptest.NewRequest(http.MethodGet, "/api/backend-logs/my-model", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusOK))
-
-			var lines []model.BackendLogLine
-			Expect(json.Unmarshal(rec.Body.Bytes(), &lines)).To(Succeed())
-			Expect(lines).To(HaveLen(2))
-			Expect(lines[0].Text).To(Equal("line one"))
-			Expect(lines[0].Stream).To(Equal("stdout"))
-			Expect(lines[1].Text).To(Equal("line two"))
-			Expect(lines[1].Stream).To(Equal("stderr"))
-		})
-
-		It("should return empty log lines for unknown model", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/backend-logs/unknown-model", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusOK))
-		})
-
-		It("should clear logs for a model", func() {
-			modelLoader.BackendLogs().AppendLine("my-model", "stdout", "hello")
-
-			req := httptest.NewRequest(http.MethodPost, "/api/backend-logs/my-model/clear", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusNoContent))
-
-			// Verify logs are cleared
-			req = httptest.NewRequest(http.MethodGet, "/api/backend-logs/my-model", nil)
-			rec = httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			var lines []model.BackendLogLine
-			Expect(json.Unmarshal(rec.Body.Bytes(), &lines)).To(Succeed())
-			Expect(lines).To(BeEmpty())
-		})
-	})
-
-	Context("WebSocket endpoint", func() {
-		It("should send initial lines and stream new lines", func() {
-			// Seed some existing lines before connecting
-			modelLoader.BackendLogs().AppendLine("ws-model", "stdout", "existing line")
-
-			// Start a real HTTP server for WebSocket
-			srv := httptest.NewServer(app)
-			defer srv.Close()
-
-			// Dial the WebSocket
-			wsURL := "ws" + strings.TrimPrefix(srv.URL, "http") + "/ws/backend-logs/ws-model"
-			dialer := websocket.Dialer{HandshakeTimeout: 2 * time.Second}
-			conn, _, err := dialer.Dial(wsURL, nil)
-			Expect(err).NotTo(HaveOccurred())
-			defer conn.Close()
-
-			// Read the initial message
-			var initialMsg map[string]any
-			err = conn.ReadJSON(&initialMsg)
-			Expect(err).NotTo(HaveOccurred())
-			Expect(initialMsg["type"]).To(Equal("initial"))
-
-			initialLines, ok := initialMsg["lines"].([]any)
-			Expect(ok).To(BeTrue())
-			Expect(initialLines).To(HaveLen(1))
-
-			firstLine := initialLines[0].(map[string]any)
-			Expect(firstLine["text"]).To(Equal("existing line"))
-
-			// Now append a new line and verify it streams through
-			modelLoader.BackendLogs().AppendLine("ws-model", "stderr", "streamed line")
-
-			var lineMsg map[string]any
-			conn.SetReadDeadline(time.Now().Add(2 * time.Second))
-			err = conn.ReadJSON(&lineMsg)
-			Expect(err).NotTo(HaveOccurred())
-			Expect(lineMsg["type"]).To(Equal("line"))
-
-			lineData, ok := lineMsg["line"].(map[string]any)
-			Expect(ok).To(BeTrue())
-			Expect(lineData["text"]).To(Equal("streamed line"))
-			Expect(lineData["stream"]).To(Equal("stderr"))
-		})
-
-		It("should handle connection close gracefully", func() {
-			srv := httptest.NewServer(app)
-			defer srv.Close()
-
-			wsURL := "ws" + strings.TrimPrefix(srv.URL, "http") + "/ws/backend-logs/close-model"
-			dialer := websocket.Dialer{HandshakeTimeout: 2 * time.Second}
-			conn, _, err := dialer.Dial(wsURL, nil)
-			Expect(err).NotTo(HaveOccurred())
-
-			// Read initial message
-			var initialMsg map[string]any
-			err = conn.ReadJSON(&initialMsg)
-			Expect(err).NotTo(HaveOccurred())
-			Expect(initialMsg["type"]).To(Equal("initial"))
-
-			// Close the connection from client side
-			conn.Close()
-
-			// Give the server goroutine time to detect the close
-			time.Sleep(50 * time.Millisecond)
-
-			// No panic or hang — the test passing is the assertion
-		})
-	})
-})
--- a/core/http/endpoints/localai/backend_monitor.go
+++ b/core/http/endpoints/localai/backend_monitor.go
@@ -8,7 +8,6 @@ import (

 // BackendMonitorEndpoint returns the status of the specified backend
 // @Summary Backend monitor endpoint
-// @Tags monitoring
 // @Param request body schema.BackendMonitorRequest true "Backend statistics request"
 // @Success 200 {object} proto.StatusResponse "Response"
 // @Router /backend/monitor [get]
@@ -30,8 +29,7 @@ func BackendMonitorEndpoint(bm *monitoring.BackendMonitorService) echo.HandlerFu
 }

 // BackendShutdownEndpoint shuts down the specified backend
-// @Summary Backend shutdown endpoint
-// @Tags monitoring
+// @Summary Backend monitor endpoint
 // @Param request body schema.BackendMonitorRequest true "Backend statistics request"
 // @Router /backend/shutdown [post]
 func BackendShutdownEndpoint(bm *monitoring.BackendMonitorService) echo.HandlerFunc {
--- a/core/http/endpoints/localai/config_meta.go
+++ b/core/http/endpoints/localai/config_meta.go
@@ -1,242 +0,0 @@
-package localai
-
-import (
-	"encoding/json"
-	"fmt"
-	"io"
-	"net/http"
-	"net/url"
-	"os"
-	"reflect"
-	"sort"
-	"strings"
-
-	"dario.cat/mergo"
-	"github.com/labstack/echo/v4"
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/config/meta"
-	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/core/services/galleryop"
-	"github.com/mudler/LocalAI/pkg/model"
-	"github.com/mudler/LocalAI/pkg/utils"
-	"github.com/mudler/xlog"
-	"gopkg.in/yaml.v3"
-)
-
-// ConfigMetadataEndpoint returns field metadata for config fields.
-// Without ?section, returns just the section index (lightweight).
-// With ?section=<id>, returns fields for that section only.
-// With ?section=all, returns all fields grouped by section.
-// @Summary List model configuration field metadata
-// @Description Returns config field metadata. Use ?section=<id> to filter by section, or omit for a section index.
-// @Tags config
-// @Produce json
-// @Param section query string false "Section ID to filter (e.g. 'general', 'llm', 'parameters') or 'all' for everything"
-// @Success 200 {object} map[string]any "Section index or filtered field metadata"
-// @Router /api/models/config-metadata [get]
-func ConfigMetadataEndpoint() echo.HandlerFunc {
-	return func(c echo.Context) error {
-		sectionParam := c.QueryParam("section")
-
-		// No section param: return lightweight section index.
-		if sectionParam == "" {
-			sections := meta.DefaultSections()
-			type sectionInfo struct {
-				ID    string `json:"id"`
-				Label string `json:"label"`
-				URL   string `json:"url"`
-			}
-			index := make([]sectionInfo, len(sections))
-			for i, s := range sections {
-				index[i] = sectionInfo{
-					ID:    s.ID,
-					Label: s.Label,
-					URL:   "/api/models/config-metadata?section=" + s.ID,
-				}
-			}
-			return c.JSON(http.StatusOK, map[string]any{
-				"hint":     "Fetch a section URL to see its fields. Use ?section=all for everything.",
-				"sections": index,
-			})
-		}
-
-		md := meta.BuildConfigMetadata(reflect.TypeOf(config.ModelConfig{}))
-
-		// section=all: return everything.
-		if sectionParam == "all" {
-			return c.JSON(http.StatusOK, md)
-		}
-
-		// Filter to requested section.
-		var filtered []meta.FieldMeta
-		for _, f := range md.Fields {
-			if f.Section == sectionParam {
-				filtered = append(filtered, f)
-			}
-		}
-		if len(filtered) == 0 {
-			return c.JSON(http.StatusNotFound, map[string]any{"error": "unknown section: " + sectionParam})
-		}
-		return c.JSON(http.StatusOK, filtered)
-	}
-}
-
-// AutocompleteEndpoint handles dynamic autocomplete lookups for config fields.
-// Static option lists (quantizations, cache types, diffusers pipelines/schedulers)
-// are embedded directly in the field metadata Options; only truly dynamic values
-// that require runtime lookup are served here.
-// @Summary Get dynamic autocomplete values for a config field
-// @Description Returns runtime-resolved values for dynamic providers (backends, models)
-// @Tags config
-// @Produce json
-// @Param provider path string true "Provider name (backends, models, models:chat, models:tts, models:transcript, models:vad)"
-// @Success 200 {object} map[string]any "values array"
-// @Router /api/models/config-metadata/autocomplete/{provider} [get]
-func AutocompleteEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
-	return func(c echo.Context) error {
-		provider := c.Param("provider")
-		var values []string
-
-		switch {
-		case provider == meta.ProviderBackends:
-			installedBackends, err := gallery.ListSystemBackends(appConfig.SystemState)
-			if err == nil {
-				for name := range installedBackends {
-					values = append(values, name)
-				}
-			}
-			sort.Strings(values)
-
-		case provider == meta.ProviderModels:
-			modelConfigs := cl.GetAllModelsConfigs()
-			for _, cfg := range modelConfigs {
-				values = append(values, cfg.Name)
-			}
-			modelsWithoutConfig, _ := galleryop.ListModels(cl, ml, config.NoFilterFn, galleryop.LOOSE_ONLY)
-			values = append(values, modelsWithoutConfig...)
-			sort.Strings(values)
-
-		case strings.HasPrefix(provider, "models:"):
-			capability := strings.TrimPrefix(provider, "models:")
-			var filterFn config.ModelConfigFilterFn
-			switch capability {
-			case "chat":
-				filterFn = config.BuildUsecaseFilterFn(config.FLAG_CHAT)
-			case "tts":
-				filterFn = config.BuildUsecaseFilterFn(config.FLAG_TTS)
-			case "vad":
-				filterFn = config.BuildUsecaseFilterFn(config.FLAG_VAD)
-			case "transcript":
-				filterFn = config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT)
-			default:
-				filterFn = config.NoFilterFn
-			}
-			filteredConfigs := cl.GetModelConfigsByFilter(filterFn)
-			for _, cfg := range filteredConfigs {
-				values = append(values, cfg.Name)
-			}
-			sort.Strings(values)
-
-		default:
-			return c.JSON(http.StatusNotFound, map[string]any{"error": "unknown provider: " + provider})
-		}
-
-		return c.JSON(http.StatusOK, map[string]any{"values": values})
-	}
-}
-
-// PatchConfigEndpoint handles PATCH requests to partially update a model config
-// using nested JSON merge.
-// @Summary Partially update a model configuration
-// @Description Deep-merges the JSON patch body into the existing model config
-// @Tags config
-// @Accept json
-// @Produce json
-// @Param name path string true "Model name"
-// @Success 200 {object} map[string]any "success message"
-// @Router /api/models/config-json/{name} [patch]
-func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
-	return func(c echo.Context) error {
-		modelName := c.Param("name")
-		if decoded, err := url.PathUnescape(modelName); err == nil {
-			modelName = decoded
-		}
-		if modelName == "" {
-			return c.JSON(http.StatusBadRequest, map[string]any{"error": "model name is required"})
-		}
-
-		modelConfig, exists := cl.GetModelConfig(modelName)
-		if !exists {
-			return c.JSON(http.StatusNotFound, map[string]any{"error": "model configuration not found"})
-		}
-
-		patchBody, err := io.ReadAll(c.Request().Body)
-		if err != nil || len(patchBody) == 0 {
-			return c.JSON(http.StatusBadRequest, map[string]any{"error": "request body is empty or unreadable"})
-		}
-
-		var patchMap map[string]any
-		if err := json.Unmarshal(patchBody, &patchMap); err != nil {
-			return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid JSON: " + err.Error()})
-		}
-
-		existingJSON, err := json.Marshal(modelConfig)
-		if err != nil {
-			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal existing config"})
-		}
-
-		var existingMap map[string]any
-		if err := json.Unmarshal(existingJSON, &existingMap); err != nil {
-			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to parse existing config"})
-		}
-
-		if err := mergo.Merge(&existingMap, patchMap, mergo.WithOverride); err != nil {
-			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to merge configs: " + err.Error()})
-		}
-
-		mergedJSON, err := json.Marshal(existingMap)
-		if err != nil {
-			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal merged config"})
-		}
-
-		var updatedConfig config.ModelConfig
-		if err := json.Unmarshal(mergedJSON, &updatedConfig); err != nil {
-			return c.JSON(http.StatusBadRequest, map[string]any{"error": "merged config is invalid: " + err.Error()})
-		}
-
-		if valid, err := updatedConfig.Validate(); !valid {
-			errMsg := "validation failed"
-			if err != nil {
-				errMsg = err.Error()
-			}
-			return c.JSON(http.StatusBadRequest, map[string]any{"error": errMsg})
-		}
-
-		configPath := modelConfig.GetModelConfigFile()
-		if err := utils.VerifyPath(configPath, appConfig.SystemState.Model.ModelsPath); err != nil {
-			return c.JSON(http.StatusForbidden, map[string]any{"error": "config path not trusted: " + err.Error()})
-		}
-
-		yamlData, err := yaml.Marshal(updatedConfig)
-		if err != nil {
-			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to marshal YAML"})
-		}
-
-		if err := os.WriteFile(configPath, yamlData, 0644); err != nil {
-			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to write config file"})
-		}
-
-		if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil {
-			return c.JSON(http.StatusInternalServerError, map[string]any{"error": "failed to reload configs: " + err.Error()})
-		}
-
-		if err := cl.Preload(appConfig.SystemState.Model.ModelsPath); err != nil {
-			xlog.Warn("Failed to preload after PATCH", "error", err)
-		}
-
-		return c.JSON(http.StatusOK, map[string]any{
-			"success": true,
-			"message": fmt.Sprintf("Model '%s' updated successfully", modelName),
-		})
-	}
-}
--- a/core/http/endpoints/localai/config_meta_test.go
+++ b/core/http/endpoints/localai/config_meta_test.go
@@ -1,243 +0,0 @@
-package localai_test
-
-import (
-	"bytes"
-	"encoding/json"
-	"net/http"
-	"net/http/httptest"
-	"os"
-	"path/filepath"
-
-	"github.com/labstack/echo/v4"
-	"github.com/mudler/LocalAI/core/config"
-	. "github.com/mudler/LocalAI/core/http/endpoints/localai"
-	"github.com/mudler/LocalAI/pkg/model"
-	"github.com/mudler/LocalAI/pkg/system"
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-var _ = Describe("Config Metadata Endpoints", func() {
-	var (
-		app          *echo.Echo
-		tempDir      string
-		configLoader *config.ModelConfigLoader
-		modelLoader  *model.ModelLoader
-		appConfig    *config.ApplicationConfig
-	)
-
-	BeforeEach(func() {
-		var err error
-		tempDir, err = os.MkdirTemp("", "config-meta-test-*")
-		Expect(err).NotTo(HaveOccurred())
-
-		systemState, err := system.GetSystemState(
-			system.WithModelPath(tempDir),
-		)
-		Expect(err).NotTo(HaveOccurred())
-
-		appConfig = config.NewApplicationConfig(
-			config.WithSystemState(systemState),
-		)
-		configLoader = config.NewModelConfigLoader(tempDir)
-		modelLoader = model.NewModelLoader(systemState)
-
-		app = echo.New()
-		app.GET("/api/models/config-metadata", ConfigMetadataEndpoint())
-		app.GET("/api/models/config-metadata/autocomplete/:provider", AutocompleteEndpoint(configLoader, modelLoader, appConfig))
-		app.PATCH("/api/models/config-json/:name", PatchConfigEndpoint(configLoader, modelLoader, appConfig))
-	})
-
-	AfterEach(func() {
-		os.RemoveAll(tempDir)
-	})
-
-	Context("GET /api/models/config-metadata", func() {
-		It("should return section index when no section param", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/models/config-metadata", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusOK))
-
-			var resp map[string]any
-			Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
-			Expect(resp).To(HaveKey("hint"))
-			Expect(resp).To(HaveKey("sections"))
-
-			sections, ok := resp["sections"].([]any)
-			Expect(ok).To(BeTrue())
-			Expect(sections).NotTo(BeEmpty())
-
-			// Verify known section IDs are present
-			ids := make([]string, len(sections))
-			for i, s := range sections {
-				sec := s.(map[string]any)
-				Expect(sec).To(HaveKey("id"))
-				Expect(sec).To(HaveKey("label"))
-				Expect(sec).To(HaveKey("url"))
-				ids[i] = sec["id"].(string)
-			}
-			Expect(ids).To(ContainElements("general", "parameters"))
-		})
-
-		It("should return all fields when section=all", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/models/config-metadata?section=all", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusOK))
-
-			var resp map[string]any
-			Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
-			Expect(resp).To(HaveKey("fields"))
-
-			fields, ok := resp["fields"].([]any)
-			Expect(ok).To(BeTrue())
-			Expect(len(fields)).To(BeNumerically(">=", 80))
-		})
-
-		It("should filter by section", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/models/config-metadata?section=general", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusOK))
-
-			var fields []map[string]any
-			Expect(json.Unmarshal(rec.Body.Bytes(), &fields)).To(Succeed())
-			Expect(fields).NotTo(BeEmpty())
-
-			for _, f := range fields {
-				Expect(f["section"]).To(Equal("general"))
-			}
-		})
-
-		It("should return 404 for unknown section", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/models/config-metadata?section=nonexistent", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusNotFound))
-		})
-	})
-
-	Context("GET /api/models/config-metadata/autocomplete/:provider", func() {
-		It("should return values for backends provider", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/models/config-metadata/autocomplete/backends", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusOK))
-
-			var resp map[string]any
-			Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
-			Expect(resp).To(HaveKey("values"))
-		})
-
-		It("should return model names for models provider", func() {
-			// Seed a model config
-			seedConfig := `name: test-model
-backend: llama-cpp
-`
-			Expect(os.WriteFile(filepath.Join(tempDir, "test-model.yaml"), []byte(seedConfig), 0644)).To(Succeed())
-			Expect(configLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
-
-			req := httptest.NewRequest(http.MethodGet, "/api/models/config-metadata/autocomplete/models", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusOK))
-
-			var resp map[string]any
-			Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
-
-			values, ok := resp["values"].([]any)
-			Expect(ok).To(BeTrue())
-			Expect(values).To(ContainElement("test-model"))
-		})
-
-		It("should return 404 for unknown provider", func() {
-			req := httptest.NewRequest(http.MethodGet, "/api/models/config-metadata/autocomplete/unknown", nil)
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusNotFound))
-		})
-	})
-
-	Context("PATCH /api/models/config-json/:name", func() {
-		It("should return 404 for nonexistent model", func() {
-			body := bytes.NewBufferString(`{"backend": "bar"}`)
-			req := httptest.NewRequest(http.MethodPatch, "/api/models/config-json/nonexistent", body)
-			req.Header.Set("Content-Type", "application/json")
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusNotFound))
-		})
-
-		It("should return 400 for empty body", func() {
-			// Seed a model config
-			seedConfig := `name: test-model
-backend: llama-cpp
-`
-			Expect(os.WriteFile(filepath.Join(tempDir, "test-model.yaml"), []byte(seedConfig), 0644)).To(Succeed())
-			Expect(configLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
-
-			req := httptest.NewRequest(http.MethodPatch, "/api/models/config-json/test-model", nil)
-			req.Header.Set("Content-Type", "application/json")
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusBadRequest))
-		})
-
-		It("should return 400 for invalid JSON", func() {
-			seedConfig := `name: test-model
-backend: llama-cpp
-`
-			Expect(os.WriteFile(filepath.Join(tempDir, "test-model.yaml"), []byte(seedConfig), 0644)).To(Succeed())
-			Expect(configLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
-
-			body := bytes.NewBufferString(`not json`)
-			req := httptest.NewRequest(http.MethodPatch, "/api/models/config-json/test-model", body)
-			req.Header.Set("Content-Type", "application/json")
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusBadRequest))
-		})
-
-		It("should merge a field update and persist to disk", func() {
-			seedConfig := `name: test-model
-backend: llama-cpp
-`
-			configPath := filepath.Join(tempDir, "test-model.yaml")
-			Expect(os.WriteFile(configPath, []byte(seedConfig), 0644)).To(Succeed())
-			Expect(configLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
-
-			body := bytes.NewBufferString(`{"backend": "vllm"}`)
-			req := httptest.NewRequest(http.MethodPatch, "/api/models/config-json/test-model", body)
-			req.Header.Set("Content-Type", "application/json")
-			rec := httptest.NewRecorder()
-			app.ServeHTTP(rec, req)
-
-			Expect(rec.Code).To(Equal(http.StatusOK))
-
-			var resp map[string]any
-			Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
-			Expect(resp["success"]).To(BeTrue())
-
-			// Verify the reloaded config has the updated value
-			updatedConfig, exists := configLoader.GetModelConfig("test-model")
-			Expect(exists).To(BeTrue())
-			Expect(updatedConfig.Backend).To(Equal("vllm"))
-
-			// Verify the file on disk was updated
-			data, err := os.ReadFile(configPath)
-			Expect(err).NotTo(HaveOccurred())
-			Expect(string(data)).To(ContainSubstring("vllm"))
-		})
-	})
-})
--- a/core/http/endpoints/localai/detection.go
+++ b/core/http/endpoints/localai/detection.go
@@ -13,7 +13,6 @@ import (

 // DetectionEndpoint is the LocalAI Detection endpoint https://localai.io/docs/api-reference/detection
 // @Summary Detects objects in the input image.
-// @Tags detection
 // @Param request body schema.DetectionRequest true "query params"
 // @Success 200 {object} schema.DetectionResponse "Response"
 // @Router /v1/detection [post]
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@@ -40,7 +40,6 @@ func CreateModelGalleryEndpointService(galleries []config.Gallery, backendGaller

 // GetOpStatusEndpoint returns the job status
 // @Summary Returns the job status
-// @Tags models
 // @Success 200 {object} galleryop.OpStatus "Response"
 // @Router /models/jobs/{uuid} [get]
 func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() echo.HandlerFunc {
@@ -55,7 +54,6 @@ func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() echo.HandlerFunc {

 // GetAllStatusEndpoint returns all the jobs status progress
 // @Summary Returns all the jobs status progress
-// @Tags models
 // @Success 200 {object} map[string]galleryop.OpStatus "Response"
 // @Router /models/jobs [get]
 func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() echo.HandlerFunc {
@@ -66,7 +64,6 @@ func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() echo.HandlerFunc

 // ApplyModelGalleryEndpoint installs a new model to a LocalAI instance from the model gallery
 // @Summary Install models to LocalAI.
-// @Tags models
 // @Param request body GalleryModel true "query params"
 // @Success 200 {object} schema.GalleryResponse "Response"
 // @Router /models/apply [post]
@@ -96,7 +93,6 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() echo.Handler

 // DeleteModelGalleryEndpoint lets delete models from a LocalAI instance
 // @Summary delete models to LocalAI.
-// @Tags models
 // @Param name	path string	true	"Model name"
 // @Success 200 {object} schema.GalleryResponse "Response"
 // @Router /models/delete/{name} [post]
@@ -122,8 +118,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() echo.Handle

 // ListModelFromGalleryEndpoint list the available models for installation from the active galleries
 // @Summary List installable models.
-// @Tags models
-// @Success 200 {object} []gallery.Metadata "Response"
+// @Success 200 {object} []gallery.GalleryModel "Response"
 // @Router /models/available [get]
 func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint(systemState *system.SystemState) echo.HandlerFunc {
 	return func(c echo.Context) error {
@@ -154,7 +149,6 @@ func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint(systemState

 // ListModelGalleriesEndpoint list the available galleries configured in LocalAI
 // @Summary List all Galleries
-// @Tags models
 // @Success 200 {object} []config.Gallery "Response"
 // @Router /models/galleries [get]
 // NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
--- a/core/http/endpoints/localai/get_token_metrics.go
+++ b/core/http/endpoints/localai/get_token_metrics.go
@@ -16,7 +16,6 @@ import (
 // TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID
 //
 //	@Summary	Get TokenMetrics for Active Slot.
-//	@Tags tokenize
 //	@Accept json
 //	@Produce audio/x-wav
 //	@Success	200		{string}	binary				"generated audio/wav file"
--- a/core/http/endpoints/localai/mcp.go
+++ b/core/http/endpoints/localai/mcp.go
@@ -53,7 +53,6 @@ type MCPErrorEvent struct {
 // which handles MCP tool injection and server-side execution.
 // Both streaming and non-streaming modes use standard OpenAI response format.
 // @Summary MCP chat completions with automatic tool execution
-// @Tags mcp
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/mcp/chat/completions [post]
--- a/core/http/endpoints/localai/metrics.go
+++ b/core/http/endpoints/localai/metrics.go
@@ -10,9 +10,7 @@ import (

 // LocalAIMetricsEndpoint returns the metrics endpoint for LocalAI
 // @Summary Prometheus metrics endpoint
-// @Tags monitoring
-// @Produce text/plain
-// @Success 200 {string} string "Prometheus metrics"
+// @Param request body config.Gallery true "Gallery details"
 // @Router /metrics [get]
 func LocalAIMetricsEndpoint() echo.HandlerFunc {
 	return echo.WrapHandler(promhttp.Handler())
--- a/core/http/endpoints/localai/p2p.go
+++ b/core/http/endpoints/localai/p2p.go
@@ -9,7 +9,6 @@ import (

 // ShowP2PNodes returns the P2P Nodes
 // @Summary Returns available P2P nodes
-// @Tags p2p
 // @Success 200 {object} []schema.P2PNodesResponse "Response"
 // @Router /api/p2p [get]
 func ShowP2PNodes(appConfig *config.ApplicationConfig) echo.HandlerFunc {
@@ -25,7 +24,6 @@ func ShowP2PNodes(appConfig *config.ApplicationConfig) echo.HandlerFunc {

 // ShowP2PToken returns the P2P token
 // @Summary Show the P2P token
-// @Tags p2p
 // @Success 200 {string} string	 "Response"
 // @Router /api/p2p/token [get]
 func ShowP2PToken(appConfig *config.ApplicationConfig) echo.HandlerFunc {
--- a/core/http/endpoints/localai/system.go
+++ b/core/http/endpoints/localai/system.go
@@ -9,7 +9,6 @@ import (

 // SystemInformations returns the system informations
 // @Summary Show the LocalAI instance information
-// @Tags monitoring
 // @Success 200 {object} schema.SystemInformationResponse "Response"
 // @Router /system [get]
 func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
--- a/core/http/endpoints/localai/tokenize.go
+++ b/core/http/endpoints/localai/tokenize.go
@@ -11,7 +11,6 @@ import (

 // TokenizeEndpoint exposes a REST API to tokenize the content
 // @Summary Tokenize the input.
-// @Tags tokenize
 // @Param request body schema.TokenizeRequest true "Request"
 // @Success 200 {object} schema.TokenizeResponse "Response"
 // @Router /v1/tokenize [post]
--- a/core/http/endpoints/localai/traces.go
+++ b/core/http/endpoints/localai/traces.go
@@ -1,59 +0,0 @@
-package localai
-
-import (
-	"github.com/labstack/echo/v4"
-	"github.com/mudler/LocalAI/core/http/middleware"
-	"github.com/mudler/LocalAI/core/trace"
-)
-
-// GetAPITracesEndpoint returns all API request/response traces
-// @Summary List API request/response traces
-// @Description Returns captured API exchange traces (request/response pairs) in reverse chronological order
-// @Tags monitoring
-// @Produce json
-// @Success 200 {object} map[string]any "Traced API exchanges"
-// @Router /api/traces [get]
-func GetAPITracesEndpoint() echo.HandlerFunc {
-	return func(c echo.Context) error {
-		return c.JSON(200, middleware.GetTraces())
-	}
-}
-
-// ClearAPITracesEndpoint clears all API traces
-// @Summary Clear API traces
-// @Description Removes all captured API request/response traces from the buffer
-// @Tags monitoring
-// @Success 204 "Traces cleared"
-// @Router /api/traces/clear [post]
-func ClearAPITracesEndpoint() echo.HandlerFunc {
-	return func(c echo.Context) error {
-		middleware.ClearTraces()
-		return c.NoContent(204)
-	}
-}
-
-// GetBackendTracesEndpoint returns all backend operation traces
-// @Summary List backend operation traces
-// @Description Returns captured backend traces (LLM calls, embeddings, TTS, etc.) in reverse chronological order
-// @Tags monitoring
-// @Produce json
-// @Success 200 {object} map[string]any "Backend operation traces"
-// @Router /api/backend-traces [get]
-func GetBackendTracesEndpoint() echo.HandlerFunc {
-	return func(c echo.Context) error {
-		return c.JSON(200, trace.GetBackendTraces())
-	}
-}
-
-// ClearBackendTracesEndpoint clears all backend traces
-// @Summary Clear backend traces
-// @Description Removes all captured backend operation traces from the buffer
-// @Tags monitoring
-// @Success 204 "Traces cleared"
-// @Router /api/backend-traces/clear [post]
-func ClearBackendTracesEndpoint() echo.HandlerFunc {
-	return func(c echo.Context) error {
-		trace.ClearBackendTraces()
-		return c.NoContent(204)
-	}
-}
--- a/core/http/endpoints/localai/traces_test.go
+++ b/core/http/endpoints/localai/traces_test.go
@@ -1,55 +0,0 @@
-package localai_test
-
-import (
-	"net/http"
-	"net/http/httptest"
-
-	"github.com/labstack/echo/v4"
-	. "github.com/mudler/LocalAI/core/http/endpoints/localai"
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-var _ = Describe("Traces Endpoints", func() {
-	var app *echo.Echo
-
-	BeforeEach(func() {
-		app = echo.New()
-		app.GET("/api/traces", GetAPITracesEndpoint())
-		app.POST("/api/traces/clear", ClearAPITracesEndpoint())
-		app.GET("/api/backend-traces", GetBackendTracesEndpoint())
-		app.POST("/api/backend-traces/clear", ClearBackendTracesEndpoint())
-	})
-
-	It("should return API traces", func() {
-		req := httptest.NewRequest(http.MethodGet, "/api/traces", nil)
-		rec := httptest.NewRecorder()
-		app.ServeHTTP(rec, req)
-
-		Expect(rec.Code).To(Equal(http.StatusOK))
-	})
-
-	It("should clear API traces", func() {
-		req := httptest.NewRequest(http.MethodPost, "/api/traces/clear", nil)
-		rec := httptest.NewRecorder()
-		app.ServeHTTP(rec, req)
-
-		Expect(rec.Code).To(Equal(http.StatusNoContent))
-	})
-
-	It("should return backend traces", func() {
-		req := httptest.NewRequest(http.MethodGet, "/api/backend-traces", nil)
-		rec := httptest.NewRecorder()
-		app.ServeHTTP(rec, req)
-
-		Expect(rec.Code).To(Equal(http.StatusOK))
-	})
-
-	It("should clear backend traces", func() {
-		req := httptest.NewRequest(http.MethodPost, "/api/backend-traces/clear", nil)
-		rec := httptest.NewRecorder()
-		app.ServeHTTP(rec, req)
-
-		Expect(rec.Code).To(Equal(http.StatusNoContent))
-	})
-})
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@@ -17,7 +17,6 @@ import (
 // TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
 //
 //		@Summary	Generates audio from the input text.
-//		@Tags		audio
 //	 	@Accept json
 //	 	@Produce audio/x-wav
 //		@Param		request	body		schema.TTSRequest	true	"query params"
--- a/core/http/endpoints/localai/vad.go
+++ b/core/http/endpoints/localai/vad.go
@@ -12,7 +12,6 @@ import (

 // VADEndpoint is Voice-Activation-Detection endpoint
 // @Summary	Detect voice fragments in an audio stream
-// @Tags audio
 // @Accept json
 // @Param		request	body		schema.VADRequest	true	"query params"
 // @Success 200 {object} proto.VADResponse "Response"
--- a/core/http/endpoints/localai/video.go
+++ b/core/http/endpoints/localai/video.go
@@ -62,7 +62,6 @@ func downloadFile(url string) (string, error) {
 */
 // VideoEndpoint
 // @Summary Creates a video given a prompt.
-// @Tags video
 // @Param request body schema.VideoRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /video [post]
--- a/core/http/endpoints/localai/vram.go
+++ b/core/http/endpoints/localai/vram.go
@@ -1,145 +0,0 @@
-package localai
-
-import (
-	"context"
-	"fmt"
-	"net/http"
-	"path/filepath"
-	"strings"
-	"time"
-
-	"github.com/labstack/echo/v4"
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/pkg/vram"
-)
-
-type vramEstimateRequest struct {
-	Model       string `json:"model"`                      // model name (must be installed)
-	ContextSize uint32 `json:"context_size,omitempty"`     // context length to estimate for (default 8192)
-	GPULayers   int    `json:"gpu_layers,omitempty"`       // number of layers to offload to GPU (0 = all)
-	KVQuantBits int    `json:"kv_quant_bits,omitempty"`    // KV cache quantization bits (0 = fp16)
-}
-
-type vramEstimateResponse struct {
-	vram.EstimateResult
-	ContextNote     string `json:"context_note,omitempty"`      // note when context_size was defaulted
-	ModelMaxContext uint64 `json:"model_max_context,omitempty"` // model's trained maximum context length
-}
-
-// resolveModelURI converts a relative model path to a file:// URI so the
-// size resolver can stat it on disk. URIs that already have a scheme are
-// returned unchanged.
-func resolveModelURI(uri, modelsPath string) string {
-	if strings.Contains(uri, "://") {
-		return uri
-	}
-	return "file://" + filepath.Join(modelsPath, uri)
-}
-
-// addWeightFile appends a resolved weight file to files and tracks the first GGUF.
-func addWeightFile(uri, modelsPath string, files *[]vram.FileInput, firstGGUF *string, seen map[string]bool) {
-	if !vram.IsWeightFile(uri) {
-		return
-	}
-	resolved := resolveModelURI(uri, modelsPath)
-	if seen[resolved] {
-		return
-	}
-	seen[resolved] = true
-	*files = append(*files, vram.FileInput{URI: resolved, Size: 0})
-	if *firstGGUF == "" && vram.IsGGUF(uri) {
-		*firstGGUF = resolved
-	}
-}
-
-// VRAMEstimateEndpoint returns a handler that estimates VRAM usage for an
-// installed model configuration. For uninstalled models (gallery URLs), use
-// the gallery-level estimates in /api/models instead.
-// @Summary Estimate VRAM usage for a model
-// @Description Estimates VRAM based on model weight files, context size, and GPU layers
-// @Tags config
-// @Accept json
-// @Produce json
-// @Param request body vramEstimateRequest true "VRAM estimation parameters"
-// @Success 200 {object} vramEstimateResponse "VRAM estimate"
-// @Router /api/models/vram-estimate [post]
-func VRAMEstimateEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
-	return func(c echo.Context) error {
-		var req vramEstimateRequest
-		if err := c.Bind(&req); err != nil {
-			return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid request body"})
-		}
-
-		if req.Model == "" {
-			return c.JSON(http.StatusBadRequest, map[string]any{"error": "model name is required"})
-		}
-
-		modelConfig, exists := cl.GetModelConfig(req.Model)
-		if !exists {
-			return c.JSON(http.StatusNotFound, map[string]any{"error": "model configuration not found"})
-		}
-
-		modelsPath := appConfig.SystemState.Model.ModelsPath
-
-		var files []vram.FileInput
-		var firstGGUF string
-		seen := make(map[string]bool)
-
-		for _, f := range modelConfig.DownloadFiles {
-			addWeightFile(string(f.URI), modelsPath, &files, &firstGGUF, seen)
-		}
-		if modelConfig.Model != "" {
-			addWeightFile(modelConfig.Model, modelsPath, &files, &firstGGUF, seen)
-		}
-		if modelConfig.MMProj != "" {
-			addWeightFile(modelConfig.MMProj, modelsPath, &files, &firstGGUF, seen)
-		}
-
-		if len(files) == 0 {
-			return c.JSON(http.StatusOK, map[string]any{
-				"message": "no weight files found for estimation",
-			})
-		}
-
-		contextDefaulted := false
-		opts := vram.EstimateOptions{
-			ContextLength: req.ContextSize,
-			GPULayers:     req.GPULayers,
-			KVQuantBits:   req.KVQuantBits,
-		}
-		if opts.ContextLength == 0 {
-			if modelConfig.ContextSize != nil {
-				opts.ContextLength = uint32(*modelConfig.ContextSize)
-			} else {
-				opts.ContextLength = 8192
-				contextDefaulted = true
-			}
-		}
-
-		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-		defer cancel()
-
-		result, err := vram.Estimate(ctx, files, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader())
-		if err != nil {
-			return c.JSON(http.StatusInternalServerError, map[string]any{"error": err.Error()})
-		}
-
-		resp := vramEstimateResponse{EstimateResult: result}
-
-		// When context was defaulted to 8192, read the GGUF metadata to report
-		// the model's trained maximum context length so callers know the estimate
-		// may be conservative.
-		if contextDefaulted && firstGGUF != "" {
-			ggufMeta, err := vram.DefaultCachedGGUFReader().ReadMetadata(ctx, firstGGUF)
-			if err == nil && ggufMeta != nil && ggufMeta.MaximumContextLength > 0 {
-				resp.ModelMaxContext = ggufMeta.MaximumContextLength
-				resp.ContextNote = fmt.Sprintf(
-					"Estimate used default context_size=8192. The model's trained maximum context is %d; VRAM usage will be higher at larger context sizes.",
-					ggufMeta.MaximumContextLength,
-				)
-			}
-		}
-
-		return c.JSON(http.StatusOK, resp)
-	}
-}
--- a/core/http/endpoints/localai/vram_test.go
+++ b/core/http/endpoints/localai/vram_test.go
@@ -1,133 +0,0 @@
-package localai_test
-
-import (
-	"bytes"
-	"encoding/json"
-	"net/http"
-	"net/http/httptest"
-	"os"
-	"path/filepath"
-
-	"github.com/labstack/echo/v4"
-	"github.com/mudler/LocalAI/core/config"
-	. "github.com/mudler/LocalAI/core/http/endpoints/localai"
-	"github.com/mudler/LocalAI/pkg/system"
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-var _ = Describe("VRAM Estimate Endpoint", func() {
-	var (
-		app          *echo.Echo
-		tempDir      string
-		configLoader *config.ModelConfigLoader
-		appConfig    *config.ApplicationConfig
-	)
-
-	BeforeEach(func() {
-		var err error
-		tempDir, err = os.MkdirTemp("", "vram-test-*")
-		Expect(err).NotTo(HaveOccurred())
-
-		systemState, err := system.GetSystemState(
-			system.WithModelPath(tempDir),
-		)
-		Expect(err).NotTo(HaveOccurred())
-
-		appConfig = config.NewApplicationConfig(
-			config.WithSystemState(systemState),
-		)
-		configLoader = config.NewModelConfigLoader(tempDir)
-
-		app = echo.New()
-		app.POST("/api/models/vram-estimate", VRAMEstimateEndpoint(configLoader, appConfig))
-	})
-
-	AfterEach(func() {
-		os.RemoveAll(tempDir)
-	})
-
-	It("should return 400 for invalid request body", func() {
-		body := bytes.NewBufferString(`not json`)
-		req := httptest.NewRequest(http.MethodPost, "/api/models/vram-estimate", body)
-		req.Header.Set("Content-Type", "application/json")
-		rec := httptest.NewRecorder()
-		app.ServeHTTP(rec, req)
-
-		Expect(rec.Code).To(Equal(http.StatusBadRequest))
-	})
-
-	It("should return 400 when model name is missing", func() {
-		body := bytes.NewBufferString(`{"context_size": 4096}`)
-		req := httptest.NewRequest(http.MethodPost, "/api/models/vram-estimate", body)
-		req.Header.Set("Content-Type", "application/json")
-		rec := httptest.NewRecorder()
-		app.ServeHTTP(rec, req)
-
-		Expect(rec.Code).To(Equal(http.StatusBadRequest))
-
-		var resp map[string]any
-		Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
-		Expect(resp["error"]).To(ContainSubstring("model name is required"))
-	})
-
-	It("should return 404 when model config does not exist", func() {
-		body := bytes.NewBufferString(`{"model": "nonexistent"}`)
-		req := httptest.NewRequest(http.MethodPost, "/api/models/vram-estimate", body)
-		req.Header.Set("Content-Type", "application/json")
-		rec := httptest.NewRecorder()
-		app.ServeHTTP(rec, req)
-
-		Expect(rec.Code).To(Equal(http.StatusNotFound))
-	})
-
-	It("should return no-weight-files message when model has no weight files", func() {
-		seedConfig := "name: test-model\nbackend: llama-cpp\n"
-		Expect(os.WriteFile(filepath.Join(tempDir, "test-model.yaml"), []byte(seedConfig), 0644)).To(Succeed())
-		Expect(configLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
-
-		body := bytes.NewBufferString(`{"model": "test-model"}`)
-		req := httptest.NewRequest(http.MethodPost, "/api/models/vram-estimate", body)
-		req.Header.Set("Content-Type", "application/json")
-		rec := httptest.NewRecorder()
-		app.ServeHTTP(rec, req)
-
-		Expect(rec.Code).To(Equal(http.StatusOK))
-
-		var resp map[string]any
-		Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
-		Expect(resp["message"]).To(ContainSubstring("no weight files"))
-	})
-
-	It("should return an estimate for a model with a weight file on disk", func() {
-		// Create a dummy GGUF file (not valid GGUF, but the size resolver
-		// will stat it and Estimate falls back to size-only estimation).
-		dummyData := make([]byte, 1024*1024) // 1 MiB
-		Expect(os.WriteFile(filepath.Join(tempDir, "model.gguf"), dummyData, 0644)).To(Succeed())
-
-		seedConfig := "name: test-model\nbackend: llama-cpp\nparameters:\n  model: model.gguf\n"
-		Expect(os.WriteFile(filepath.Join(tempDir, "test-model.yaml"), []byte(seedConfig), 0644)).To(Succeed())
-		Expect(configLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
-
-		body := bytes.NewBufferString(`{"model": "test-model", "context_size": 4096}`)
-		req := httptest.NewRequest(http.MethodPost, "/api/models/vram-estimate", body)
-		req.Header.Set("Content-Type", "application/json")
-		rec := httptest.NewRecorder()
-		app.ServeHTTP(rec, req)
-
-		Expect(rec.Code).To(Equal(http.StatusOK))
-
-		var resp map[string]any
-		Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed())
-		// The response should have non-zero size and vram estimates.
-		// JSON numbers unmarshal as float64.
-		sizeBytes, ok := resp["sizeBytes"].(float64)
-		Expect(ok).To(BeTrue(), "sizeBytes should be a number, got: %v (response: %s)", resp["sizeBytes"], rec.Body.String())
-		Expect(sizeBytes).To(BeNumerically(">", 0))
-		vramBytes, ok := resp["vramBytes"].(float64)
-		Expect(ok).To(BeTrue(), "vramBytes should be a number")
-		Expect(vramBytes).To(BeNumerically(">", 0))
-		Expect(resp["sizeDisplay"]).NotTo(BeEmpty())
-		Expect(resp["vramDisplay"]).NotTo(BeEmpty())
-	})
-})
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -55,7 +55,6 @@ func mergeToolCallDeltas(existing []schema.ToolCall, deltas []schema.ToolCall) [

 // ChatEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/chat/create
 // @Summary Generate a chat completions for a given prompt and model.
-// @Tags inference
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/chat/completions [post]
@@ -82,23 +81,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 		extractor := reason.NewReasoningExtractor(thinkingStartToken, config.ReasoningConfig)

 		_, _, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
-			var reasoningDelta, contentDelta string
-
-			// Always keep the Go-side extractor in sync with raw tokens so it
-			// can serve as fallback for backends without an autoparser (e.g. vLLM).
-			goReasoning, goContent := extractor.ProcessToken(s)
-
-			// When C++ autoparser chat deltas are available, prefer them — they
-			// handle model-specific formats (Gemma 4, etc.) without Go-side tags.
-			// Otherwise fall back to Go-side extraction.
-			if tokenUsage.HasChatDeltaContent() {
-				rawReasoning, cd := tokenUsage.ChatDeltaReasoningAndContent()
-				contentDelta = cd
-				reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
-			} else {
-				reasoningDelta = goReasoning
-				contentDelta = goContent
-			}
+			reasoningDelta, contentDelta := extractor.ProcessToken(s)

 			usage := schema.OpenAIUsage{
 				PromptTokens:     tokenUsage.Prompt,
@@ -150,19 +133,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator

 		_, tokenUsage, chatDeltas, err := ComputeChoices(req, prompt, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
 			result += s
-
-			var reasoningDelta, contentDelta string
-
-			goReasoning, goContent := extractor.ProcessToken(s)
-
-			if usage.HasChatDeltaContent() {
-				rawReasoning, cd := usage.ChatDeltaReasoningAndContent()
-				contentDelta = cd
-				reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
-			} else {
-				reasoningDelta = goReasoning
-				contentDelta = goContent
-			}
+			reasoningDelta, contentDelta := extractor.ProcessToken(s)

 			// Emit reasoning deltas in their own SSE chunks before any tool-call chunks
 			// (OpenAI spec: reasoning and tool_calls never share a delta)
@@ -992,24 +963,6 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 					return err
 				}

-				// For non-tool requests: prefer C++ autoparser chat deltas over
-				// Go-side tag extraction (which can mangle output when thinkingStartToken
-				// differs from the model's actual reasoning tags, e.g. Gemma 4).
-				if !shouldUseFn && len(chatDeltas) > 0 {
-					deltaContent := functions.ContentFromChatDeltas(chatDeltas)
-					deltaReasoning := functions.ReasoningFromChatDeltas(chatDeltas)
-					if deltaContent != "" || deltaReasoning != "" {
-						xlog.Debug("[ChatDeltas] non-SSE no-tools: overriding result with C++ autoparser deltas",
-							"content_len", len(deltaContent), "reasoning_len", len(deltaReasoning))
-						stopReason := FinishReasonStop
-						message := &schema.Message{Role: "assistant", Content: &deltaContent}
-						if deltaReasoning != "" {
-							message.Reasoning = &deltaReasoning
-						}
-						result = []schema.Choice{{FinishReason: &stopReason, Index: 0, Message: message}}
-					}
-				}
-
 				// Tool parsing is deferred here (only when shouldUseFn) so chat deltas are available
 				if shouldUseFn {
 					var funcResults []functions.FuncCallResults
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -21,7 +21,6 @@ import (

 // CompletionEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/completions
 // @Summary Generate completions for a given prompt and model.
-// @Tags inference
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/completions [post]
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@@ -20,7 +20,6 @@ import (

 // EditEndpoint is the OpenAI edit API endpoint
 // @Summary OpenAI edit endpoint
-// @Tags inference
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/edits [post]
--- a/core/http/endpoints/openai/embeddings.go
+++ b/core/http/endpoints/openai/embeddings.go
@@ -42,7 +42,6 @@ func embeddingItem(embeddings []float32, index int, encodingFormat string) schem

 // EmbeddingsEndpoint is the OpenAI Embeddings API endpoint https://platform.openai.com/docs/api-reference/embeddings
 // @Summary Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.
-// @Tags embeddings
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/embeddings [post]
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -68,7 +68,6 @@ func downloadFile(url string) (string, error) {
 */
 // ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create
 // @Summary Creates an image given a prompt.
-// @Tags images
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/images/generations [post]
--- a/core/http/endpoints/openai/inference_test.go
+++ b/core/http/endpoints/openai/inference_test.go
@@ -398,124 +398,5 @@ var _ = Describe("ComputeChoices", func() {
 			Expect(choices).To(HaveLen(1))
 			Expect(streamedTokens).To(Equal([]string{"Hello", " world"}))
 		})
-
-		It("should pass chat deltas through TokenUsage during streaming", func() {
-			var receivedDeltas [][]*pb.ChatDelta
-			backend.ModelInferenceFunc = func(
-				ctx context.Context, s string, messages schema.Messages,
-				images, videos, audios []string,
-				loader *model.ModelLoader, c *config.ModelConfig, cl *config.ModelConfigLoader,
-				o *config.ApplicationConfig,
-				tokenCallback func(string, backend.TokenUsage) bool,
-				tools, toolChoice string,
-				logprobs, topLogprobs *int,
-				logitBias map[string]float64,
-				metadata map[string]string,
-			) (func() (backend.LLMResponse, error), error) {
-				predFunc := func() (backend.LLMResponse, error) {
-					if tokenCallback != nil {
-						// Simulate C++ autoparser sending reasoning in chat deltas
-						tokenCallback("<|channel>thought\nthinking\n<channel|>", backend.TokenUsage{
-							Prompt: 5,
-							ChatDeltas: []*pb.ChatDelta{
-								{ReasoningContent: "thinking"},
-							},
-						})
-						tokenCallback("Hello!", backend.TokenUsage{
-							Prompt: 5, Completion: 3,
-							ChatDeltas: []*pb.ChatDelta{
-								{Content: "Hello!"},
-							},
-						})
-					}
-					return backend.LLMResponse{
-						Response: "<|channel>thought\nthinking\n<channel|>Hello!",
-						Usage:    backend.TokenUsage{Prompt: 5, Completion: 3},
-						ChatDeltas: []*pb.ChatDelta{
-							{ReasoningContent: "thinking"},
-							{Content: "Hello!"},
-						},
-					}, nil
-				}
-				return predFunc, nil
-			}
-
-			choices, _, deltas, err := ComputeChoices(
-				makeReq(), "test", cfg, nil, appCfg, nil,
-				func(s string, c *[]schema.Choice) {
-					*c = append(*c, schema.Choice{Text: s})
-				},
-				func(s string, usage backend.TokenUsage) bool {
-					// Capture chat deltas received per-chunk
-					if len(usage.ChatDeltas) > 0 {
-						receivedDeltas = append(receivedDeltas, usage.ChatDeltas)
-					}
-					return true
-				},
-			)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(choices).To(HaveLen(1))
-
-			// Verify per-chunk deltas were received during streaming
-			Expect(receivedDeltas).To(HaveLen(2))
-			Expect(receivedDeltas[0][0].ReasoningContent).To(Equal("thinking"))
-			Expect(receivedDeltas[1][0].Content).To(Equal("Hello!"))
-
-			// Verify final accumulated deltas are also returned
-			Expect(deltas).To(HaveLen(2))
-			Expect(deltas[0].ReasoningContent).To(Equal("thinking"))
-			Expect(deltas[1].Content).To(Equal("Hello!"))
-		})
-
-		It("should prefer chat deltas over raw text when HasChatDeltaContent is true", func() {
-			// Verify that the callback can distinguish between
-			// chunks with and without chat deltas
-			var withDeltas, withoutDeltas int
-			backend.ModelInferenceFunc = func(
-				ctx context.Context, s string, messages schema.Messages,
-				images, videos, audios []string,
-				loader *model.ModelLoader, c *config.ModelConfig, cl *config.ModelConfigLoader,
-				o *config.ApplicationConfig,
-				tokenCallback func(string, backend.TokenUsage) bool,
-				tools, toolChoice string,
-				logprobs, topLogprobs *int,
-				logitBias map[string]float64,
-				metadata map[string]string,
-			) (func() (backend.LLMResponse, error), error) {
-				predFunc := func() (backend.LLMResponse, error) {
-					if tokenCallback != nil {
-						// Chunk with chat deltas (C++ autoparser active)
-						tokenCallback("raw-text", backend.TokenUsage{
-							ChatDeltas: []*pb.ChatDelta{{Content: "parsed-content"}},
-						})
-						// Chunk without chat deltas (fallback)
-						tokenCallback("fallback-text", backend.TokenUsage{})
-					}
-					return backend.LLMResponse{Response: "raw-textfallback-text"}, nil
-				}
-				return predFunc, nil
-			}
-
-			_, _, _, err := ComputeChoices(
-				makeReq(), "test", cfg, nil, appCfg, nil,
-				func(s string, c *[]schema.Choice) {
-					*c = append(*c, schema.Choice{Text: s})
-				},
-				func(s string, usage backend.TokenUsage) bool {
-					if usage.HasChatDeltaContent() {
-						withDeltas++
-						r, c := usage.ChatDeltaReasoningAndContent()
-						Expect(c).To(Equal("parsed-content"))
-						Expect(r).To(BeEmpty())
-					} else {
-						withoutDeltas++
-					}
-					return true
-				},
-			)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(withDeltas).To(Equal(1))
-			Expect(withoutDeltas).To(Equal(1))
-		})
 	})
 })
--- a/core/http/endpoints/openai/list.go
+++ b/core/http/endpoints/openai/list.go
@@ -12,7 +12,6 @@ import (

 // ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models
 // @Summary List and describe the various models available in the API.
-// @Tags models
 // @Success 200 {object} schema.ModelsDataResponse "Response"
 // @Router /v1/models [get]
 func ListModelsEndpoint(bcl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, db ...*gorm.DB) echo.HandlerFunc {
--- a/core/http/endpoints/openai/transcription.go
+++ b/core/http/endpoints/openai/transcription.go
@@ -20,7 +20,6 @@ import (

 // TranscriptEndpoint is the OpenAI Whisper API endpoint https://platform.openai.com/docs/api-reference/audio/create
 // @Summary Transcribes audio into the input language.
-// @Tags audio
 // @accept multipart/form-data
 // @Param model formData string true "model"
 // @Param file formData file true "file"
--- a/core/http/endpoints/openresponses/responses.go
+++ b/core/http/endpoints/openresponses/responses.go
@@ -25,7 +25,6 @@ import (
 // ResponsesEndpoint is the Open Responses API endpoint
 // https://www.openresponses.org/specification
 // @Summary Create a response using the Open Responses API
-// @Tags inference
 // @Param request body schema.OpenResponsesRequest true "Request body"
 // @Success 200 {object} schema.ORResponseResource "Response"
 // @Router /v1/responses [post]
@@ -1820,17 +1819,7 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6

 				// If no tool calls detected yet, handle reasoning and text
 				if !inToolCallMode {
-					var reasoningDelta, contentDelta string
-					goReasoning, goContent := extractor.ProcessToken(token)
-
-					if tokenUsage.HasChatDeltaContent() {
-						rawReasoning, cd := tokenUsage.ChatDeltaReasoningAndContent()
-						contentDelta = cd
-						reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
-					} else {
-						reasoningDelta = goReasoning
-						contentDelta = goContent
-					}
+					reasoningDelta, contentDelta := extractor.ProcessToken(token)

 					// Handle reasoning item
 					if extractor.Reasoning() != "" {
@@ -2349,18 +2338,7 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
 	// Stream text deltas with reasoning extraction
 	tokenCallback := func(token string, tokenUsage backend.TokenUsage) bool {
 		accumulatedText += token
-
-		var reasoningDelta, contentDelta string
-		goReasoning, goContent := extractor.ProcessToken(token)
-
-		if tokenUsage.HasChatDeltaContent() {
-			rawReasoning, cd := tokenUsage.ChatDeltaReasoningAndContent()
-			contentDelta = cd
-			reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
-		} else {
-			reasoningDelta = goReasoning
-			contentDelta = goContent
-		}
+		reasoningDelta, contentDelta := extractor.ProcessToken(token)

 		// Handle reasoning item
 		if extractor.Reasoning() != "" {
@@ -2953,7 +2931,6 @@ func convertORToolsToOpenAIFormat(orTools []schema.ORFunctionTool) []functions.T
 // GetResponseEndpoint returns a handler for GET /responses/:id
 // This endpoint is used for polling background responses or resuming streaming
 // @Summary Get a response by ID
-// @Tags inference
 // @Description Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses.
 // @Param id path string true "Response ID"
 // @Param stream query string false "Set to 'true' to resume streaming"
@@ -3095,7 +3072,6 @@ func handleStreamResume(c echo.Context, store *ResponseStore, responseID string,
 // CancelResponseEndpoint returns a handler for POST /responses/:id/cancel
 // This endpoint cancels a background response if it's still in progress
 // @Summary Cancel a response
-// @Tags inference
 // @Description Cancel a background response if it's still in progress
 // @Param id path string true "Response ID"
 // @Success 200 {object} schema.ORResponseResource "Response"
--- a/core/http/react-ui/src/components/RequireAuth.jsx
+++ b/core/http/react-ui/src/components/RequireAuth.jsx
@@ -2,8 +2,8 @@ import { Navigate } from 'react-router-dom'
 import { useAuth } from '../context/AuthContext'

 export default function RequireAuth({ children }) {
-  const { authEnabled, staticApiKeyRequired, user, loading } = useAuth()
+  const { authEnabled, user, loading } = useAuth()
  if (loading) return null
-  if ((authEnabled || staticApiKeyRequired) && !user) return <Navigate to="/login" replace />
+  if (authEnabled && !user) return <Navigate to="/login" replace />
  return children
 }
--- a/core/http/react-ui/src/context/AuthContext.jsx
+++ b/core/http/react-ui/src/context/AuthContext.jsx
@@ -7,7 +7,6 @@ export function AuthProvider({ children }) {
  const [state, setState] = useState({
    loading: true,
    authEnabled: false,
-    staticApiKeyRequired: false,
    user: null,
    permissions: {},
  })
@@ -21,13 +20,12 @@ export function AuthProvider({ children }) {
        setState({
          loading: false,
          authEnabled: data.authEnabled || false,
-          staticApiKeyRequired: data.staticApiKeyRequired || false,
          user,
          permissions,
        })
      })
      .catch(() => {
-        setState({ loading: false, authEnabled: false, staticApiKeyRequired: false, user: null, permissions: {} })
+        setState({ loading: false, authEnabled: false, user: null, permissions: {} })
      })
  }

@@ -47,20 +45,17 @@ export function AuthProvider({ children }) {

  const refresh = () => fetchStatus()

-  const noAuthRequired = !state.authEnabled && !state.staticApiKeyRequired
-
  const hasFeature = (name) => {
-    if (state.user?.role === 'admin' || noAuthRequired) return true
+    if (state.user?.role === 'admin' || !state.authEnabled) return true
    return !!state.permissions[name]
  }

  const value = {
    loading: state.loading,
    authEnabled: state.authEnabled,
-    staticApiKeyRequired: state.staticApiKeyRequired,
    user: state.user,
    permissions: state.permissions,
-    isAdmin: state.user?.role === 'admin' || noAuthRequired,
+    isAdmin: state.user?.role === 'admin' || !state.authEnabled,
    hasFeature,
    logout,
    refresh,
--- a/core/http/react-ui/src/hooks/useChat.js
+++ b/core/http/react-ui/src/hooks/useChat.js
@@ -2,9 +2,9 @@ import { useState, useCallback, useRef, useEffect } from 'react'
 import { API_CONFIG } from '../utils/config'
 import { apiUrl } from '../utils/basePath'

-const thinkingTagRegex = /<thinking>([\s\S]*?)<\/thinking>|<think>([\s\S]*?)<\/think>|<\|channel>thought([\s\S]*?)<channel\|>/g
-const openThinkTagRegex = /<thinking>|<think>|<\|channel>thought/
-const closeThinkTagRegex = /<\/thinking>|<\/think>|<channel\|>/
+const thinkingTagRegex = /<thinking>([\s\S]*?)<\/thinking>|<think>([\s\S]*?)<\/think>/g
+const openThinkTagRegex = /<thinking>|<think>/
+const closeThinkTagRegex = /<\/thinking>|<\/think>/

 async function extractHttpError(response) {
  let errorMsg = `HTTP ${response.status}`
@@ -23,7 +23,7 @@ function extractThinking(text) {
  thinkingTagRegex.lastIndex = 0
  while ((match = thinkingTagRegex.exec(text)) !== null) {
    regularContent += text.slice(lastIdx, match.index)
-    thinkingContent += match[1] || match[2] || match[3] || ''
+    thinkingContent += match[1] || match[2] || ''
    lastIdx = match.index + match[0].length
  }
  regularContent += text.slice(lastIdx)
@@ -578,9 +578,9 @@ export function useChat(initialModel = '') {
                    }

                    if (insideThinkTag) {
-                      const lastOpen = Math.max(rawContent.lastIndexOf('<thinking>'), rawContent.lastIndexOf('<think>'), rawContent.lastIndexOf('<|channel>thought'))
+                      const lastOpen = Math.max(rawContent.lastIndexOf('<thinking>'), rawContent.lastIndexOf('<think>'))
                      if (lastOpen >= 0) {
-                        const partial = rawContent.slice(lastOpen).replace(/<thinking>|<think>|<\|channel>thought/, '')
+                        const partial = rawContent.slice(lastOpen).replace(/<thinking>|<think>/, '')
                        setStreamingReasoning(partial)
                        const beforeThink = rawContent.slice(0, lastOpen)
                        const { regularContent: contentBeforeThink } = extractThinking(beforeThink)
--- a/core/http/react-ui/src/pages/Login.jsx
+++ b/core/http/react-ui/src/pages/Login.jsx
@@ -8,7 +8,7 @@ export default function Login() {
  const navigate = useNavigate()
  const { code: urlInviteCode } = useParams()
  const [searchParams] = useSearchParams()
-  const { authEnabled, staticApiKeyRequired, user, loading: authLoading, refresh } = useAuth()
+  const { authEnabled, user, loading: authLoading, refresh } = useAuth()
  const [providers, setProviders] = useState([])
  const [hasUsers, setHasUsers] = useState(true)
  const [registrationMode, setRegistrationMode] = useState('open')
@@ -66,7 +66,7 @@ export default function Login() {

  // Redirect if auth is disabled or user is already logged in
  useEffect(() => {
-    if (!authLoading && ((!authEnabled && !staticApiKeyRequired) || user)) {
+    if (!authLoading && (!authEnabled || user)) {
      navigate('/app', { replace: true })
    }
  }, [authLoading, authEnabled, user, navigate])
@@ -176,40 +176,6 @@ export default function Login() {

  if (authLoading || statusLoading) return null

-  // Legacy API key-only mode: show a simplified login with just the token input
-  if (staticApiKeyRequired && !authEnabled) {
-    return (
-      <div className="login-page">
-        <div className="card login-card">
-          <div className="login-header">
-            <img src={apiUrl('/static/logo.png')} alt="LocalAI" className="login-logo" />
-            <p className="login-subtitle">Enter your API key to continue</p>
-          </div>
-
-          {error && (
-            <div className="login-alert login-alert-error">{error}</div>
-          )}
-
-          <form onSubmit={handleTokenLogin}>
-            <div className="form-group">
-              <input
-                className="input"
-                type="password"
-                value={token}
-                onChange={(e) => { setToken(e.target.value); setError('') }}
-                placeholder="Enter API key..."
-                autoFocus
-              />
-            </div>
-            <button type="submit" className="btn btn-primary login-btn-full" disabled={submitting}>
-              {submitting ? 'Signing in...' : 'Sign In'}
-            </button>
-          </form>
-        </div>
-      </div>
-    )
-  }
-
  const hasGitHub = providers.includes('github')
  const hasOIDC = providers.includes('oidc')
  const hasLocal = providers.includes('local')
--- a/core/http/routes/auth.go
+++ b/core/http/routes/auth.go
@@ -157,11 +157,10 @@ func RegisterAuthRoutes(e *echo.Echo, app *application.Application) {
 		}

 		resp := map[string]any{
-			"authEnabled":            authEnabled,
-			"staticApiKeyRequired":   !authEnabled && len(appConfig.ApiKeys) > 0,
-			"providers":              providers,
-			"hasUsers":               hasUsers,
-			"registrationMode":       registrationMode,
+			"authEnabled":      authEnabled,
+			"providers":        providers,
+			"hasUsers":         hasUsers,
+			"registrationMode": registrationMode,
 		}

 		// Include current user if authenticated
--- a/core/http/routes/auth_test.go
+++ b/core/http/routes/auth_test.go
@@ -45,10 +45,9 @@ func newTestAuthApp(db *gorm.DB, appConfig *config.ApplicationConfig) *echo.Echo
 		}

 		resp := map[string]any{
-			"authEnabled":          authEnabled,
-			"staticApiKeyRequired": !authEnabled && len(appConfig.ApiKeys) > 0,
-			"providers":            providers,
-			"hasUsers":             hasUsers,
+			"authEnabled": authEnabled,
+			"providers":   providers,
+			"hasUsers":    hasUsers,
 		}

 		user := auth.GetUser(c)
@@ -408,29 +407,6 @@ var _ = Describe("Auth Routes", Label("auth"), func() {
 			json.Unmarshal(rec.Body.Bytes(), &resp)
 			Expect(resp["hasUsers"]).To(BeFalse())
 		})
-
-		It("returns staticApiKeyRequired=true when no DB but API keys configured", func() {
-			cfg := config.NewApplicationConfig()
-			config.WithApiKeys([]string{"test-key-123"})(cfg)
-			app := newTestAuthApp(nil, cfg)
-			rec := doAuthRequest(app, "GET", "/api/auth/status", nil)
-			Expect(rec.Code).To(Equal(http.StatusOK))
-
-			var resp map[string]any
-			json.Unmarshal(rec.Body.Bytes(), &resp)
-			Expect(resp["authEnabled"]).To(BeFalse())
-			Expect(resp["staticApiKeyRequired"]).To(BeTrue())
-		})
-
-		It("returns staticApiKeyRequired=false when no DB and no API keys", func() {
-			app := newTestAuthApp(nil, config.NewApplicationConfig())
-			rec := doAuthRequest(app, "GET", "/api/auth/status", nil)
-			Expect(rec.Code).To(Equal(http.StatusOK))
-
-			var resp map[string]any
-			json.Unmarshal(rec.Body.Bytes(), &resp)
-			Expect(resp["staticApiKeyRequired"]).To(BeFalse())
-		})
 	})

 	Context("POST /api/auth/logout", func() {
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -29,9 +29,7 @@ func RegisterLocalAIRoutes(router *echo.Echo,
 	mcpJobsMw echo.MiddlewareFunc,
 	mcpMw echo.MiddlewareFunc) {

-	router.GET("/swagger/*", echoswagger.EchoWrapHandler(func(c *echoswagger.Config) {
-		c.URLs = []string{"doc.json"}
-	}))
+	router.GET("/swagger/*", echoswagger.WrapHandler) // default

 	// LocalAI API endpoints
 	if !appConfig.DisableGalleryEndpoint {
@@ -126,19 +124,6 @@ func RegisterLocalAIRoutes(router *echo.Echo,
 	router.GET("/v1/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService), adminMiddleware)
 	router.POST("/v1/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService), adminMiddleware)

-	// Traces and backend logs (monitoring)
-	router.GET("/api/traces", localai.GetAPITracesEndpoint(), adminMiddleware)
-	router.POST("/api/traces/clear", localai.ClearAPITracesEndpoint(), adminMiddleware)
-	router.GET("/api/backend-traces", localai.GetBackendTracesEndpoint(), adminMiddleware)
-	router.POST("/api/backend-traces/clear", localai.ClearBackendTracesEndpoint(), adminMiddleware)
-	// Backend logs — standalone only (distributed mode uses node-proxied routes)
-	if !appConfig.Distributed.Enabled {
-		router.GET("/api/backend-logs", localai.ListBackendLogsEndpoint(ml), adminMiddleware)
-		router.GET("/api/backend-logs/:modelId", localai.GetBackendLogsEndpoint(ml), adminMiddleware)
-		router.POST("/api/backend-logs/:modelId/clear", localai.ClearBackendLogsEndpoint(ml), adminMiddleware)
-		router.GET("/ws/backend-logs/:modelId", localai.BackendLogsWebSocketEndpoint(ml), adminMiddleware)
-	}
-
 	// p2p
 	router.GET("/api/p2p", localai.ShowP2PNodes(appConfig), adminMiddleware)
 	router.GET("/api/p2p/token", localai.ShowP2PToken(appConfig), adminMiddleware)
@@ -149,127 +134,6 @@ func RegisterLocalAIRoutes(router *echo.Echo,
 		}{Version: internal.PrintableVersion()})
 	})

-	// Agent discovery endpoint
-	router.GET("/.well-known/localai.json", func(c echo.Context) error {
-		monitoringRoutes := map[string]string{
-			"metrics":              "/metrics",
-			"backend_monitor":      "/backend/monitor",
-			"backend_shutdown":     "/backend/shutdown",
-			"system":               "/system",
-			"version":              "/version",
-			"traces":               "/api/traces",
-			"traces_clear":         "/api/traces/clear",
-			"backend_traces":       "/api/backend-traces",
-			"backend_traces_clear": "/api/backend-traces/clear",
-		}
-		if !appConfig.Distributed.Enabled {
-			monitoringRoutes["backend_logs"] = "/api/backend-logs"
-			monitoringRoutes["backend_logs_model"] = "/api/backend-logs/:modelId"
-			monitoringRoutes["backend_logs_clear"] = "/api/backend-logs/:modelId/clear"
-			monitoringRoutes["backend_logs_ws"] = "/ws/backend-logs/:modelId"
-		} else {
-			monitoringRoutes["node_backend_logs"] = "/api/nodes/:id/backend-logs"
-			monitoringRoutes["node_backend_logs_model"] = "/api/nodes/:id/backend-logs/:modelId"
-			monitoringRoutes["node_backend_logs_ws"] = "/ws/nodes/:id/backend-logs/:modelId"
-		}
-		return c.JSON(200, map[string]any{
-			"version": internal.PrintableVersion(),
-			// Flat endpoint list for backwards compatibility
-			"endpoints": map[string]any{
-				"models":           "/v1/models",
-				"chat_completions": "/v1/chat/completions",
-				"completions":      "/v1/completions",
-				"embeddings":       "/v1/embeddings",
-				"config_metadata":  "/api/models/config-metadata",
-				"config_json":      "/api/models/config-json/:name",
-				"config_patch":     "/api/models/config-json/:name",
-				"autocomplete":     "/api/models/config-metadata/autocomplete/:provider",
-				"vram_estimate":    "/api/models/vram-estimate",
-				"tts":              "/tts",
-				"transcription":    "/v1/audio/transcriptions",
-				"image_generation": "/v1/images/generations",
-				"swagger":          "/swagger/index.html",
-				"instructions":     "/api/instructions",
-			},
-			// Categorized endpoint groups for structured discovery
-			"endpoint_groups": map[string]any{
-				"openai_compatible": map[string]string{
-					"models":           "/v1/models",
-					"chat_completions": "/v1/chat/completions",
-					"completions":      "/v1/completions",
-					"embeddings":       "/v1/embeddings",
-					"transcription":    "/v1/audio/transcriptions",
-					"image_generation": "/v1/images/generations",
-				},
-				"config_management": map[string]string{
-					"config_metadata": "/api/models/config-metadata",
-					"config_json":     "/api/models/config-json/:name",
-					"config_patch":    "/api/models/config-json/:name",
-					"autocomplete":    "/api/models/config-metadata/autocomplete/:provider",
-					"vram_estimate":   "/api/models/vram-estimate",
-				},
-				"model_management": map[string]string{
-					"list_gallery": "/models/available",
-					"install":      "/models/apply",
-					"delete":       "/models/delete/:name",
-					"edit":         "/models/edit/:name",
-					"import":       "/models/import",
-					"reload":       "/models/reload",
-				},
-				"ai_functions": map[string]string{
-					"tts":       "/tts",
-					"vad":       "/vad",
-					"video":     "/video",
-					"detection": "/v1/detection",
-					"tokenize":  "/v1/tokenize",
-				},
-				"monitoring": monitoringRoutes,
-				"mcp": map[string]string{
-					"chat_completions": "/v1/mcp/chat/completions",
-					"servers":          "/v1/mcp/servers/:model",
-					"prompts":          "/v1/mcp/prompts/:model",
-					"resources":        "/v1/mcp/resources/:model",
-				},
-				"p2p": map[string]string{
-					"nodes": "/api/p2p",
-					"token": "/api/p2p/token",
-				},
-				"agents": map[string]string{
-					"tasks":   "/api/agent/tasks",
-					"jobs":    "/api/agent/jobs",
-					"execute": "/api/agent/jobs/execute",
-				},
-				"settings": map[string]string{
-					"get":    "/api/settings",
-					"update": "/api/settings",
-				},
-				"stores": map[string]string{
-					"set":    "/stores/set",
-					"get":    "/stores/get",
-					"find":   "/stores/find",
-					"delete": "/stores/delete",
-				},
-				"docs": map[string]string{
-					"swagger": "/swagger/index.html",
-					"instructions": "/api/instructions",
-				},
-			},
-			"capabilities": map[string]bool{
-				"config_metadata": true,
-				"config_patch":    true,
-				"vram_estimate":   true,
-				"mcp":             !appConfig.DisableMCP,
-				"agents":          appConfig.AgentPool.Enabled,
-				"p2p":             appConfig.P2PToken != "",
-				"tracing":         true,
-			},
-		})
-	})
-
-	// API instructions for agent discovery (no auth — agents should discover these without credentials)
-	router.GET("/api/instructions", localai.ListAPIInstructionsEndpoint())
-	router.GET("/api/instructions/:name", localai.GetAPIInstructionEndpoint())
-
 	router.GET("/api/features", func(c echo.Context) error {
 		return c.JSON(200, map[string]bool{
 			"agents":       appConfig.AgentPool.Enabled,
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -2,15 +2,41 @@ package routes

 import (
 	"cmp"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"net/url"
 	"slices"
+	"sync"
+	"time"

+	"github.com/gorilla/websocket"
 	"github.com/labstack/echo/v4"
 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/services/galleryop"
+	"github.com/mudler/LocalAI/core/trace"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/xlog"
 )

+var backendLogsUpgrader = websocket.Upgrader{
+	CheckOrigin: func(r *http.Request) bool {
+		origin := r.Header.Get("Origin")
+		if origin == "" {
+			return true // no origin header = same-origin or non-browser
+		}
+		u, err := url.Parse(origin)
+		if err != nil {
+			return false
+		}
+		return u.Host == r.Host
+	},
+}
+
 func RegisterUIRoutes(app *echo.Echo,
 	cl *config.ModelConfigLoader,
+	ml *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
 	galleryService *galleryop.GalleryService,
 	adminMiddleware echo.MiddlewareFunc) {
@@ -52,4 +78,142 @@ func RegisterUIRoutes(app *echo.Echo,

 		return c.JSON(200, models)
 	})
+
+	app.GET("/api/traces", func(c echo.Context) error {
+		return c.JSON(200, middleware.GetTraces())
+	}, adminMiddleware)
+
+	app.POST("/api/traces/clear", func(c echo.Context) error {
+		middleware.ClearTraces()
+		return c.NoContent(204)
+	}, adminMiddleware)
+
+	app.GET("/api/backend-traces", func(c echo.Context) error {
+		return c.JSON(200, trace.GetBackendTraces())
+	}, adminMiddleware)
+
+	app.POST("/api/backend-traces/clear", func(c echo.Context) error {
+		trace.ClearBackendTraces()
+		return c.NoContent(204)
+	}, adminMiddleware)
+
+	// Backend logs endpoints — only in standalone mode.
+	// In distributed mode, backend processes run on workers and logs are
+	// streamed via /api/nodes/:id/backend-logs and /ws/nodes/:id/backend-logs/:modelId.
+	if !appConfig.Distributed.Enabled {
+		app.GET("/api/backend-logs", func(c echo.Context) error {
+			return c.JSON(200, ml.BackendLogs().ListModels())
+		}, adminMiddleware)
+
+		app.GET("/api/backend-logs/:modelId", func(c echo.Context) error {
+			modelID := c.Param("modelId")
+			return c.JSON(200, ml.BackendLogs().GetLines(modelID))
+		}, adminMiddleware)
+
+		app.POST("/api/backend-logs/:modelId/clear", func(c echo.Context) error {
+			ml.BackendLogs().Clear(c.Param("modelId"))
+			return c.NoContent(204)
+		}, adminMiddleware)
+
+		// Backend logs WebSocket endpoint for real-time streaming
+		app.GET("/ws/backend-logs/:modelId", func(c echo.Context) error {
+			modelID := c.Param("modelId")
+
+			ws, err := backendLogsUpgrader.Upgrade(c.Response(), c.Request(), nil)
+			if err != nil {
+				return err
+			}
+			defer ws.Close()
+
+			ws.SetReadLimit(4096)
+
+			// Set up ping/pong for keepalive
+			ws.SetReadDeadline(time.Now().Add(90 * time.Second))
+			ws.SetPongHandler(func(string) error {
+				ws.SetReadDeadline(time.Now().Add(90 * time.Second))
+				return nil
+			})
+
+			conn := &backendLogsConn{Conn: ws}
+
+			// Send existing lines as initial batch
+			existingLines := ml.BackendLogs().GetLines(modelID)
+			initialMsg := map[string]any{
+				"type":  "initial",
+				"lines": existingLines,
+			}
+			if err := conn.writeJSON(initialMsg); err != nil {
+				xlog.Debug("WebSocket backend-logs initial write failed", "error", err)
+				return nil
+			}
+
+			// Subscribe to new lines
+			lineCh, unsubscribe := ml.BackendLogs().Subscribe(modelID)
+			defer unsubscribe()
+
+			// Handle close from client side
+			closeCh := make(chan struct{})
+			go func() {
+				for {
+					_, _, err := ws.ReadMessage()
+					if err != nil {
+						close(closeCh)
+						return
+					}
+				}
+			}()
+
+			// Ping ticker for keepalive
+			pingTicker := time.NewTicker(30 * time.Second)
+			defer pingTicker.Stop()
+
+			// Forward new lines to WebSocket
+			for {
+				select {
+				case line, ok := <-lineCh:
+					if !ok {
+						return nil
+					}
+					lineMsg := map[string]any{
+						"type": "line",
+						"line": line,
+					}
+					if err := conn.writeJSON(lineMsg); err != nil {
+						xlog.Debug("WebSocket backend-logs write error", "error", err)
+						return nil
+					}
+				case <-pingTicker.C:
+					if err := conn.writePing(); err != nil {
+						return nil
+					}
+				case <-closeCh:
+					return nil
+				}
+			}
+		}, adminMiddleware)
+	}
+}
+
+// backendLogsConn wraps a websocket connection with a mutex for safe concurrent writes
+type backendLogsConn struct {
+	*websocket.Conn
+	mu sync.Mutex
+}
+
+func (c *backendLogsConn) writeJSON(v any) error {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.Conn.SetWriteDeadline(time.Now().Add(30 * time.Second))
+	data, err := json.Marshal(v)
+	if err != nil {
+		return fmt.Errorf("marshal error: %w", err)
+	}
+	return c.Conn.WriteMessage(websocket.TextMessage, data)
+}
+
+func (c *backendLogsConn) writePing() error {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.Conn.SetWriteDeadline(time.Now().Add(30 * time.Second))
+	return c.Conn.WriteMessage(websocket.PingMessage, nil)
 }
--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -690,18 +690,6 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		return c.JSON(http.StatusOK, modelConfig)
 	}, adminMiddleware)

-	// Config metadata API - returns field metadata for all ~170 config fields
-	app.GET("/api/models/config-metadata", localai.ConfigMetadataEndpoint(), adminMiddleware)
-
-	// Autocomplete providers for config fields (dynamic values only)
-	app.GET("/api/models/config-metadata/autocomplete/:provider", localai.AutocompleteEndpoint(cl, ml, appConfig), adminMiddleware)
-
-	// PATCH config endpoint - partial update using nested JSON merge
-	app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, appConfig), adminMiddleware)
-
-	// VRAM estimation endpoint
-	app.POST("/api/models/vram-estimate", localai.VRAMEstimateEndpoint(cl, appConfig), adminMiddleware)
-
 	// Get installed model YAML config for the React model editor
 	app.GET("/api/models/edit/:name", func(c echo.Context) error {
 		modelName := c.Param("name")
@@ -1325,4 +1313,3 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		})
 	}, adminMiddleware)
 }
-
--- a/core/schema/agent_jobs.go
+++ b/core/schema/agent_jobs.go
@@ -10,18 +10,21 @@ type Task struct {
 	Name           string            `json:"name"`        // User-friendly name
 	Description    string            `json:"description"` // Optional description
 	Model          string            `json:"model"`       // Model name (must have MCP config)
-	Prompt         string            `json:"prompt"`      // Template prompt (supports Go template .param syntax)
+	Prompt         string            `json:"prompt"`      // Template prompt (supports {{.param}} syntax)
 	CreatedAt      time.Time         `json:"created_at"`
 	UpdatedAt      time.Time         `json:"updated_at"`
 	Enabled        bool              `json:"enabled"`                   // Can be disabled without deletion
 	Cron           string            `json:"cron,omitempty"`            // Optional cron expression
 	CronParameters map[string]string `json:"cron_parameters,omitempty"` // Parameters to use when executing cron jobs

-	// Webhook configuration (for notifications).
-	// Supports multiple webhook endpoints.
+	// Webhook configuration (for notifications)
+	// Support multiple webhook endpoints
 	// Webhooks can handle both success and failure cases using template variables:
-	// .Job (Job object), .Task (Task object), .Result (if successful),
-	// .Error (if failed), .Status (job status string).
+	// - {{.Job}} - Job object with all fields
+	// - {{.Task}} - Task object
+	// - {{.Result}} - Job result (if successful)
+	// - {{.Error}} - Error message (if failed, empty string if successful)
+	// - {{.Status}} - Job status string
 	Webhooks []WebhookConfig `json:"webhooks,omitempty"` // Webhook configs for job completion notifications

 	// Multimedia sources (for cron jobs)
@@ -36,8 +39,13 @@ type WebhookConfig struct {
 	Method          string            `json:"method"`                     // HTTP method (POST, PUT, PATCH) - default: POST
 	Headers         map[string]string `json:"headers,omitempty"`          // Custom headers (e.g., Authorization)
 	PayloadTemplate string            `json:"payload_template,omitempty"` // Optional template for payload
-	// If PayloadTemplate is empty, uses default JSON structure.
-	// Available template variables: .Job, .Task, .Result, .Error, .Status.
+	// If PayloadTemplate is empty, uses default JSON structure
+	// Available template variables:
+	// - {{.Job}} - Job object with all fields
+	// - {{.Task}} - Task object
+	// - {{.Result}} - Job result (if successful)
+	// - {{.Error}} - Error message (if failed, empty string if successful)
+	// - {{.Status}} - Job status string
 }

 // MultimediaSourceConfig represents configuration for fetching multimedia content
@@ -118,9 +126,9 @@ type JobExecutionRequest struct {

 // JobExecutionResponse represents the response after creating a job
 type JobExecutionResponse struct {
-	JobID  string `json:"job_id"` // unique job identifier
-	Status string `json:"status"` // initial status (pending)
-	URL    string `json:"url"`    // URL to poll for job status
+	JobID  string `json:"job_id"`
+	Status string `json:"status"`
+	URL    string `json:"url"` // URL to check job status
 }

 // TasksFile represents the structure of agent_tasks.json
--- a/core/schema/anthropic.go
+++ b/core/schema/anthropic.go
@@ -78,7 +78,7 @@ type AnthropicMessage struct {
 // AnthropicContentBlock represents a content block in an Anthropic message
 type AnthropicContentBlock struct {
 	Type      string                `json:"type"`
-	Text      string                `json:"text"`
+	Text      string                `json:"text,omitempty"`
 	Source    *AnthropicImageSource `json:"source,omitempty"`
 	ID        string                `json:"id,omitempty"`
 	Name      string                `json:"name,omitempty"`
@@ -116,7 +116,7 @@ type AnthropicUsage struct {
 // AnthropicStreamEvent represents a streaming event from the Anthropic API
 type AnthropicStreamEvent struct {
 	Type         string                  `json:"type"`
-	Index        *int                    `json:"index,omitempty"`
+	Index        int                     `json:"index,omitempty"`
 	ContentBlock *AnthropicContentBlock  `json:"content_block,omitempty"`
 	Delta        *AnthropicStreamDelta   `json:"delta,omitempty"`
 	Message      *AnthropicStreamMessage `json:"message,omitempty"`
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -33,31 +33,31 @@ type GalleryResponse struct {

 type VideoRequest struct {
 	BasicModelRequest
-	Prompt         string  `json:"prompt" yaml:"prompt"`                                     // text description of the video to generate
-	NegativePrompt string  `json:"negative_prompt" yaml:"negative_prompt"`                   // things to avoid in the output
-	StartImage     string  `json:"start_image" yaml:"start_image"`                           // URL or base64 of the first frame
-	EndImage       string  `json:"end_image" yaml:"end_image"`                               // URL or base64 of the last frame
-	Width          int32   `json:"width" yaml:"width"`                                       // output width in pixels
-	Height         int32   `json:"height" yaml:"height"`                                     // output height in pixels
-	NumFrames      int32   `json:"num_frames" yaml:"num_frames"`                             // total number of frames to generate
-	FPS            int32   `json:"fps" yaml:"fps"`                                           // frames per second
-	Seconds        string  `json:"seconds,omitempty" yaml:"seconds,omitempty"`               // duration in seconds (alternative to num_frames)
-	Size           string  `json:"size,omitempty" yaml:"size,omitempty"`                     // WxH shorthand (e.g. "512x512")
-	InputReference string  `json:"input_reference,omitempty" yaml:"input_reference,omitempty"` // reference image or video URL
-	Seed           int32   `json:"seed" yaml:"seed"`                                         // random seed for reproducibility
-	CFGScale       float32 `json:"cfg_scale" yaml:"cfg_scale"`                               // classifier-free guidance scale
-	Step           int32   `json:"step" yaml:"step"`                                         // number of diffusion steps
-	ResponseFormat string  `json:"response_format" yaml:"response_format"`                   // output format (url or b64_json)
+	Prompt         string  `json:"prompt" yaml:"prompt"`
+	NegativePrompt string  `json:"negative_prompt" yaml:"negative_prompt"`
+	StartImage     string  `json:"start_image" yaml:"start_image"`
+	EndImage       string  `json:"end_image" yaml:"end_image"`
+	Width          int32   `json:"width" yaml:"width"`
+	Height         int32   `json:"height" yaml:"height"`
+	NumFrames      int32   `json:"num_frames" yaml:"num_frames"`
+	FPS            int32   `json:"fps" yaml:"fps"`
+	Seconds        string  `json:"seconds,omitempty" yaml:"seconds,omitempty"`
+	Size           string  `json:"size,omitempty" yaml:"size,omitempty"`
+	InputReference string  `json:"input_reference,omitempty" yaml:"input_reference,omitempty"`
+	Seed           int32   `json:"seed" yaml:"seed"`
+	CFGScale       float32 `json:"cfg_scale" yaml:"cfg_scale"`
+	Step           int32   `json:"step" yaml:"step"`
+	ResponseFormat string  `json:"response_format" yaml:"response_format"`
 }

 // @Description TTS request body
 type TTSRequest struct {
 	BasicModelRequest
-	Input    string `json:"input" yaml:"input"` // text input
-	Voice    string `json:"voice" yaml:"voice"` // voice audio file or speaker id
-	Backend  string `json:"backend" yaml:"backend"` // backend engine override
-	Language string `json:"language,omitempty" yaml:"language,omitempty"`               // (optional) language to use with TTS model
-	Format   string `json:"response_format,omitempty" yaml:"response_format,omitempty"` // (optional) output format
+	Input      string `json:"input" yaml:"input"` // text input
+	Voice      string `json:"voice" yaml:"voice"` // voice audio file or speaker id
+	Backend    string `json:"backend" yaml:"backend"`
+	Language   string `json:"language,omitempty" yaml:"language,omitempty"`               // (optional) language to use with TTS model
+	Format     string `json:"response_format,omitempty" yaml:"response_format,omitempty"` // (optional) output format
 	Stream     bool   `json:"stream,omitempty" yaml:"stream,omitempty"`                   // (optional) enable streaming TTS
 	SampleRate int    `json:"sample_rate,omitempty" yaml:"sample_rate,omitempty"`         // (optional) desired output sample rate
 }
@@ -65,7 +65,7 @@ type TTSRequest struct {
 // @Description VAD request body
 type VADRequest struct {
 	BasicModelRequest
-	Audio []float32 `json:"audio" yaml:"audio"` // raw audio samples as float32 PCM
+	Audio []float32 `json:"audio" yaml:"audio"` // model name or full path
 }

 type VADSegment struct {
@@ -146,13 +146,13 @@ type SysInfoModel struct {
 }

 type SystemInformationResponse struct {
-	Backends []string       `json:"backends"`      // available backend engines
-	Models   []SysInfoModel `json:"loaded_models"` // currently loaded models
+	Backends []string       `json:"backends"`
+	Models   []SysInfoModel `json:"loaded_models"`
 }

 type DetectionRequest struct {
 	BasicModelRequest
-	Image string `json:"image"` // URL or base64-encoded image to analyze
+	Image string `json:"image"`
 }

 type DetectionResponse struct {
--- a/core/schema/tokenize.go
+++ b/core/schema/tokenize.go
@@ -2,9 +2,9 @@ package schema

 type TokenizeRequest struct {
 	BasicModelRequest
-	Content string `json:"content"` // text to tokenize
+	Content string `json:"content"`
 }

 type TokenizeResponse struct {
-	Tokens []int32 `json:"tokens"` // token IDs
+	Tokens []int32 `json:"tokens"`
 }
--- a/core/services/nodes/health_mock_test.go
+++ b/core/services/nodes/health_mock_test.go
@@ -231,9 +231,6 @@ func (c *fakeBackendClient) QuantizationProgress(_ context.Context, _ *pb.Quanti
 func (c *fakeBackendClient) StopQuantization(_ context.Context, _ *pb.QuantizationStopRequest, _ ...ggrpc.CallOption) (*pb.Result, error) {
 	return nil, nil
 }
-func (c *fakeBackendClient) Free(_ context.Context) error {
-	return nil
-}

 // --- fakeBackendClientFactory ---

--- a/core/services/nodes/inflight_test.go
+++ b/core/services/nodes/inflight_test.go
@@ -175,10 +175,6 @@ func (f *fakeGRPCBackend) StopQuantization(_ context.Context, _ *pb.Quantization
 	return &pb.Result{}, nil
 }

-func (f *fakeGRPCBackend) Free(_ context.Context) error {
-	return nil
-}
-
 // --- Tests ---

 var _ = Describe("InFlightTrackingClient", func() {
--- a/core/services/nodes/managers_distributed.go
+++ b/core/services/nodes/managers_distributed.go
@@ -11,7 +11,6 @@ import (
 	"github.com/mudler/LocalAI/core/services/galleryop"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/xlog"
-	"github.com/nats-io/nats.go"
 )

 // DistributedModelManager wraps a local ModelManager and adds NATS fan-out
@@ -85,13 +84,6 @@ func (d *DistributedBackendManager) DeleteBackend(name string) error {
 			continue
 		}
 		if _, delErr := d.adapter.DeleteBackend(node.ID, name); delErr != nil {
-			if errors.Is(delErr, nats.ErrNoResponders) {
-				// Node's NATS subscription is gone — likely restarted with a new ID.
-				// Mark it unhealthy so future fan-outs skip it.
-				xlog.Warn("No NATS responders for node, marking unhealthy", "node", node.Name, "nodeID", node.ID)
-				d.registry.MarkUnhealthy(context.Background(), node.ID)
-				continue
-			}
 			xlog.Warn("Failed to propagate backend deletion to worker", "node", node.Name, "backend", name, "error", delErr)
 			errs = append(errs, fmt.Errorf("node %s: %w", node.Name, delErr))
 		}
@@ -113,11 +105,6 @@ func (d *DistributedBackendManager) ListBackends() (gallery.SystemBackends, erro
 		}
 		reply, err := d.adapter.ListBackends(node.ID)
 		if err != nil {
-			if errors.Is(err, nats.ErrNoResponders) {
-				xlog.Warn("No NATS responders for node, marking unhealthy", "node", node.Name, "nodeID", node.ID)
-				d.registry.MarkUnhealthy(context.Background(), node.ID)
-				continue
-			}
 			xlog.Warn("Failed to list backends on worker", "node", node.Name, "error", err)
 			continue
 		}
@@ -158,11 +145,6 @@ func (d *DistributedBackendManager) InstallBackend(ctx context.Context, op *gall
 		}
 		reply, err := d.adapter.InstallBackend(node.ID, backendName, "", string(galleriesJSON))
 		if err != nil {
-			if errors.Is(err, nats.ErrNoResponders) {
-				xlog.Warn("No NATS responders for node, marking unhealthy", "node", node.Name, "nodeID", node.ID)
-				d.registry.MarkUnhealthy(context.Background(), node.ID)
-				continue
-			}
 			xlog.Warn("Failed to install backend on worker", "node", node.Name, "backend", backendName, "error", err)
 			continue
 		}
--- a/docs/content/features/GPU-acceleration.md
+++ b/docs/content/features/GPU-acceleration.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "GPU Acceleration"
+title = "⚡ GPU acceleration"
 weight = 9
 url = "/features/gpu-acceleration/"
 +++
--- a/docs/content/features/_index.en.md
+++ b/docs/content/features/_index.en.md
@@ -27,7 +27,8 @@ LocalAI provides a comprehensive set of features for running AI models locally.
 - **[Realtime API](openai-realtime/)** - Low-latency multi-modal conversations (voice+text) over WebSocket
 - **[Constrained Grammars](constrained_grammars/)** - Control model output format with BNF grammars
 - **[GPU Acceleration](GPU-acceleration/)** - Optimize performance with GPU support
- **[Distribution](distribution/)** - Scale inference across multiple nodes (P2P federation or production distributed mode)
+- **[Distributed Inference](distributed_inferencing/)** - Scale inference across multiple nodes
+- **[Distributed Mode](distributed-mode/)** - Horizontal scaling with PostgreSQL, NATS, and remote backend nodes
 - **[P2P API](p2p/)** - Monitor and manage P2P worker and federated nodes
 - **[Model Context Protocol (MCP)](mcp/)** - Enable agentic capabilities with MCP integration
 - **[Agents](agents/)** - Autonomous AI agents with tools, knowledge base, and skills
--- a/docs/content/features/agents.md
+++ b/docs/content/features/agents.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "Agents"
+title = "🤖 Agents"
 weight = 21
 url = '/features/agents'
 +++
--- a/docs/content/features/api-discovery.md
+++ b/docs/content/features/api-discovery.md
@@ -1,223 +0,0 @@
-+++
-title = "API Discovery & Instructions"
-weight = 27
-toc = true
-description = "Programmatic API discovery for agents, tools, and automation"
-tags = ["API", "Agents", "Instructions", "Configuration", "Advanced"]
-categories = ["Features"]
-+++
-
-LocalAI exposes a set of discovery endpoints that let external agents, coding assistants, and automation tools programmatically learn what the instance can do and how to control it — without reading documentation ahead of time.
-
-## Quick start
-
-```bash
-# 1. Discover what's available
-curl http://localhost:8080/.well-known/localai.json
-
-# 2. Browse instruction areas
-curl http://localhost:8080/api/instructions
-
-# 3. Get an API guide for a specific instruction
-curl http://localhost:8080/api/instructions/config-management
-```
-
-## Well-Known Discovery Endpoint
-
-`GET /.well-known/localai.json`
-
-Returns the instance version, all available endpoint URLs (flat and categorized), and runtime capabilities.
-
-**Example response (abbreviated):**
-
-```json
-{
-  "version": "v2.28.0",
-  "endpoints": {
-    "chat_completions": "/v1/chat/completions",
-    "models": "/v1/models",
-    "config_metadata": "/api/models/config-metadata",
-    "instructions": "/api/instructions",
-    "swagger": "/swagger/index.html"
-  },
-  "endpoint_groups": {
-    "openai_compatible": { "chat_completions": "/v1/chat/completions", "..." : "..." },
-    "config_management": { "config_metadata": "/api/models/config-metadata", "..." : "..." },
-    "model_management": { "..." : "..." },
-    "monitoring": { "..." : "..." }
-  },
-  "capabilities": {
-    "config_metadata": true,
-    "config_patch": true,
-    "vram_estimate": true,
-    "mcp": true,
-    "agents": false,
-    "p2p": false
-  }
-}
-```
-
-The `capabilities` object reflects the current runtime configuration — for example, `mcp` is only `true` if MCP is enabled, and `agents` is `true` only if the agent pool is running.
-
-## Instructions API
-
-Instructions are curated groups of related API endpoints. Each instruction maps to one or more Swagger tags and provides a focused, LLM-readable guide.
-
-### List all instructions
-
-`GET /api/instructions`
-
-```bash
-curl http://localhost:8080/api/instructions
-```
-
-Returns a compact list of instruction areas:
-
-```json
-{
-  "instructions": [
-    {
-      "name": "chat-inference",
-      "description": "OpenAI-compatible chat completions, text completions, and embeddings",
-      "tags": ["inference", "embeddings"],
-      "url": "/api/instructions/chat-inference"
-    },
-    {
-      "name": "config-management",
-      "description": "Discover, read, and modify model configuration fields with VRAM estimation",
-      "tags": ["config"],
-      "url": "/api/instructions/config-management"
-    }
-  ],
-  "hint": "Fetch GET {url} for a markdown API guide. Add ?format=json for a raw OpenAPI fragment."
-}
-```
-
-**Available instructions:**
-
-| Instruction | Description |
-|-------------|-------------|
-| `chat-inference` | Chat completions, text completions, embeddings (OpenAI-compatible) |
-| `audio` | Text-to-speech, transcription, voice activity detection, sound generation |
-| `images` | Image generation and inpainting |
-| `model-management` | Browse gallery, install, delete, manage models and backends |
-| `config-management` | Discover, read, and modify model config fields with VRAM estimation |
-| `monitoring` | System metrics, backend status, system information |
-| `mcp` | Model Context Protocol — tool-augmented chat with MCP servers |
-| `agents` | Agent task and job management |
-| `video` | Video generation from text prompts |
-
-### Get an instruction guide
-
-`GET /api/instructions/:name`
-
-By default, returns a **markdown guide** suitable for LLMs and humans:
-
-```bash
-curl http://localhost:8080/api/instructions/config-management
-```
-
-Add `?format=json` to get a raw **OpenAPI fragment** (filtered Swagger spec with only the relevant paths and definitions):
-
-```bash
-curl http://localhost:8080/api/instructions/config-management?format=json
-```
-
-## Configuration Management APIs
-
-These endpoints let agents discover model configuration fields, read current settings, modify them, and estimate VRAM usage.
-
-### Config metadata
-
-`GET /api/models/config-metadata`
-
-Returns structured metadata for all model configuration fields, organized by section. Each field includes its YAML path, Go type, UI type, label, description, default value, validation constraints, and available options.
-
-```bash
-# All fields
-curl http://localhost:8080/api/models/config-metadata
-
-# Filter by section
-curl http://localhost:8080/api/models/config-metadata?section=parameters
-```
-
-### Autocomplete values
-
-`GET /api/models/config-metadata/autocomplete/:provider`
-
-Returns runtime values for dynamic fields. Providers include `backends`, `models`, `models:chat`, `models:tts`, `models:transcript`, `models:vad`.
-
-```bash
-# List available backends
-curl http://localhost:8080/api/models/config-metadata/autocomplete/backends
-
-# List chat-capable models
-curl http://localhost:8080/api/models/config-metadata/autocomplete/models:chat
-```
-
-### Read model config
-
-`GET /api/models/config-json/:name`
-
-Returns the full model configuration as JSON:
-
-```bash
-curl http://localhost:8080/api/models/config-json/my-model
-```
-
-### Update model config
-
-`PATCH /api/models/config-json/:name`
-
-Deep-merges a JSON patch into the existing model configuration. Only include the fields you want to change:
-
-```bash
-curl -X PATCH http://localhost:8080/api/models/config-json/my-model \
-  -H "Content-Type: application/json" \
-  -d '{"context_size": 16384, "gpu_layers": 40}'
-```
-
-The endpoint validates the merged config and writes it to disk as YAML.
-
-{{% notice context="warning" %}}
-Config management endpoints require **admin authentication** when API keys are configured. The discovery and instructions endpoints are unauthenticated.
-{{% /notice %}}
-
-### VRAM estimation
-
-`POST /api/models/vram-estimate`
-
-Estimates VRAM usage for an installed model based on its weight files, context size, and GPU layer offloading:
-
-```bash
-curl -X POST http://localhost:8080/api/models/vram-estimate \
-  -H "Content-Type: application/json" \
-  -d '{"model": "my-model", "context_size": 8192}'
-```
-
-```json
-{
-  "sizeBytes": 4368438272,
-  "sizeDisplay": "4.4 GB",
-  "vramBytes": 6123456789,
-  "vramDisplay": "6.1 GB",
-  "context_note": "Estimate used default context_size=8192. The model's trained maximum context is 131072; VRAM usage will be higher at larger context sizes.",
-  "model_max_context": 131072
-}
-```
-
-Optional parameters: `gpu_layers` (number of layers to offload, 0 = all), `kv_quant_bits` (KV cache quantization, 0 = fp16).
-
-## Integration guide
-
-A recommended workflow for agent/tool builders:
-
-1. **Discover**: Fetch `/.well-known/localai.json` to learn available endpoints and capabilities
-2. **Browse instructions**: Fetch `/api/instructions` for an overview of instruction areas
-3. **Deep dive**: Fetch `/api/instructions/:name` for a markdown API guide on a specific area
-4. **Explore config**: Use `/api/models/config-metadata` to understand configuration fields
-5. **Interact**: Use the standard OpenAI-compatible endpoints for inference, and the config management endpoints for runtime tuning
-
-## Swagger UI
-
-The full interactive API documentation is available at `/swagger/index.html`. All annotated endpoints can be explored and tested directly from the browser.
--- a/docs/content/features/audio-to-text.md
+++ b/docs/content/features/audio-to-text.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "Audio to Text"
+title = "🔈 Audio to text"
 weight = 16
 url = "/features/audio-to-text/"
 +++
--- a/docs/content/features/authentication.md
+++ b/docs/content/features/authentication.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "Authentication & Authorization"
+title = "🔐 Authentication & Authorization"
 weight = 26
 url = '/features/authentication'
 +++
--- a/docs/content/features/backends.md
+++ b/docs/content/features/backends.md
@@ -1,5 +1,5 @@
 ---
-title: "Backends"
+title: "⚙️ Backends"
 description: "Learn how to use, manage, and develop backends in LocalAI"
 weight: 4
 url: "/backends/"
--- a/docs/content/features/constrained_grammars.md
+++ b/docs/content/features/constrained_grammars.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "Constrained Grammars"
+title = "✍️ Constrained Grammars"
 weight = 15
 url = "/features/constrained_grammars/"
 +++
--- a/docs/content/features/distributed-mode.md
+++ b/docs/content/features/distributed-mode.md
@@ -5,7 +5,7 @@ weight = 14
 url = "/features/distributed-mode/"
 +++

-Distributed mode enables horizontal scaling of LocalAI across multiple machines using **PostgreSQL** for state and node registry, and **NATS** for real-time coordination. Unlike the [P2P/federation approach]({{% relref "features/distributed_inferencing" %}}), distributed mode is designed for production deployments and Kubernetes environments where you need centralized management, health monitoring, and deterministic routing.
+Distributed mode enables horizontal scaling of LocalAI across multiple machines using **PostgreSQL** for state and node registry, and **NATS** for real-time coordination. Unlike the [P2P/federation approach](/features/distribute/), distributed mode is designed for production deployments and Kubernetes environments where you need centralized management, health monitoring, and deterministic routing.

 {{% notice note %}}
 Distributed mode requires authentication enabled with a **PostgreSQL** database — SQLite is not supported. This is because the node registry, job store, and other distributed state are stored in PostgreSQL tables.
--- a/docs/content/features/distributed_inferencing.md
+++ b/docs/content/features/distributed_inferencing.md
@@ -1,12 +1,12 @@
 +++
 disableToc = false
-title = "P2P / Federated Inference"
+title = "🆕🖧 Distributed Inference"
 weight = 15
 url = "/features/distribute/"
 +++

 {{% notice tip %}}
-Looking for production-grade horizontal scaling with PostgreSQL and NATS? See [Distributed Mode]({{% relref "features/distributed-mode" %}}).
+Looking for production-grade horizontal scaling with PostgreSQL and NATS? See [Distributed Mode](/features/distributed-mode/).
 {{% /notice %}}

 This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance. Nodes are automatically discovered and connect via p2p by using a shared token which makes sure the communication is secure and private between the nodes of the network.
--- a/docs/content/features/distribution.md
+++ b/docs/content/features/distribution.md
@@ -1,34 +0,0 @@
-+++
-disableToc = false
-title = "Distribution"
-weight = 13
-url = "/features/distribution/"
-+++
-
-LocalAI supports distributing inference workloads across multiple machines. There are two approaches, each suited to different use cases:
-
-## Distributed Mode (PostgreSQL + NATS)
-
-Production-grade horizontal scaling with centralized management. Frontends are stateless LocalAI instances behind a load balancer; workers self-register and receive backends dynamically via NATS. State lives in PostgreSQL.
-
-**Best for:** production deployments, Kubernetes, managed infrastructure.
-
-[Read more]({{% relref "features/distributed-mode" %}})
-
-## P2P / Federated Inference
-
-Peer-to-peer networking via libp2p. Share a token to form a cluster with automatic discovery — no central server required. Supports federated load balancing and worker-mode weight sharding.
-
-**Best for:** ad-hoc clusters, community sharing, quick experimentation.
-
-[Read more]({{% relref "features/distributed_inferencing" %}})
-
-## Quick Comparison
-
-| | P2P / Federation | Distributed Mode |
-|---|---|---|
-| **Discovery** | Automatic via libp2p token | Self-registration to frontend URL |
-| **State storage** | In-memory / ledger | PostgreSQL |
-| **Coordination** | Gossip protocol | NATS messaging |
-| **Node management** | Automatic | REST API + WebUI |
-| **Setup complexity** | Minimal (share a token) | Requires PostgreSQL + NATS |
--- a/docs/content/features/embeddings.md
+++ b/docs/content/features/embeddings.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "Embeddings"
+title = "🧠 Embeddings"
 weight = 13
 url = "/features/embeddings/"
 +++
--- a/docs/content/features/gpt-vision.md
+++ b/docs/content/features/gpt-vision.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "GPT Vision"
+title = "🥽 GPT Vision"
 weight = 14
 url = "/features/gpt-vision/"
 +++
--- a/docs/content/features/image-generation.md
+++ b/docs/content/features/image-generation.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "Image Generation"
+title = "🎨 Image generation"
 weight = 12
 url = "/features/image-generation/"
 +++
--- a/docs/content/features/mcp.md
+++ b/docs/content/features/mcp.md
@@ -1,5 +1,5 @@
 +++
-title = "Model Context Protocol (MCP)"
+title = "🔗 Model Context Protocol (MCP)"
 weight = 20
 toc = true
 description = "Agentic capabilities with Model Context Protocol integration"
--- a/docs/content/features/model-gallery.md
+++ b/docs/content/features/model-gallery.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "Model Gallery"
+title = "🖼️ Model gallery"
 weight = 18
 url = '/models'
 +++
--- a/docs/content/features/object-detection.md
+++ b/docs/content/features/object-detection.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "Object Detection"
+title = "🔍 Object detection"
 weight = 13
 url = "/features/object-detection/"
 +++
--- a/docs/content/features/openai-functions.md
+++ b/docs/content/features/openai-functions.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "OpenAI Functions and Tools"
+title = "🔥 OpenAI functions and tools"
 weight = 17
 url = "/features/openai-functions/"
 +++
--- a/docs/content/features/reranker.md
+++ b/docs/content/features/reranker.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "Reranker"
+title = "📈 Reranker"
 weight = 11
 url = "/features/reranker/"
 +++
--- a/docs/content/features/runtime-settings.md
+++ b/docs/content/features/runtime-settings.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "Runtime Settings"
+title = "⚙️ Runtime Settings"
 weight = 25
 url = '/features/runtime-settings'
 +++
--- a/docs/content/features/stores.md
+++ b/docs/content/features/stores.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "Stores"
+title = "💾 Stores"
 weight = 18
 url = '/stores'
 +++
--- a/docs/content/features/text-generation.md
+++ b/docs/content/features/text-generation.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "Text Generation (GPT)"
+title = "📖 Text generation (GPT)"
 weight = 10
 url = "/features/text-generation/"
 +++
--- a/docs/content/features/text-to-audio.md
+++ b/docs/content/features/text-to-audio.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "Text to Audio (TTS)"
+title = "🗣 Text to audio (TTS)"
 weight = 11
 url = "/features/text-to-audio/"
 +++
--- a/Show More
+++ b/Show More