chore(deps): bump mxschmitt/action-tmate from 3.23 to 3.24

Bumps [mxschmitt/action-tmate](https://github.com/mxschmitt/action-tmate) from 3.23 to 3.24. - [Release notes](https://github.com/mxschmitt/action-tmate/releases) - [Changelog](https://github.com/mxschmitt/action-tmate/blob/master/RELEASE.md) - [Commits](https://github.com/mxschmitt/action-tmate/compare/v3.23...v3.24) --- updated-dependencies: - dependency-name: mxschmitt/action-tmate dependency-version: '3.24' dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>
2026-06-24 16:49:06 -04:00 · 2026-06-22 19:42:05 +00:00
59 changed files with 778 additions and 907 deletions
--- a/.github/backend-matrix.yml
+++ b/.github/backend-matrix.yml
@@ -4974,9 +4974,6 @@ includeDarwin:
  - backend: "kitten-tts"
    tag-suffix: "-metal-darwin-arm64-kitten-tts"
    build-type: "mps"
-  - backend: "trl"
-    tag-suffix: "-metal-darwin-arm64-trl"
-    build-type: "mps"
  - backend: "piper"
    tag-suffix: "-metal-darwin-arm64-piper"
    build-type: "metal"
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -71,7 +71,7 @@ jobs:
          if-no-files-found: ignore
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.23
+        uses: mxschmitt/action-tmate@v3.24
        with:
          detached: true
          connect-timeout-seconds: 180
@@ -116,7 +116,7 @@ jobs:
          PATH="$PATH:$HOME/go/bin" BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.23
+        uses: mxschmitt/action-tmate@v3.24
        with:
          detached: true
          connect-timeout-seconds: 180
--- a/.github/workflows/tests-aio.yml
+++ b/.github/workflows/tests-aio.yml
@@ -79,7 +79,7 @@ jobs:
            PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-e2e e2e-aio
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.23
+        uses: mxschmitt/action-tmate@v3.24
        with:
          detached: true
          connect-timeout-seconds: 180
--- a/.github/workflows/tests-e2e.yml
+++ b/.github/workflows/tests-e2e.yml
@@ -57,7 +57,7 @@ jobs:
          PATH="$PATH:$HOME/go/bin" make build-mock-backend test-e2e
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.23
+        uses: mxschmitt/action-tmate@v3.24
        with:
          detached: true
          connect-timeout-seconds: 180
--- a/.github/workflows/tests-pii-ner-e2e.yml
+++ b/.github/workflows/tests-pii-ner-e2e.yml
@@ -90,7 +90,7 @@ jobs:
        run: PATH="$PATH:$HOME/go/bin" make test-extra-backend-privacy-filter
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.23
+        uses: mxschmitt/action-tmate@v3.24
        with:
          detached: true
          connect-timeout-seconds: 180
--- a/.github/workflows/tests-ui-e2e.yml
+++ b/.github/workflows/tests-ui-e2e.yml
@@ -75,7 +75,7 @@ jobs:
          retention-days: 7
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.23
+        uses: mxschmitt/action-tmate@v3.24
        with:
          detached: true
          connect-timeout-seconds: 180
--- a/backend/cpp/ik-llama-cpp/Makefile
+++ b/backend/cpp/ik-llama-cpp/Makefile
@@ -1,5 +1,5 @@

-IK_LLAMA_VERSION?=7ccf1d209588962b96eacca325b37e9b3e8faf5e
+IK_LLAMA_VERSION?=6c00e87ac84404af588ad2e65935bd6f079c696f
 LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp

 CMAKE_ARGS?=
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@

-LLAMA_VERSION?=be4a6a63eb2b848e19c277bdcf2bd399e8af76d9
+LLAMA_VERSION?=7c082bc417bbe53210a83df4ba5b49e18ce6193c
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

 CMAKE_ARGS?=
--- a/backend/go/crispasr/Makefile
+++ b/backend/go/crispasr/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

 # CrispASR version (release tag)
 CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
-CRISPASR_VERSION?=96b2a6ee31d30389fed8a7ef1a54239b75231ddc
+CRISPASR_VERSION?=7a8cb80907341c0204bd0488c1244764f4163883
 SO_TARGET?=libgocrispasr.so

 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
--- a/backend/go/parakeet-cpp/Makefile
+++ b/backend/go/parakeet-cpp/Makefile
@@ -1,6 +1,6 @@
 # parakeet-cpp backend Makefile.
 #
-# Upstream pin lives below as PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
+# Upstream pin lives below as PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
 # (.github/bump_deps.sh) can find and update it - matches the
 # whisper.cpp / ds4 / vibevoice-cpp convention.
 #
@@ -15,7 +15,7 @@
 # That's what the L0 smoke test uses. The default target below does the
 # proper clone-at-pin + cmake build so CI doesn't need a side-checkout.

-PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
+PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
 PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp

 GOCMD?=go
--- a/backend/go/stablediffusion-ggml/Makefile
+++ b/backend/go/stablediffusion-ggml/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

 # stablediffusion.cpp (ggml)
 STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=f440ad9c29dd8bc34e5d1f4b863832b96d6ea05f
+STABLEDIFFUSION_GGML_VERSION?=b12098f5d09fc83da36e65c784f7bdb16a5a5ebf

 CMAKE_ARGS+=-DGGML_MAX_NAME=128

--- a/backend/go/whisper/Makefile
+++ b/backend/go/whisper/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
-WHISPER_CPP_VERSION?=43d78af5be58f41d6ffbc227d608f104577741ea
+WHISPER_CPP_VERSION?=5ed76e9a079962f1c85cfce44edd325c27ef1f97
 SO_TARGET?=libgowhisper.so

 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -5282,7 +5282,6 @@
    nvidia: "cuda12-trl"
    nvidia-cuda-12: "cuda12-trl"
    nvidia-cuda-13: "cuda13-trl"
-    metal: "metal-trl"
 ## TRL backend images
 - !!merge <<: *trl
  name: "cpu-trl"
@@ -5314,16 +5313,6 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-trl"
  mirrors:
    - localai/localai-backends:master-gpu-nvidia-cuda-13-trl
- !!merge <<: *trl
-  name: "metal-trl"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-trl"
-  mirrors:
-    - localai/localai-backends:latest-metal-darwin-arm64-trl
- !!merge <<: *trl
-  name: "metal-trl-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-trl"
-  mirrors:
-    - localai/localai-backends:master-metal-darwin-arm64-trl
 ## llama.cpp quantization backend
 - &llama-cpp-quantization
  name: "llama-cpp-quantization"
--- a/backend/python/trl/install.sh
+++ b/backend/python/trl/install.sh
@@ -8,13 +8,7 @@ else
    source $backend_dir/../common/libbackend.sh
 fi

-EXTRA_PIP_INSTALL_FLAGS+=" --upgrade"
-# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip
-# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add
-# it when uv is the installer, keeping the Linux/CUDA resolution unchanged.
-if [ "x${USE_PIP:-}" != "xtrue" ]; then
-    EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match"
-fi
+EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
 installRequirements

 # Fetch convert_hf_to_gguf.py and gguf package from the same llama.cpp version
--- a/backend/python/trl/requirements-mps.txt
+++ b/backend/python/trl/requirements-mps.txt
@@ -1,12 +0,0 @@
-torch==2.10.0
-trl
-peft
-datasets>=3.0.0
-transformers>=4.56.2
-accelerate>=1.4.0
-huggingface-hub>=1.3.0
-sentencepiece
-# Note: bitsandbytes is intentionally omitted on MPS. It is only used by the
-# CUDA (cublas) variants for 8-bit/4-bit quantization and has poor support on
-# Apple Silicon. torch here uses the plain PyPI wheels, which ship MPS support
-# on macOS arm64.
--- a/core/application/config_file_watcher.go
+++ b/core/application/config_file_watcher.go
@@ -215,7 +215,6 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
 		envBackendGalleries := slices.Equal(appConfig.BackendGalleries, startupAppConfig.BackendGalleries)
 		envAutoloadGalleries := appConfig.AutoloadGalleries == startupAppConfig.AutoloadGalleries
 		envAutoloadBackendGalleries := appConfig.AutoloadBackendGalleries == startupAppConfig.AutoloadBackendGalleries
-		envPIIDefaultDetectors := slices.Equal(appConfig.PIIDefaultDetectors, startupAppConfig.PIIDefaultDetectors)
 		envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays
 		envForceEvictionWhenBusy := appConfig.ForceEvictionWhenBusy == startupAppConfig.ForceEvictionWhenBusy
 		envLRUEvictionMaxRetries := appConfig.LRUEvictionMaxRetries == startupAppConfig.LRUEvictionMaxRetries
@@ -336,15 +335,6 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
 			if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries {
 				appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
 			}
-			if settings.PIIDefaultDetectors != nil && !envPIIDefaultDetectors {
-				// Request-side default redaction reads this live via
-				// ResolvePIIPolicy, so a file edit takes effect on the next chat
-				// request. The MITM listener resolves its per-host detector map
-				// once at start, so a raw file edit reaches cloud-proxy traffic
-				// only after a restart or a POST /api/settings (which rebuilds
-				// the listener) — the admin UI uses the latter.
-				appConfig.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
-			}
 			if settings.AutoUpgradeBackends != nil {
 				appConfig.AutoUpgradeBackends = *settings.AutoUpgradeBackends
 			}
--- a/core/application/runtime_settings_branding_test.go
+++ b/core/application/runtime_settings_branding_test.go
@@ -109,52 +109,6 @@ var _ = Describe("loadRuntimeSettingsFromFile", func() {
 		})
 	})

-	// Instance-wide default PII detectors. The file is the only source (no
-	// env var), and the loader runs immediately before startMITMIfConfigured,
-	// so a regression here means the cloud-proxy MITM listener resolves an
-	// empty detector set at boot and forwards intercepted traffic unredacted —
-	// even though pii_default_detectors is on disk and the MITM model has PII
-	// enabled. It also breaks request-side default redaction the same way.
-	Describe("PII default detectors", func() {
-		It("loads pii_default_detectors from the file", func() {
-			cfg := &config.ApplicationConfig{DynamicConfigsDir: seedSettings(`{"pii_default_detectors": ["privacy-filter-nemotron", "secret-filter"]}`)}
-			loadRuntimeSettingsFromFile(cfg)
-			Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"privacy-filter-nemotron", "secret-filter"}))
-		})
-
-		It("does not override an env/CLI-set value (LOCALAI_PII_DEFAULT_DETECTORS)", func() {
-			cfg := &config.ApplicationConfig{
-				DynamicConfigsDir:   seedSettings(`{"pii_default_detectors": ["from-file"]}`),
-				PIIDefaultDetectors: []string{"from-env"}, // simulate WithPIIDefaultDetectors(env)
-			}
-			loadRuntimeSettingsFromFile(cfg)
-			Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env var must win over the persisted file value")
-		})
-	})
-
-	// The live file watcher applies pii_default_detectors on a runtime change
-	// the same way it handles galleries/threads/etc.: env-set values (current
-	// == startup snapshot) are left alone, otherwise the file value is applied
-	// to the live config so request-side default redaction picks it up without
-	// a restart.
-	Describe("file watcher: pii_default_detectors", func() {
-		It("applies a changed file value to the live config", func() {
-			startup := config.ApplicationConfig{} // no env baseline
-			live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"old"}}
-			handler := readRuntimeSettingsJson(startup)
-			Expect(handler([]byte(`{"pii_default_detectors":["new-a","new-b"]}`), live)).To(Succeed())
-			Expect(live.PIIDefaultDetectors).To(Equal([]string{"new-a", "new-b"}))
-		})
-
-		It("leaves an env-controlled value untouched", func() {
-			startup := config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
-			live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
-			handler := readRuntimeSettingsJson(startup)
-			Expect(handler([]byte(`{"pii_default_detectors":["from-file"]}`), live)).To(Succeed())
-			Expect(live.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env-controlled detectors must not be overwritten by the file")
-		})
-	})
-
 	// The Agent Pool block has a mix of zero and non-zero defaults
 	// (Enabled=true, EmbeddingModel="granite-...", MaxChunkingSize=400,
 	// VectorEngine="chromem", AgentHubURL="https://agenthub.localai.io").
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -750,20 +750,6 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
 		options.MITMListen = *settings.MITMListen
 	}

-	// Instance-wide default PII detectors. LOCALAI_PII_DEFAULT_DETECTORS (via
-	// WithPIIDefaultDetectors) wins when set; otherwise the file is the source
-	// — apply it only when the env/CLI left the value empty, mirroring the
-	// "env > file" precedence used for the other fields. This must land before
-	// startMITMIfConfigured (called right after this loader): the cloud-proxy
-	// listener resolves each intercept host's detectors once at start via
-	// ResolvePIIPolicy, and a MITM model that names no detectors of its own
-	// falls back to these defaults. Without it the listener (and request-side
-	// default redaction) starts with an empty detector set and forwards
-	// traffic unredacted even though pii_default_detectors is on disk.
-	if settings.PIIDefaultDetectors != nil && len(options.PIIDefaultDetectors) == 0 {
-		options.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
-	}
-
 	// Backend upgrade flags
 	if settings.AutoUpgradeBackends != nil {
 		if !options.AutoUpgradeBackends {
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -181,8 +181,6 @@ type RunCMD struct {
 	// Cloud-proxy MITM listener (off by default).
 	MITMListen string `env:"LOCALAI_MITM_LISTEN" help:"Address (host:port) for the cloudproxy MITM listener. Empty = disabled. Clients set HTTPS_PROXY=http://<this>:<port>. Intercept hosts are declared per-model via the model YAML mitm.hosts: block; create one from the Add Model UI." group:"middleware"`
 	MITMCADir  string `env:"LOCALAI_MITM_CA_DIR" type:"path" help:"Directory holding the MITM proxy CA cert + key. Defaults to <data-path>/mitm-ca." group:"middleware"`
-
-	PIIDefaultDetectors []string `env:"LOCALAI_PII_DEFAULT_DETECTORS" help:"Instance-wide default PII/secret detector model names applied to any PII-enabled model (chiefly cloud-proxy / MITM models) that names no pii.detectors of its own. Comma-separated, e.g. privacy-filter-nemotron,secret-filter. Takes precedence over the value persisted via the Middleware UI." group:"middleware"`
 }

 func (r *RunCMD) Run(ctx *cliContext.Context) error {
@@ -245,7 +243,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithAPIAddress(r.Address),
 		config.WithMITMListen(r.MITMListen),
 		config.WithMITMCADir(r.MITMCADir),
-		config.WithPIIDefaultDetectors(r.PIIDefaultDetectors),
 		config.WithAgentJobRetentionDays(r.AgentJobRetentionDays),
 		config.WithLlamaCPPTunnelCallback(func(tunnels []string) {
 			tunnelEnvVar := strings.Join(tunnels, ",")
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -712,18 +712,6 @@ func WithMITMCADir(dir string) AppOption {
 	}
 }

-// WithPIIDefaultDetectors sets the instance-wide default PII/secret detector
-// model names applied to any PII-enabled model (chiefly cloud-proxy / MITM
-// models) that names no pii.detectors of its own. CLI/env:
-// LOCALAI_PII_DEFAULT_DETECTORS. Empty leaves the value to
-// runtime_settings.json / the Middleware UI; a non-empty value takes
-// precedence over the file (env > file).
-func WithPIIDefaultDetectors(detectors []string) AppOption {
-	return func(o *ApplicationConfig) {
-		o.PIIDefaultDetectors = detectors
-	}
-}
-
 func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.DynamicConfigsDir = dynamicConfigsDir
--- a/core/config/runtime_settings_persist.go
+++ b/core/config/runtime_settings_persist.go
@@ -5,7 +5,6 @@ import (
 	"errors"
 	"os"
 	"path/filepath"
-	"reflect"
 )

 // runtimeSettingsFile is the on-disk filename inside DynamicConfigsDir.
@@ -34,35 +33,6 @@ func (o *ApplicationConfig) ReadPersistedSettings() (RuntimeSettings, error) {
 	return settings, nil
 }

-// MergeNonNil overlays every set (non-nil) field of overlay onto the
-// receiver, leaving the receiver's value untouched wherever overlay left a
-// field unset. Every RuntimeSettings field is a pointer precisely so "set"
-// can be told apart from "absent" (see the type doc), which makes this a
-// faithful partial update: a caller that submits only the field it owns
-// changes exactly that field and never clobbers unrelated settings.
-//
-// This is the read-modify-write contract the persistence helpers exist for.
-// UpdateSettingsEndpoint reads the on-disk settings, merges the request body
-// on top, and writes the result — so a focused admin page that POSTs only its
-// own field (the Middleware page sends only mitm_listen; the detector table
-// only pii_default_detectors) no longer nulls every other setting.
-//
-// Reflection keeps the merge total over the struct: a field added to
-// RuntimeSettings later is merged automatically, so the persistence path can
-// never silently drop a new setting the way a hand-maintained field list
-// would. Non-pointer fields (none today) are skipped — they cannot express
-// "absent", so the receiver wins.
-func (s *RuntimeSettings) MergeNonNil(overlay RuntimeSettings) {
-	dst := reflect.ValueOf(s).Elem()
-	src := reflect.ValueOf(overlay)
-	for i := 0; i < src.NumField(); i++ {
-		f := src.Field(i)
-		if f.Kind() == reflect.Pointer && !f.IsNil() {
-			dst.Field(i).Set(f)
-		}
-	}
-}
-
 // WritePersistedSettings serialises the given RuntimeSettings to
 // runtime_settings.json with restricted permissions (it may carry API
 // keys and P2P tokens).
--- a/core/config/runtime_settings_persist_test.go
+++ b/core/config/runtime_settings_persist_test.go
@@ -12,7 +12,6 @@ import (
 )

 func strPtr(s string) *string { return &s }
-func boolPtr(b bool) *bool     { return &b }

 var _ = Describe("RuntimeSettings persistence helpers", func() {
 	var (
@@ -52,47 +51,6 @@ var _ = Describe("RuntimeSettings persistence helpers", func() {
 		})
 	})

-	// MergeNonNil is the partial-update primitive UpdateSettingsEndpoint
-	// relies on: a focused admin page POSTs only the field it owns, and the
-	// handler reads the on-disk settings and overlays the request on top.
-	// Without it, the body would be written verbatim and every field the
-	// caller omitted would be nulled (the reported regression: changing
-	// mitm_listen wiped the galleries, api keys, watchdog config, etc.).
-	Describe("MergeNonNil partial update", func() {
-		It("overlays set fields and preserves unset ones", func() {
-			base := config.RuntimeSettings{
-				MITMListen:          strPtr(":9000"),
-				Galleries:           &[]config.Gallery{{Name: "g1", URL: "http://example/g1"}},
-				WatchdogIdleEnabled: boolPtr(true),
-				ApiKeys:             &[]string{"persisted-key"},
-				PIIDefaultDetectors: &[]string{"det-a"},
-			}
-
-			// Simulate the Middleware proxy tab: only mitm_listen is sent.
-			overlay := config.RuntimeSettings{MITMListen: strPtr(":8443")}
-			base.MergeNonNil(overlay)
-
-			Expect(base.MITMListen).ToNot(BeNil())
-			Expect(*base.MITMListen).To(Equal(":8443"), "set field should be overlaid")
-			// Everything the overlay left unset must survive untouched.
-			Expect(base.Galleries).ToNot(BeNil(), "galleries were clobbered")
-			Expect(*base.Galleries).To(HaveLen(1))
-			Expect(base.WatchdogIdleEnabled).ToNot(BeNil())
-			Expect(*base.WatchdogIdleEnabled).To(BeTrue())
-			Expect(base.ApiKeys).ToNot(BeNil(), "api_keys were clobbered")
-			Expect(*base.ApiKeys).To(Equal([]string{"persisted-key"}))
-			Expect(base.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were clobbered")
-			Expect(*base.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
-		})
-
-		It("lets an explicit empty slice clear a field", func() {
-			base := config.RuntimeSettings{PIIDefaultDetectors: &[]string{"det-a"}}
-			base.MergeNonNil(config.RuntimeSettings{PIIDefaultDetectors: &[]string{}})
-			Expect(base.PIIDefaultDetectors).ToNot(BeNil())
-			Expect(*base.PIIDefaultDetectors).To(BeEmpty(), "an explicit empty slice should clear, not preserve")
-		})
-	})
-
 	// MITM round trip pins the contract that loadRuntimeSettingsFromFile
 	// MITM listener address must survive a write/read round trip so the
 	// next process restart can bring the listener back up. (Intercept
--- a/core/http/endpoints/localai/agent_collections.go
+++ b/core/http/endpoints/localai/agent_collections.go
@@ -70,7 +70,7 @@ func UploadToCollectionEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := decodedParam(c, "name")
+		name := c.Param("name")
 		file, err := c.FormFile("file")
 		if err != nil {
 			return c.JSON(http.StatusBadRequest, map[string]string{"error": "file required"})
@@ -116,7 +116,7 @@ func ListCollectionEntriesEndpoint(app *application.Application) echo.HandlerFun
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		entries, err := svc.ListCollectionEntriesForUser(userID, decodedParam(c, "name"))
+		entries, err := svc.ListCollectionEntriesForUser(userID, c.Param("name"))
 		if err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -139,7 +139,7 @@ func GetCollectionEntryContentEndpoint(app *application.Application) echo.Handle
 		if err != nil {
 			entry = entryParam
 		}
-		content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, decodedParam(c, "name"), entry)
+		content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, c.Param("name"), entry)
 		if err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -164,7 +164,7 @@ func SearchCollectionEndpoint(app *application.Application) echo.HandlerFunc {
 		if err := c.Bind(&payload); err != nil {
 			return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
 		}
-		results, err := svc.SearchCollectionForUser(userID, decodedParam(c, "name"), payload.Query, payload.MaxResults)
+		results, err := svc.SearchCollectionForUser(userID, c.Param("name"), payload.Query, payload.MaxResults)
 		if err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -182,7 +182,7 @@ func ResetCollectionEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		if err := svc.ResetCollectionForUser(userID, decodedParam(c, "name")); err != nil {
+		if err := svc.ResetCollectionForUser(userID, c.Param("name")); err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
 			}
@@ -202,7 +202,7 @@ func DeleteCollectionEntryEndpoint(app *application.Application) echo.HandlerFun
 		if err := c.Bind(&payload); err != nil {
 			return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
 		}
-		remaining, err := svc.DeleteCollectionEntryForUser(userID, decodedParam(c, "name"), payload.Entry)
+		remaining, err := svc.DeleteCollectionEntryForUser(userID, c.Param("name"), payload.Entry)
 		if err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -230,7 +230,7 @@ func AddCollectionSourceEndpoint(app *application.Application) echo.HandlerFunc
 		if payload.UpdateInterval < 1 {
 			payload.UpdateInterval = 60
 		}
-		if err := svc.AddCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL, payload.UpdateInterval); err != nil {
+		if err := svc.AddCollectionSourceForUser(userID, c.Param("name"), payload.URL, payload.UpdateInterval); err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
 			}
@@ -250,7 +250,7 @@ func RemoveCollectionSourceEndpoint(app *application.Application) echo.HandlerFu
 		if err := c.Bind(&payload); err != nil {
 			return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
 		}
-		if err := svc.RemoveCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL); err != nil {
+		if err := svc.RemoveCollectionSourceForUser(userID, c.Param("name"), payload.URL); err != nil {
 			return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
 		}
 		return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
@@ -267,7 +267,7 @@ func GetCollectionEntryRawFileEndpoint(app *application.Application) echo.Handle
 		if err != nil {
 			entry = entryParam
 		}
-		fpath, err := svc.GetCollectionEntryFilePathForUser(userID, decodedParam(c, "name"), entry)
+		fpath, err := svc.GetCollectionEntryFilePathForUser(userID, c.Param("name"), entry)
 		if err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -282,7 +282,7 @@ func ListCollectionSourcesEndpoint(app *application.Application) echo.HandlerFun
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		sources, err := svc.ListCollectionSourcesForUser(userID, decodedParam(c, "name"))
+		sources, err := svc.ListCollectionSourcesForUser(userID, c.Param("name"))
 		if err != nil {
 			if strings.Contains(err.Error(), "not found") {
 				return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
--- a/core/http/endpoints/localai/agent_collections_param_test.go
+++ b/core/http/endpoints/localai/agent_collections_param_test.go
@@ -1,49 +0,0 @@
-package localai
-
-import (
-	"net/http"
-	"net/http/httptest"
-
-	"github.com/labstack/echo/v4"
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-// Regression for #10443: agent/collection names carry a "legacy-api-key:"
-// prefix, so the ':' is percent-encoded as %3A in the request path. Echo routes
-// such paths via URL.RawPath and stores the path-param value still escaped, so
-// handlers must URL-decode it before looking the collection up in the store -
-// otherwise the lookup sees "legacy-api-key%3ALiteraryResearch" and 404s.
-var _ = Describe("decodedParam", func() {
-	var e *echo.Echo
-
-	BeforeEach(func() {
-		e = echo.New()
-	})
-
-	// route runs a request through Echo's real router so the path param is
-	// populated exactly as it would be in production, then returns the decoded
-	// value the handler would observe.
-	route := func(rawPath string) string {
-		var got string
-		e.GET("/api/agents/collections/:name/upload", func(c echo.Context) error {
-			got = decodedParam(c, "name")
-			return c.NoContent(http.StatusOK)
-		})
-		req := httptest.NewRequest(http.MethodGet, rawPath, nil)
-		rec := httptest.NewRecorder()
-		e.ServeHTTP(rec, req)
-		Expect(rec.Code).To(Equal(http.StatusOK))
-		return got
-	}
-
-	It("decodes a percent-encoded colon in the collection name", func() {
-		got := route("/api/agents/collections/legacy-api-key%3ALiteraryResearch/upload")
-		Expect(got).To(Equal("legacy-api-key:LiteraryResearch"))
-	})
-
-	It("leaves an unencoded name untouched", func() {
-		got := route("/api/agents/collections/PlainCollection/upload")
-		Expect(got).To(Equal("PlainCollection"))
-	})
-})
--- a/core/http/endpoints/localai/agents.go
+++ b/core/http/endpoints/localai/agents.go
@@ -6,7 +6,6 @@ import (
 	"io"
 	"maps"
 	"net/http"
-	"net/url"
 	"os"
 	"path/filepath"
 	"slices"
@@ -34,22 +33,6 @@ func getUserID(c echo.Context) string {
 	return user.ID
 }

-// decodedParam returns the named path parameter, URL-decoding it.
-//
-// Echo routes a request via URL.RawPath whenever the path contains
-// percent-encoded characters (e.g. %3A for ':'), and in that case stores the
-// matched path-param value raw/escaped. Agent and collection names carry a
-// "legacy-api-key:" prefix, so the ':' arrives as %3A and the raw param no
-// longer matches the stored name. Callers must unescape before lookups.
-// Falls back to the raw value if it isn't valid percent-encoding.
-func decodedParam(c echo.Context, name string) string {
-	raw := c.Param(name)
-	if decoded, err := url.PathUnescape(raw); err == nil {
-		return decoded
-	}
-	return raw
-}
-
 // isAdminUser returns true if the authenticated user has admin role.
 func isAdminUser(c echo.Context) bool {
 	user := auth.GetUser(c)
@@ -144,7 +127,7 @@ func GetAgentEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := decodedParam(c, "name")
+		name := c.Param("name")

 		statuses := svc.ListAgentsForUser(userID)
 		active, exists := statuses[name]
@@ -159,7 +142,7 @@ func UpdateAgentEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := decodedParam(c, "name")
+		name := c.Param("name")
 		var cfg state.AgentConfig
 		if err := c.Bind(&cfg); err != nil {
 			return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
@@ -178,7 +161,7 @@ func DeleteAgentEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := decodedParam(c, "name")
+		name := c.Param("name")
 		if err := svc.DeleteAgentForUser(userID, name); err != nil {
 			return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
 		}
@@ -190,7 +173,7 @@ func GetAgentConfigEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := decodedParam(c, "name")
+		name := c.Param("name")
 		cfg := svc.GetAgentConfigForUser(userID, name)
 		if cfg == nil {
 			return c.JSON(http.StatusNotFound, map[string]string{"error": "Agent not found"})
@@ -203,7 +186,7 @@ func PauseAgentEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		if err := svc.PauseAgentForUser(userID, decodedParam(c, "name")); err != nil {
+		if err := svc.PauseAgentForUser(userID, c.Param("name")); err != nil {
 			return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
 		}
 		return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
@@ -214,7 +197,7 @@ func ResumeAgentEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		if err := svc.ResumeAgentForUser(userID, decodedParam(c, "name")); err != nil {
+		if err := svc.ResumeAgentForUser(userID, c.Param("name")); err != nil {
 			return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
 		}
 		return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
@@ -225,7 +208,7 @@ func GetAgentStatusEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := decodedParam(c, "name")
+		name := c.Param("name")

 		history := svc.GetAgentStatusForUser(userID, name)
 		if history == nil {
@@ -258,7 +241,7 @@ func GetAgentObservablesEndpoint(app *application.Application) echo.HandlerFunc
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := decodedParam(c, "name")
+		name := c.Param("name")

 		history, err := svc.GetAgentObservablesForUser(userID, name)
 		if err != nil {
@@ -278,7 +261,7 @@ func ClearAgentObservablesEndpoint(app *application.Application) echo.HandlerFun
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := decodedParam(c, "name")
+		name := c.Param("name")
 		if err := svc.ClearAgentObservablesForUser(userID, name); err != nil {
 			return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
 		}
@@ -290,7 +273,7 @@ func ChatWithAgentEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := decodedParam(c, "name")
+		name := c.Param("name")
 		var payload struct {
 			Message string `json:"message"`
 		}
@@ -319,7 +302,7 @@ func AgentSSEEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := decodedParam(c, "name")
+		name := c.Param("name")

 		// Try local SSE manager first
 		manager := svc.GetSSEManagerForUser(userID, name)
@@ -351,7 +334,7 @@ func ExportAgentEndpoint(app *application.Application) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		svc := app.AgentPoolService()
 		userID := effectiveUserID(c)
-		name := decodedParam(c, "name")
+		name := c.Param("name")
 		data, err := svc.ExportAgentForUser(userID, name)
 		if err != nil {
 			return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
--- a/core/http/endpoints/localai/settings.go
+++ b/core/http/endpoints/localai/settings.go
@@ -4,6 +4,8 @@ import (
 	"encoding/json"
 	"io"
 	"net/http"
+	"os"
+	"path/filepath"
 	"time"

 	"github.com/labstack/echo/v4"
@@ -108,18 +110,6 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 			})
 		}

-		// Read whatever is already persisted: it is both the source of truth
-		// for branding asset filenames (below) and the base we merge this
-		// request onto before writing. A read failure must not let a Save
-		// silently discard the existing settings — surface it instead.
-		persisted, err := appConfig.ReadPersistedSettings()
-		if err != nil {
-			return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
-				Success: false,
-				Error:   "Failed to read existing settings: " + err.Error(),
-			})
-		}
-
 		// Branding asset filenames are owned exclusively by
 		// /api/branding/asset/{kind} (upload/delete). The Settings page also
 		// round-trips them via GET /api/settings, but its local state is stale
@@ -128,9 +118,11 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 		// at page open. Replace whatever the body sent for these three fields
 		// with the values currently on disk so /api/settings can never
 		// regress them.
-		settings.LogoFile = persisted.LogoFile
-		settings.LogoHorizontalFile = persisted.LogoHorizontalFile
-		settings.FaviconFile = persisted.FaviconFile
+		if existing, err := appConfig.ReadPersistedSettings(); err == nil {
+			settings.LogoFile = existing.LogoFile
+			settings.LogoHorizontalFile = existing.LogoHorizontalFile
+			settings.FaviconFile = existing.FaviconFile
+		}

 		// The UI reads ApiKeys from GET /api/settings, which already returns the
 		// merged env+runtime list. When the user clicks Save, the same merged
@@ -153,17 +145,16 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 			settings.ApiKeys = &runtimeOnly
 		}

-		// Persist as a partial update: overlay only the fields this request set
-		// onto the settings already on disk. Focused admin pages POST just the
-		// keys they own (the Middleware proxy tab sends only mitm_listen; the
-		// detector table only pii_default_detectors), so writing the request
-		// body verbatim would null every unrelated setting (the no-omitempty
-		// api_keys / pii_default_detectors fields even round-trip as JSON
-		// null). The full Settings page still round-trips every field, so its
-		// Save is unchanged.
-		toPersist := persisted
-		toPersist.MergeNonNil(settings)
-		if err := appConfig.WritePersistedSettings(toPersist); err != nil {
+		settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
+		settingsJSON, err := json.MarshalIndent(settings, "", "  ")
+		if err != nil {
+			return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
+				Success: false,
+				Error:   "Failed to marshal settings: " + err.Error(),
+			})
+		}
+
+		if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
 			return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
 				Success: false,
 				Error:   "Failed to write settings file: " + err.Error(),
@@ -271,14 +262,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
 			}
 		}

-		// Rebuild the MITM listener when its address OR the instance-wide
-		// default detectors change. The per-host detector map is resolved once
-		// at listener start (startMITMLocked → ResolvePIIPolicy), so a
-		// default-detector change is otherwise invisible to cloud-proxy traffic
-		// until the next restart — an admin toggling a default detector would
-		// see no redaction. RestartMITM is a no-op when the listener is
-		// disabled (empty address).
-		if settings.MITMListen != nil || settings.PIIDefaultDetectors != nil {
+		if settings.MITMListen != nil {
 			if err := app.RestartMITM(); err != nil {
 				xlog.Error("Failed to restart MITM proxy", "error", err)
 				return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
--- a/core/http/endpoints/localai/settings_test.go
+++ b/core/http/endpoints/localai/settings_test.go
@@ -52,10 +52,6 @@ var _ = Describe("Settings endpoints", func() {
 		// Settings are persisted here; set after construction since there's no
 		// dedicated AppOption for it.
 		app.ApplicationConfig().DynamicConfigsDir = tmp
-		// Contain the MITM CA inside tmp too. The partial-save spec flips
-		// mitm_listen, which starts the listener and writes a CA; without this
-		// it defaults to ./mitm-ca and litters the package source tree.
-		app.ApplicationConfig().MITMCADir = filepath.Join(tmp, "mitm-ca")

 		e = echo.New()
 		e.GET("/api/settings", GetSettingsEndpoint(app))
@@ -113,57 +109,6 @@ var _ = Describe("Settings endpoints", func() {
 		Expect(err).ToNot(HaveOccurred())
 	})

-	// Regression: a focused admin page (the Middleware proxy tab) POSTs only
-	// the one field it owns — mitm_listen. The old handler wrote the request
-	// body verbatim, so every other persisted setting was dropped (and
-	// api_keys / pii_default_detectors, which lack omitempty, were written as
-	// null). A partial POST must now merge onto what is already on disk.
-	It("preserves unrelated persisted settings when a partial POST sets only mitm_listen", func() {
-		// First save establishes a fuller settings file (as the full Settings
-		// page would): galleries, an API key, and the MITM listener. The
-		// listener restart binds a real socket, so use 127.0.0.1:0 for an
-		// ephemeral free port rather than a fixed one that may be in use.
-		rec := post(`{"mitm_listen":"127.0.0.1:0","galleries":[{"name":"g1","url":"http://example/g1"}],"api_keys":["k1"],"pii_default_detectors":["det-a"]}`)
-		Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
-
-		// The Middleware proxy tab then changes only the listen address — the
-		// exact partial body that nulled everything else before the fix.
-		rec = post(`{"mitm_listen":"127.0.0.1:0"}`)
-		Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
-
-		raw, err := os.ReadFile(filepath.Join(tmp, "runtime_settings.json"))
-		Expect(err).ToNot(HaveOccurred())
-		var ondisk config.RuntimeSettings
-		Expect(json.Unmarshal(raw, &ondisk)).To(Succeed())
-
-		Expect(ondisk.MITMListen).ToNot(BeNil())
-		Expect(*ondisk.MITMListen).To(Equal("127.0.0.1:0"), "the changed field should be saved")
-		Expect(ondisk.Galleries).ToNot(BeNil(), "galleries were clobbered by the partial save")
-		Expect(*ondisk.Galleries).To(HaveLen(1))
-		Expect(ondisk.ApiKeys).ToNot(BeNil(), "api_keys were nulled by the partial save")
-		Expect(*ondisk.ApiKeys).To(Equal([]string{"k1"}))
-		Expect(ondisk.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were nulled by the partial save")
-		Expect(*ondisk.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
-	})
-
-	// The MITM listener resolves its per-host PII detectors once at start
-	// (startMITMLocked → ResolvePIIPolicy), and the handler used to restart it
-	// only when mitm_listen changed. So an admin toggling a default detector
-	// (the Middleware detector table POSTs only pii_default_detectors) left
-	// cloud-proxy traffic unredacted until the next reboot. A
-	// pii_default_detectors change must now rebuild the listener.
-	It("rebuilds the MITM listener when only pii_default_detectors changes", func() {
-		rec := post(`{"mitm_listen":"127.0.0.1:0"}`)
-		Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
-		srv1 := app.MITMServer()
-		Expect(srv1).ToNot(BeNil(), "listener should be running after mitm_listen is set")
-
-		rec = post(`{"pii_default_detectors":["det-a"]}`)
-		Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
-		Expect(app.MITMServer()).ToNot(BeIdenticalTo(srv1),
-			"a default-detector change must restart the listener so it picks up the new detectors")
-	})
-
 	// Residual #9125: enabling the watchdog from a cold (off) state via the
 	// React master toggle must start the live watchdog immediately, without a
 	// restart. The toggle posts watchdog_idle_enabled/busy_enabled=true while
--- a/core/http/react-ui/e2e/role-mode-adaptive.spec.js
+++ b/core/http/react-ui/e2e/role-mode-adaptive.spec.js
@@ -0,0 +1,100 @@
+import { test, expect } from './coverage-fixtures.js'
+
+// These specs stub /api/features and /api/auth/status per cell. The test server
+// disables auth (isAdmin=true) and reports its own features, so we intercept
+// before navigation to simulate each role x mode cell.
+
+function stubFeatures(page, features) {
+  return page.route('**/api/features', route =>
+    route.fulfill({ contentType: 'application/json', body: JSON.stringify(features) }))
+}
+
+function stubNoP2P(page) {
+  // P2P token endpoint returns empty -> p2pEnabled=false.
+  return page.route('**/api/p2p/token', route =>
+    route.fulfill({ contentType: 'text/plain', body: '' }))
+}
+
+test.describe('Adaptive landing (HomeRoute)', () => {
+  test('admin + distributed redirects /app to Nodes', async ({ page }) => {
+    await stubFeatures(page, { distributed: true })
+    await stubNoP2P(page)
+    await page.goto('/app')
+    await expect(page).toHaveURL(/\/app\/nodes$/)
+    await expect(page.locator('.page-title').first()).toBeVisible({ timeout: 15_000 })
+  })
+
+  test('admin + single-node stays on Home', async ({ page }) => {
+    await stubFeatures(page, { distributed: false })
+    await stubNoP2P(page)
+    await page.goto('/app')
+    await expect(page).toHaveURL(/\/app$/)
+    await expect(page.locator('.home-greeting')).toBeVisible({ timeout: 15_000 })
+  })
+})
+
+test.describe('Adaptive sidebar', () => {
+  test('distributed pins the Cluster group with Nodes at the top', async ({ page }) => {
+    await stubFeatures(page, { distributed: true })
+    await stubNoP2P(page)
+    await page.goto('/app/chat') // any in-app page so the sidebar is mounted
+    const pinned = page.locator('.sidebar-nav .sidebar-section-items').first()
+    await expect(pinned.getByText('Nodes', { exact: false })).toBeVisible({ timeout: 15_000 })
+  })
+
+  test('single-node does not pin a Cluster group', async ({ page }) => {
+    await stubFeatures(page, { distributed: false })
+    await stubNoP2P(page)
+    await page.goto('/app/chat')
+    // Nodes is reachable only via the Operate rail, not pinned at the top.
+    await expect(page.locator('.sidebar-nav')).toBeVisible({ timeout: 15_000 })
+    await expect(page.locator('.sidebar-nav .sidebar-section-items').first()
+      .getByText('Nodes', { exact: false })).toHaveCount(0)
+  })
+})
+
+test.describe('Top navbar', () => {
+  test('admin sees the mode pill and settings cog', async ({ page }) => {
+    await stubFeatures(page, { distributed: true })
+    await stubNoP2P(page)
+    await page.goto('/app/chat')
+    await expect(page.locator('.top-navbar__mode')).toBeVisible({ timeout: 15_000 })
+    await expect(page.locator('.top-navbar__icon[aria-label]')).not.toHaveCount(0)
+  })
+
+  test('admin-via-chat jump shows when localai_assistant is enabled', async ({ page }) => {
+    await stubFeatures(page, { distributed: false, localai_assistant: true })
+    await stubNoP2P(page)
+    await page.goto('/app/chat')
+    await expect(page.locator('.top-navbar__assistant')).toBeVisible({ timeout: 15_000 })
+  })
+
+  test('admin-via-chat jump hidden when localai_assistant is off', async ({ page }) => {
+    await stubFeatures(page, { distributed: false, localai_assistant: false })
+    await stubNoP2P(page)
+    await page.goto('/app/chat')
+    await expect(page.locator('.top-navbar__assistant')).toHaveCount(0)
+  })
+})
+
+test.describe('Token usage meter', () => {
+  test('renders when admin usage has data', async ({ page }) => {
+    await stubFeatures(page, { distributed: false })
+    await stubNoP2P(page)
+    await page.route('**/api/auth/admin/usage**', route =>
+      route.fulfill({ contentType: 'application/json',
+        body: JSON.stringify({ buckets: [{ total_tokens: 1234 }] }) }))
+    await page.goto('/app/chat')
+    await expect(page.locator('.top-navbar__meter')).toBeVisible({ timeout: 15_000 })
+  })
+
+  test('hidden when admin usage is empty (graceful degrade)', async ({ page }) => {
+    await stubFeatures(page, { distributed: false })
+    await stubNoP2P(page)
+    await page.route('**/api/auth/admin/usage**', route =>
+      route.fulfill({ contentType: 'application/json', body: JSON.stringify({ buckets: [] }) }))
+    await page.goto('/app/chat')
+    await expect(page.locator('.top-navbar')).toBeVisible({ timeout: 15_000 })
+    await expect(page.locator('.top-navbar__meter')).toHaveCount(0)
+  })
+})
--- a/core/http/react-ui/public/locales/en/nav.json
+++ b/core/http/react-ui/public/locales/en/nav.json
@@ -12,6 +12,16 @@
  "accountSettings": "Account settings",
  "account": "Account",
  "accountFor": "Account: {{name}}",
+  "topbar": {
+    "label": "Top bar",
+    "modeDistributed": "Distributed",
+    "modeSwarm": "Swarm",
+    "modeSingle": "Single-node",
+    "pickModel": "Models",
+    "adminViaChat": "Admin via chat",
+    "tokensToday": "Tokens today",
+    "usageDetail": "View usage detail"
+  },
  "sections": {
    "create": "Create",
    "recognition": "Recognition",
--- a/core/http/react-ui/public/locales/id/admin.json
+++ b/core/http/react-ui/public/locales/id/admin.json
@@ -45,7 +45,7 @@
  },
  "scheduling": {
    "title": "Penjadwalan",
-    "subtitle": "Aturan penempatan model dan replika di seluruh kluster"
+    "subtitle": "Aturan penempatan model dan replika di seluruh klaster"
  },
  "p2p": {
    "title": "Komputasi AI Terdistribusi",
@@ -86,4 +86,4 @@
    "title": "Penjelajah",
    "subtitle": "Jelajahi file dan konfigurasi"
  }
-}
+}
--- a/core/http/react-ui/public/locales/id/chat.json
+++ b/core/http/react-ui/public/locales/id/chat.json
@@ -72,7 +72,7 @@
  "actions": {
    "copy": "Salin",
    "regenerate": "Hasilkan ulang",
-    "jumpToLatest": "Lompat ke terbaru"
+    "jumpToLatest": "Jump to latest"
  },
  "streaming": {
    "transferring": "Mentransfer model...",
@@ -115,4 +115,4 @@
    "clearAll": "Hapus semua",
    "deleteAllTitle": "Hapus semua percakapan"
  }
-}
+}
--- a/core/http/react-ui/public/locales/id/common.json
+++ b/core/http/react-ui/public/locales/id/common.json
@@ -1,8 +1,8 @@
 {
  "unsaved": {
-    "title": "Buang perubahan yang belum disimpan?",
-    "message": "Anda memiliki perubahan yang belum disimpan. Perubahan tersebut akan hilang jika Anda meninggalkan halaman ini.",
-    "leave": "Tinggalkan Halaman"
+    "title": "Discard unsaved changes?",
+    "message": "You have unsaved changes that will be lost if you leave this page.",
+    "leave": "Leave"
  },
  "actions": {
    "save": "Simpan",
--- a/core/http/react-ui/public/locales/id/home.json
+++ b/core/http/react-ui/public/locales/id/home.json
@@ -7,15 +7,15 @@
  "resourceGpu": "GPU",
  "resourceRam": "RAM",
  "greeting": {
-    "morning": "Selamat pagi",
-    "afternoon": "Selamat siang",
-    "evening": "Selamat malam",
-    "night": "Selamat lembur"
+    "morning": "Good morning",
+    "afternoon": "Good afternoon",
+    "evening": "Good evening",
+    "night": "Working late"
  },
  "statusLine": {
-    "modelsLoaded_one": "{{count}} model dimuat",
-    "modelsLoaded_other": "{{count}} model dimuat",
-    "noModelsLoaded": "Tidak ada model yang dimuat",
+    "modelsLoaded_one": "{{count}} model loaded",
+    "modelsLoaded_other": "{{count}} models loaded",
+    "noModelsLoaded": "No models loaded",
    "nodes_one": "{{count}} node",
    "nodes_other": "{{count}} nodes"
  },
@@ -79,14 +79,14 @@
  },
  "connect": {
    "title": "Satu endpoint, semua API",
-    "subtitle": "LocalAI menyediakan API miliknya sendiri yang lengkap — pembuatan gambar & video, depth, deteksi objek, reranking, audio, pengenalan wajah & suara, serta suara realtime melalui WebRTC dan WebSocket. Selain itu, lapisan kompatibilitas drop-in membuat aplikasi apa pun yang dibuat untuk OpenAI, Anthropic, Ollama, atau OpenAI Responses bekerja tanpa perubahan.",
+    "subtitle": "LocalAI menyediakan API miliknya sendiri yang lengkap — pembuatan gambar & video, depth, deteksi objek, reranking, audio, pengenalan wajah & suara, serta suara realtime melalui WebRTC dan WebSocket. Di atas itu, lapisan kompatibilitas drop-in membuat aplikasi apa pun yang dibuat untuk OpenAI, Anthropic, Ollama, atau OpenAI Responses bekerja tanpa perubahan.",
    "nativeTitle": "API native",
    "compatTitle": "Kompatibilitas drop-in",
    "apiReference": "Referensi API lengkap",
    "copy": "Salin",
    "copied": "Disalin",
-    "browse": "Jelajahi API",
-    "hide": "Sembunyikan endpoint",
-    "dismiss": "Abaikan"
+    "browse": "Browse the API",
+    "hide": "Hide endpoints",
+    "dismiss": "Dismiss"
  }
 }
--- a/core/http/react-ui/public/locales/id/media.json
+++ b/core/http/react-ui/public/locales/id/media.json
@@ -5,7 +5,7 @@
      "video": "Video",
      "tts": "TTS",
      "sound": "Suara",
-      "transform": "Transformasi"
+      "transform": "Transform"
    }
  },
  "image": {
@@ -30,7 +30,7 @@
      "refImagesAdded_other": "{{count}} gambar ditambahkan"
    },
    "actions": {
-      "view": "Lihat",
+      "view": "View",
      "generate": "Hasilkan",
      "generating": "Menghasilkan..."
    },
@@ -153,4 +153,4 @@
    "clearConfirm": "Hapus",
    "cleared": "Riwayat dihapus"
  }
-}
+}
--- a/core/http/react-ui/public/locales/id/nav.json
+++ b/core/http/react-ui/public/locales/id/nav.json
@@ -19,11 +19,11 @@
    "operate": "Operasikan"
  },
  "operate": {
-    "inference": "Inferensi",
-    "cluster": "Kluster",
-    "observability": "Observabilitas",
-    "access": "Akses",
-    "system": "Sistem"
+    "inference": "Inference",
+    "cluster": "Cluster",
+    "observability": "Observability",
+    "access": "Access",
+    "system": "System"
  },
  "items": {
    "home": "Beranda",
@@ -64,7 +64,7 @@
    "copyright": "© 2023-{{year}} {{author}}"
  },
  "console": {
-    "automation": "Automasi",
+    "automation": "Otomasi",
    "training": "Pelatihan"
  }
 }
--- a/core/http/react-ui/src/App.css
+++ b/core/http/react-ui/src/App.css
@@ -184,6 +184,50 @@
  font-size: 1.5rem;
 }

+/* Desktop top bar: deployment + admin affordances on wide screens. Hidden on
+   mobile, where .mobile-header carries the equivalent actions. */
+.top-navbar {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: var(--spacing-md);
+  padding: var(--spacing-sm) var(--spacing-lg);
+  border-bottom: 1px solid var(--color-border-default);
+  background: var(--color-bg-secondary);
+}
+.top-navbar__right { display: flex; align-items: center; gap: var(--spacing-sm); }
+.top-navbar__mode {
+  font-size: 0.75rem;
+  padding: 2px 10px;
+  border-radius: 999px;
+  border: 1px solid var(--color-border-default);
+  color: var(--color-text-secondary);
+}
+.top-navbar__mode.is-active { color: var(--color-success); border-color: var(--color-success); }
+.top-navbar__btn {
+  display: inline-flex; align-items: center; gap: 6px;
+  font-size: 0.8125rem; padding: 5px 10px; border-radius: 8px;
+  border: 1px solid var(--color-border-default); background: var(--color-bg-tertiary);
+  color: var(--color-text-primary); cursor: pointer;
+}
+.top-navbar__icon {
+  width: 32px; height: 32px; display: inline-flex; align-items: center;
+  justify-content: center; border-radius: 8px; border: 1px solid var(--color-border-default);
+  background: var(--color-bg-tertiary); color: var(--color-text-secondary); cursor: pointer;
+}
+.top-navbar__avatar img { width: 100%; height: 100%; border-radius: 50%; object-fit: cover; }
+.top-navbar__meter {
+  display: inline-flex; flex-direction: column; gap: 3px; align-items: flex-start;
+  padding: 4px 10px; border-radius: 8px; border: 1px solid var(--color-border-default);
+  background: var(--color-bg-tertiary); cursor: pointer; min-width: 150px;
+}
+.top-navbar__meter-label { font-size: 0.6875rem; color: var(--color-text-secondary); }
+.top-navbar__meter-bar { width: 100%; height: 5px; border-radius: 3px; background: var(--color-bg-secondary); overflow: hidden; }
+.top-navbar__meter-bar i { display: block; height: 100%; background: var(--color-primary); }
+@media (max-width: 639px) {
+  .top-navbar { display: none; }
+}
+
 /* Sidebar */
 .sidebar {
  position: fixed;
--- a/core/http/react-ui/src/App.jsx
+++ b/core/http/react-ui/src/App.jsx
@@ -3,6 +3,7 @@ import { Outlet, useLocation, useNavigate } from 'react-router-dom'
 import { useTranslation } from 'react-i18next'
 import Sidebar from './components/Sidebar'
 import OperationsBar from './components/OperationsBar'
+import TopNavbar from './components/TopNavbar'
 import { ToastContainer, useToast } from './components/Toast'
 import { systemApi } from './utils/api'
 import { useTheme } from './contexts/ThemeContext'
@@ -98,6 +99,7 @@ export default function App() {
      <Sidebar isOpen={sidebarOpen} onClose={() => setSidebarOpen(false)} />
      <main className="main-content" {...(sidebarOpen ? { 'aria-hidden': 'true', inert: '' } : {})}>
        <OperationsBar />
+        <TopNavbar />
        {/* Mobile header — primary actions reachable without opening the
            drawer. Hamburger is the only way to expand the nav on phones;
            theme toggle and account avatar are mirrored from the sidebar
--- a/core/http/react-ui/src/components/HomeRoute.jsx
+++ b/core/http/react-ui/src/components/HomeRoute.jsx
@@ -0,0 +1,28 @@
+import { lazy, Suspense } from 'react'
+import { Navigate } from 'react-router-dom'
+import { useAuth } from '../context/AuthContext'
+import { useDeployment } from '../contexts/DeploymentContext'
+import { resolveHome } from '../utils/resolveHome'
+import RouteFallback from './RouteFallback'
+
+const Home = lazy(() => import('../pages/Home'))
+
+// Index-route element. Waits for auth + deployment signals to load (so we never
+// flash the wrong landing), then either renders Home or redirects to the cell's
+// landing page. Redirecting (rather than rendering Nodes/Chat inline at /app)
+// keeps each target's own route guard, active-nav state, and deep-linkability.
+export default function HomeRoute() {
+  const { isAdmin, loading: authLoading } = useAuth()
+  const { distributed, p2pEnabled, loading: deployLoading } = useDeployment()
+
+  if (authLoading || deployLoading) return <RouteFallback />
+
+  const target = resolveHome({ isAdmin, distributed, p2pEnabled })
+  if (target) return <Navigate to={target} replace />
+
+  return (
+    <Suspense fallback={<RouteFallback />}>
+      <Home />
+    </Suspense>
+  )
+}
--- a/core/http/react-ui/src/components/Sidebar.jsx
+++ b/core/http/react-ui/src/components/Sidebar.jsx
@@ -5,9 +5,11 @@ import ThemeToggle from './ThemeToggle'
 import LanguageSwitcher from './LanguageSwitcher'
 import { useAuth } from '../context/AuthContext'
 import { useBranding } from '../contexts/BrandingContext'
+import { useDeployment } from '../contexts/DeploymentContext'
 import { apiUrl } from '../utils/basePath'
 import { preloadRoute } from '../router'
 import { consoles, firstVisiblePath, consolePaths } from './console/consoleConfig'
+import { clusterPinItems, shouldCollapseCreate } from '../utils/sidebarPolicy'

 const COLLAPSED_KEY = 'localai_sidebar_collapsed'
 const SECTIONS_KEY = 'localai_sidebar_sections'
@@ -58,11 +60,13 @@ function NavItem({ item, onClose, collapsed }) {
  )
 }

-function loadSectionState() {
-  // Tiers render expanded by default (the redesign favours showing the few
-  // intent groups up front); users can still collapse any tier and the choice
-  // is persisted. Stored values override the defaults so a saved collapse wins.
+function loadSectionState(collapseCreate = false) {
+  // Tiers render expanded by default; users can collapse any tier and the
+  // choice persists (stored values override defaults). In cluster cells we
+  // start Create collapsed so the pinned cluster group leads - but only when
+  // the user has not already expressed a preference.
  const defaults = Object.fromEntries(sections.map(s => [s.id, true]))
+  if (collapseCreate) defaults.create = false
  try {
    const stored = localStorage.getItem(SECTIONS_KEY)
    return stored ? { ...defaults, ...JSON.parse(stored) } : defaults
@@ -77,20 +81,34 @@ function saveSectionState(state) {

 export default function Sidebar({ isOpen, onClose }) {
  const { t } = useTranslation('nav')
-  const [features, setFeatures] = useState({})
+  const { isAdmin, authEnabled, user, logout, hasFeature } = useAuth()
+  // Deployment shape (server features + p2p) drives the adaptive sidebar; the
+  // shared context replaces the sidebar's own /api/features fetch so the
+  // landing resolver, navbar, and this policy agree on one snapshot.
+  const deployment = useDeployment()
+  const features = deployment.features
+  // Shared shape for the console gating helpers (consoleConfig.js); in scope for
+  // both the pinned cluster group and the console-tier rendering below.
+  const auth = { isAdmin, authEnabled, hasFeature, features }
+  const collapseCreate = shouldCollapseCreate(auth, deployment)
  const [collapsed, setCollapsed] = useState(() => {
    try { return localStorage.getItem(COLLAPSED_KEY) === 'true' } catch (_) { return false }
  })
  const [openSections, setOpenSections] = useState(loadSectionState)
-  const { isAdmin, authEnabled, user, logout, hasFeature } = useAuth()
  const branding = useBranding()
  const navigate = useNavigate()
  const location = useLocation()
  const closeBtnRef = useRef(null)

+  // Apply the cluster-cell Create-collapse default once, only when the user has
+  // no stored section preference (so we never override an explicit choice).
  useEffect(() => {
-    fetch(apiUrl('/api/features')).then(r => r.json()).then(setFeatures).catch(() => {})
-  }, [])
+    if (deployment.loading) return
+    let hasStored = false
+    try { hasStored = !!localStorage.getItem(SECTIONS_KEY) } catch { hasStored = false }
+    if (hasStored || !collapseCreate) return
+    setOpenSections(prev => (prev.create === false ? prev : { ...prev, create: false }))
+  }, [deployment.loading, collapseCreate])

  // Stay in sync with external collapse dispatches (e.g. the chat
  // page's focus mode). The collapse-toggle button still owns the
@@ -157,8 +175,6 @@ export default function Sidebar({ isOpen, onClose }) {
  }

  const visibleTopItems = topItems.filter(filterItem)
-  // Shared shape for the console gating helpers (consoleConfig.js).
-  const auth = { isAdmin, authEnabled, hasFeature, features }

  // Inline sections (Create) carry no gating; a plain filterItem pass suffices.
  const getVisibleSectionItems = (section) => section.items.filter(filterItem)
@@ -199,6 +215,28 @@ export default function Sidebar({ isOpen, onClose }) {
            ))}
          </div>

+          {/* Pinned Cluster quick-access (admin + distributed/p2p). Same gate
+              as the Operate rail; surfaced at the top for cluster operators. */}
+          {(() => {
+            const pinned = clusterPinItems(auth, deployment)
+            if (pinned.length === 0) return null
+            return (
+              <div className="sidebar-section">
+                <div className="sidebar-section-title">{t('operate.cluster')}</div>
+                <div className="sidebar-section-items">
+                  {pinned.map(item => (
+                    <NavItem
+                      key={item.path}
+                      item={{ path: item.path, icon: item.icon, labelKey: item.labelKey }}
+                      onClose={onClose}
+                      collapsed={collapsed}
+                    />
+                  ))}
+                </div>
+              </div>
+            )
+          })()}
+
          {/* Collapsible sections */}
          {sections.map(section => {
            const visibleItems = getVisibleSectionItems(section)
--- a/core/http/react-ui/src/components/TopNavbar.jsx
+++ b/core/http/react-ui/src/components/TopNavbar.jsx
@@ -0,0 +1,96 @@
+import { useNavigate } from 'react-router-dom'
+import { useTranslation } from 'react-i18next'
+import { useAuth } from '../context/AuthContext'
+import { useDeployment } from '../contexts/DeploymentContext'
+import { useTheme } from '../contexts/ThemeContext'
+import { launchAssistantChat } from '../utils/launchAssistantChat'
+import TokenUsageMeter from './navbar/TokenUsageMeter'
+
+// Desktop top bar. Complementary to the mobile-only header in App.jsx: this is
+// hidden on small screens (see .top-navbar CSS) and shows deployment/admin
+// affordances on wide screens where the sidebar footer is far from the content.
+export default function TopNavbar() {
+  const { t } = useTranslation('nav')
+  const navigate = useNavigate()
+  const { isAdmin, authEnabled, user } = useAuth()
+  const { features, distributed, p2pEnabled } = useDeployment()
+  const { theme, toggleTheme } = useTheme()
+
+  const modeLabel = distributed
+    ? t('topbar.modeDistributed')
+    : p2pEnabled
+      ? t('topbar.modeSwarm')
+      : t('topbar.modeSingle')
+
+  const showAssistantJump = isAdmin && !!features.localai_assistant
+  const showAvatar = authEnabled && user
+  const themeLabel = theme === 'dark' ? t('switchToLightMode') : t('switchToDarkMode')
+
+  return (
+    <div className="top-navbar" role="navigation" aria-label={t('topbar.label')}>
+      <div className="top-navbar__left">
+        {isAdmin && (
+          <span className={`top-navbar__mode ${distributed || p2pEnabled ? 'is-active' : ''}`}>
+            <i className="fas fa-circle-nodes" aria-hidden="true" /> {modeLabel}
+          </span>
+        )}
+      </div>
+      <div className="top-navbar__right">
+        {!isAdmin && (
+          <button
+            type="button"
+            className="top-navbar__btn"
+            onClick={() => navigate('/app/chat')}
+            title={t('topbar.pickModel')}
+          >
+            <i className="fas fa-cube" aria-hidden="true" /> {t('topbar.pickModel')}
+          </button>
+        )}
+        {showAssistantJump && (
+          <button
+            type="button"
+            className="top-navbar__btn top-navbar__assistant"
+            onClick={() => launchAssistantChat(navigate)}
+            title={t('topbar.adminViaChat')}
+          >
+            <i className="fas fa-user-shield" aria-hidden="true" /> {t('topbar.adminViaChat')}
+          </button>
+        )}
+        {isAdmin && <TokenUsageMeter />}
+        {isAdmin && (
+          <button
+            type="button"
+            className="top-navbar__icon"
+            onClick={() => navigate('/app/settings')}
+            aria-label={t('items.settings')}
+            title={t('items.settings')}
+          >
+            <i className="fas fa-cog" aria-hidden="true" />
+          </button>
+        )}
+        <button
+          type="button"
+          className="top-navbar__icon"
+          onClick={toggleTheme}
+          aria-label={themeLabel}
+          title={themeLabel}
+        >
+          <i className={`fas ${theme === 'dark' ? 'fa-sun' : 'fa-moon'}`} aria-hidden="true" />
+        </button>
+        {showAvatar && (
+          <button
+            type="button"
+            className="top-navbar__icon top-navbar__avatar"
+            onClick={() => navigate('/app/account')}
+            aria-label={user.name || user.email}
+            title={user.name || user.email}
+          >
+            {user.avatarUrl
+              ? <img src={user.avatarUrl} alt="" />
+              : <i className="fas fa-user-circle" aria-hidden="true" />}
+          </button>
+        )}
+      </div>
+    </div>
+  )
+}
--- a/core/http/react-ui/src/components/navbar/TokenUsageMeter.jsx
+++ b/core/http/react-ui/src/components/navbar/TokenUsageMeter.jsx
@@ -0,0 +1,52 @@
+import { useState, useEffect } from 'react'
+import { useNavigate } from 'react-router-dom'
+import { useTranslation } from 'react-i18next'
+import { usageApi } from '../../utils/api'
+
+// Compact admin-only usage glance: today's total tokens, optionally against a
+// quota cap, linking to the full /app/usage page. Self-contained data fetch so
+// a usage-API failure cannot break the navbar - it just renders nothing.
+function sumTotalTokens(res) {
+  const buckets = res?.buckets || res?.usage || (Array.isArray(res) ? res : [])
+  if (!Array.isArray(buckets) || buckets.length === 0) return null
+  return buckets.reduce((s, b) => s + (b.total_tokens || 0), 0)
+}
+
+export default function TokenUsageMeter() {
+  const { t } = useTranslation('nav')
+  const navigate = useNavigate()
+  const [tokens, setTokens] = useState(null)
+  const [cap, setCap] = useState(null)
+
+  useEffect(() => {
+    let cancelled = false
+    usageApi.getAdminUsage('day')
+      .then(res => { if (!cancelled) setTokens(sumTotalTokens(res)) })
+      .catch(() => { if (!cancelled) setTokens(null) })
+    usageApi.getMyQuotas()
+      .then(q => { if (!cancelled) setCap(q?.token_limit || q?.tokens?.limit || null) })
+      .catch(() => { if (!cancelled) setCap(null) })
+    return () => { cancelled = true }
+  }, [])
+
+  if (tokens === null) return null
+
+  const pct = cap ? Math.min(100, Math.round((tokens / cap) * 100)) : null
+
+  return (
+    <button
+      type="button"
+      className="top-navbar__meter"
+      onClick={() => navigate('/app/usage')}
+      title={t('topbar.usageDetail')}
+    >
+      <span className="top-navbar__meter-label">
+        {t('topbar.tokensToday')}: {Intl.NumberFormat().format(tokens)}
+        {cap ? ` / ${Intl.NumberFormat().format(cap)}` : ''}
+      </span>
+      {pct !== null && (
+        <span className="top-navbar__meter-bar"><i style={{ width: `${pct}%` }} /></span>
+      )}
+    </button>
+  )
+}
--- a/core/http/react-ui/src/contexts/DeploymentContext.jsx
+++ b/core/http/react-ui/src/contexts/DeploymentContext.jsx
@@ -0,0 +1,55 @@
+import { createContext, useContext, useState, useEffect } from 'react'
+import { apiUrl } from '../utils/basePath'
+import { p2pApi } from '../utils/api'
+
+const DeploymentContext = createContext(null)
+
+// One shared fetch of the deployment-shape signals the adaptive UI keys off:
+// server features (/api/features) and whether a P2P network token exists.
+// Components used to fetch /api/features independently (Sidebar, Home); this
+// centralises it so the landing resolver, sidebar policy, and navbar agree on
+// one snapshot and we issue a single request.
+export function DeploymentProvider({ children }) {
+  const [features, setFeatures] = useState({})
+  const [p2pEnabled, setP2pEnabled] = useState(false)
+  const [loading, setLoading] = useState(true)
+
+  useEffect(() => {
+    let cancelled = false
+    const featuresP = fetch(apiUrl('/api/features'))
+      .then(r => r.json())
+      .catch(() => ({}))
+    // P2P has no /api/features flag: it is "enabled" when a network token
+    // exists (mirrors pages/P2P.jsx). A 404/disabled endpoint throws and we
+    // treat that as not-enabled.
+    const p2pP = p2pApi.getToken()
+      .then(tok => (typeof tok === 'string' ? tok : (tok?.token || '')).trim())
+      .catch(() => '')
+    Promise.all([featuresP, p2pP]).then(([f, tok]) => {
+      if (cancelled) return
+      setFeatures(f || {})
+      setP2pEnabled(!!tok)
+      setLoading(false)
+    })
+    return () => { cancelled = true }
+  }, [])
+
+  const value = {
+    features,
+    distributed: !!features.distributed,
+    p2pEnabled,
+    loading,
+  }
+
+  return (
+    <DeploymentContext.Provider value={value}>
+      {children}
+    </DeploymentContext.Provider>
+  )
+}
+
+export function useDeployment() {
+  const ctx = useContext(DeploymentContext)
+  if (!ctx) throw new Error('useDeployment must be used within DeploymentProvider')
+  return ctx
+}
--- a/core/http/react-ui/src/main.jsx
+++ b/core/http/react-ui/src/main.jsx
@@ -4,6 +4,7 @@ import { RouterProvider } from 'react-router-dom'
 import { ThemeProvider } from './contexts/ThemeContext'
 import { BrandingProvider } from './contexts/BrandingContext'
 import { AuthProvider } from './context/AuthContext'
+import { DeploymentProvider } from './contexts/DeploymentContext'
 import { OperationsProvider } from './contexts/OperationsContext'
 import { router } from './router'
 import './i18n'
@@ -32,9 +33,11 @@ createRoot(document.getElementById('root')).render(
      <ThemeProvider>
        <BrandingProvider>
          <AuthProvider>
-            <OperationsProvider>
-              <RouterProvider router={router} />
-            </OperationsProvider>
+            <DeploymentProvider>
+              <OperationsProvider>
+                <RouterProvider router={router} />
+              </OperationsProvider>
+            </DeploymentProvider>
          </AuthProvider>
        </BrandingProvider>
      </ThemeProvider>
--- a/core/http/react-ui/src/pages/Chat.jsx
+++ b/core/http/react-ui/src/pages/Chat.jsx
@@ -541,58 +541,73 @@ export default function Chat() {
    updateChatSettings(activeChat.id, { clientMCPServers: next })
  }, [activeChat, updateChatSettings])

-  // Load initial message from home page
+  // Load initial message / assistant launch from the Home page or the navbar
+  // quick-jump. Factored into a callback so both the mount-time reader and the
+  // navbar re-trigger event below consume the same payload through one path.
  const homeDataProcessed = useRef(false)
-  useEffect(() => {
-    if (homeDataProcessed.current) return
+  const consumeHomeChatData = useCallback(() => {
    const stored = localStorage.getItem('localai_index_chat_data')
-    if (stored) {
-      homeDataProcessed.current = true
-      try {
-        const data = JSON.parse(stored)
-        localStorage.removeItem('localai_index_chat_data')
+    if (!stored) return
+    try {
+      const data = JSON.parse(stored)
+      localStorage.removeItem('localai_index_chat_data')

-        // Two entry shapes from Home:
-        //   - "compose-and-send": data.message present → open new chat,
-        //     prefill the composer, click submit.
-        //   - "open-assistant": no message, just data.localaiAssistant → open
-        //     a fresh chat already in admin mode so the wizard can fire.
-        const hasMessage = !!data.message
-        const wantsAssistant = !!data.localaiAssistant
+      // Two entry shapes from Home:
+      //   - "compose-and-send": data.message present → open new chat,
+      //     prefill the composer, click submit.
+      //   - "open-assistant": no message, just data.localaiAssistant → open
+      //     a fresh chat already in admin mode so the wizard can fire.
+      const hasMessage = !!data.message
+      const wantsAssistant = !!data.localaiAssistant

-        if (hasMessage || wantsAssistant) {
-          let targetChat = activeChat
-          if (data.newChat) {
-            targetChat = addChat(data.model || '', '', data.mcpMode || false)
-          } else {
-            if (data.model && activeChat) {
-              updateChatSettings(activeChat.id, { model: data.model })
-            }
-            if (data.mcpMode && activeChat) {
-              updateChatSettings(activeChat.id, { mcpMode: true })
-            }
+      if (hasMessage || wantsAssistant) {
+        let targetChat = activeChat
+        if (data.newChat) {
+          targetChat = addChat(data.model || '', '', data.mcpMode || false)
+        } else {
+          if (data.model && activeChat) {
+            updateChatSettings(activeChat.id, { model: data.model })
          }
-          if (data.mcpServers?.length > 0 && targetChat) {
-            updateChatSettings(targetChat.id, { mcpServers: data.mcpServers })
-          }
-          if (data.clientMCPServers?.length > 0 && targetChat) {
-            updateChatSettings(targetChat.id, { clientMCPServers: data.clientMCPServers })
-          }
-          if (wantsAssistant && targetChat) {
-            updateChatSettings(targetChat.id, { localaiAssistant: true })
-          }
-          if (hasMessage) {
-            setInput(data.message)
-            if (data.files) setFiles(data.files)
-            setTimeout(() => {
-              const submitBtn = document.getElementById('chat-submit-btn')
-              submitBtn?.click()
-            }, 100)
+          if (data.mcpMode && activeChat) {
+            updateChatSettings(activeChat.id, { mcpMode: true })
          }
        }
-      } catch (_e) { /* ignore */ }
-    }
-  }, [])
+        if (data.mcpServers?.length > 0 && targetChat) {
+          updateChatSettings(targetChat.id, { mcpServers: data.mcpServers })
+        }
+        if (data.clientMCPServers?.length > 0 && targetChat) {
+          updateChatSettings(targetChat.id, { clientMCPServers: data.clientMCPServers })
+        }
+        if (wantsAssistant && targetChat) {
+          updateChatSettings(targetChat.id, { localaiAssistant: true })
+        }
+        if (hasMessage) {
+          setInput(data.message)
+          if (data.files) setFiles(data.files)
+          setTimeout(() => {
+            const submitBtn = document.getElementById('chat-submit-btn')
+            submitBtn?.click()
+          }, 100)
+        }
+      }
+    } catch (_e) { /* ignore */ }
+  }, [activeChat, addChat, updateChatSettings])
+
+  useEffect(() => {
+    if (homeDataProcessed.current) return
+    homeDataProcessed.current = true
+    consumeHomeChatData()
+  }, [consumeHomeChatData])
+
+  // Admins can re-trigger the assistant jump from the navbar while already on
+  // the chat page; navigate('/app/chat') does not remount Chat, so the
+  // mount-time reader above never fires. The launcher dispatches this event
+  // after writing the payload so we re-consume it and open a fresh assistant.
+  useEffect(() => {
+    const onOpenAssistant = () => consumeHomeChatData()
+    window.addEventListener('localai-open-assistant', onOpenAssistant)
+    return () => window.removeEventListener('localai-open-assistant', onOpenAssistant)
+  }, [consumeHomeChatData])

  // Track whether the user is pinned to the bottom. If they scroll up
  // while a response is streaming, stop forcing them back down.
--- a/core/http/react-ui/src/pages/Home.jsx
+++ b/core/http/react-ui/src/pages/Home.jsx
@@ -13,6 +13,7 @@ import { useResources } from '../hooks/useResources'
 import { fileToBase64, backendControlApi, systemApi, modelsApi, mcpApi, nodesApi } from '../utils/api'
 import { API_CONFIG } from '../utils/config'
 import { greetingKey } from '../utils/greeting'
+import { launchAssistantChat } from '../utils/launchAssistantChat'
 import StatusPill from '../components/StatusPill'
 import Skeleton from '../components/Skeleton'
 import SectionHeading from '../components/SectionHeading'
@@ -228,16 +229,8 @@ export default function Home() {
  // requiring an initial message or model selection. Useful when an admin
  // wants to start the assistant from a cold home page.
  const openAssistantChat = useCallback(() => {
-    const chatData = {
-      model: selectedModel || '',
-      mcpMode: false,
-      localaiAssistant: true,
-      newChat: true,
-    }
-    localStorage.setItem('localai_index_chat_data', JSON.stringify(chatData))
-    try { localStorage.setItem('localai_assistant_used', '1') } catch { /* ignore */ }
+    launchAssistantChat(navigate, selectedModel)
    setAssistantUsed(true)
-    navigate('/app/chat')
  }, [navigate, selectedModel])

  const handleSubmit = (e) => {
--- a/core/http/react-ui/src/router.jsx
+++ b/core/http/react-ui/src/router.jsx
@@ -6,6 +6,7 @@ import RequireAdmin from './components/RequireAdmin'
 import RequireAuth from './components/RequireAuth'
 import RequireAuthEnabled from './components/RequireAuthEnabled'
 import RequireFeature from './components/RequireFeature'
+import HomeRoute from './components/HomeRoute'

 // Pages are code-split: each becomes its own chunk loaded on demand, so a route
 // no longer drags every other page (and its heavy deps — CodeMirror, the MCP
@@ -32,7 +33,7 @@ export function preloadRoute(path) {
  preloaders[m[1] ?? '']?.().catch(() => { /* network blip — real click will retry */ })
 }

-const Home = page('', () => import('./pages/Home'))
+page('', () => import('./pages/Home'))
 const Chat = page('chat', () => import('./pages/Chat'))
 const Models = page('models', () => import('./pages/Models'))
 const Manage = page('manage', () => import('./pages/Manage'))
@@ -96,7 +97,7 @@ function Feature({ feature, children }) {
 }

 const appChildren = [
-  { index: true, element: <Home /> },
+  { index: true, element: <HomeRoute /> },
  { path: 'chat', element: <Chat /> },
  { path: 'chat/:model', element: <Chat /> },
  { path: 'image', element: <ImageGen /> },
--- a/core/http/react-ui/src/utils/launchAssistantChat.js
+++ b/core/http/react-ui/src/utils/launchAssistantChat.js
@@ -0,0 +1,19 @@
+// Opens a fresh chat already in LocalAI Assistant ("manage") mode. Chat.jsx
+// reads localai_index_chat_data on mount and enables localaiAssistant for the
+// new chat. Shared by the Home CTA and the top navbar quick-jump so there is
+// one definition of how the assistant is launched.
+export function launchAssistantChat(navigate, model = '') {
+  const chatData = {
+    model: model || '',
+    mcpMode: false,
+    localaiAssistant: true,
+    newChat: true,
+  }
+  try { localStorage.setItem('localai_index_chat_data', JSON.stringify(chatData)) } catch { /* ignore */ }
+  try { localStorage.setItem('localai_assistant_used', '1') } catch { /* ignore */ }
+  navigate('/app/chat')
+  // When already on /app/chat, navigate() does not remount Chat, so its
+  // mount-time reader would never see the payload above. Signal the mounted
+  // Chat to re-consume it; harmless elsewhere since Chat reads on mount anyway.
+  try { window.dispatchEvent(new CustomEvent('localai-open-assistant')) } catch { /* ignore */ }
+}
--- a/core/http/react-ui/src/utils/resolveHome.js
+++ b/core/http/react-ui/src/utils/resolveHome.js
@@ -0,0 +1,11 @@
+// Pure landing-page resolver for the index route. Returns a target path, or ''
+// meaning "render the default Home". Admin precedence is distributed > p2p >
+// plain; non-admins always go to Chat (distributed/p2p are admin-only and
+// invisible to them). Visibility gates are enforced elsewhere - this only
+// chooses where /app lands.
+export function resolveHome({ isAdmin, distributed, p2pEnabled }) {
+  if (!isAdmin) return '/app/chat'
+  if (distributed) return '/app/nodes'
+  if (p2pEnabled) return '/app/p2p'
+  return ''
+}
--- a/core/http/react-ui/src/utils/sidebarPolicy.js
+++ b/core/http/react-ui/src/utils/sidebarPolicy.js
@@ -0,0 +1,20 @@
+import { operateConsole, isConsoleItemVisible } from '../components/console/consoleConfig'
+
+// The Operate > Cluster group, surfaced as a pinned top-of-sidebar quick-access
+// group when the admin is running a cluster (NATS-distributed) or a P2P swarm.
+// Items are filtered through the SAME gate as everywhere else, so e.g. in a
+// p2p-only deployment Nodes/Scheduling (feature: 'distributed') drop out and
+// only Swarm remains. Returns [] when the pin does not apply.
+export function clusterPinItems(auth, deployment) {
+  if (!auth.isAdmin) return []
+  if (!deployment.distributed && !deployment.p2pEnabled) return []
+  const group = operateConsole.groups.find(g => g.titleKey === 'operate.cluster')
+  if (!group) return []
+  return group.items.filter(item => isConsoleItemVisible(item, auth))
+}
+
+// In the cluster cells the Create group defaults collapsed so the pinned
+// cluster group leads. Users can still expand it; their stored choice wins.
+export function shouldCollapseCreate(auth, deployment) {
+  return !!auth.isAdmin && (!!deployment.distributed || !!deployment.p2pEnabled)
+}
--- a/core/services/distributed/gallery.go
+++ b/core/services/distributed/gallery.go
@@ -79,29 +79,21 @@ func (s *GalleryStore) Create(op *GalleryOperationRecord) error {
 	}).Create(op).Error
 }

-// UpdateProgress updates progress for an operation. The cancellable flag is
-// persisted on every tick so a replica that restarts mid-install rehydrates the
-// op as still cancellable — otherwise the column keeps its Create-time zero
-// value (false), the UI hides the cancel button, and the orphaned op can only
-// be dismissed by waiting for the 30-minute stale reaper.
-func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string, cancellable bool) error {
+// UpdateProgress updates progress for an operation.
+func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string) error {
 	return s.db.Model(&GalleryOperationRecord{}).Where("id = ?", id).Updates(map[string]any{
 		"progress":             progress,
 		"message":              message,
 		"downloaded_file_size": downloadedSize,
-		"cancellable":          cancellable,
 		"updated_at":           time.Now(),
 	}).Error
 }

-// UpdateStatus updates the status of an operation. A terminal status is never
-// cancellable, so the flag is cleared here to keep the persisted row consistent
-// with what the UI should offer.
+// UpdateStatus updates the status of an operation.
 func (s *GalleryStore) UpdateStatus(id, status, errMsg string) error {
 	updates := map[string]any{
-		"status":      status,
-		"cancellable": false,
-		"updated_at":  time.Now(),
+		"status":     status,
+		"updated_at": time.Now(),
 	}
 	if errMsg != "" {
 		updates["error"] = errMsg
--- a/core/services/galleryop/cancellable_persist_test.go
+++ b/core/services/galleryop/cancellable_persist_test.go
@@ -1,56 +0,0 @@
-package galleryop_test
-
-import (
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/services/distributed"
-	"github.com/mudler/LocalAI/core/services/galleryop"
-	"github.com/mudler/LocalAI/core/services/testutil"
-)
-
-// Reproduces "an in-flight install can't be cancelled after a restart". The
-// live install path marks OpStatus.Cancellable=true on every progress tick, but
-// UpdateStatus persisted progress/status to the gallery store WITHOUT the
-// cancellable flag, and Create defaulted it to false. So after a replica
-// restart Hydrate rebuilt the op with Cancellable=false, /api/operations
-// reported cancellable:false, and the UI hid the cancel button — the orphaned
-// op lingered until the 30-minute stale reaper expired it. The cancellable
-// state must be persisted so a rehydrated in-flight op stays cancellable.
-var _ = Describe("GalleryService cancellable persistence across restart", func() {
-	It("rehydrates an in-flight op as still cancellable", func() {
-		db := testutil.SetupTestDB()
-		store, err := distributed.NewGalleryStore(db)
-		Expect(err).ToNot(HaveOccurred())
-
-		svc := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
-		svc.SetGalleryStore(store)
-
-		// Seed the in-flight op row as the worker goroutine does on admission.
-		Expect(store.Create(&distributed.GalleryOperationRecord{
-			ID:                 "op-inflight",
-			GalleryElementName: "llama-cpp-development",
-			OpType:             "backend_install",
-			Status:             "pending",
-		})).To(Succeed())
-
-		// Simulate a progress tick: the live path always marks installs
-		// cancellable while they are downloading/processing.
-		svc.UpdateStatus("op-inflight", &galleryop.OpStatus{
-			Message:     "downloading",
-			Progress:    25,
-			Cancellable: true,
-		})
-
-		// A fresh replica boots and hydrates from the store.
-		fresh := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
-		fresh.SetGalleryStore(store)
-		Expect(fresh.Hydrate()).To(Succeed())
-
-		st := fresh.GetStatus("op-inflight")
-		Expect(st).ToNot(BeNil(), "the in-flight op must hydrate after a restart")
-		Expect(st.Cancellable).To(BeTrue(),
-			"a still-active install must rehydrate as cancellable so the admin can dismiss it")
-	})
-})
--- a/core/services/galleryop/service.go
+++ b/core/services/galleryop/service.go
@@ -167,7 +167,7 @@ func (g *GalleryService) UpdateStatus(s string, op *OpStatus) {
 				xlog.Warn("Failed to persist gallery operation status", "op_id", s, "error", err)
 			}
 		} else {
-			if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize, op.Cancellable); err != nil {
+			if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize); err != nil {
 				xlog.Warn("Failed to persist gallery operation progress", "op_id", s, "error", err)
 			}
 		}
@@ -467,7 +467,6 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader,
 						GalleryElementName: op.GalleryElementName,
 						OpType:             "backend_install",
 						Status:             "pending",
-						Cancellable:        true,
 					})
 				}
 				err := g.backendHandler(&op, systemState)
@@ -500,8 +499,6 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader,
 						GalleryElementName: op.GalleryElementName,
 						OpType:             opType,
 						Status:             "pending",
-						// A delete is not cancellable; an install is.
-						Cancellable: !op.Delete,
 					})
 				}
 				err := g.modelHandler(&op, cl, systemState)
--- a/core/services/nodes/inflight.go
+++ b/core/services/nodes/inflight.go
@@ -19,40 +19,25 @@ import (
 // Per-replica: a single tracker instance is bound to (nodeID, modelName, replicaIndex).
 // The router constructs one tracker per Route() result, so each in-flight tick lands
 // on the correct row even when multiple replicas of the same model live on the same node.
-//
-// Embedding only grpc.ControlBackend (not the whole grpc.Backend) is what makes
-// the in-flight accounting safe by construction: the control-plane methods pass
-// through untracked, while every grpc.InferenceBackend method must be declared
-// explicitly below to satisfy grpc.Backend. Adding an inference method to the
-// interface therefore breaks this file's build (see the var assertion below)
-// until it is wrapped with track() - so a new inference path can't be added
-// without an in-flight accounting decision.
 type InFlightTrackingClient struct {
-	grpc.ControlBackend                       // passthrough for control-plane / streaming-constructor methods
-	inner               grpc.InferenceBackend // tracked inference methods delegate here
-	registry            InFlightTracker
-	nodeID              string
-	modelName           string
-	replicaIndex        int
+	grpc.Backend // embed for passthrough of untracked methods
+	registry     InFlightTracker
+	nodeID       string
+	modelName    string
+	replicaIndex int

 	firstOnce       sync.Once // guards onFirstComplete
 	onFirstComplete func()    // called once after the first tracked inference call completes
 }

-// Compile-time contract: *InFlightTrackingClient must implement the FULL backend
-// surface. Because it embeds only ControlBackend, this fails to compile if any
-// InferenceBackend method is left unwrapped.
-var _ grpc.Backend = (*InFlightTrackingClient)(nil)
-
 // NewInFlightTrackingClient wraps a gRPC backend client with in-flight tracking.
 func NewInFlightTrackingClient(inner grpc.Backend, registry InFlightTracker, nodeID, modelName string, replicaIndex int) *InFlightTrackingClient {
 	return &InFlightTrackingClient{
-		ControlBackend: inner,
-		inner:          inner,
-		registry:       registry,
-		nodeID:         nodeID,
-		modelName:      modelName,
-		replicaIndex:   replicaIndex,
+		Backend:      inner,
+		registry:     registry,
+		nodeID:       nodeID,
+		modelName:    modelName,
+		replicaIndex: replicaIndex,
 	}
 }

@@ -106,162 +91,154 @@ func (c *InFlightTrackingClient) reconcile(err error) error {

 func (c *InFlightTrackingClient) Predict(ctx context.Context, in *pb.PredictOptions, opts ...ggrpc.CallOption) (*pb.Reply, error) {
 	defer c.track(ctx)()
-	reply, err := c.inner.Predict(ctx, in, opts...)
+	reply, err := c.Backend.Predict(ctx, in, opts...)
 	return reply, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...ggrpc.CallOption) error {
 	defer c.track(ctx)()
-	return c.reconcile(c.inner.PredictStream(ctx, in, f, opts...))
+	return c.reconcile(c.Backend.PredictStream(ctx, in, f, opts...))
 }

 func (c *InFlightTrackingClient) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...ggrpc.CallOption) (*pb.EmbeddingResult, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.Embeddings(ctx, in, opts...)
+	res, err := c.Backend.Embeddings(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.GenerateImage(ctx, in, opts...)
+	res, err := c.Backend.GenerateImage(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.GenerateVideo(ctx, in, opts...)
+	res, err := c.Backend.GenerateVideo(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) TTS(ctx context.Context, in *pb.TTSRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.TTS(ctx, in, opts...)
+	res, err := c.Backend.TTS(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) TTSStream(ctx context.Context, in *pb.TTSRequest, f func(reply *pb.Reply), opts ...ggrpc.CallOption) error {
 	defer c.track(ctx)()
-	return c.reconcile(c.inner.TTSStream(ctx, in, f, opts...))
+	return c.reconcile(c.Backend.TTSStream(ctx, in, f, opts...))
 }

 func (c *InFlightTrackingClient) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.SoundGeneration(ctx, in, opts...)
+	res, err := c.Backend.SoundGeneration(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...ggrpc.CallOption) (*pb.TranscriptResult, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.AudioTranscription(ctx, in, opts...)
+	res, err := c.Backend.AudioTranscription(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) AudioTranscriptionStream(ctx context.Context, in *pb.TranscriptRequest, f func(chunk *pb.TranscriptStreamResponse), opts ...ggrpc.CallOption) error {
 	defer c.track(ctx)()
-	return c.reconcile(c.inner.AudioTranscriptionStream(ctx, in, f, opts...))
+	return c.reconcile(c.Backend.AudioTranscriptionStream(ctx, in, f, opts...))
 }

 func (c *InFlightTrackingClient) Detect(ctx context.Context, in *pb.DetectOptions, opts ...ggrpc.CallOption) (*pb.DetectResponse, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.Detect(ctx, in, opts...)
+	res, err := c.Backend.Detect(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) Depth(ctx context.Context, in *pb.DepthRequest, opts ...ggrpc.CallOption) (*pb.DepthResponse, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.Depth(ctx, in, opts...)
+	res, err := c.Backend.Depth(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...ggrpc.CallOption) (*pb.RerankResult, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.Rerank(ctx, in, opts...)
+	res, err := c.Backend.Rerank(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) VAD(ctx context.Context, in *pb.VADRequest, opts ...ggrpc.CallOption) (*pb.VADResponse, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.VAD(ctx, in, opts...)
+	res, err := c.Backend.VAD(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) Diarize(ctx context.Context, in *pb.DiarizeRequest, opts ...ggrpc.CallOption) (*pb.DiarizeResponse, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.Diarize(ctx, in, opts...)
+	res, err := c.Backend.Diarize(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...ggrpc.CallOption) (*pb.FaceVerifyResponse, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.FaceVerify(ctx, in, opts...)
+	res, err := c.Backend.FaceVerify(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) FaceAnalyze(ctx context.Context, in *pb.FaceAnalyzeRequest, opts ...ggrpc.CallOption) (*pb.FaceAnalyzeResponse, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.FaceAnalyze(ctx, in, opts...)
+	res, err := c.Backend.FaceAnalyze(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) VoiceVerify(ctx context.Context, in *pb.VoiceVerifyRequest, opts ...ggrpc.CallOption) (*pb.VoiceVerifyResponse, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.VoiceVerify(ctx, in, opts...)
+	res, err := c.Backend.VoiceVerify(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) VoiceAnalyze(ctx context.Context, in *pb.VoiceAnalyzeRequest, opts ...ggrpc.CallOption) (*pb.VoiceAnalyzeResponse, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.VoiceAnalyze(ctx, in, opts...)
+	res, err := c.Backend.VoiceAnalyze(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) VoiceEmbed(ctx context.Context, in *pb.VoiceEmbedRequest, opts ...ggrpc.CallOption) (*pb.VoiceEmbedResponse, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.VoiceEmbed(ctx, in, opts...)
+	res, err := c.Backend.VoiceEmbed(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) TokenClassify(ctx context.Context, in *pb.TokenClassifyRequest, opts ...ggrpc.CallOption) (*pb.TokenClassifyResponse, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.TokenClassify(ctx, in, opts...)
+	res, err := c.Backend.TokenClassify(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) Score(ctx context.Context, in *pb.ScoreRequest, opts ...ggrpc.CallOption) (*pb.ScoreResponse, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.Score(ctx, in, opts...)
-	return res, c.reconcile(err)
-}
-
-func (c *InFlightTrackingClient) SoundDetection(ctx context.Context, in *pb.SoundDetectionRequest, opts ...ggrpc.CallOption) (*pb.SoundDetectionResponse, error) {
-	defer c.track(ctx)()
-	res, err := c.inner.SoundDetection(ctx, in, opts...)
+	res, err := c.Backend.Score(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) AudioEncode(ctx context.Context, in *pb.AudioEncodeRequest, opts ...ggrpc.CallOption) (*pb.AudioEncodeResult, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.AudioEncode(ctx, in, opts...)
+	res, err := c.Backend.AudioEncode(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) AudioDecode(ctx context.Context, in *pb.AudioDecodeRequest, opts ...ggrpc.CallOption) (*pb.AudioDecodeResult, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.AudioDecode(ctx, in, opts...)
+	res, err := c.Backend.AudioDecode(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

 func (c *InFlightTrackingClient) AudioTransform(ctx context.Context, in *pb.AudioTransformRequest, opts ...ggrpc.CallOption) (*pb.AudioTransformResult, error) {
 	defer c.track(ctx)()
-	res, err := c.inner.AudioTransform(ctx, in, opts...)
+	res, err := c.Backend.AudioTransform(ctx, in, opts...)
 	return res, c.reconcile(err)
 }

-// AudioTransformStream, AudioToAudioStream and Forward live in grpc.ControlBackend
-// and are passed through via the embedded field, NOT tracked: they return a stream
-// client and the inference spans the stream's lifetime, not the constructor call.
-// Wrapping the constructor with track() would increment and immediately decrement
-// (and fire onFirstComplete) before any audio flows. Tracking those correctly needs
-// the done() func tied to stream close, which the Backend interface doesn't surface
-// here. If they ever need tracking, move them to grpc.InferenceBackend - the build
-// will then force an explicit wrapper here.
+// AudioTransformStream, AudioToAudioStream and Forward are deliberately left as
+// embedded passthrough: they return a stream client and the inference spans the
+// stream's lifetime, not the constructor call. Wrapping the constructor with
+// track() would increment and immediately decrement (and fire onFirstComplete)
+// before any audio flows. Tracking those correctly needs the done() func tied to
+// stream close, which the current Backend interface doesn't surface here.
--- a/core/services/nodes/inflight_test.go
+++ b/core/services/nodes/inflight_test.go
@@ -408,13 +408,6 @@ var _ = Describe("InFlightTrackingClient", func() {
 				return err
 			})
 		})
-
-		It("SoundDetection", func() {
-			assertTracked(func() error {
-				_, err := client.SoundDetection(context.Background(), &pb.SoundDetectionRequest{})
-				return err
-			})
-		})
 	})

 	Describe("stale model reload (self-heal)", func() {
--- a/docs/content/features/middleware.md
+++ b/docs/content/features/middleware.md
@@ -185,13 +185,6 @@ It is persisted through `POST /api/settings` and read live, so a change takes
 effect on the next request without a restart. A default that names a model no
 longer loaded still appears (marked *not loaded*) so it can be toggled off.

-The default set can also be supplied out-of-band with the
-`LOCALAI_PII_DEFAULT_DETECTORS` environment variable (comma-separated model
-names, e.g. `privacy-filter-nemotron,secret-filter`). When set it takes
-precedence over the value persisted via the UI (env > file), which is the
-right behaviour for immutable container deployments that pin filtering policy
-at boot rather than via the admin UI.
-
 This is what makes `cloud-proxy` / MITM redaction work out of the box: those
 backends default to PII-enabled but ship no detector list, so without a
 default detector the filter runs with nothing to scan. Set one here and
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,225 +1,4 @@
 ---
- name: "lfm2.5-1.2b-instruct"
-  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
-  urls:
-    - https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF
-  description: |
-    Try LFM • Docs • LEAP • Discord
-
-    # LFM2.5-1.2B-Instruct
-
-    LFM2.5 is a new family of hybrid models designed for **on-device deployment**. It builds on the LFM2 architecture with extended pre-training and reinforcement learning.
-
-      - **Best-in-class performance**: A 1.2B model rivaling much larger models, bringing high-quality AI to your pocket.
-      - **Fast edge inference**: 239 tok/s decode on AMD CPU, 82 tok/s on mobile NPU. Runs under 1GB of memory with day-one support for llama.cpp, MLX, and vLLM.
-      - **Scaled training**: Extended pre-training from 10T to 28T tokens and large-scale multi-stage reinforcement learning.
-
-    Find more information about LFM2.5 in our blog post.
-
-    ## 🗒️ Model Details
-
-    LFM2.5-1.2B-Instruct is a general-purpose text-only model with the following features:
-
-    ...
-  license: "other"
-  tags:
-    - llm
-    - gguf
-  icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/dxnYF2fuLpulismtFSGFi.png
-  overrides:
-    backend: llama-cpp
-    function:
-      automatic_tool_parsing_fallback: true
-      grammar:
-        disable: true
-    known_usecases:
-      - chat
-    options:
-      - use_jinja:true
-    parameters:
-      min_p: 0.15
-      model: llama-cpp/models/LFM2.5-1.2B-Instruct-GGUF/LFM2.5-1.2B-Instruct-Q4_K_M.gguf
-      repeat_penalty: 1.05
-      temperature: 0.1
-      top_k: 50
-      top_p: 0.1
-    template:
-      use_tokenizer_template: true
-  files:
-    - filename: llama-cpp/models/LFM2.5-1.2B-Instruct-GGUF/LFM2.5-1.2B-Instruct-Q4_K_M.gguf
-      sha256: b1b3de114215d9507409a662a501a631095a479a419584e8a2ded6304b19b4f5
-      uri: https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF/resolve/main/LFM2.5-1.2B-Instruct-Q4_K_M.gguf
- name: "qwopus3.6-27b-coder-compat-mtp"
-  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
-  urls:
-    - https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF
-  description: "\U0001FA90 Qwopus-3.6-27B-Coder\nCoder SFT Release\n\nAgentic Coding &amp; Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Dense Model\n⚡ Agentic Coding\n\U0001F6E0️ Tool Calling & Agent\n\U0001F3C6 SWE-bench Verified: 67.0% (off-thinking)\n\n\U0001F4A1 What is Qwopus-3.6-27B-Coder?\n\U0001FA90 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.\n\n\U0001F9E9 Agentic Coding\nOptimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.\n\n\U0001F6E0️ Tool Calling\nLearns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.\n\n...\n"
-  license: "apache-2.0"
-  tags:
-    - llm
-    - gguf
-    - vision
-    - multimodal
-    - reasoning
-  icon: https://cdn-uploads.huggingface.co/production/uploads/66309bd090589b7c65950665/sGQKmrMc6L6guMoaB5_Y2.png
-  overrides:
-    backend: llama-cpp
-    function:
-      automatic_tool_parsing_fallback: true
-      grammar:
-        disable: true
-    known_usecases:
-      - chat
-    mmproj: llama-cpp/mmproj/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/mmproj-F32.gguf
-    options:
-      - use_jinja:true
-      - spec_type:draft-mtp
-      - spec_n_max:6
-      - spec_p_min:0.75
-    parameters:
-      model: llama-cpp/models/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
-    template:
-      use_tokenizer_template: true
-  files:
-    - filename: llama-cpp/models/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
-      sha256: f893632170124da60e159b7bcc9d91e1cda3014b2c6b8ad9c6cde38a1fcd2f6f
-      uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/resolve/main/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
-    - filename: llama-cpp/mmproj/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/mmproj-F32.gguf
-      sha256: 32f7ea0600c07272547da401d460f8abbd980f3a57b69d6df87be0e2505e0b9c
-      uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/resolve/main/mmproj-F32.gguf
- name: "kimi-k2.7-code"
-  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
-  urls:
-    - https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF
-  description: |
-    ## 1. Model Introduction
-
-    Kimi K2.7 Code is a coding-focused agentic model built upon Kimi K2.6. With substantial improvements on real-world long-horizon coding tasks, it strengthens end-to-end task completion across complex software engineering workflows while improving token efficiency, reducing thinking-token usage by approximately 30% compared with Kimi K2.6.
-
-    ## 2. Model Summary
-
-    ## 3. Evaluation Results
-
-    Benchmark
-    Kimi K2.6
-    Kimi K2.7 Code
-    GPT-5.5
-    Claude Opus 4.8
-
-    Coding
-
-    Kimi Code Bench v2
-    50.9
-    62.0
-    69.0
-    67.4
-
-    Program Bench
-    48.3
-    53.6
-    69.1
-    63.8
-
-    MLS Bench Lite
-    26.7
-    35.1
-    35.5
-    42.8
-
-    Agentic
-
-    Kimi Claw 24/7 Bench
-    42.9
-    46.9
-    52.8
-    50.4
-
-    MCP Atlas
-    69.4
-    76.0
-    79.4
-    81.3
-
-    MCP Mark Verified
-    72.8
-    81.1
-    92.9
-    76.4
-
-    Footnotes
-
-    ...
-  license: "other"
-  tags:
-    - llm
-    - gguf
-  icon: https://huggingface.co/moonshotai/Kimi-K2.7-Code/resolve/main/figures/kimi-logo.png
-  overrides:
-    backend: llama-cpp
-    function:
-      automatic_tool_parsing_fallback: true
-      grammar:
-        disable: true
-    known_usecases:
-      - chat
-    mmproj: llama-cpp/mmproj/Kimi-K2.7-Code-GGUF/mmproj-F32.gguf
-    options:
-      - use_jinja:true
-    parameters:
-      min_p: 0.01
-      model: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
-      repeat_penalty: 1
-      temperature: 0.6
-      top_k: -1
-      top_p: 0.95
-    template:
-      use_tokenizer_template: true
-  files:
-    - filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
-      sha256: 65f0aca336f876902323a90e2aff32cac76d071b2cdd818c6a8d78be8fc2c680
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
-    - filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00002-of-00014.gguf
-      sha256: 40f4416c130827a11502778891f4ef95b2144db90f51d63aa3548d0952a39683
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00002-of-00014.gguf
-    - filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00003-of-00014.gguf
-      sha256: ba2ba0b5168784ace7c752ecadfc3631279b2bb023824cb0fe9e2dab3dd28f22
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00003-of-00014.gguf
-    - filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00004-of-00014.gguf
-      sha256: 10298a6c98b13ef49be286fefbea8663e16473fb69bbeabe153bc80c60ae116e
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00004-of-00014.gguf
-    - filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00005-of-00014.gguf
-      sha256: 8e9e4c8e35d34fc4fef6bfb65a715ad7defbd196970d833c1df6924d701c88b3
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00005-of-00014.gguf
-    - filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00006-of-00014.gguf
-      sha256: ccff6e7f299742f82cf6f51a871e3eb3167511efaee967477cc8387f54d16442
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00006-of-00014.gguf
-    - filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00007-of-00014.gguf
-      sha256: 1a3b639633a2d22f71156a9f643ded2329cdd969cc21177b644b5741bac1af8e
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00007-of-00014.gguf
-    - filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00008-of-00014.gguf
-      sha256: bde28f682a1eab973538b2102007d952f37a13c1f7d55e2ed99177445ddc4282
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00008-of-00014.gguf
-    - filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00009-of-00014.gguf
-      sha256: b6a23a95b61e100f7593fa75e2363966323fa767b7e4fdf45d963b59e8fdc69f
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00009-of-00014.gguf
-    - filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00010-of-00014.gguf
-      sha256: fb10231c2e6d76921d40f22690f4aa08a8090c708edeaf7e581abafc24d3b25c
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00010-of-00014.gguf
-    - filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00011-of-00014.gguf
-      sha256: d2290be7ed1a22ac1f9f8a4813389689e075ce2ab8abc3aaaa1157a3cb1462d8
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00011-of-00014.gguf
-    - filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00012-of-00014.gguf
-      sha256: ce0d028314aa3fc783082dbca097e1055d69686a17ab8306574e2949568f26a5
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00012-of-00014.gguf
-    - filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00013-of-00014.gguf
-      sha256: 217864ce63a1d130ab39dcb0996b6097e1aa78eb896e38efaefdbbac3a00b7ec
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00013-of-00014.gguf
-    - filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00014-of-00014.gguf
-      sha256: eb7582ad7066c5eaa01bde95acb00b4ad9cd7b07cd50a6cf5c9ee427258bc9dd
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00014-of-00014.gguf
-    - filename: llama-cpp/mmproj/Kimi-K2.7-Code-GGUF/mmproj-F32.gguf
-      sha256: b2cc50c8c13fe70fc4968a83332f31e9007ea09ebb9ae91d46a4e4cd2a3053cd
-      uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/mmproj-F32.gguf
 - name: "qwythos-9b-claude-mythos-5-1m"
  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
  urls:
@@ -270,7 +49,33 @@
  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
  urls:
    - https://huggingface.co/unsloth/GLM-5.2-GGUF
-  description: "# GLM-5.2\n\n\U0001F44B Join our WeChat or Discord community.\n\n\U0001F4D6 Check out the GLM-5.2 blog and GLM-5 Technical report.\n\n\U0001F4CD Use GLM-5.2 API services on Z.ai API Platform.\n\n\U0001F51C Try GLM-5.2 here.\n\n[Paper]\n[GitHub]\n\n## Introduction\n\nWe're introducing GLM-5.2, our latest flagship model for long-horizon tasks. It marks a substantial leap in long-horizon task capability over its predecessor GLM-5.1 and, for the first time, delivers that capability on a **solid 1M-token context**. GLM-5.2's new capabilities include:\n  - **Solid 1M Context:** A solid 1M-token context that stably sustains long-horizon work\n  - **Advanced Coding with Flexible Effort**: Stronger coding capabilities with multiple thinking effort levels to balance performance and latency\n  - **Improved Architecture**: We propose IndexShare, which reuses the same indexer across every four sparse attention layers, reducing per-token FLOPs by 2.9× at a 1M context length. We also improve GLM-5.2’s MTP layer for speculative decoding, increasing the acceptance length by up to 20%\n  - **Pure Open**: An MIT open-source license — no regional limits, technical access without borders\n\n## Benchmark\n\n## Serve GLM-5.2 Locally\n\n...\n"
+  description: |
+    # GLM-5.2
+
+    👋 Join our WeChat or Discord community.
+
+    📖 Check out the GLM-5.2 blog and GLM-5 Technical report.
+
+    📍 Use GLM-5.2 API services on Z.ai API Platform.
+
+    🔜 Try GLM-5.2 here.
+
+    [Paper]
+    [GitHub]
+
+    ## Introduction
+
+    We're introducing GLM-5.2, our latest flagship model for long-horizon tasks. It marks a substantial leap in long-horizon task capability over its predecessor GLM-5.1 and, for the first time, delivers that capability on a **solid 1M-token context**. GLM-5.2's new capabilities include:
+      - **Solid 1M Context:** A solid 1M-token context that stably sustains long-horizon work
+      - **Advanced Coding with Flexible Effort**: Stronger coding capabilities with multiple thinking effort levels to balance performance and latency
+      - **Improved Architecture**: We propose IndexShare, which reuses the same indexer across every four sparse attention layers, reducing per-token FLOPs by 2.9× at a 1M context length. We also improve GLM-5.2’s MTP layer for speculative decoding, increasing the acceptance length by up to 20%
+      - **Pure Open**: An MIT open-source license — no regional limits, technical access without borders
+
+    ## Benchmark
+
+    ## Serve GLM-5.2 Locally
+
+    ...
  license: "mit"
  tags:
    - llm
@@ -393,7 +198,26 @@
  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
  urls:
    - https://huggingface.co/michaelw9999/Qwopus3.6-27B-v2-MTP-NVFP4-GGUF
-  description: "\U0001FA90 Qwopus3.6-27B-v2-MTP\nMTP Release\n\nMulti-Token Prediction reasoning model fine-tuned from Qwen3.6-27B\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Parameters\n⚡ Speculative Decoding\n\U0001F6E0️ Coding / DevOps / Math\n\n\U0001F4A1 What is Qwopus3.6-27B-v2-MTP?\n\U0001FA90 Qwopus3.6-27B-v2-MTP is a speed-oriented reasoning release built on top of Qwen3.6-27B. It keeps the Qwopus line's focus on reconstructed reasoning traces, coding discipline, DevOps procedures, and mathematical derivations, while adding Multi-Token Prediction for faster generation. The goal is simple: preserve the depth and structure of a 27B reasoning model while making real interactive use noticeably faster.\n\n⚡ MTP DecodingAuxiliary future-token prediction improves throughput on long reasoning, code, math, and strict-format prompts.\n\U0001F9E9 Structured ReasoningInherits the Qwopus training recipe built around reconstructed step-by-step reasoning trajectories.\n\U0001F9EA GB10 TestedValidated on a 30-question local benchmark across Logic, Coding, DevOps, Math, and Edge tasks.\n\U0001F680 Practical SpeedDesigned for workflows where strong answers matter, but waiting several extra minutes per task does not.\n\n...\n"
+  description: |
+    🪐 Qwopus3.6-27B-v2-MTP
+    MTP Release
+
+    Multi-Token Prediction reasoning model fine-tuned from Qwen3.6-27B
+
+    🧬 Trace Inversion & Negentropy
+    🧠 27B Parameters
+    ⚡ Speculative Decoding
+    🛠️ Coding / DevOps / Math
+
+    💡 What is Qwopus3.6-27B-v2-MTP?
+    🪐 Qwopus3.6-27B-v2-MTP is a speed-oriented reasoning release built on top of Qwen3.6-27B. It keeps the Qwopus line's focus on reconstructed reasoning traces, coding discipline, DevOps procedures, and mathematical derivations, while adding Multi-Token Prediction for faster generation. The goal is simple: preserve the depth and structure of a 27B reasoning model while making real interactive use noticeably faster.
+
+    ⚡ MTP DecodingAuxiliary future-token prediction improves throughput on long reasoning, code, math, and strict-format prompts.
+    🧩 Structured ReasoningInherits the Qwopus training recipe built around reconstructed step-by-step reasoning trajectories.
+    🧪 GB10 TestedValidated on a 30-question local benchmark across Logic, Coding, DevOps, Math, and Edge tasks.
+    🚀 Practical SpeedDesigned for workflows where strong answers matter, but waiting several extra minutes per task does not.
+
+    ...
  tags:
    - llm
    - gguf
@@ -419,7 +243,28 @@
  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
  urls:
    - https://huggingface.co/michaelw9999/Qwopus3.6-27B-Coder-MTP-NVFP4-GGUF
-  description: "\U0001FA90 Qwopus-3.6-27B-Coder\nCoder SFT Release\n\nAgentic Coding &amp; Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Dense Model\n⚡ Agentic Coding\n\U0001F6E0️ Tool Calling & Agent\n\U0001F3C6 SWE-bench Verified: 67.0% (off-thinking)\n\n\U0001F4A1 What is Qwopus-3.6-27B-Coder?\n\U0001FA90 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro (300ex) and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.\n\n\U0001F9E9 Agentic Coding\nOptimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.\n\n\U0001F6E0️ Tool Calling\nLearns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.\n\n...\n"
+  description: |
+    🪐 Qwopus-3.6-27B-Coder
+    Coder SFT Release
+
+    Agentic Coding &amp; Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2
+
+    🧬 Trace Inversion & Negentropy
+    🧠 27B Dense Model
+    ⚡ Agentic Coding
+    🛠️ Tool Calling & Agent
+    🏆 SWE-bench Verified: 67.0% (off-thinking)
+
+    💡 What is Qwopus-3.6-27B-Coder?
+    🪐 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro (300ex) and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.
+
+    🧩 Agentic Coding
+    Optimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.
+
+    🛠️ Tool Calling
+    Learns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.
+
+    ...
  tags:
    - llm
    - gguf
@@ -1639,8 +1484,8 @@
      use_tokenizer_template: true
  files:
    - filename: llama-cpp/models/Qwopus3.6-27B-v2-MTP-GGUF/Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf
+      sha256: 818d68223be4d8518dac0b3b5604dde633cbbcbae1f491d842a3e26711c6606d
      uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-v2-MTP-GGUF/resolve/main/Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf
-      sha256: 31cf5fc2406a0c7aaebcc26d440bf0df94e215d0589d5205bf319649c052b50a
 - name: "qwen3.6-40b-claude-4.6-opus-deckard-heretic-uncensored-thinking-neo-code-di-imatrix-max"
  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
  urls:
--- a/pkg/grpc/backend.go
+++ b/pkg/grpc/backend.go
@@ -41,34 +41,11 @@ func buildClient(address string, parallel bool, wd WatchDog, enableWatchDog bool
 	}
 }

-// Backend is the full client surface of a model backend. It is deliberately
-// composed of two sub-interfaces so that wrappers can get a COMPILE-TIME
-// guarantee about which methods they must account for:
-//
-//   - InferenceBackend - methods that each perform one discrete inference call
-//     (the call begins on entry and ends on return). A wrapper that does
-//     per-call accounting - e.g. the distributed router's in-flight tracker,
-//     core/services/nodes.InFlightTrackingClient - embeds only ControlBackend
-//     and implements every InferenceBackend method explicitly. Adding a method
-//     to InferenceBackend therefore breaks that wrapper's build until it is
-//     implemented: inference can't be added without an accounting decision.
-//   - ControlBackend - everything that is NOT a discrete inference call:
-//     lifecycle/control-plane operations and the streaming constructors whose
-//     work spans the returned stream rather than the constructor call. These
-//     are safe to pass through untracked.
-//
-// Keep the two sets disjoint; every backend method belongs to exactly one.
 type Backend interface {
-	InferenceBackend
-	ControlBackend
-}
-
-// InferenceBackend is the subset of Backend whose methods each map to a single
-// inference call. Wrappers that account for in-flight work must implement these
-// explicitly (see Backend). Do NOT add methods that return a stream client or
-// that are control-plane only - those belong in ControlBackend.
-type InferenceBackend interface {
+	IsBusy() bool
+	HealthCheck(ctx context.Context) (bool, error)
 	Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error)
+	LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
 	PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error
 	Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error)
 	GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
@@ -76,8 +53,6 @@ type InferenceBackend interface {
 	TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
 	TTSStream(ctx context.Context, in *pb.TTSRequest, f func(reply *pb.Reply), opts ...grpc.CallOption) error
 	SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
-	AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)
-	AudioTranscriptionStream(ctx context.Context, in *pb.TranscriptRequest, f func(chunk *pb.TranscriptStreamResponse), opts ...grpc.CallOption) error
 	Detect(ctx context.Context, in *pb.DetectOptions, opts ...grpc.CallOption) (*pb.DetectResponse, error)
 	Depth(ctx context.Context, in *pb.DepthRequest, opts ...grpc.CallOption) (*pb.DepthResponse, error)
 	FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...grpc.CallOption) (*pb.FaceVerifyResponse, error)
@@ -85,25 +60,8 @@ type InferenceBackend interface {
 	VoiceVerify(ctx context.Context, in *pb.VoiceVerifyRequest, opts ...grpc.CallOption) (*pb.VoiceVerifyResponse, error)
 	VoiceAnalyze(ctx context.Context, in *pb.VoiceAnalyzeRequest, opts ...grpc.CallOption) (*pb.VoiceAnalyzeResponse, error)
 	VoiceEmbed(ctx context.Context, in *pb.VoiceEmbedRequest, opts ...grpc.CallOption) (*pb.VoiceEmbedResponse, error)
-	Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)
-	TokenClassify(ctx context.Context, in *pb.TokenClassifyRequest, opts ...grpc.CallOption) (*pb.TokenClassifyResponse, error)
-	Score(ctx context.Context, in *pb.ScoreRequest, opts ...grpc.CallOption) (*pb.ScoreResponse, error)
-	VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error)
-	Diarize(ctx context.Context, in *pb.DiarizeRequest, opts ...grpc.CallOption) (*pb.DiarizeResponse, error)
-	SoundDetection(ctx context.Context, in *pb.SoundDetectionRequest, opts ...grpc.CallOption) (*pb.SoundDetectionResponse, error)
-	AudioEncode(ctx context.Context, in *pb.AudioEncodeRequest, opts ...grpc.CallOption) (*pb.AudioEncodeResult, error)
-	AudioDecode(ctx context.Context, in *pb.AudioDecodeRequest, opts ...grpc.CallOption) (*pb.AudioDecodeResult, error)
-	AudioTransform(ctx context.Context, in *pb.AudioTransformRequest, opts ...grpc.CallOption) (*pb.AudioTransformResult, error)
-}
-
-// ControlBackend is the subset of Backend that is NOT per-call inference:
-// lifecycle/control-plane operations and the streaming constructors whose work
-// spans the returned stream rather than the constructor call. In-flight-tracking
-// wrappers embed this directly and pass it through untracked (see Backend).
-type ControlBackend interface {
-	IsBusy() bool
-	HealthCheck(ctx context.Context) (bool, error)
-	LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
+	AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)
+	AudioTranscriptionStream(ctx context.Context, in *pb.TranscriptRequest, f func(chunk *pb.TranscriptStreamResponse), opts ...grpc.CallOption) error
 	TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error)
 	Status(ctx context.Context) (*pb.StatusResponse, error)

@@ -112,11 +70,24 @@ type ControlBackend interface {
 	StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error)
 	StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error)

+	Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)
+
+	TokenClassify(ctx context.Context, in *pb.TokenClassifyRequest, opts ...grpc.CallOption) (*pb.TokenClassifyResponse, error)
+
+	Score(ctx context.Context, in *pb.ScoreRequest, opts ...grpc.CallOption) (*pb.ScoreResponse, error)
+
 	GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opts ...grpc.CallOption) (*pb.MetricsResponse, error)

-	// Streaming constructors: these return a stream client immediately; the
-	// actual inference spans the stream's lifetime, not this call, so they are
-	// NOT tracked as a single in-flight unit.
+	VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error)
+
+	Diarize(ctx context.Context, in *pb.DiarizeRequest, opts ...grpc.CallOption) (*pb.DiarizeResponse, error)
+
+	SoundDetection(ctx context.Context, in *pb.SoundDetectionRequest, opts ...grpc.CallOption) (*pb.SoundDetectionResponse, error)
+
+	AudioEncode(ctx context.Context, in *pb.AudioEncodeRequest, opts ...grpc.CallOption) (*pb.AudioEncodeResult, error)
+	AudioDecode(ctx context.Context, in *pb.AudioDecodeRequest, opts ...grpc.CallOption) (*pb.AudioDecodeResult, error)
+
+	AudioTransform(ctx context.Context, in *pb.AudioTransformRequest, opts ...grpc.CallOption) (*pb.AudioTransformResult, error)
 	AudioTransformStream(ctx context.Context, opts ...grpc.CallOption) (AudioTransformStreamClient, error)
 	AudioToAudioStream(ctx context.Context, opts ...grpc.CallOption) (AudioToAudioStreamClient, error)

--- a/tests/e2e/distributed/gallery_distributed_test.go
+++ b/tests/e2e/distributed/gallery_distributed_test.go
@@ -53,13 +53,12 @@ var _ = Describe("Gallery Distributed", Label("Distributed"), func() {
 			Expect(retrieved.Status).To(Equal("downloading"))
 			Expect(retrieved.FrontendID).To(Equal("f1"))

-			// Update progress (cancellable: a downloading install can be cancelled)
-			Expect(galleryStore.UpdateProgress(op.ID, 0.75, "75% complete", "6GB", true)).To(Succeed())
+			// Update progress
+			Expect(galleryStore.UpdateProgress(op.ID, 0.75, "75% complete", "6GB")).To(Succeed())

 			updated, _ := galleryStore.Get(op.ID)
 			Expect(updated.Progress).To(BeNumerically("~", 0.75, 0.01))
 			Expect(updated.Message).To(Equal("75% complete"))
-			Expect(updated.Cancellable).To(BeTrue())

 			// Complete
 			Expect(galleryStore.UpdateStatus(op.ID, "completed", "")).To(Succeed())
--- a/tests/e2e/distributed/phase4_test.go
+++ b/tests/e2e/distributed/phase4_test.go
@@ -104,12 +104,11 @@ var _ = Describe("Phase 4: MCP, Skills, Gallery, Fine-Tuning", Label("Distribute
 			}
 			stores.Gallery.Create(op)

-			Expect(stores.Gallery.UpdateProgress(op.ID, 0.5, "50% complete", "2GB", true)).To(Succeed())
+			Expect(stores.Gallery.UpdateProgress(op.ID, 0.5, "50% complete", "2GB")).To(Succeed())

 			updated, _ := stores.Gallery.Get(op.ID)
 			Expect(updated.Progress).To(BeNumerically("~", 0.5, 0.01))
 			Expect(updated.Message).To(Equal("50% complete"))
-			Expect(updated.Cancellable).To(BeTrue())
 		})

 		It("should deduplicate concurrent downloads", func() {