Compare commits

..

1 Commits

Author SHA1 Message Date
dependabot[bot]
961eb8ce14 chore(deps): bump mxschmitt/action-tmate from 3.23 to 3.24
Bumps [mxschmitt/action-tmate](https://github.com/mxschmitt/action-tmate) from 3.23 to 3.24.
- [Release notes](https://github.com/mxschmitt/action-tmate/releases)
- [Changelog](https://github.com/mxschmitt/action-tmate/blob/master/RELEASE.md)
- [Commits](https://github.com/mxschmitt/action-tmate/compare/v3.23...v3.24)

---
updated-dependencies:
- dependency-name: mxschmitt/action-tmate
  dependency-version: '3.24'
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-06-22 19:42:05 +00:00
59 changed files with 778 additions and 907 deletions

View File

@@ -4974,9 +4974,6 @@ includeDarwin:
- backend: "kitten-tts"
tag-suffix: "-metal-darwin-arm64-kitten-tts"
build-type: "mps"
- backend: "trl"
tag-suffix: "-metal-darwin-arm64-trl"
build-type: "mps"
- backend: "piper"
tag-suffix: "-metal-darwin-arm64-piper"
build-type: "metal"

View File

@@ -71,7 +71,7 @@ jobs:
if-no-files-found: ignore
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.23
uses: mxschmitt/action-tmate@v3.24
with:
detached: true
connect-timeout-seconds: 180
@@ -116,7 +116,7 @@ jobs:
PATH="$PATH:$HOME/go/bin" BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.23
uses: mxschmitt/action-tmate@v3.24
with:
detached: true
connect-timeout-seconds: 180

View File

@@ -79,7 +79,7 @@ jobs:
PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-e2e e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.23
uses: mxschmitt/action-tmate@v3.24
with:
detached: true
connect-timeout-seconds: 180

View File

@@ -57,7 +57,7 @@ jobs:
PATH="$PATH:$HOME/go/bin" make build-mock-backend test-e2e
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.23
uses: mxschmitt/action-tmate@v3.24
with:
detached: true
connect-timeout-seconds: 180

View File

@@ -90,7 +90,7 @@ jobs:
run: PATH="$PATH:$HOME/go/bin" make test-extra-backend-privacy-filter
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.23
uses: mxschmitt/action-tmate@v3.24
with:
detached: true
connect-timeout-seconds: 180

View File

@@ -75,7 +75,7 @@ jobs:
retention-days: 7
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.23
uses: mxschmitt/action-tmate@v3.24
with:
detached: true
connect-timeout-seconds: 180

View File

@@ -1,5 +1,5 @@
IK_LLAMA_VERSION?=7ccf1d209588962b96eacca325b37e9b3e8faf5e
IK_LLAMA_VERSION?=6c00e87ac84404af588ad2e65935bd6f079c696f
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
CMAKE_ARGS?=

View File

@@ -1,5 +1,5 @@
LLAMA_VERSION?=be4a6a63eb2b848e19c277bdcf2bd399e8af76d9
LLAMA_VERSION?=7c082bc417bbe53210a83df4ba5b49e18ce6193c
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# CrispASR version (release tag)
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
CRISPASR_VERSION?=96b2a6ee31d30389fed8a7ef1a54239b75231ddc
CRISPASR_VERSION?=7a8cb80907341c0204bd0488c1244764f4163883
SO_TARGET?=libgocrispasr.so
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF

View File

@@ -1,6 +1,6 @@
# parakeet-cpp backend Makefile.
#
# Upstream pin lives below as PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
# Upstream pin lives below as PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
# (.github/bump_deps.sh) can find and update it - matches the
# whisper.cpp / ds4 / vibevoice-cpp convention.
#
@@ -15,7 +15,7 @@
# That's what the L0 smoke test uses. The default target below does the
# proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
GOCMD?=go

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=f440ad9c29dd8bc34e5d1f4b863832b96d6ea05f
STABLEDIFFUSION_GGML_VERSION?=b12098f5d09fc83da36e65c784f7bdb16a5a5ebf
CMAKE_ARGS+=-DGGML_MAX_NAME=128

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
WHISPER_CPP_VERSION?=43d78af5be58f41d6ffbc227d608f104577741ea
WHISPER_CPP_VERSION?=5ed76e9a079962f1c85cfce44edd325c27ef1f97
SO_TARGET?=libgowhisper.so
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF

View File

@@ -5282,7 +5282,6 @@
nvidia: "cuda12-trl"
nvidia-cuda-12: "cuda12-trl"
nvidia-cuda-13: "cuda13-trl"
metal: "metal-trl"
## TRL backend images
- !!merge <<: *trl
name: "cpu-trl"
@@ -5314,16 +5313,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-trl"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-trl
- !!merge <<: *trl
name: "metal-trl"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-trl"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-trl
- !!merge <<: *trl
name: "metal-trl-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-trl"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-trl
## llama.cpp quantization backend
- &llama-cpp-quantization
name: "llama-cpp-quantization"

View File

@@ -8,13 +8,7 @@ else
source $backend_dir/../common/libbackend.sh
fi
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade"
# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip
# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add
# it when uv is the installer, keeping the Linux/CUDA resolution unchanged.
if [ "x${USE_PIP:-}" != "xtrue" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match"
fi
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
installRequirements
# Fetch convert_hf_to_gguf.py and gguf package from the same llama.cpp version

View File

@@ -1,12 +0,0 @@
torch==2.10.0
trl
peft
datasets>=3.0.0
transformers>=4.56.2
accelerate>=1.4.0
huggingface-hub>=1.3.0
sentencepiece
# Note: bitsandbytes is intentionally omitted on MPS. It is only used by the
# CUDA (cublas) variants for 8-bit/4-bit quantization and has poor support on
# Apple Silicon. torch here uses the plain PyPI wheels, which ship MPS support
# on macOS arm64.

View File

@@ -215,7 +215,6 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
envBackendGalleries := slices.Equal(appConfig.BackendGalleries, startupAppConfig.BackendGalleries)
envAutoloadGalleries := appConfig.AutoloadGalleries == startupAppConfig.AutoloadGalleries
envAutoloadBackendGalleries := appConfig.AutoloadBackendGalleries == startupAppConfig.AutoloadBackendGalleries
envPIIDefaultDetectors := slices.Equal(appConfig.PIIDefaultDetectors, startupAppConfig.PIIDefaultDetectors)
envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays
envForceEvictionWhenBusy := appConfig.ForceEvictionWhenBusy == startupAppConfig.ForceEvictionWhenBusy
envLRUEvictionMaxRetries := appConfig.LRUEvictionMaxRetries == startupAppConfig.LRUEvictionMaxRetries
@@ -336,15 +335,6 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries {
appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
}
if settings.PIIDefaultDetectors != nil && !envPIIDefaultDetectors {
// Request-side default redaction reads this live via
// ResolvePIIPolicy, so a file edit takes effect on the next chat
// request. The MITM listener resolves its per-host detector map
// once at start, so a raw file edit reaches cloud-proxy traffic
// only after a restart or a POST /api/settings (which rebuilds
// the listener) — the admin UI uses the latter.
appConfig.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
}
if settings.AutoUpgradeBackends != nil {
appConfig.AutoUpgradeBackends = *settings.AutoUpgradeBackends
}

View File

@@ -109,52 +109,6 @@ var _ = Describe("loadRuntimeSettingsFromFile", func() {
})
})
// Instance-wide default PII detectors. The file is the only source (no
// env var), and the loader runs immediately before startMITMIfConfigured,
// so a regression here means the cloud-proxy MITM listener resolves an
// empty detector set at boot and forwards intercepted traffic unredacted —
// even though pii_default_detectors is on disk and the MITM model has PII
// enabled. It also breaks request-side default redaction the same way.
Describe("PII default detectors", func() {
It("loads pii_default_detectors from the file", func() {
cfg := &config.ApplicationConfig{DynamicConfigsDir: seedSettings(`{"pii_default_detectors": ["privacy-filter-nemotron", "secret-filter"]}`)}
loadRuntimeSettingsFromFile(cfg)
Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"privacy-filter-nemotron", "secret-filter"}))
})
It("does not override an env/CLI-set value (LOCALAI_PII_DEFAULT_DETECTORS)", func() {
cfg := &config.ApplicationConfig{
DynamicConfigsDir: seedSettings(`{"pii_default_detectors": ["from-file"]}`),
PIIDefaultDetectors: []string{"from-env"}, // simulate WithPIIDefaultDetectors(env)
}
loadRuntimeSettingsFromFile(cfg)
Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env var must win over the persisted file value")
})
})
// The live file watcher applies pii_default_detectors on a runtime change
// the same way it handles galleries/threads/etc.: env-set values (current
// == startup snapshot) are left alone, otherwise the file value is applied
// to the live config so request-side default redaction picks it up without
// a restart.
Describe("file watcher: pii_default_detectors", func() {
It("applies a changed file value to the live config", func() {
startup := config.ApplicationConfig{} // no env baseline
live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"old"}}
handler := readRuntimeSettingsJson(startup)
Expect(handler([]byte(`{"pii_default_detectors":["new-a","new-b"]}`), live)).To(Succeed())
Expect(live.PIIDefaultDetectors).To(Equal([]string{"new-a", "new-b"}))
})
It("leaves an env-controlled value untouched", func() {
startup := config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
handler := readRuntimeSettingsJson(startup)
Expect(handler([]byte(`{"pii_default_detectors":["from-file"]}`), live)).To(Succeed())
Expect(live.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env-controlled detectors must not be overwritten by the file")
})
})
// The Agent Pool block has a mix of zero and non-zero defaults
// (Enabled=true, EmbeddingModel="granite-...", MaxChunkingSize=400,
// VectorEngine="chromem", AgentHubURL="https://agenthub.localai.io").

View File

@@ -750,20 +750,6 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
options.MITMListen = *settings.MITMListen
}
// Instance-wide default PII detectors. LOCALAI_PII_DEFAULT_DETECTORS (via
// WithPIIDefaultDetectors) wins when set; otherwise the file is the source
// — apply it only when the env/CLI left the value empty, mirroring the
// "env > file" precedence used for the other fields. This must land before
// startMITMIfConfigured (called right after this loader): the cloud-proxy
// listener resolves each intercept host's detectors once at start via
// ResolvePIIPolicy, and a MITM model that names no detectors of its own
// falls back to these defaults. Without it the listener (and request-side
// default redaction) starts with an empty detector set and forwards
// traffic unredacted even though pii_default_detectors is on disk.
if settings.PIIDefaultDetectors != nil && len(options.PIIDefaultDetectors) == 0 {
options.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
}
// Backend upgrade flags
if settings.AutoUpgradeBackends != nil {
if !options.AutoUpgradeBackends {

View File

@@ -181,8 +181,6 @@ type RunCMD struct {
// Cloud-proxy MITM listener (off by default).
MITMListen string `env:"LOCALAI_MITM_LISTEN" help:"Address (host:port) for the cloudproxy MITM listener. Empty = disabled. Clients set HTTPS_PROXY=http://<this>:<port>. Intercept hosts are declared per-model via the model YAML mitm.hosts: block; create one from the Add Model UI." group:"middleware"`
MITMCADir string `env:"LOCALAI_MITM_CA_DIR" type:"path" help:"Directory holding the MITM proxy CA cert + key. Defaults to <data-path>/mitm-ca." group:"middleware"`
PIIDefaultDetectors []string `env:"LOCALAI_PII_DEFAULT_DETECTORS" help:"Instance-wide default PII/secret detector model names applied to any PII-enabled model (chiefly cloud-proxy / MITM models) that names no pii.detectors of its own. Comma-separated, e.g. privacy-filter-nemotron,secret-filter. Takes precedence over the value persisted via the Middleware UI." group:"middleware"`
}
func (r *RunCMD) Run(ctx *cliContext.Context) error {
@@ -245,7 +243,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithAPIAddress(r.Address),
config.WithMITMListen(r.MITMListen),
config.WithMITMCADir(r.MITMCADir),
config.WithPIIDefaultDetectors(r.PIIDefaultDetectors),
config.WithAgentJobRetentionDays(r.AgentJobRetentionDays),
config.WithLlamaCPPTunnelCallback(func(tunnels []string) {
tunnelEnvVar := strings.Join(tunnels, ",")

View File

@@ -712,18 +712,6 @@ func WithMITMCADir(dir string) AppOption {
}
}
// WithPIIDefaultDetectors sets the instance-wide default PII/secret detector
// model names applied to any PII-enabled model (chiefly cloud-proxy / MITM
// models) that names no pii.detectors of its own. CLI/env:
// LOCALAI_PII_DEFAULT_DETECTORS. Empty leaves the value to
// runtime_settings.json / the Middleware UI; a non-empty value takes
// precedence over the file (env > file).
func WithPIIDefaultDetectors(detectors []string) AppOption {
return func(o *ApplicationConfig) {
o.PIIDefaultDetectors = detectors
}
}
func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
return func(o *ApplicationConfig) {
o.DynamicConfigsDir = dynamicConfigsDir

View File

@@ -5,7 +5,6 @@ import (
"errors"
"os"
"path/filepath"
"reflect"
)
// runtimeSettingsFile is the on-disk filename inside DynamicConfigsDir.
@@ -34,35 +33,6 @@ func (o *ApplicationConfig) ReadPersistedSettings() (RuntimeSettings, error) {
return settings, nil
}
// MergeNonNil overlays every set (non-nil) field of overlay onto the
// receiver, leaving the receiver's value untouched wherever overlay left a
// field unset. Every RuntimeSettings field is a pointer precisely so "set"
// can be told apart from "absent" (see the type doc), which makes this a
// faithful partial update: a caller that submits only the field it owns
// changes exactly that field and never clobbers unrelated settings.
//
// This is the read-modify-write contract the persistence helpers exist for.
// UpdateSettingsEndpoint reads the on-disk settings, merges the request body
// on top, and writes the result — so a focused admin page that POSTs only its
// own field (the Middleware page sends only mitm_listen; the detector table
// only pii_default_detectors) no longer nulls every other setting.
//
// Reflection keeps the merge total over the struct: a field added to
// RuntimeSettings later is merged automatically, so the persistence path can
// never silently drop a new setting the way a hand-maintained field list
// would. Non-pointer fields (none today) are skipped — they cannot express
// "absent", so the receiver wins.
func (s *RuntimeSettings) MergeNonNil(overlay RuntimeSettings) {
dst := reflect.ValueOf(s).Elem()
src := reflect.ValueOf(overlay)
for i := 0; i < src.NumField(); i++ {
f := src.Field(i)
if f.Kind() == reflect.Pointer && !f.IsNil() {
dst.Field(i).Set(f)
}
}
}
// WritePersistedSettings serialises the given RuntimeSettings to
// runtime_settings.json with restricted permissions (it may carry API
// keys and P2P tokens).

View File

@@ -12,7 +12,6 @@ import (
)
func strPtr(s string) *string { return &s }
func boolPtr(b bool) *bool { return &b }
var _ = Describe("RuntimeSettings persistence helpers", func() {
var (
@@ -52,47 +51,6 @@ var _ = Describe("RuntimeSettings persistence helpers", func() {
})
})
// MergeNonNil is the partial-update primitive UpdateSettingsEndpoint
// relies on: a focused admin page POSTs only the field it owns, and the
// handler reads the on-disk settings and overlays the request on top.
// Without it, the body would be written verbatim and every field the
// caller omitted would be nulled (the reported regression: changing
// mitm_listen wiped the galleries, api keys, watchdog config, etc.).
Describe("MergeNonNil partial update", func() {
It("overlays set fields and preserves unset ones", func() {
base := config.RuntimeSettings{
MITMListen: strPtr(":9000"),
Galleries: &[]config.Gallery{{Name: "g1", URL: "http://example/g1"}},
WatchdogIdleEnabled: boolPtr(true),
ApiKeys: &[]string{"persisted-key"},
PIIDefaultDetectors: &[]string{"det-a"},
}
// Simulate the Middleware proxy tab: only mitm_listen is sent.
overlay := config.RuntimeSettings{MITMListen: strPtr(":8443")}
base.MergeNonNil(overlay)
Expect(base.MITMListen).ToNot(BeNil())
Expect(*base.MITMListen).To(Equal(":8443"), "set field should be overlaid")
// Everything the overlay left unset must survive untouched.
Expect(base.Galleries).ToNot(BeNil(), "galleries were clobbered")
Expect(*base.Galleries).To(HaveLen(1))
Expect(base.WatchdogIdleEnabled).ToNot(BeNil())
Expect(*base.WatchdogIdleEnabled).To(BeTrue())
Expect(base.ApiKeys).ToNot(BeNil(), "api_keys were clobbered")
Expect(*base.ApiKeys).To(Equal([]string{"persisted-key"}))
Expect(base.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were clobbered")
Expect(*base.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
})
It("lets an explicit empty slice clear a field", func() {
base := config.RuntimeSettings{PIIDefaultDetectors: &[]string{"det-a"}}
base.MergeNonNil(config.RuntimeSettings{PIIDefaultDetectors: &[]string{}})
Expect(base.PIIDefaultDetectors).ToNot(BeNil())
Expect(*base.PIIDefaultDetectors).To(BeEmpty(), "an explicit empty slice should clear, not preserve")
})
})
// MITM round trip pins the contract that loadRuntimeSettingsFromFile
// MITM listener address must survive a write/read round trip so the
// next process restart can bring the listener back up. (Intercept

View File

@@ -70,7 +70,7 @@ func UploadToCollectionEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
name := decodedParam(c, "name")
name := c.Param("name")
file, err := c.FormFile("file")
if err != nil {
return c.JSON(http.StatusBadRequest, map[string]string{"error": "file required"})
@@ -116,7 +116,7 @@ func ListCollectionEntriesEndpoint(app *application.Application) echo.HandlerFun
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
entries, err := svc.ListCollectionEntriesForUser(userID, decodedParam(c, "name"))
entries, err := svc.ListCollectionEntriesForUser(userID, c.Param("name"))
if err != nil {
if strings.Contains(err.Error(), "not found") {
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -139,7 +139,7 @@ func GetCollectionEntryContentEndpoint(app *application.Application) echo.Handle
if err != nil {
entry = entryParam
}
content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, decodedParam(c, "name"), entry)
content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, c.Param("name"), entry)
if err != nil {
if strings.Contains(err.Error(), "not found") {
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -164,7 +164,7 @@ func SearchCollectionEndpoint(app *application.Application) echo.HandlerFunc {
if err := c.Bind(&payload); err != nil {
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
}
results, err := svc.SearchCollectionForUser(userID, decodedParam(c, "name"), payload.Query, payload.MaxResults)
results, err := svc.SearchCollectionForUser(userID, c.Param("name"), payload.Query, payload.MaxResults)
if err != nil {
if strings.Contains(err.Error(), "not found") {
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -182,7 +182,7 @@ func ResetCollectionEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
if err := svc.ResetCollectionForUser(userID, decodedParam(c, "name")); err != nil {
if err := svc.ResetCollectionForUser(userID, c.Param("name")); err != nil {
if strings.Contains(err.Error(), "not found") {
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
}
@@ -202,7 +202,7 @@ func DeleteCollectionEntryEndpoint(app *application.Application) echo.HandlerFun
if err := c.Bind(&payload); err != nil {
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
}
remaining, err := svc.DeleteCollectionEntryForUser(userID, decodedParam(c, "name"), payload.Entry)
remaining, err := svc.DeleteCollectionEntryForUser(userID, c.Param("name"), payload.Entry)
if err != nil {
if strings.Contains(err.Error(), "not found") {
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -230,7 +230,7 @@ func AddCollectionSourceEndpoint(app *application.Application) echo.HandlerFunc
if payload.UpdateInterval < 1 {
payload.UpdateInterval = 60
}
if err := svc.AddCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL, payload.UpdateInterval); err != nil {
if err := svc.AddCollectionSourceForUser(userID, c.Param("name"), payload.URL, payload.UpdateInterval); err != nil {
if strings.Contains(err.Error(), "not found") {
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
}
@@ -250,7 +250,7 @@ func RemoveCollectionSourceEndpoint(app *application.Application) echo.HandlerFu
if err := c.Bind(&payload); err != nil {
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
}
if err := svc.RemoveCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL); err != nil {
if err := svc.RemoveCollectionSourceForUser(userID, c.Param("name"), payload.URL); err != nil {
return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
}
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
@@ -267,7 +267,7 @@ func GetCollectionEntryRawFileEndpoint(app *application.Application) echo.Handle
if err != nil {
entry = entryParam
}
fpath, err := svc.GetCollectionEntryFilePathForUser(userID, decodedParam(c, "name"), entry)
fpath, err := svc.GetCollectionEntryFilePathForUser(userID, c.Param("name"), entry)
if err != nil {
if strings.Contains(err.Error(), "not found") {
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
@@ -282,7 +282,7 @@ func ListCollectionSourcesEndpoint(app *application.Application) echo.HandlerFun
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
sources, err := svc.ListCollectionSourcesForUser(userID, decodedParam(c, "name"))
sources, err := svc.ListCollectionSourcesForUser(userID, c.Param("name"))
if err != nil {
if strings.Contains(err.Error(), "not found") {
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})

View File

@@ -1,49 +0,0 @@
package localai
import (
"net/http"
"net/http/httptest"
"github.com/labstack/echo/v4"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
// Regression for #10443: agent/collection names carry a "legacy-api-key:"
// prefix, so the ':' is percent-encoded as %3A in the request path. Echo routes
// such paths via URL.RawPath and stores the path-param value still escaped, so
// handlers must URL-decode it before looking the collection up in the store -
// otherwise the lookup sees "legacy-api-key%3ALiteraryResearch" and 404s.
var _ = Describe("decodedParam", func() {
var e *echo.Echo
BeforeEach(func() {
e = echo.New()
})
// route runs a request through Echo's real router so the path param is
// populated exactly as it would be in production, then returns the decoded
// value the handler would observe.
route := func(rawPath string) string {
var got string
e.GET("/api/agents/collections/:name/upload", func(c echo.Context) error {
got = decodedParam(c, "name")
return c.NoContent(http.StatusOK)
})
req := httptest.NewRequest(http.MethodGet, rawPath, nil)
rec := httptest.NewRecorder()
e.ServeHTTP(rec, req)
Expect(rec.Code).To(Equal(http.StatusOK))
return got
}
It("decodes a percent-encoded colon in the collection name", func() {
got := route("/api/agents/collections/legacy-api-key%3ALiteraryResearch/upload")
Expect(got).To(Equal("legacy-api-key:LiteraryResearch"))
})
It("leaves an unencoded name untouched", func() {
got := route("/api/agents/collections/PlainCollection/upload")
Expect(got).To(Equal("PlainCollection"))
})
})

View File

@@ -6,7 +6,6 @@ import (
"io"
"maps"
"net/http"
"net/url"
"os"
"path/filepath"
"slices"
@@ -34,22 +33,6 @@ func getUserID(c echo.Context) string {
return user.ID
}
// decodedParam returns the named path parameter, URL-decoding it.
//
// Echo routes a request via URL.RawPath whenever the path contains
// percent-encoded characters (e.g. %3A for ':'), and in that case stores the
// matched path-param value raw/escaped. Agent and collection names carry a
// "legacy-api-key:" prefix, so the ':' arrives as %3A and the raw param no
// longer matches the stored name. Callers must unescape before lookups.
// Falls back to the raw value if it isn't valid percent-encoding.
func decodedParam(c echo.Context, name string) string {
raw := c.Param(name)
if decoded, err := url.PathUnescape(raw); err == nil {
return decoded
}
return raw
}
// isAdminUser returns true if the authenticated user has admin role.
func isAdminUser(c echo.Context) bool {
user := auth.GetUser(c)
@@ -144,7 +127,7 @@ func GetAgentEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
name := decodedParam(c, "name")
name := c.Param("name")
statuses := svc.ListAgentsForUser(userID)
active, exists := statuses[name]
@@ -159,7 +142,7 @@ func UpdateAgentEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
name := decodedParam(c, "name")
name := c.Param("name")
var cfg state.AgentConfig
if err := c.Bind(&cfg); err != nil {
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
@@ -178,7 +161,7 @@ func DeleteAgentEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
name := decodedParam(c, "name")
name := c.Param("name")
if err := svc.DeleteAgentForUser(userID, name); err != nil {
return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
}
@@ -190,7 +173,7 @@ func GetAgentConfigEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
name := decodedParam(c, "name")
name := c.Param("name")
cfg := svc.GetAgentConfigForUser(userID, name)
if cfg == nil {
return c.JSON(http.StatusNotFound, map[string]string{"error": "Agent not found"})
@@ -203,7 +186,7 @@ func PauseAgentEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
if err := svc.PauseAgentForUser(userID, decodedParam(c, "name")); err != nil {
if err := svc.PauseAgentForUser(userID, c.Param("name")); err != nil {
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
}
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
@@ -214,7 +197,7 @@ func ResumeAgentEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
if err := svc.ResumeAgentForUser(userID, decodedParam(c, "name")); err != nil {
if err := svc.ResumeAgentForUser(userID, c.Param("name")); err != nil {
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
}
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
@@ -225,7 +208,7 @@ func GetAgentStatusEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
name := decodedParam(c, "name")
name := c.Param("name")
history := svc.GetAgentStatusForUser(userID, name)
if history == nil {
@@ -258,7 +241,7 @@ func GetAgentObservablesEndpoint(app *application.Application) echo.HandlerFunc
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
name := decodedParam(c, "name")
name := c.Param("name")
history, err := svc.GetAgentObservablesForUser(userID, name)
if err != nil {
@@ -278,7 +261,7 @@ func ClearAgentObservablesEndpoint(app *application.Application) echo.HandlerFun
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
name := decodedParam(c, "name")
name := c.Param("name")
if err := svc.ClearAgentObservablesForUser(userID, name); err != nil {
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
}
@@ -290,7 +273,7 @@ func ChatWithAgentEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
name := decodedParam(c, "name")
name := c.Param("name")
var payload struct {
Message string `json:"message"`
}
@@ -319,7 +302,7 @@ func AgentSSEEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
name := decodedParam(c, "name")
name := c.Param("name")
// Try local SSE manager first
manager := svc.GetSSEManagerForUser(userID, name)
@@ -351,7 +334,7 @@ func ExportAgentEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
svc := app.AgentPoolService()
userID := effectiveUserID(c)
name := decodedParam(c, "name")
name := c.Param("name")
data, err := svc.ExportAgentForUser(userID, name)
if err != nil {
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})

View File

@@ -4,6 +4,8 @@ import (
"encoding/json"
"io"
"net/http"
"os"
"path/filepath"
"time"
"github.com/labstack/echo/v4"
@@ -108,18 +110,6 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
})
}
// Read whatever is already persisted: it is both the source of truth
// for branding asset filenames (below) and the base we merge this
// request onto before writing. A read failure must not let a Save
// silently discard the existing settings — surface it instead.
persisted, err := appConfig.ReadPersistedSettings()
if err != nil {
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Failed to read existing settings: " + err.Error(),
})
}
// Branding asset filenames are owned exclusively by
// /api/branding/asset/{kind} (upload/delete). The Settings page also
// round-trips them via GET /api/settings, but its local state is stale
@@ -128,9 +118,11 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
// at page open. Replace whatever the body sent for these three fields
// with the values currently on disk so /api/settings can never
// regress them.
settings.LogoFile = persisted.LogoFile
settings.LogoHorizontalFile = persisted.LogoHorizontalFile
settings.FaviconFile = persisted.FaviconFile
if existing, err := appConfig.ReadPersistedSettings(); err == nil {
settings.LogoFile = existing.LogoFile
settings.LogoHorizontalFile = existing.LogoHorizontalFile
settings.FaviconFile = existing.FaviconFile
}
// The UI reads ApiKeys from GET /api/settings, which already returns the
// merged env+runtime list. When the user clicks Save, the same merged
@@ -153,17 +145,16 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
settings.ApiKeys = &runtimeOnly
}
// Persist as a partial update: overlay only the fields this request set
// onto the settings already on disk. Focused admin pages POST just the
// keys they own (the Middleware proxy tab sends only mitm_listen; the
// detector table only pii_default_detectors), so writing the request
// body verbatim would null every unrelated setting (the no-omitempty
// api_keys / pii_default_detectors fields even round-trip as JSON
// null). The full Settings page still round-trips every field, so its
// Save is unchanged.
toPersist := persisted
toPersist.MergeNonNil(settings)
if err := appConfig.WritePersistedSettings(toPersist); err != nil {
settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
settingsJSON, err := json.MarshalIndent(settings, "", " ")
if err != nil {
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Failed to marshal settings: " + err.Error(),
})
}
if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Failed to write settings file: " + err.Error(),
@@ -271,14 +262,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
}
}
// Rebuild the MITM listener when its address OR the instance-wide
// default detectors change. The per-host detector map is resolved once
// at listener start (startMITMLocked → ResolvePIIPolicy), so a
// default-detector change is otherwise invisible to cloud-proxy traffic
// until the next restart — an admin toggling a default detector would
// see no redaction. RestartMITM is a no-op when the listener is
// disabled (empty address).
if settings.MITMListen != nil || settings.PIIDefaultDetectors != nil {
if settings.MITMListen != nil {
if err := app.RestartMITM(); err != nil {
xlog.Error("Failed to restart MITM proxy", "error", err)
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{

View File

@@ -52,10 +52,6 @@ var _ = Describe("Settings endpoints", func() {
// Settings are persisted here; set after construction since there's no
// dedicated AppOption for it.
app.ApplicationConfig().DynamicConfigsDir = tmp
// Contain the MITM CA inside tmp too. The partial-save spec flips
// mitm_listen, which starts the listener and writes a CA; without this
// it defaults to ./mitm-ca and litters the package source tree.
app.ApplicationConfig().MITMCADir = filepath.Join(tmp, "mitm-ca")
e = echo.New()
e.GET("/api/settings", GetSettingsEndpoint(app))
@@ -113,57 +109,6 @@ var _ = Describe("Settings endpoints", func() {
Expect(err).ToNot(HaveOccurred())
})
// Regression: a focused admin page (the Middleware proxy tab) POSTs only
// the one field it owns — mitm_listen. The old handler wrote the request
// body verbatim, so every other persisted setting was dropped (and
// api_keys / pii_default_detectors, which lack omitempty, were written as
// null). A partial POST must now merge onto what is already on disk.
It("preserves unrelated persisted settings when a partial POST sets only mitm_listen", func() {
// First save establishes a fuller settings file (as the full Settings
// page would): galleries, an API key, and the MITM listener. The
// listener restart binds a real socket, so use 127.0.0.1:0 for an
// ephemeral free port rather than a fixed one that may be in use.
rec := post(`{"mitm_listen":"127.0.0.1:0","galleries":[{"name":"g1","url":"http://example/g1"}],"api_keys":["k1"],"pii_default_detectors":["det-a"]}`)
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
// The Middleware proxy tab then changes only the listen address — the
// exact partial body that nulled everything else before the fix.
rec = post(`{"mitm_listen":"127.0.0.1:0"}`)
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
raw, err := os.ReadFile(filepath.Join(tmp, "runtime_settings.json"))
Expect(err).ToNot(HaveOccurred())
var ondisk config.RuntimeSettings
Expect(json.Unmarshal(raw, &ondisk)).To(Succeed())
Expect(ondisk.MITMListen).ToNot(BeNil())
Expect(*ondisk.MITMListen).To(Equal("127.0.0.1:0"), "the changed field should be saved")
Expect(ondisk.Galleries).ToNot(BeNil(), "galleries were clobbered by the partial save")
Expect(*ondisk.Galleries).To(HaveLen(1))
Expect(ondisk.ApiKeys).ToNot(BeNil(), "api_keys were nulled by the partial save")
Expect(*ondisk.ApiKeys).To(Equal([]string{"k1"}))
Expect(ondisk.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were nulled by the partial save")
Expect(*ondisk.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
})
// The MITM listener resolves its per-host PII detectors once at start
// (startMITMLocked → ResolvePIIPolicy), and the handler used to restart it
// only when mitm_listen changed. So an admin toggling a default detector
// (the Middleware detector table POSTs only pii_default_detectors) left
// cloud-proxy traffic unredacted until the next reboot. A
// pii_default_detectors change must now rebuild the listener.
It("rebuilds the MITM listener when only pii_default_detectors changes", func() {
rec := post(`{"mitm_listen":"127.0.0.1:0"}`)
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
srv1 := app.MITMServer()
Expect(srv1).ToNot(BeNil(), "listener should be running after mitm_listen is set")
rec = post(`{"pii_default_detectors":["det-a"]}`)
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
Expect(app.MITMServer()).ToNot(BeIdenticalTo(srv1),
"a default-detector change must restart the listener so it picks up the new detectors")
})
// Residual #9125: enabling the watchdog from a cold (off) state via the
// React master toggle must start the live watchdog immediately, without a
// restart. The toggle posts watchdog_idle_enabled/busy_enabled=true while

View File

@@ -0,0 +1,100 @@
import { test, expect } from './coverage-fixtures.js'
// These specs stub /api/features and /api/auth/status per cell. The test server
// disables auth (isAdmin=true) and reports its own features, so we intercept
// before navigation to simulate each role x mode cell.
function stubFeatures(page, features) {
return page.route('**/api/features', route =>
route.fulfill({ contentType: 'application/json', body: JSON.stringify(features) }))
}
function stubNoP2P(page) {
// P2P token endpoint returns empty -> p2pEnabled=false.
return page.route('**/api/p2p/token', route =>
route.fulfill({ contentType: 'text/plain', body: '' }))
}
test.describe('Adaptive landing (HomeRoute)', () => {
test('admin + distributed redirects /app to Nodes', async ({ page }) => {
await stubFeatures(page, { distributed: true })
await stubNoP2P(page)
await page.goto('/app')
await expect(page).toHaveURL(/\/app\/nodes$/)
await expect(page.locator('.page-title').first()).toBeVisible({ timeout: 15_000 })
})
test('admin + single-node stays on Home', async ({ page }) => {
await stubFeatures(page, { distributed: false })
await stubNoP2P(page)
await page.goto('/app')
await expect(page).toHaveURL(/\/app$/)
await expect(page.locator('.home-greeting')).toBeVisible({ timeout: 15_000 })
})
})
test.describe('Adaptive sidebar', () => {
test('distributed pins the Cluster group with Nodes at the top', async ({ page }) => {
await stubFeatures(page, { distributed: true })
await stubNoP2P(page)
await page.goto('/app/chat') // any in-app page so the sidebar is mounted
const pinned = page.locator('.sidebar-nav .sidebar-section-items').first()
await expect(pinned.getByText('Nodes', { exact: false })).toBeVisible({ timeout: 15_000 })
})
test('single-node does not pin a Cluster group', async ({ page }) => {
await stubFeatures(page, { distributed: false })
await stubNoP2P(page)
await page.goto('/app/chat')
// Nodes is reachable only via the Operate rail, not pinned at the top.
await expect(page.locator('.sidebar-nav')).toBeVisible({ timeout: 15_000 })
await expect(page.locator('.sidebar-nav .sidebar-section-items').first()
.getByText('Nodes', { exact: false })).toHaveCount(0)
})
})
test.describe('Top navbar', () => {
test('admin sees the mode pill and settings cog', async ({ page }) => {
await stubFeatures(page, { distributed: true })
await stubNoP2P(page)
await page.goto('/app/chat')
await expect(page.locator('.top-navbar__mode')).toBeVisible({ timeout: 15_000 })
await expect(page.locator('.top-navbar__icon[aria-label]')).not.toHaveCount(0)
})
test('admin-via-chat jump shows when localai_assistant is enabled', async ({ page }) => {
await stubFeatures(page, { distributed: false, localai_assistant: true })
await stubNoP2P(page)
await page.goto('/app/chat')
await expect(page.locator('.top-navbar__assistant')).toBeVisible({ timeout: 15_000 })
})
test('admin-via-chat jump hidden when localai_assistant is off', async ({ page }) => {
await stubFeatures(page, { distributed: false, localai_assistant: false })
await stubNoP2P(page)
await page.goto('/app/chat')
await expect(page.locator('.top-navbar__assistant')).toHaveCount(0)
})
})
test.describe('Token usage meter', () => {
test('renders when admin usage has data', async ({ page }) => {
await stubFeatures(page, { distributed: false })
await stubNoP2P(page)
await page.route('**/api/auth/admin/usage**', route =>
route.fulfill({ contentType: 'application/json',
body: JSON.stringify({ buckets: [{ total_tokens: 1234 }] }) }))
await page.goto('/app/chat')
await expect(page.locator('.top-navbar__meter')).toBeVisible({ timeout: 15_000 })
})
test('hidden when admin usage is empty (graceful degrade)', async ({ page }) => {
await stubFeatures(page, { distributed: false })
await stubNoP2P(page)
await page.route('**/api/auth/admin/usage**', route =>
route.fulfill({ contentType: 'application/json', body: JSON.stringify({ buckets: [] }) }))
await page.goto('/app/chat')
await expect(page.locator('.top-navbar')).toBeVisible({ timeout: 15_000 })
await expect(page.locator('.top-navbar__meter')).toHaveCount(0)
})
})

View File

@@ -12,6 +12,16 @@
"accountSettings": "Account settings",
"account": "Account",
"accountFor": "Account: {{name}}",
"topbar": {
"label": "Top bar",
"modeDistributed": "Distributed",
"modeSwarm": "Swarm",
"modeSingle": "Single-node",
"pickModel": "Models",
"adminViaChat": "Admin via chat",
"tokensToday": "Tokens today",
"usageDetail": "View usage detail"
},
"sections": {
"create": "Create",
"recognition": "Recognition",

View File

@@ -45,7 +45,7 @@
},
"scheduling": {
"title": "Penjadwalan",
"subtitle": "Aturan penempatan model dan replika di seluruh kluster"
"subtitle": "Aturan penempatan model dan replika di seluruh klaster"
},
"p2p": {
"title": "Komputasi AI Terdistribusi",
@@ -86,4 +86,4 @@
"title": "Penjelajah",
"subtitle": "Jelajahi file dan konfigurasi"
}
}
}

View File

@@ -72,7 +72,7 @@
"actions": {
"copy": "Salin",
"regenerate": "Hasilkan ulang",
"jumpToLatest": "Lompat ke terbaru"
"jumpToLatest": "Jump to latest"
},
"streaming": {
"transferring": "Mentransfer model...",
@@ -115,4 +115,4 @@
"clearAll": "Hapus semua",
"deleteAllTitle": "Hapus semua percakapan"
}
}
}

View File

@@ -1,8 +1,8 @@
{
"unsaved": {
"title": "Buang perubahan yang belum disimpan?",
"message": "Anda memiliki perubahan yang belum disimpan. Perubahan tersebut akan hilang jika Anda meninggalkan halaman ini.",
"leave": "Tinggalkan Halaman"
"title": "Discard unsaved changes?",
"message": "You have unsaved changes that will be lost if you leave this page.",
"leave": "Leave"
},
"actions": {
"save": "Simpan",

View File

@@ -7,15 +7,15 @@
"resourceGpu": "GPU",
"resourceRam": "RAM",
"greeting": {
"morning": "Selamat pagi",
"afternoon": "Selamat siang",
"evening": "Selamat malam",
"night": "Selamat lembur"
"morning": "Good morning",
"afternoon": "Good afternoon",
"evening": "Good evening",
"night": "Working late"
},
"statusLine": {
"modelsLoaded_one": "{{count}} model dimuat",
"modelsLoaded_other": "{{count}} model dimuat",
"noModelsLoaded": "Tidak ada model yang dimuat",
"modelsLoaded_one": "{{count}} model loaded",
"modelsLoaded_other": "{{count}} models loaded",
"noModelsLoaded": "No models loaded",
"nodes_one": "{{count}} node",
"nodes_other": "{{count}} nodes"
},
@@ -79,14 +79,14 @@
},
"connect": {
"title": "Satu endpoint, semua API",
"subtitle": "LocalAI menyediakan API miliknya sendiri yang lengkap — pembuatan gambar & video, depth, deteksi objek, reranking, audio, pengenalan wajah & suara, serta suara realtime melalui WebRTC dan WebSocket. Selain itu, lapisan kompatibilitas drop-in membuat aplikasi apa pun yang dibuat untuk OpenAI, Anthropic, Ollama, atau OpenAI Responses bekerja tanpa perubahan.",
"subtitle": "LocalAI menyediakan API miliknya sendiri yang lengkap — pembuatan gambar & video, depth, deteksi objek, reranking, audio, pengenalan wajah & suara, serta suara realtime melalui WebRTC dan WebSocket. Di atas itu, lapisan kompatibilitas drop-in membuat aplikasi apa pun yang dibuat untuk OpenAI, Anthropic, Ollama, atau OpenAI Responses bekerja tanpa perubahan.",
"nativeTitle": "API native",
"compatTitle": "Kompatibilitas drop-in",
"apiReference": "Referensi API lengkap",
"copy": "Salin",
"copied": "Disalin",
"browse": "Jelajahi API",
"hide": "Sembunyikan endpoint",
"dismiss": "Abaikan"
"browse": "Browse the API",
"hide": "Hide endpoints",
"dismiss": "Dismiss"
}
}

View File

@@ -5,7 +5,7 @@
"video": "Video",
"tts": "TTS",
"sound": "Suara",
"transform": "Transformasi"
"transform": "Transform"
}
},
"image": {
@@ -30,7 +30,7 @@
"refImagesAdded_other": "{{count}} gambar ditambahkan"
},
"actions": {
"view": "Lihat",
"view": "View",
"generate": "Hasilkan",
"generating": "Menghasilkan..."
},
@@ -153,4 +153,4 @@
"clearConfirm": "Hapus",
"cleared": "Riwayat dihapus"
}
}
}

View File

@@ -19,11 +19,11 @@
"operate": "Operasikan"
},
"operate": {
"inference": "Inferensi",
"cluster": "Kluster",
"observability": "Observabilitas",
"access": "Akses",
"system": "Sistem"
"inference": "Inference",
"cluster": "Cluster",
"observability": "Observability",
"access": "Access",
"system": "System"
},
"items": {
"home": "Beranda",
@@ -64,7 +64,7 @@
"copyright": "© 2023-{{year}} {{author}}"
},
"console": {
"automation": "Automasi",
"automation": "Otomasi",
"training": "Pelatihan"
}
}

View File

@@ -184,6 +184,50 @@
font-size: 1.5rem;
}
/* Desktop top bar: deployment + admin affordances on wide screens. Hidden on
mobile, where .mobile-header carries the equivalent actions. */
.top-navbar {
display: flex;
align-items: center;
justify-content: space-between;
gap: var(--spacing-md);
padding: var(--spacing-sm) var(--spacing-lg);
border-bottom: 1px solid var(--color-border-default);
background: var(--color-bg-secondary);
}
.top-navbar__right { display: flex; align-items: center; gap: var(--spacing-sm); }
.top-navbar__mode {
font-size: 0.75rem;
padding: 2px 10px;
border-radius: 999px;
border: 1px solid var(--color-border-default);
color: var(--color-text-secondary);
}
.top-navbar__mode.is-active { color: var(--color-success); border-color: var(--color-success); }
.top-navbar__btn {
display: inline-flex; align-items: center; gap: 6px;
font-size: 0.8125rem; padding: 5px 10px; border-radius: 8px;
border: 1px solid var(--color-border-default); background: var(--color-bg-tertiary);
color: var(--color-text-primary); cursor: pointer;
}
.top-navbar__icon {
width: 32px; height: 32px; display: inline-flex; align-items: center;
justify-content: center; border-radius: 8px; border: 1px solid var(--color-border-default);
background: var(--color-bg-tertiary); color: var(--color-text-secondary); cursor: pointer;
}
.top-navbar__avatar img { width: 100%; height: 100%; border-radius: 50%; object-fit: cover; }
.top-navbar__meter {
display: inline-flex; flex-direction: column; gap: 3px; align-items: flex-start;
padding: 4px 10px; border-radius: 8px; border: 1px solid var(--color-border-default);
background: var(--color-bg-tertiary); cursor: pointer; min-width: 150px;
}
.top-navbar__meter-label { font-size: 0.6875rem; color: var(--color-text-secondary); }
.top-navbar__meter-bar { width: 100%; height: 5px; border-radius: 3px; background: var(--color-bg-secondary); overflow: hidden; }
.top-navbar__meter-bar i { display: block; height: 100%; background: var(--color-primary); }
@media (max-width: 639px) {
.top-navbar { display: none; }
}
/* Sidebar */
.sidebar {
position: fixed;

View File

@@ -3,6 +3,7 @@ import { Outlet, useLocation, useNavigate } from 'react-router-dom'
import { useTranslation } from 'react-i18next'
import Sidebar from './components/Sidebar'
import OperationsBar from './components/OperationsBar'
import TopNavbar from './components/TopNavbar'
import { ToastContainer, useToast } from './components/Toast'
import { systemApi } from './utils/api'
import { useTheme } from './contexts/ThemeContext'
@@ -98,6 +99,7 @@ export default function App() {
<Sidebar isOpen={sidebarOpen} onClose={() => setSidebarOpen(false)} />
<main className="main-content" {...(sidebarOpen ? { 'aria-hidden': 'true', inert: '' } : {})}>
<OperationsBar />
<TopNavbar />
{/* Mobile header — primary actions reachable without opening the
drawer. Hamburger is the only way to expand the nav on phones;
theme toggle and account avatar are mirrored from the sidebar

View File

@@ -0,0 +1,28 @@
import { lazy, Suspense } from 'react'
import { Navigate } from 'react-router-dom'
import { useAuth } from '../context/AuthContext'
import { useDeployment } from '../contexts/DeploymentContext'
import { resolveHome } from '../utils/resolveHome'
import RouteFallback from './RouteFallback'
const Home = lazy(() => import('../pages/Home'))
// Index-route element. Waits for auth + deployment signals to load (so we never
// flash the wrong landing), then either renders Home or redirects to the cell's
// landing page. Redirecting (rather than rendering Nodes/Chat inline at /app)
// keeps each target's own route guard, active-nav state, and deep-linkability.
export default function HomeRoute() {
const { isAdmin, loading: authLoading } = useAuth()
const { distributed, p2pEnabled, loading: deployLoading } = useDeployment()
if (authLoading || deployLoading) return <RouteFallback />
const target = resolveHome({ isAdmin, distributed, p2pEnabled })
if (target) return <Navigate to={target} replace />
return (
<Suspense fallback={<RouteFallback />}>
<Home />
</Suspense>
)
}

View File

@@ -5,9 +5,11 @@ import ThemeToggle from './ThemeToggle'
import LanguageSwitcher from './LanguageSwitcher'
import { useAuth } from '../context/AuthContext'
import { useBranding } from '../contexts/BrandingContext'
import { useDeployment } from '../contexts/DeploymentContext'
import { apiUrl } from '../utils/basePath'
import { preloadRoute } from '../router'
import { consoles, firstVisiblePath, consolePaths } from './console/consoleConfig'
import { clusterPinItems, shouldCollapseCreate } from '../utils/sidebarPolicy'
const COLLAPSED_KEY = 'localai_sidebar_collapsed'
const SECTIONS_KEY = 'localai_sidebar_sections'
@@ -58,11 +60,13 @@ function NavItem({ item, onClose, collapsed }) {
)
}
function loadSectionState() {
// Tiers render expanded by default (the redesign favours showing the few
// intent groups up front); users can still collapse any tier and the choice
// is persisted. Stored values override the defaults so a saved collapse wins.
function loadSectionState(collapseCreate = false) {
// Tiers render expanded by default; users can collapse any tier and the
// choice persists (stored values override defaults). In cluster cells we
// start Create collapsed so the pinned cluster group leads - but only when
// the user has not already expressed a preference.
const defaults = Object.fromEntries(sections.map(s => [s.id, true]))
if (collapseCreate) defaults.create = false
try {
const stored = localStorage.getItem(SECTIONS_KEY)
return stored ? { ...defaults, ...JSON.parse(stored) } : defaults
@@ -77,20 +81,34 @@ function saveSectionState(state) {
export default function Sidebar({ isOpen, onClose }) {
const { t } = useTranslation('nav')
const [features, setFeatures] = useState({})
const { isAdmin, authEnabled, user, logout, hasFeature } = useAuth()
// Deployment shape (server features + p2p) drives the adaptive sidebar; the
// shared context replaces the sidebar's own /api/features fetch so the
// landing resolver, navbar, and this policy agree on one snapshot.
const deployment = useDeployment()
const features = deployment.features
// Shared shape for the console gating helpers (consoleConfig.js); in scope for
// both the pinned cluster group and the console-tier rendering below.
const auth = { isAdmin, authEnabled, hasFeature, features }
const collapseCreate = shouldCollapseCreate(auth, deployment)
const [collapsed, setCollapsed] = useState(() => {
try { return localStorage.getItem(COLLAPSED_KEY) === 'true' } catch (_) { return false }
})
const [openSections, setOpenSections] = useState(loadSectionState)
const { isAdmin, authEnabled, user, logout, hasFeature } = useAuth()
const branding = useBranding()
const navigate = useNavigate()
const location = useLocation()
const closeBtnRef = useRef(null)
// Apply the cluster-cell Create-collapse default once, only when the user has
// no stored section preference (so we never override an explicit choice).
useEffect(() => {
fetch(apiUrl('/api/features')).then(r => r.json()).then(setFeatures).catch(() => {})
}, [])
if (deployment.loading) return
let hasStored = false
try { hasStored = !!localStorage.getItem(SECTIONS_KEY) } catch { hasStored = false }
if (hasStored || !collapseCreate) return
setOpenSections(prev => (prev.create === false ? prev : { ...prev, create: false }))
}, [deployment.loading, collapseCreate])
// Stay in sync with external collapse dispatches (e.g. the chat
// page's focus mode). The collapse-toggle button still owns the
@@ -157,8 +175,6 @@ export default function Sidebar({ isOpen, onClose }) {
}
const visibleTopItems = topItems.filter(filterItem)
// Shared shape for the console gating helpers (consoleConfig.js).
const auth = { isAdmin, authEnabled, hasFeature, features }
// Inline sections (Create) carry no gating; a plain filterItem pass suffices.
const getVisibleSectionItems = (section) => section.items.filter(filterItem)
@@ -199,6 +215,28 @@ export default function Sidebar({ isOpen, onClose }) {
))}
</div>
{/* Pinned Cluster quick-access (admin + distributed/p2p). Same gate
as the Operate rail; surfaced at the top for cluster operators. */}
{(() => {
const pinned = clusterPinItems(auth, deployment)
if (pinned.length === 0) return null
return (
<div className="sidebar-section">
<div className="sidebar-section-title">{t('operate.cluster')}</div>
<div className="sidebar-section-items">
{pinned.map(item => (
<NavItem
key={item.path}
item={{ path: item.path, icon: item.icon, labelKey: item.labelKey }}
onClose={onClose}
collapsed={collapsed}
/>
))}
</div>
</div>
)
})()}
{/* Collapsible sections */}
{sections.map(section => {
const visibleItems = getVisibleSectionItems(section)

View File

@@ -0,0 +1,96 @@
import { useNavigate } from 'react-router-dom'
import { useTranslation } from 'react-i18next'
import { useAuth } from '../context/AuthContext'
import { useDeployment } from '../contexts/DeploymentContext'
import { useTheme } from '../contexts/ThemeContext'
import { launchAssistantChat } from '../utils/launchAssistantChat'
import TokenUsageMeter from './navbar/TokenUsageMeter'
// Desktop top bar. Complementary to the mobile-only header in App.jsx: this is
// hidden on small screens (see .top-navbar CSS) and shows deployment/admin
// affordances on wide screens where the sidebar footer is far from the content.
export default function TopNavbar() {
const { t } = useTranslation('nav')
const navigate = useNavigate()
const { isAdmin, authEnabled, user } = useAuth()
const { features, distributed, p2pEnabled } = useDeployment()
const { theme, toggleTheme } = useTheme()
const modeLabel = distributed
? t('topbar.modeDistributed')
: p2pEnabled
? t('topbar.modeSwarm')
: t('topbar.modeSingle')
const showAssistantJump = isAdmin && !!features.localai_assistant
const showAvatar = authEnabled && user
const themeLabel = theme === 'dark' ? t('switchToLightMode') : t('switchToDarkMode')
return (
<div className="top-navbar" role="navigation" aria-label={t('topbar.label')}>
<div className="top-navbar__left">
{isAdmin && (
<span className={`top-navbar__mode ${distributed || p2pEnabled ? 'is-active' : ''}`}>
<i className="fas fa-circle-nodes" aria-hidden="true" /> {modeLabel}
</span>
)}
</div>
<div className="top-navbar__right">
{!isAdmin && (
<button
type="button"
className="top-navbar__btn"
onClick={() => navigate('/app/chat')}
title={t('topbar.pickModel')}
>
<i className="fas fa-cube" aria-hidden="true" /> {t('topbar.pickModel')}
</button>
)}
{showAssistantJump && (
<button
type="button"
className="top-navbar__btn top-navbar__assistant"
onClick={() => launchAssistantChat(navigate)}
title={t('topbar.adminViaChat')}
>
<i className="fas fa-user-shield" aria-hidden="true" /> {t('topbar.adminViaChat')}
</button>
)}
{isAdmin && <TokenUsageMeter />}
{isAdmin && (
<button
type="button"
className="top-navbar__icon"
onClick={() => navigate('/app/settings')}
aria-label={t('items.settings')}
title={t('items.settings')}
>
<i className="fas fa-cog" aria-hidden="true" />
</button>
)}
<button
type="button"
className="top-navbar__icon"
onClick={toggleTheme}
aria-label={themeLabel}
title={themeLabel}
>
<i className={`fas ${theme === 'dark' ? 'fa-sun' : 'fa-moon'}`} aria-hidden="true" />
</button>
{showAvatar && (
<button
type="button"
className="top-navbar__icon top-navbar__avatar"
onClick={() => navigate('/app/account')}
aria-label={user.name || user.email}
title={user.name || user.email}
>
{user.avatarUrl
? <img src={user.avatarUrl} alt="" />
: <i className="fas fa-user-circle" aria-hidden="true" />}
</button>
)}
</div>
</div>
)
}

View File

@@ -0,0 +1,52 @@
import { useState, useEffect } from 'react'
import { useNavigate } from 'react-router-dom'
import { useTranslation } from 'react-i18next'
import { usageApi } from '../../utils/api'
// Compact admin-only usage glance: today's total tokens, optionally against a
// quota cap, linking to the full /app/usage page. Self-contained data fetch so
// a usage-API failure cannot break the navbar - it just renders nothing.
function sumTotalTokens(res) {
const buckets = res?.buckets || res?.usage || (Array.isArray(res) ? res : [])
if (!Array.isArray(buckets) || buckets.length === 0) return null
return buckets.reduce((s, b) => s + (b.total_tokens || 0), 0)
}
export default function TokenUsageMeter() {
const { t } = useTranslation('nav')
const navigate = useNavigate()
const [tokens, setTokens] = useState(null)
const [cap, setCap] = useState(null)
useEffect(() => {
let cancelled = false
usageApi.getAdminUsage('day')
.then(res => { if (!cancelled) setTokens(sumTotalTokens(res)) })
.catch(() => { if (!cancelled) setTokens(null) })
usageApi.getMyQuotas()
.then(q => { if (!cancelled) setCap(q?.token_limit || q?.tokens?.limit || null) })
.catch(() => { if (!cancelled) setCap(null) })
return () => { cancelled = true }
}, [])
if (tokens === null) return null
const pct = cap ? Math.min(100, Math.round((tokens / cap) * 100)) : null
return (
<button
type="button"
className="top-navbar__meter"
onClick={() => navigate('/app/usage')}
title={t('topbar.usageDetail')}
>
<span className="top-navbar__meter-label">
{t('topbar.tokensToday')}: {Intl.NumberFormat().format(tokens)}
{cap ? ` / ${Intl.NumberFormat().format(cap)}` : ''}
</span>
{pct !== null && (
<span className="top-navbar__meter-bar"><i style={{ width: `${pct}%` }} /></span>
)}
</button>
)
}

View File

@@ -0,0 +1,55 @@
import { createContext, useContext, useState, useEffect } from 'react'
import { apiUrl } from '../utils/basePath'
import { p2pApi } from '../utils/api'
const DeploymentContext = createContext(null)
// One shared fetch of the deployment-shape signals the adaptive UI keys off:
// server features (/api/features) and whether a P2P network token exists.
// Components used to fetch /api/features independently (Sidebar, Home); this
// centralises it so the landing resolver, sidebar policy, and navbar agree on
// one snapshot and we issue a single request.
export function DeploymentProvider({ children }) {
const [features, setFeatures] = useState({})
const [p2pEnabled, setP2pEnabled] = useState(false)
const [loading, setLoading] = useState(true)
useEffect(() => {
let cancelled = false
const featuresP = fetch(apiUrl('/api/features'))
.then(r => r.json())
.catch(() => ({}))
// P2P has no /api/features flag: it is "enabled" when a network token
// exists (mirrors pages/P2P.jsx). A 404/disabled endpoint throws and we
// treat that as not-enabled.
const p2pP = p2pApi.getToken()
.then(tok => (typeof tok === 'string' ? tok : (tok?.token || '')).trim())
.catch(() => '')
Promise.all([featuresP, p2pP]).then(([f, tok]) => {
if (cancelled) return
setFeatures(f || {})
setP2pEnabled(!!tok)
setLoading(false)
})
return () => { cancelled = true }
}, [])
const value = {
features,
distributed: !!features.distributed,
p2pEnabled,
loading,
}
return (
<DeploymentContext.Provider value={value}>
{children}
</DeploymentContext.Provider>
)
}
export function useDeployment() {
const ctx = useContext(DeploymentContext)
if (!ctx) throw new Error('useDeployment must be used within DeploymentProvider')
return ctx
}

View File

@@ -4,6 +4,7 @@ import { RouterProvider } from 'react-router-dom'
import { ThemeProvider } from './contexts/ThemeContext'
import { BrandingProvider } from './contexts/BrandingContext'
import { AuthProvider } from './context/AuthContext'
import { DeploymentProvider } from './contexts/DeploymentContext'
import { OperationsProvider } from './contexts/OperationsContext'
import { router } from './router'
import './i18n'
@@ -32,9 +33,11 @@ createRoot(document.getElementById('root')).render(
<ThemeProvider>
<BrandingProvider>
<AuthProvider>
<OperationsProvider>
<RouterProvider router={router} />
</OperationsProvider>
<DeploymentProvider>
<OperationsProvider>
<RouterProvider router={router} />
</OperationsProvider>
</DeploymentProvider>
</AuthProvider>
</BrandingProvider>
</ThemeProvider>

View File

@@ -541,58 +541,73 @@ export default function Chat() {
updateChatSettings(activeChat.id, { clientMCPServers: next })
}, [activeChat, updateChatSettings])
// Load initial message from home page
// Load initial message / assistant launch from the Home page or the navbar
// quick-jump. Factored into a callback so both the mount-time reader and the
// navbar re-trigger event below consume the same payload through one path.
const homeDataProcessed = useRef(false)
useEffect(() => {
if (homeDataProcessed.current) return
const consumeHomeChatData = useCallback(() => {
const stored = localStorage.getItem('localai_index_chat_data')
if (stored) {
homeDataProcessed.current = true
try {
const data = JSON.parse(stored)
localStorage.removeItem('localai_index_chat_data')
if (!stored) return
try {
const data = JSON.parse(stored)
localStorage.removeItem('localai_index_chat_data')
// Two entry shapes from Home:
// - "compose-and-send": data.message present → open new chat,
// prefill the composer, click submit.
// - "open-assistant": no message, just data.localaiAssistant → open
// a fresh chat already in admin mode so the wizard can fire.
const hasMessage = !!data.message
const wantsAssistant = !!data.localaiAssistant
// Two entry shapes from Home:
// - "compose-and-send": data.message present → open new chat,
// prefill the composer, click submit.
// - "open-assistant": no message, just data.localaiAssistant → open
// a fresh chat already in admin mode so the wizard can fire.
const hasMessage = !!data.message
const wantsAssistant = !!data.localaiAssistant
if (hasMessage || wantsAssistant) {
let targetChat = activeChat
if (data.newChat) {
targetChat = addChat(data.model || '', '', data.mcpMode || false)
} else {
if (data.model && activeChat) {
updateChatSettings(activeChat.id, { model: data.model })
}
if (data.mcpMode && activeChat) {
updateChatSettings(activeChat.id, { mcpMode: true })
}
if (hasMessage || wantsAssistant) {
let targetChat = activeChat
if (data.newChat) {
targetChat = addChat(data.model || '', '', data.mcpMode || false)
} else {
if (data.model && activeChat) {
updateChatSettings(activeChat.id, { model: data.model })
}
if (data.mcpServers?.length > 0 && targetChat) {
updateChatSettings(targetChat.id, { mcpServers: data.mcpServers })
}
if (data.clientMCPServers?.length > 0 && targetChat) {
updateChatSettings(targetChat.id, { clientMCPServers: data.clientMCPServers })
}
if (wantsAssistant && targetChat) {
updateChatSettings(targetChat.id, { localaiAssistant: true })
}
if (hasMessage) {
setInput(data.message)
if (data.files) setFiles(data.files)
setTimeout(() => {
const submitBtn = document.getElementById('chat-submit-btn')
submitBtn?.click()
}, 100)
if (data.mcpMode && activeChat) {
updateChatSettings(activeChat.id, { mcpMode: true })
}
}
} catch (_e) { /* ignore */ }
}
}, [])
if (data.mcpServers?.length > 0 && targetChat) {
updateChatSettings(targetChat.id, { mcpServers: data.mcpServers })
}
if (data.clientMCPServers?.length > 0 && targetChat) {
updateChatSettings(targetChat.id, { clientMCPServers: data.clientMCPServers })
}
if (wantsAssistant && targetChat) {
updateChatSettings(targetChat.id, { localaiAssistant: true })
}
if (hasMessage) {
setInput(data.message)
if (data.files) setFiles(data.files)
setTimeout(() => {
const submitBtn = document.getElementById('chat-submit-btn')
submitBtn?.click()
}, 100)
}
}
} catch (_e) { /* ignore */ }
}, [activeChat, addChat, updateChatSettings])
useEffect(() => {
if (homeDataProcessed.current) return
homeDataProcessed.current = true
consumeHomeChatData()
}, [consumeHomeChatData])
// Admins can re-trigger the assistant jump from the navbar while already on
// the chat page; navigate('/app/chat') does not remount Chat, so the
// mount-time reader above never fires. The launcher dispatches this event
// after writing the payload so we re-consume it and open a fresh assistant.
useEffect(() => {
const onOpenAssistant = () => consumeHomeChatData()
window.addEventListener('localai-open-assistant', onOpenAssistant)
return () => window.removeEventListener('localai-open-assistant', onOpenAssistant)
}, [consumeHomeChatData])
// Track whether the user is pinned to the bottom. If they scroll up
// while a response is streaming, stop forcing them back down.

View File

@@ -13,6 +13,7 @@ import { useResources } from '../hooks/useResources'
import { fileToBase64, backendControlApi, systemApi, modelsApi, mcpApi, nodesApi } from '../utils/api'
import { API_CONFIG } from '../utils/config'
import { greetingKey } from '../utils/greeting'
import { launchAssistantChat } from '../utils/launchAssistantChat'
import StatusPill from '../components/StatusPill'
import Skeleton from '../components/Skeleton'
import SectionHeading from '../components/SectionHeading'
@@ -228,16 +229,8 @@ export default function Home() {
// requiring an initial message or model selection. Useful when an admin
// wants to start the assistant from a cold home page.
const openAssistantChat = useCallback(() => {
const chatData = {
model: selectedModel || '',
mcpMode: false,
localaiAssistant: true,
newChat: true,
}
localStorage.setItem('localai_index_chat_data', JSON.stringify(chatData))
try { localStorage.setItem('localai_assistant_used', '1') } catch { /* ignore */ }
launchAssistantChat(navigate, selectedModel)
setAssistantUsed(true)
navigate('/app/chat')
}, [navigate, selectedModel])
const handleSubmit = (e) => {

View File

@@ -6,6 +6,7 @@ import RequireAdmin from './components/RequireAdmin'
import RequireAuth from './components/RequireAuth'
import RequireAuthEnabled from './components/RequireAuthEnabled'
import RequireFeature from './components/RequireFeature'
import HomeRoute from './components/HomeRoute'
// Pages are code-split: each becomes its own chunk loaded on demand, so a route
// no longer drags every other page (and its heavy deps — CodeMirror, the MCP
@@ -32,7 +33,7 @@ export function preloadRoute(path) {
preloaders[m[1] ?? '']?.().catch(() => { /* network blip — real click will retry */ })
}
const Home = page('', () => import('./pages/Home'))
page('', () => import('./pages/Home'))
const Chat = page('chat', () => import('./pages/Chat'))
const Models = page('models', () => import('./pages/Models'))
const Manage = page('manage', () => import('./pages/Manage'))
@@ -96,7 +97,7 @@ function Feature({ feature, children }) {
}
const appChildren = [
{ index: true, element: <Home /> },
{ index: true, element: <HomeRoute /> },
{ path: 'chat', element: <Chat /> },
{ path: 'chat/:model', element: <Chat /> },
{ path: 'image', element: <ImageGen /> },

View File

@@ -0,0 +1,19 @@
// Opens a fresh chat already in LocalAI Assistant ("manage") mode. Chat.jsx
// reads localai_index_chat_data on mount and enables localaiAssistant for the
// new chat. Shared by the Home CTA and the top navbar quick-jump so there is
// one definition of how the assistant is launched.
export function launchAssistantChat(navigate, model = '') {
const chatData = {
model: model || '',
mcpMode: false,
localaiAssistant: true,
newChat: true,
}
try { localStorage.setItem('localai_index_chat_data', JSON.stringify(chatData)) } catch { /* ignore */ }
try { localStorage.setItem('localai_assistant_used', '1') } catch { /* ignore */ }
navigate('/app/chat')
// When already on /app/chat, navigate() does not remount Chat, so its
// mount-time reader would never see the payload above. Signal the mounted
// Chat to re-consume it; harmless elsewhere since Chat reads on mount anyway.
try { window.dispatchEvent(new CustomEvent('localai-open-assistant')) } catch { /* ignore */ }
}

View File

@@ -0,0 +1,11 @@
// Pure landing-page resolver for the index route. Returns a target path, or ''
// meaning "render the default Home". Admin precedence is distributed > p2p >
// plain; non-admins always go to Chat (distributed/p2p are admin-only and
// invisible to them). Visibility gates are enforced elsewhere - this only
// chooses where /app lands.
export function resolveHome({ isAdmin, distributed, p2pEnabled }) {
if (!isAdmin) return '/app/chat'
if (distributed) return '/app/nodes'
if (p2pEnabled) return '/app/p2p'
return ''
}

View File

@@ -0,0 +1,20 @@
import { operateConsole, isConsoleItemVisible } from '../components/console/consoleConfig'
// The Operate > Cluster group, surfaced as a pinned top-of-sidebar quick-access
// group when the admin is running a cluster (NATS-distributed) or a P2P swarm.
// Items are filtered through the SAME gate as everywhere else, so e.g. in a
// p2p-only deployment Nodes/Scheduling (feature: 'distributed') drop out and
// only Swarm remains. Returns [] when the pin does not apply.
export function clusterPinItems(auth, deployment) {
if (!auth.isAdmin) return []
if (!deployment.distributed && !deployment.p2pEnabled) return []
const group = operateConsole.groups.find(g => g.titleKey === 'operate.cluster')
if (!group) return []
return group.items.filter(item => isConsoleItemVisible(item, auth))
}
// In the cluster cells the Create group defaults collapsed so the pinned
// cluster group leads. Users can still expand it; their stored choice wins.
export function shouldCollapseCreate(auth, deployment) {
return !!auth.isAdmin && (!!deployment.distributed || !!deployment.p2pEnabled)
}

View File

@@ -79,29 +79,21 @@ func (s *GalleryStore) Create(op *GalleryOperationRecord) error {
}).Create(op).Error
}
// UpdateProgress updates progress for an operation. The cancellable flag is
// persisted on every tick so a replica that restarts mid-install rehydrates the
// op as still cancellable — otherwise the column keeps its Create-time zero
// value (false), the UI hides the cancel button, and the orphaned op can only
// be dismissed by waiting for the 30-minute stale reaper.
func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string, cancellable bool) error {
// UpdateProgress updates progress for an operation.
func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string) error {
return s.db.Model(&GalleryOperationRecord{}).Where("id = ?", id).Updates(map[string]any{
"progress": progress,
"message": message,
"downloaded_file_size": downloadedSize,
"cancellable": cancellable,
"updated_at": time.Now(),
}).Error
}
// UpdateStatus updates the status of an operation. A terminal status is never
// cancellable, so the flag is cleared here to keep the persisted row consistent
// with what the UI should offer.
// UpdateStatus updates the status of an operation.
func (s *GalleryStore) UpdateStatus(id, status, errMsg string) error {
updates := map[string]any{
"status": status,
"cancellable": false,
"updated_at": time.Now(),
"status": status,
"updated_at": time.Now(),
}
if errMsg != "" {
updates["error"] = errMsg

View File

@@ -1,56 +0,0 @@
package galleryop_test
import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/services/distributed"
"github.com/mudler/LocalAI/core/services/galleryop"
"github.com/mudler/LocalAI/core/services/testutil"
)
// Reproduces "an in-flight install can't be cancelled after a restart". The
// live install path marks OpStatus.Cancellable=true on every progress tick, but
// UpdateStatus persisted progress/status to the gallery store WITHOUT the
// cancellable flag, and Create defaulted it to false. So after a replica
// restart Hydrate rebuilt the op with Cancellable=false, /api/operations
// reported cancellable:false, and the UI hid the cancel button — the orphaned
// op lingered until the 30-minute stale reaper expired it. The cancellable
// state must be persisted so a rehydrated in-flight op stays cancellable.
var _ = Describe("GalleryService cancellable persistence across restart", func() {
It("rehydrates an in-flight op as still cancellable", func() {
db := testutil.SetupTestDB()
store, err := distributed.NewGalleryStore(db)
Expect(err).ToNot(HaveOccurred())
svc := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
svc.SetGalleryStore(store)
// Seed the in-flight op row as the worker goroutine does on admission.
Expect(store.Create(&distributed.GalleryOperationRecord{
ID: "op-inflight",
GalleryElementName: "llama-cpp-development",
OpType: "backend_install",
Status: "pending",
})).To(Succeed())
// Simulate a progress tick: the live path always marks installs
// cancellable while they are downloading/processing.
svc.UpdateStatus("op-inflight", &galleryop.OpStatus{
Message: "downloading",
Progress: 25,
Cancellable: true,
})
// A fresh replica boots and hydrates from the store.
fresh := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
fresh.SetGalleryStore(store)
Expect(fresh.Hydrate()).To(Succeed())
st := fresh.GetStatus("op-inflight")
Expect(st).ToNot(BeNil(), "the in-flight op must hydrate after a restart")
Expect(st.Cancellable).To(BeTrue(),
"a still-active install must rehydrate as cancellable so the admin can dismiss it")
})
})

View File

@@ -167,7 +167,7 @@ func (g *GalleryService) UpdateStatus(s string, op *OpStatus) {
xlog.Warn("Failed to persist gallery operation status", "op_id", s, "error", err)
}
} else {
if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize, op.Cancellable); err != nil {
if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize); err != nil {
xlog.Warn("Failed to persist gallery operation progress", "op_id", s, "error", err)
}
}
@@ -467,7 +467,6 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader,
GalleryElementName: op.GalleryElementName,
OpType: "backend_install",
Status: "pending",
Cancellable: true,
})
}
err := g.backendHandler(&op, systemState)
@@ -500,8 +499,6 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader,
GalleryElementName: op.GalleryElementName,
OpType: opType,
Status: "pending",
// A delete is not cancellable; an install is.
Cancellable: !op.Delete,
})
}
err := g.modelHandler(&op, cl, systemState)

View File

@@ -19,40 +19,25 @@ import (
// Per-replica: a single tracker instance is bound to (nodeID, modelName, replicaIndex).
// The router constructs one tracker per Route() result, so each in-flight tick lands
// on the correct row even when multiple replicas of the same model live on the same node.
//
// Embedding only grpc.ControlBackend (not the whole grpc.Backend) is what makes
// the in-flight accounting safe by construction: the control-plane methods pass
// through untracked, while every grpc.InferenceBackend method must be declared
// explicitly below to satisfy grpc.Backend. Adding an inference method to the
// interface therefore breaks this file's build (see the var assertion below)
// until it is wrapped with track() - so a new inference path can't be added
// without an in-flight accounting decision.
type InFlightTrackingClient struct {
grpc.ControlBackend // passthrough for control-plane / streaming-constructor methods
inner grpc.InferenceBackend // tracked inference methods delegate here
registry InFlightTracker
nodeID string
modelName string
replicaIndex int
grpc.Backend // embed for passthrough of untracked methods
registry InFlightTracker
nodeID string
modelName string
replicaIndex int
firstOnce sync.Once // guards onFirstComplete
onFirstComplete func() // called once after the first tracked inference call completes
}
// Compile-time contract: *InFlightTrackingClient must implement the FULL backend
// surface. Because it embeds only ControlBackend, this fails to compile if any
// InferenceBackend method is left unwrapped.
var _ grpc.Backend = (*InFlightTrackingClient)(nil)
// NewInFlightTrackingClient wraps a gRPC backend client with in-flight tracking.
func NewInFlightTrackingClient(inner grpc.Backend, registry InFlightTracker, nodeID, modelName string, replicaIndex int) *InFlightTrackingClient {
return &InFlightTrackingClient{
ControlBackend: inner,
inner: inner,
registry: registry,
nodeID: nodeID,
modelName: modelName,
replicaIndex: replicaIndex,
Backend: inner,
registry: registry,
nodeID: nodeID,
modelName: modelName,
replicaIndex: replicaIndex,
}
}
@@ -106,162 +91,154 @@ func (c *InFlightTrackingClient) reconcile(err error) error {
func (c *InFlightTrackingClient) Predict(ctx context.Context, in *pb.PredictOptions, opts ...ggrpc.CallOption) (*pb.Reply, error) {
defer c.track(ctx)()
reply, err := c.inner.Predict(ctx, in, opts...)
reply, err := c.Backend.Predict(ctx, in, opts...)
return reply, c.reconcile(err)
}
func (c *InFlightTrackingClient) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...ggrpc.CallOption) error {
defer c.track(ctx)()
return c.reconcile(c.inner.PredictStream(ctx, in, f, opts...))
return c.reconcile(c.Backend.PredictStream(ctx, in, f, opts...))
}
func (c *InFlightTrackingClient) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...ggrpc.CallOption) (*pb.EmbeddingResult, error) {
defer c.track(ctx)()
res, err := c.inner.Embeddings(ctx, in, opts...)
res, err := c.Backend.Embeddings(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
defer c.track(ctx)()
res, err := c.inner.GenerateImage(ctx, in, opts...)
res, err := c.Backend.GenerateImage(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
defer c.track(ctx)()
res, err := c.inner.GenerateVideo(ctx, in, opts...)
res, err := c.Backend.GenerateVideo(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) TTS(ctx context.Context, in *pb.TTSRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
defer c.track(ctx)()
res, err := c.inner.TTS(ctx, in, opts...)
res, err := c.Backend.TTS(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) TTSStream(ctx context.Context, in *pb.TTSRequest, f func(reply *pb.Reply), opts ...ggrpc.CallOption) error {
defer c.track(ctx)()
return c.reconcile(c.inner.TTSStream(ctx, in, f, opts...))
return c.reconcile(c.Backend.TTSStream(ctx, in, f, opts...))
}
func (c *InFlightTrackingClient) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
defer c.track(ctx)()
res, err := c.inner.SoundGeneration(ctx, in, opts...)
res, err := c.Backend.SoundGeneration(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...ggrpc.CallOption) (*pb.TranscriptResult, error) {
defer c.track(ctx)()
res, err := c.inner.AudioTranscription(ctx, in, opts...)
res, err := c.Backend.AudioTranscription(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) AudioTranscriptionStream(ctx context.Context, in *pb.TranscriptRequest, f func(chunk *pb.TranscriptStreamResponse), opts ...ggrpc.CallOption) error {
defer c.track(ctx)()
return c.reconcile(c.inner.AudioTranscriptionStream(ctx, in, f, opts...))
return c.reconcile(c.Backend.AudioTranscriptionStream(ctx, in, f, opts...))
}
func (c *InFlightTrackingClient) Detect(ctx context.Context, in *pb.DetectOptions, opts ...ggrpc.CallOption) (*pb.DetectResponse, error) {
defer c.track(ctx)()
res, err := c.inner.Detect(ctx, in, opts...)
res, err := c.Backend.Detect(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) Depth(ctx context.Context, in *pb.DepthRequest, opts ...ggrpc.CallOption) (*pb.DepthResponse, error) {
defer c.track(ctx)()
res, err := c.inner.Depth(ctx, in, opts...)
res, err := c.Backend.Depth(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...ggrpc.CallOption) (*pb.RerankResult, error) {
defer c.track(ctx)()
res, err := c.inner.Rerank(ctx, in, opts...)
res, err := c.Backend.Rerank(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) VAD(ctx context.Context, in *pb.VADRequest, opts ...ggrpc.CallOption) (*pb.VADResponse, error) {
defer c.track(ctx)()
res, err := c.inner.VAD(ctx, in, opts...)
res, err := c.Backend.VAD(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) Diarize(ctx context.Context, in *pb.DiarizeRequest, opts ...ggrpc.CallOption) (*pb.DiarizeResponse, error) {
defer c.track(ctx)()
res, err := c.inner.Diarize(ctx, in, opts...)
res, err := c.Backend.Diarize(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...ggrpc.CallOption) (*pb.FaceVerifyResponse, error) {
defer c.track(ctx)()
res, err := c.inner.FaceVerify(ctx, in, opts...)
res, err := c.Backend.FaceVerify(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) FaceAnalyze(ctx context.Context, in *pb.FaceAnalyzeRequest, opts ...ggrpc.CallOption) (*pb.FaceAnalyzeResponse, error) {
defer c.track(ctx)()
res, err := c.inner.FaceAnalyze(ctx, in, opts...)
res, err := c.Backend.FaceAnalyze(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) VoiceVerify(ctx context.Context, in *pb.VoiceVerifyRequest, opts ...ggrpc.CallOption) (*pb.VoiceVerifyResponse, error) {
defer c.track(ctx)()
res, err := c.inner.VoiceVerify(ctx, in, opts...)
res, err := c.Backend.VoiceVerify(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) VoiceAnalyze(ctx context.Context, in *pb.VoiceAnalyzeRequest, opts ...ggrpc.CallOption) (*pb.VoiceAnalyzeResponse, error) {
defer c.track(ctx)()
res, err := c.inner.VoiceAnalyze(ctx, in, opts...)
res, err := c.Backend.VoiceAnalyze(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) VoiceEmbed(ctx context.Context, in *pb.VoiceEmbedRequest, opts ...ggrpc.CallOption) (*pb.VoiceEmbedResponse, error) {
defer c.track(ctx)()
res, err := c.inner.VoiceEmbed(ctx, in, opts...)
res, err := c.Backend.VoiceEmbed(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) TokenClassify(ctx context.Context, in *pb.TokenClassifyRequest, opts ...ggrpc.CallOption) (*pb.TokenClassifyResponse, error) {
defer c.track(ctx)()
res, err := c.inner.TokenClassify(ctx, in, opts...)
res, err := c.Backend.TokenClassify(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) Score(ctx context.Context, in *pb.ScoreRequest, opts ...ggrpc.CallOption) (*pb.ScoreResponse, error) {
defer c.track(ctx)()
res, err := c.inner.Score(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) SoundDetection(ctx context.Context, in *pb.SoundDetectionRequest, opts ...ggrpc.CallOption) (*pb.SoundDetectionResponse, error) {
defer c.track(ctx)()
res, err := c.inner.SoundDetection(ctx, in, opts...)
res, err := c.Backend.Score(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) AudioEncode(ctx context.Context, in *pb.AudioEncodeRequest, opts ...ggrpc.CallOption) (*pb.AudioEncodeResult, error) {
defer c.track(ctx)()
res, err := c.inner.AudioEncode(ctx, in, opts...)
res, err := c.Backend.AudioEncode(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) AudioDecode(ctx context.Context, in *pb.AudioDecodeRequest, opts ...ggrpc.CallOption) (*pb.AudioDecodeResult, error) {
defer c.track(ctx)()
res, err := c.inner.AudioDecode(ctx, in, opts...)
res, err := c.Backend.AudioDecode(ctx, in, opts...)
return res, c.reconcile(err)
}
func (c *InFlightTrackingClient) AudioTransform(ctx context.Context, in *pb.AudioTransformRequest, opts ...ggrpc.CallOption) (*pb.AudioTransformResult, error) {
defer c.track(ctx)()
res, err := c.inner.AudioTransform(ctx, in, opts...)
res, err := c.Backend.AudioTransform(ctx, in, opts...)
return res, c.reconcile(err)
}
// AudioTransformStream, AudioToAudioStream and Forward live in grpc.ControlBackend
// and are passed through via the embedded field, NOT tracked: they return a stream
// client and the inference spans the stream's lifetime, not the constructor call.
// Wrapping the constructor with track() would increment and immediately decrement
// (and fire onFirstComplete) before any audio flows. Tracking those correctly needs
// the done() func tied to stream close, which the Backend interface doesn't surface
// here. If they ever need tracking, move them to grpc.InferenceBackend - the build
// will then force an explicit wrapper here.
// AudioTransformStream, AudioToAudioStream and Forward are deliberately left as
// embedded passthrough: they return a stream client and the inference spans the
// stream's lifetime, not the constructor call. Wrapping the constructor with
// track() would increment and immediately decrement (and fire onFirstComplete)
// before any audio flows. Tracking those correctly needs the done() func tied to
// stream close, which the current Backend interface doesn't surface here.

View File

@@ -408,13 +408,6 @@ var _ = Describe("InFlightTrackingClient", func() {
return err
})
})
It("SoundDetection", func() {
assertTracked(func() error {
_, err := client.SoundDetection(context.Background(), &pb.SoundDetectionRequest{})
return err
})
})
})
Describe("stale model reload (self-heal)", func() {

View File

@@ -185,13 +185,6 @@ It is persisted through `POST /api/settings` and read live, so a change takes
effect on the next request without a restart. A default that names a model no
longer loaded still appears (marked *not loaded*) so it can be toggled off.
The default set can also be supplied out-of-band with the
`LOCALAI_PII_DEFAULT_DETECTORS` environment variable (comma-separated model
names, e.g. `privacy-filter-nemotron,secret-filter`). When set it takes
precedence over the value persisted via the UI (env > file), which is the
right behaviour for immutable container deployments that pin filtering policy
at boot rather than via the admin UI.
This is what makes `cloud-proxy` / MITM redaction work out of the box: those
backends default to PII-enabled but ship no detector list, so without a
default detector the filter runs with nothing to scan. Set one here and

View File

@@ -1,225 +1,4 @@
---
- name: "lfm2.5-1.2b-instruct"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
- https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF
description: |
Try LFM • Docs • LEAP • Discord
# LFM2.5-1.2B-Instruct
LFM2.5 is a new family of hybrid models designed for **on-device deployment**. It builds on the LFM2 architecture with extended pre-training and reinforcement learning.
- **Best-in-class performance**: A 1.2B model rivaling much larger models, bringing high-quality AI to your pocket.
- **Fast edge inference**: 239 tok/s decode on AMD CPU, 82 tok/s on mobile NPU. Runs under 1GB of memory with day-one support for llama.cpp, MLX, and vLLM.
- **Scaled training**: Extended pre-training from 10T to 28T tokens and large-scale multi-stage reinforcement learning.
Find more information about LFM2.5 in our blog post.
## 🗒️ Model Details
LFM2.5-1.2B-Instruct is a general-purpose text-only model with the following features:
...
license: "other"
tags:
- llm
- gguf
icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/dxnYF2fuLpulismtFSGFi.png
overrides:
backend: llama-cpp
function:
automatic_tool_parsing_fallback: true
grammar:
disable: true
known_usecases:
- chat
options:
- use_jinja:true
parameters:
min_p: 0.15
model: llama-cpp/models/LFM2.5-1.2B-Instruct-GGUF/LFM2.5-1.2B-Instruct-Q4_K_M.gguf
repeat_penalty: 1.05
temperature: 0.1
top_k: 50
top_p: 0.1
template:
use_tokenizer_template: true
files:
- filename: llama-cpp/models/LFM2.5-1.2B-Instruct-GGUF/LFM2.5-1.2B-Instruct-Q4_K_M.gguf
sha256: b1b3de114215d9507409a662a501a631095a479a419584e8a2ded6304b19b4f5
uri: https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF/resolve/main/LFM2.5-1.2B-Instruct-Q4_K_M.gguf
- name: "qwopus3.6-27b-coder-compat-mtp"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
- https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF
description: "\U0001FA90 Qwopus-3.6-27B-Coder\nCoder SFT Release\n\nAgentic Coding &amp; Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Dense Model\n⚡ Agentic Coding\n\U0001F6E0 Tool Calling & Agent\n\U0001F3C6 SWE-bench Verified: 67.0% (off-thinking)\n\n\U0001F4A1 What is Qwopus-3.6-27B-Coder?\n\U0001FA90 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.\n\n\U0001F9E9 Agentic Coding\nOptimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.\n\n\U0001F6E0 Tool Calling\nLearns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.\n\n...\n"
license: "apache-2.0"
tags:
- llm
- gguf
- vision
- multimodal
- reasoning
icon: https://cdn-uploads.huggingface.co/production/uploads/66309bd090589b7c65950665/sGQKmrMc6L6guMoaB5_Y2.png
overrides:
backend: llama-cpp
function:
automatic_tool_parsing_fallback: true
grammar:
disable: true
known_usecases:
- chat
mmproj: llama-cpp/mmproj/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/mmproj-F32.gguf
options:
- use_jinja:true
- spec_type:draft-mtp
- spec_n_max:6
- spec_p_min:0.75
parameters:
model: llama-cpp/models/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
template:
use_tokenizer_template: true
files:
- filename: llama-cpp/models/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
sha256: f893632170124da60e159b7bcc9d91e1cda3014b2c6b8ad9c6cde38a1fcd2f6f
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/resolve/main/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
- filename: llama-cpp/mmproj/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/mmproj-F32.gguf
sha256: 32f7ea0600c07272547da401d460f8abbd980f3a57b69d6df87be0e2505e0b9c
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/resolve/main/mmproj-F32.gguf
- name: "kimi-k2.7-code"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
- https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF
description: |
## 1. Model Introduction
Kimi K2.7 Code is a coding-focused agentic model built upon Kimi K2.6. With substantial improvements on real-world long-horizon coding tasks, it strengthens end-to-end task completion across complex software engineering workflows while improving token efficiency, reducing thinking-token usage by approximately 30% compared with Kimi K2.6.
## 2. Model Summary
## 3. Evaluation Results
Benchmark
Kimi K2.6
Kimi K2.7 Code
GPT-5.5
Claude Opus 4.8
Coding
Kimi Code Bench v2
50.9
62.0
69.0
67.4
Program Bench
48.3
53.6
69.1
63.8
MLS Bench Lite
26.7
35.1
35.5
42.8
Agentic
Kimi Claw 24/7 Bench
42.9
46.9
52.8
50.4
MCP Atlas
69.4
76.0
79.4
81.3
MCP Mark Verified
72.8
81.1
92.9
76.4
Footnotes
...
license: "other"
tags:
- llm
- gguf
icon: https://huggingface.co/moonshotai/Kimi-K2.7-Code/resolve/main/figures/kimi-logo.png
overrides:
backend: llama-cpp
function:
automatic_tool_parsing_fallback: true
grammar:
disable: true
known_usecases:
- chat
mmproj: llama-cpp/mmproj/Kimi-K2.7-Code-GGUF/mmproj-F32.gguf
options:
- use_jinja:true
parameters:
min_p: 0.01
model: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
repeat_penalty: 1
temperature: 0.6
top_k: -1
top_p: 0.95
template:
use_tokenizer_template: true
files:
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
sha256: 65f0aca336f876902323a90e2aff32cac76d071b2cdd818c6a8d78be8fc2c680
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00002-of-00014.gguf
sha256: 40f4416c130827a11502778891f4ef95b2144db90f51d63aa3548d0952a39683
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00002-of-00014.gguf
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00003-of-00014.gguf
sha256: ba2ba0b5168784ace7c752ecadfc3631279b2bb023824cb0fe9e2dab3dd28f22
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00003-of-00014.gguf
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00004-of-00014.gguf
sha256: 10298a6c98b13ef49be286fefbea8663e16473fb69bbeabe153bc80c60ae116e
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00004-of-00014.gguf
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00005-of-00014.gguf
sha256: 8e9e4c8e35d34fc4fef6bfb65a715ad7defbd196970d833c1df6924d701c88b3
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00005-of-00014.gguf
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00006-of-00014.gguf
sha256: ccff6e7f299742f82cf6f51a871e3eb3167511efaee967477cc8387f54d16442
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00006-of-00014.gguf
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00007-of-00014.gguf
sha256: 1a3b639633a2d22f71156a9f643ded2329cdd969cc21177b644b5741bac1af8e
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00007-of-00014.gguf
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00008-of-00014.gguf
sha256: bde28f682a1eab973538b2102007d952f37a13c1f7d55e2ed99177445ddc4282
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00008-of-00014.gguf
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00009-of-00014.gguf
sha256: b6a23a95b61e100f7593fa75e2363966323fa767b7e4fdf45d963b59e8fdc69f
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00009-of-00014.gguf
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00010-of-00014.gguf
sha256: fb10231c2e6d76921d40f22690f4aa08a8090c708edeaf7e581abafc24d3b25c
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00010-of-00014.gguf
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00011-of-00014.gguf
sha256: d2290be7ed1a22ac1f9f8a4813389689e075ce2ab8abc3aaaa1157a3cb1462d8
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00011-of-00014.gguf
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00012-of-00014.gguf
sha256: ce0d028314aa3fc783082dbca097e1055d69686a17ab8306574e2949568f26a5
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00012-of-00014.gguf
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00013-of-00014.gguf
sha256: 217864ce63a1d130ab39dcb0996b6097e1aa78eb896e38efaefdbbac3a00b7ec
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00013-of-00014.gguf
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00014-of-00014.gguf
sha256: eb7582ad7066c5eaa01bde95acb00b4ad9cd7b07cd50a6cf5c9ee427258bc9dd
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00014-of-00014.gguf
- filename: llama-cpp/mmproj/Kimi-K2.7-Code-GGUF/mmproj-F32.gguf
sha256: b2cc50c8c13fe70fc4968a83332f31e9007ea09ebb9ae91d46a4e4cd2a3053cd
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/mmproj-F32.gguf
- name: "qwythos-9b-claude-mythos-5-1m"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
@@ -270,7 +49,33 @@
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
- https://huggingface.co/unsloth/GLM-5.2-GGUF
description: "# GLM-5.2\n\n\U0001F44B Join our WeChat or Discord community.\n\n\U0001F4D6 Check out the GLM-5.2 blog and GLM-5 Technical report.\n\n\U0001F4CD Use GLM-5.2 API services on Z.ai API Platform.\n\n\U0001F51C Try GLM-5.2 here.\n\n[Paper]\n[GitHub]\n\n## Introduction\n\nWe're introducing GLM-5.2, our latest flagship model for long-horizon tasks. It marks a substantial leap in long-horizon task capability over its predecessor GLM-5.1 and, for the first time, delivers that capability on a **solid 1M-token context**. GLM-5.2's new capabilities include:\n - **Solid 1M Context:** A solid 1M-token context that stably sustains long-horizon work\n - **Advanced Coding with Flexible Effort**: Stronger coding capabilities with multiple thinking effort levels to balance performance and latency\n - **Improved Architecture**: We propose IndexShare, which reuses the same indexer across every four sparse attention layers, reducing per-token FLOPs by 2.9× at a 1M context length. We also improve GLM-5.2s MTP layer for speculative decoding, increasing the acceptance length by up to 20%\n - **Pure Open**: An MIT open-source license — no regional limits, technical access without borders\n\n## Benchmark\n\n## Serve GLM-5.2 Locally\n\n...\n"
description: |
# GLM-5.2
👋 Join our WeChat or Discord community.
📖 Check out the GLM-5.2 blog and GLM-5 Technical report.
📍 Use GLM-5.2 API services on Z.ai API Platform.
🔜 Try GLM-5.2 here.
[Paper]
[GitHub]
## Introduction
We're introducing GLM-5.2, our latest flagship model for long-horizon tasks. It marks a substantial leap in long-horizon task capability over its predecessor GLM-5.1 and, for the first time, delivers that capability on a **solid 1M-token context**. GLM-5.2's new capabilities include:
- **Solid 1M Context:** A solid 1M-token context that stably sustains long-horizon work
- **Advanced Coding with Flexible Effort**: Stronger coding capabilities with multiple thinking effort levels to balance performance and latency
- **Improved Architecture**: We propose IndexShare, which reuses the same indexer across every four sparse attention layers, reducing per-token FLOPs by 2.9× at a 1M context length. We also improve GLM-5.2s MTP layer for speculative decoding, increasing the acceptance length by up to 20%
- **Pure Open**: An MIT open-source license — no regional limits, technical access without borders
## Benchmark
## Serve GLM-5.2 Locally
...
license: "mit"
tags:
- llm
@@ -393,7 +198,26 @@
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
- https://huggingface.co/michaelw9999/Qwopus3.6-27B-v2-MTP-NVFP4-GGUF
description: "\U0001FA90 Qwopus3.6-27B-v2-MTP\nMTP Release\n\nMulti-Token Prediction reasoning model fine-tuned from Qwen3.6-27B\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Parameters\n⚡ Speculative Decoding\n\U0001F6E0 Coding / DevOps / Math\n\n\U0001F4A1 What is Qwopus3.6-27B-v2-MTP?\n\U0001FA90 Qwopus3.6-27B-v2-MTP is a speed-oriented reasoning release built on top of Qwen3.6-27B. It keeps the Qwopus line's focus on reconstructed reasoning traces, coding discipline, DevOps procedures, and mathematical derivations, while adding Multi-Token Prediction for faster generation. The goal is simple: preserve the depth and structure of a 27B reasoning model while making real interactive use noticeably faster.\n\n⚡ MTP DecodingAuxiliary future-token prediction improves throughput on long reasoning, code, math, and strict-format prompts.\n\U0001F9E9 Structured ReasoningInherits the Qwopus training recipe built around reconstructed step-by-step reasoning trajectories.\n\U0001F9EA GB10 TestedValidated on a 30-question local benchmark across Logic, Coding, DevOps, Math, and Edge tasks.\n\U0001F680 Practical SpeedDesigned for workflows where strong answers matter, but waiting several extra minutes per task does not.\n\n...\n"
description: |
🪐 Qwopus3.6-27B-v2-MTP
MTP Release
Multi-Token Prediction reasoning model fine-tuned from Qwen3.6-27B
🧬 Trace Inversion & Negentropy
🧠 27B Parameters
⚡ Speculative Decoding
🛠️ Coding / DevOps / Math
💡 What is Qwopus3.6-27B-v2-MTP?
🪐 Qwopus3.6-27B-v2-MTP is a speed-oriented reasoning release built on top of Qwen3.6-27B. It keeps the Qwopus line's focus on reconstructed reasoning traces, coding discipline, DevOps procedures, and mathematical derivations, while adding Multi-Token Prediction for faster generation. The goal is simple: preserve the depth and structure of a 27B reasoning model while making real interactive use noticeably faster.
⚡ MTP DecodingAuxiliary future-token prediction improves throughput on long reasoning, code, math, and strict-format prompts.
🧩 Structured ReasoningInherits the Qwopus training recipe built around reconstructed step-by-step reasoning trajectories.
🧪 GB10 TestedValidated on a 30-question local benchmark across Logic, Coding, DevOps, Math, and Edge tasks.
🚀 Practical SpeedDesigned for workflows where strong answers matter, but waiting several extra minutes per task does not.
...
tags:
- llm
- gguf
@@ -419,7 +243,28 @@
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
- https://huggingface.co/michaelw9999/Qwopus3.6-27B-Coder-MTP-NVFP4-GGUF
description: "\U0001FA90 Qwopus-3.6-27B-Coder\nCoder SFT Release\n\nAgentic Coding &amp; Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Dense Model\n⚡ Agentic Coding\n\U0001F6E0 Tool Calling & Agent\n\U0001F3C6 SWE-bench Verified: 67.0% (off-thinking)\n\n\U0001F4A1 What is Qwopus-3.6-27B-Coder?\n\U0001FA90 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro (300ex) and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.\n\n\U0001F9E9 Agentic Coding\nOptimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.\n\n\U0001F6E0 Tool Calling\nLearns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.\n\n...\n"
description: |
🪐 Qwopus-3.6-27B-Coder
Coder SFT Release
Agentic Coding &amp; Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2
🧬 Trace Inversion & Negentropy
🧠 27B Dense Model
⚡ Agentic Coding
🛠️ Tool Calling & Agent
🏆 SWE-bench Verified: 67.0% (off-thinking)
💡 What is Qwopus-3.6-27B-Coder?
🪐 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro (300ex) and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.
🧩 Agentic Coding
Optimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.
🛠️ Tool Calling
Learns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.
...
tags:
- llm
- gguf
@@ -1639,8 +1484,8 @@
use_tokenizer_template: true
files:
- filename: llama-cpp/models/Qwopus3.6-27B-v2-MTP-GGUF/Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf
sha256: 818d68223be4d8518dac0b3b5604dde633cbbcbae1f491d842a3e26711c6606d
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-v2-MTP-GGUF/resolve/main/Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf
sha256: 31cf5fc2406a0c7aaebcc26d440bf0df94e215d0589d5205bf319649c052b50a
- name: "qwen3.6-40b-claude-4.6-opus-deckard-heretic-uncensored-thinking-neo-code-di-imatrix-max"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:

View File

@@ -41,34 +41,11 @@ func buildClient(address string, parallel bool, wd WatchDog, enableWatchDog bool
}
}
// Backend is the full client surface of a model backend. It is deliberately
// composed of two sub-interfaces so that wrappers can get a COMPILE-TIME
// guarantee about which methods they must account for:
//
// - InferenceBackend - methods that each perform one discrete inference call
// (the call begins on entry and ends on return). A wrapper that does
// per-call accounting - e.g. the distributed router's in-flight tracker,
// core/services/nodes.InFlightTrackingClient - embeds only ControlBackend
// and implements every InferenceBackend method explicitly. Adding a method
// to InferenceBackend therefore breaks that wrapper's build until it is
// implemented: inference can't be added without an accounting decision.
// - ControlBackend - everything that is NOT a discrete inference call:
// lifecycle/control-plane operations and the streaming constructors whose
// work spans the returned stream rather than the constructor call. These
// are safe to pass through untracked.
//
// Keep the two sets disjoint; every backend method belongs to exactly one.
type Backend interface {
InferenceBackend
ControlBackend
}
// InferenceBackend is the subset of Backend whose methods each map to a single
// inference call. Wrappers that account for in-flight work must implement these
// explicitly (see Backend). Do NOT add methods that return a stream client or
// that are control-plane only - those belong in ControlBackend.
type InferenceBackend interface {
IsBusy() bool
HealthCheck(ctx context.Context) (bool, error)
Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error)
LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error
Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error)
GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
@@ -76,8 +53,6 @@ type InferenceBackend interface {
TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
TTSStream(ctx context.Context, in *pb.TTSRequest, f func(reply *pb.Reply), opts ...grpc.CallOption) error
SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)
AudioTranscriptionStream(ctx context.Context, in *pb.TranscriptRequest, f func(chunk *pb.TranscriptStreamResponse), opts ...grpc.CallOption) error
Detect(ctx context.Context, in *pb.DetectOptions, opts ...grpc.CallOption) (*pb.DetectResponse, error)
Depth(ctx context.Context, in *pb.DepthRequest, opts ...grpc.CallOption) (*pb.DepthResponse, error)
FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...grpc.CallOption) (*pb.FaceVerifyResponse, error)
@@ -85,25 +60,8 @@ type InferenceBackend interface {
VoiceVerify(ctx context.Context, in *pb.VoiceVerifyRequest, opts ...grpc.CallOption) (*pb.VoiceVerifyResponse, error)
VoiceAnalyze(ctx context.Context, in *pb.VoiceAnalyzeRequest, opts ...grpc.CallOption) (*pb.VoiceAnalyzeResponse, error)
VoiceEmbed(ctx context.Context, in *pb.VoiceEmbedRequest, opts ...grpc.CallOption) (*pb.VoiceEmbedResponse, error)
Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)
TokenClassify(ctx context.Context, in *pb.TokenClassifyRequest, opts ...grpc.CallOption) (*pb.TokenClassifyResponse, error)
Score(ctx context.Context, in *pb.ScoreRequest, opts ...grpc.CallOption) (*pb.ScoreResponse, error)
VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error)
Diarize(ctx context.Context, in *pb.DiarizeRequest, opts ...grpc.CallOption) (*pb.DiarizeResponse, error)
SoundDetection(ctx context.Context, in *pb.SoundDetectionRequest, opts ...grpc.CallOption) (*pb.SoundDetectionResponse, error)
AudioEncode(ctx context.Context, in *pb.AudioEncodeRequest, opts ...grpc.CallOption) (*pb.AudioEncodeResult, error)
AudioDecode(ctx context.Context, in *pb.AudioDecodeRequest, opts ...grpc.CallOption) (*pb.AudioDecodeResult, error)
AudioTransform(ctx context.Context, in *pb.AudioTransformRequest, opts ...grpc.CallOption) (*pb.AudioTransformResult, error)
}
// ControlBackend is the subset of Backend that is NOT per-call inference:
// lifecycle/control-plane operations and the streaming constructors whose work
// spans the returned stream rather than the constructor call. In-flight-tracking
// wrappers embed this directly and pass it through untracked (see Backend).
type ControlBackend interface {
IsBusy() bool
HealthCheck(ctx context.Context) (bool, error)
LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)
AudioTranscriptionStream(ctx context.Context, in *pb.TranscriptRequest, f func(chunk *pb.TranscriptStreamResponse), opts ...grpc.CallOption) error
TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error)
Status(ctx context.Context) (*pb.StatusResponse, error)
@@ -112,11 +70,24 @@ type ControlBackend interface {
StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error)
StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error)
Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)
TokenClassify(ctx context.Context, in *pb.TokenClassifyRequest, opts ...grpc.CallOption) (*pb.TokenClassifyResponse, error)
Score(ctx context.Context, in *pb.ScoreRequest, opts ...grpc.CallOption) (*pb.ScoreResponse, error)
GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opts ...grpc.CallOption) (*pb.MetricsResponse, error)
// Streaming constructors: these return a stream client immediately; the
// actual inference spans the stream's lifetime, not this call, so they are
// NOT tracked as a single in-flight unit.
VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error)
Diarize(ctx context.Context, in *pb.DiarizeRequest, opts ...grpc.CallOption) (*pb.DiarizeResponse, error)
SoundDetection(ctx context.Context, in *pb.SoundDetectionRequest, opts ...grpc.CallOption) (*pb.SoundDetectionResponse, error)
AudioEncode(ctx context.Context, in *pb.AudioEncodeRequest, opts ...grpc.CallOption) (*pb.AudioEncodeResult, error)
AudioDecode(ctx context.Context, in *pb.AudioDecodeRequest, opts ...grpc.CallOption) (*pb.AudioDecodeResult, error)
AudioTransform(ctx context.Context, in *pb.AudioTransformRequest, opts ...grpc.CallOption) (*pb.AudioTransformResult, error)
AudioTransformStream(ctx context.Context, opts ...grpc.CallOption) (AudioTransformStreamClient, error)
AudioToAudioStream(ctx context.Context, opts ...grpc.CallOption) (AudioToAudioStreamClient, error)

View File

@@ -53,13 +53,12 @@ var _ = Describe("Gallery Distributed", Label("Distributed"), func() {
Expect(retrieved.Status).To(Equal("downloading"))
Expect(retrieved.FrontendID).To(Equal("f1"))
// Update progress (cancellable: a downloading install can be cancelled)
Expect(galleryStore.UpdateProgress(op.ID, 0.75, "75% complete", "6GB", true)).To(Succeed())
// Update progress
Expect(galleryStore.UpdateProgress(op.ID, 0.75, "75% complete", "6GB")).To(Succeed())
updated, _ := galleryStore.Get(op.ID)
Expect(updated.Progress).To(BeNumerically("~", 0.75, 0.01))
Expect(updated.Message).To(Equal("75% complete"))
Expect(updated.Cancellable).To(BeTrue())
// Complete
Expect(galleryStore.UpdateStatus(op.ID, "completed", "")).To(Succeed())

View File

@@ -104,12 +104,11 @@ var _ = Describe("Phase 4: MCP, Skills, Gallery, Fine-Tuning", Label("Distribute
}
stores.Gallery.Create(op)
Expect(stores.Gallery.UpdateProgress(op.ID, 0.5, "50% complete", "2GB", true)).To(Succeed())
Expect(stores.Gallery.UpdateProgress(op.ID, 0.5, "50% complete", "2GB")).To(Succeed())
updated, _ := stores.Gallery.Get(op.ID)
Expect(updated.Progress).To(BeNumerically("~", 0.5, 0.01))
Expect(updated.Message).To(Equal("50% complete"))
Expect(updated.Cancellable).To(BeTrue())
})
It("should deduplicate concurrent downloads", func() {