mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-24 00:28:55 -04:00
Compare commits
11 Commits
dependabot
...
v4.5.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
deb430f3ec | ||
|
|
dd8c8778e2 | ||
|
|
06a7b6cadb | ||
|
|
67c8889866 | ||
|
|
1d49041c85 | ||
|
|
2edc4e25b3 | ||
|
|
7888067914 | ||
|
|
9eedbf537a | ||
|
|
69c16481c8 | ||
|
|
56f8a6623f | ||
|
|
4755d676a3 |
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=7c082bc417bbe53210a83df4ba5b49e18ce6193c
|
||||
LLAMA_VERSION?=73618f27a801c0b8614ceaf3547d3c2a99baae14
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# CrispASR version (release tag)
|
||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||
CRISPASR_VERSION?=7a8cb80907341c0204bd0488c1244764f4163883
|
||||
CRISPASR_VERSION?=63b57289255267edf66e43e33bc3911e04a2e92d
|
||||
SO_TARGET?=libgocrispasr.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=b12098f5d09fc83da36e65c784f7bdb16a5a5ebf
|
||||
STABLEDIFFUSION_GGML_VERSION?=f440ad9c29dd8bc34e5d1f4b863832b96d6ea05f
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=5ed76e9a079962f1c85cfce44edd325c27ef1f97
|
||||
WHISPER_CPP_VERSION?=bae6bc02b1940bbfb87b6a0299c565e563b916d1
|
||||
SO_TARGET?=libgowhisper.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
// runtimeSettingsFile is the on-disk filename inside DynamicConfigsDir.
|
||||
@@ -33,6 +34,35 @@ func (o *ApplicationConfig) ReadPersistedSettings() (RuntimeSettings, error) {
|
||||
return settings, nil
|
||||
}
|
||||
|
||||
// MergeNonNil overlays every set (non-nil) field of overlay onto the
|
||||
// receiver, leaving the receiver's value untouched wherever overlay left a
|
||||
// field unset. Every RuntimeSettings field is a pointer precisely so "set"
|
||||
// can be told apart from "absent" (see the type doc), which makes this a
|
||||
// faithful partial update: a caller that submits only the field it owns
|
||||
// changes exactly that field and never clobbers unrelated settings.
|
||||
//
|
||||
// This is the read-modify-write contract the persistence helpers exist for.
|
||||
// UpdateSettingsEndpoint reads the on-disk settings, merges the request body
|
||||
// on top, and writes the result — so a focused admin page that POSTs only its
|
||||
// own field (the Middleware page sends only mitm_listen; the detector table
|
||||
// only pii_default_detectors) no longer nulls every other setting.
|
||||
//
|
||||
// Reflection keeps the merge total over the struct: a field added to
|
||||
// RuntimeSettings later is merged automatically, so the persistence path can
|
||||
// never silently drop a new setting the way a hand-maintained field list
|
||||
// would. Non-pointer fields (none today) are skipped — they cannot express
|
||||
// "absent", so the receiver wins.
|
||||
func (s *RuntimeSettings) MergeNonNil(overlay RuntimeSettings) {
|
||||
dst := reflect.ValueOf(s).Elem()
|
||||
src := reflect.ValueOf(overlay)
|
||||
for i := 0; i < src.NumField(); i++ {
|
||||
f := src.Field(i)
|
||||
if f.Kind() == reflect.Pointer && !f.IsNil() {
|
||||
dst.Field(i).Set(f)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WritePersistedSettings serialises the given RuntimeSettings to
|
||||
// runtime_settings.json with restricted permissions (it may carry API
|
||||
// keys and P2P tokens).
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
)
|
||||
|
||||
func strPtr(s string) *string { return &s }
|
||||
func boolPtr(b bool) *bool { return &b }
|
||||
|
||||
var _ = Describe("RuntimeSettings persistence helpers", func() {
|
||||
var (
|
||||
@@ -51,6 +52,47 @@ var _ = Describe("RuntimeSettings persistence helpers", func() {
|
||||
})
|
||||
})
|
||||
|
||||
// MergeNonNil is the partial-update primitive UpdateSettingsEndpoint
|
||||
// relies on: a focused admin page POSTs only the field it owns, and the
|
||||
// handler reads the on-disk settings and overlays the request on top.
|
||||
// Without it, the body would be written verbatim and every field the
|
||||
// caller omitted would be nulled (the reported regression: changing
|
||||
// mitm_listen wiped the galleries, api keys, watchdog config, etc.).
|
||||
Describe("MergeNonNil partial update", func() {
|
||||
It("overlays set fields and preserves unset ones", func() {
|
||||
base := config.RuntimeSettings{
|
||||
MITMListen: strPtr(":9000"),
|
||||
Galleries: &[]config.Gallery{{Name: "g1", URL: "http://example/g1"}},
|
||||
WatchdogIdleEnabled: boolPtr(true),
|
||||
ApiKeys: &[]string{"persisted-key"},
|
||||
PIIDefaultDetectors: &[]string{"det-a"},
|
||||
}
|
||||
|
||||
// Simulate the Middleware proxy tab: only mitm_listen is sent.
|
||||
overlay := config.RuntimeSettings{MITMListen: strPtr(":8443")}
|
||||
base.MergeNonNil(overlay)
|
||||
|
||||
Expect(base.MITMListen).ToNot(BeNil())
|
||||
Expect(*base.MITMListen).To(Equal(":8443"), "set field should be overlaid")
|
||||
// Everything the overlay left unset must survive untouched.
|
||||
Expect(base.Galleries).ToNot(BeNil(), "galleries were clobbered")
|
||||
Expect(*base.Galleries).To(HaveLen(1))
|
||||
Expect(base.WatchdogIdleEnabled).ToNot(BeNil())
|
||||
Expect(*base.WatchdogIdleEnabled).To(BeTrue())
|
||||
Expect(base.ApiKeys).ToNot(BeNil(), "api_keys were clobbered")
|
||||
Expect(*base.ApiKeys).To(Equal([]string{"persisted-key"}))
|
||||
Expect(base.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were clobbered")
|
||||
Expect(*base.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
|
||||
})
|
||||
|
||||
It("lets an explicit empty slice clear a field", func() {
|
||||
base := config.RuntimeSettings{PIIDefaultDetectors: &[]string{"det-a"}}
|
||||
base.MergeNonNil(config.RuntimeSettings{PIIDefaultDetectors: &[]string{}})
|
||||
Expect(base.PIIDefaultDetectors).ToNot(BeNil())
|
||||
Expect(*base.PIIDefaultDetectors).To(BeEmpty(), "an explicit empty slice should clear, not preserve")
|
||||
})
|
||||
})
|
||||
|
||||
// MITM round trip pins the contract that loadRuntimeSettingsFromFile
|
||||
// MITM listener address must survive a write/read round trip so the
|
||||
// next process restart can bring the listener back up. (Intercept
|
||||
|
||||
@@ -4,8 +4,6 @@ import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
@@ -110,6 +108,18 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
})
|
||||
}
|
||||
|
||||
// Read whatever is already persisted: it is both the source of truth
|
||||
// for branding asset filenames (below) and the base we merge this
|
||||
// request onto before writing. A read failure must not let a Save
|
||||
// silently discard the existing settings — surface it instead.
|
||||
persisted, err := appConfig.ReadPersistedSettings()
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to read existing settings: " + err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
// Branding asset filenames are owned exclusively by
|
||||
// /api/branding/asset/{kind} (upload/delete). The Settings page also
|
||||
// round-trips them via GET /api/settings, but its local state is stale
|
||||
@@ -118,11 +128,9 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
// at page open. Replace whatever the body sent for these three fields
|
||||
// with the values currently on disk so /api/settings can never
|
||||
// regress them.
|
||||
if existing, err := appConfig.ReadPersistedSettings(); err == nil {
|
||||
settings.LogoFile = existing.LogoFile
|
||||
settings.LogoHorizontalFile = existing.LogoHorizontalFile
|
||||
settings.FaviconFile = existing.FaviconFile
|
||||
}
|
||||
settings.LogoFile = persisted.LogoFile
|
||||
settings.LogoHorizontalFile = persisted.LogoHorizontalFile
|
||||
settings.FaviconFile = persisted.FaviconFile
|
||||
|
||||
// The UI reads ApiKeys from GET /api/settings, which already returns the
|
||||
// merged env+runtime list. When the user clicks Save, the same merged
|
||||
@@ -145,16 +153,17 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
settings.ApiKeys = &runtimeOnly
|
||||
}
|
||||
|
||||
settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
|
||||
settingsJSON, err := json.MarshalIndent(settings, "", " ")
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to marshal settings: " + err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
|
||||
// Persist as a partial update: overlay only the fields this request set
|
||||
// onto the settings already on disk. Focused admin pages POST just the
|
||||
// keys they own (the Middleware proxy tab sends only mitm_listen; the
|
||||
// detector table only pii_default_detectors), so writing the request
|
||||
// body verbatim would null every unrelated setting (the no-omitempty
|
||||
// api_keys / pii_default_detectors fields even round-trip as JSON
|
||||
// null). The full Settings page still round-trips every field, so its
|
||||
// Save is unchanged.
|
||||
toPersist := persisted
|
||||
toPersist.MergeNonNil(settings)
|
||||
if err := appConfig.WritePersistedSettings(toPersist); err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to write settings file: " + err.Error(),
|
||||
|
||||
@@ -52,6 +52,10 @@ var _ = Describe("Settings endpoints", func() {
|
||||
// Settings are persisted here; set after construction since there's no
|
||||
// dedicated AppOption for it.
|
||||
app.ApplicationConfig().DynamicConfigsDir = tmp
|
||||
// Contain the MITM CA inside tmp too. The partial-save spec flips
|
||||
// mitm_listen, which starts the listener and writes a CA; without this
|
||||
// it defaults to ./mitm-ca and litters the package source tree.
|
||||
app.ApplicationConfig().MITMCADir = filepath.Join(tmp, "mitm-ca")
|
||||
|
||||
e = echo.New()
|
||||
e.GET("/api/settings", GetSettingsEndpoint(app))
|
||||
@@ -109,6 +113,39 @@ var _ = Describe("Settings endpoints", func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
|
||||
// Regression: a focused admin page (the Middleware proxy tab) POSTs only
|
||||
// the one field it owns — mitm_listen. The old handler wrote the request
|
||||
// body verbatim, so every other persisted setting was dropped (and
|
||||
// api_keys / pii_default_detectors, which lack omitempty, were written as
|
||||
// null). A partial POST must now merge onto what is already on disk.
|
||||
It("preserves unrelated persisted settings when a partial POST sets only mitm_listen", func() {
|
||||
// First save establishes a fuller settings file (as the full Settings
|
||||
// page would): galleries, an API key, and the MITM listener. The
|
||||
// listener restart binds a real socket, so use 127.0.0.1:0 for an
|
||||
// ephemeral free port rather than a fixed one that may be in use.
|
||||
rec := post(`{"mitm_listen":"127.0.0.1:0","galleries":[{"name":"g1","url":"http://example/g1"}],"api_keys":["k1"],"pii_default_detectors":["det-a"]}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||
|
||||
// The Middleware proxy tab then changes only the listen address — the
|
||||
// exact partial body that nulled everything else before the fix.
|
||||
rec = post(`{"mitm_listen":"127.0.0.1:0"}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||
|
||||
raw, err := os.ReadFile(filepath.Join(tmp, "runtime_settings.json"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
var ondisk config.RuntimeSettings
|
||||
Expect(json.Unmarshal(raw, &ondisk)).To(Succeed())
|
||||
|
||||
Expect(ondisk.MITMListen).ToNot(BeNil())
|
||||
Expect(*ondisk.MITMListen).To(Equal("127.0.0.1:0"), "the changed field should be saved")
|
||||
Expect(ondisk.Galleries).ToNot(BeNil(), "galleries were clobbered by the partial save")
|
||||
Expect(*ondisk.Galleries).To(HaveLen(1))
|
||||
Expect(ondisk.ApiKeys).ToNot(BeNil(), "api_keys were nulled by the partial save")
|
||||
Expect(*ondisk.ApiKeys).To(Equal([]string{"k1"}))
|
||||
Expect(ondisk.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were nulled by the partial save")
|
||||
Expect(*ondisk.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
|
||||
})
|
||||
|
||||
// Residual #9125: enabling the watchdog from a cold (off) state via the
|
||||
// React master toggle must start the live watchdog immediately, without a
|
||||
// restart. The toggle posts watchdog_idle_enabled/busy_enabled=true while
|
||||
|
||||
@@ -1,100 +0,0 @@
|
||||
import { test, expect } from './coverage-fixtures.js'
|
||||
|
||||
// These specs stub /api/features and /api/auth/status per cell. The test server
|
||||
// disables auth (isAdmin=true) and reports its own features, so we intercept
|
||||
// before navigation to simulate each role x mode cell.
|
||||
|
||||
function stubFeatures(page, features) {
|
||||
return page.route('**/api/features', route =>
|
||||
route.fulfill({ contentType: 'application/json', body: JSON.stringify(features) }))
|
||||
}
|
||||
|
||||
function stubNoP2P(page) {
|
||||
// P2P token endpoint returns empty -> p2pEnabled=false.
|
||||
return page.route('**/api/p2p/token', route =>
|
||||
route.fulfill({ contentType: 'text/plain', body: '' }))
|
||||
}
|
||||
|
||||
test.describe('Adaptive landing (HomeRoute)', () => {
|
||||
test('admin + distributed redirects /app to Nodes', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: true })
|
||||
await stubNoP2P(page)
|
||||
await page.goto('/app')
|
||||
await expect(page).toHaveURL(/\/app\/nodes$/)
|
||||
await expect(page.locator('.page-title').first()).toBeVisible({ timeout: 15_000 })
|
||||
})
|
||||
|
||||
test('admin + single-node stays on Home', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: false })
|
||||
await stubNoP2P(page)
|
||||
await page.goto('/app')
|
||||
await expect(page).toHaveURL(/\/app$/)
|
||||
await expect(page.locator('.home-greeting')).toBeVisible({ timeout: 15_000 })
|
||||
})
|
||||
})
|
||||
|
||||
test.describe('Adaptive sidebar', () => {
|
||||
test('distributed pins the Cluster group with Nodes at the top', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: true })
|
||||
await stubNoP2P(page)
|
||||
await page.goto('/app/chat') // any in-app page so the sidebar is mounted
|
||||
const pinned = page.locator('.sidebar-nav .sidebar-section-items').first()
|
||||
await expect(pinned.getByText('Nodes', { exact: false })).toBeVisible({ timeout: 15_000 })
|
||||
})
|
||||
|
||||
test('single-node does not pin a Cluster group', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: false })
|
||||
await stubNoP2P(page)
|
||||
await page.goto('/app/chat')
|
||||
// Nodes is reachable only via the Operate rail, not pinned at the top.
|
||||
await expect(page.locator('.sidebar-nav')).toBeVisible({ timeout: 15_000 })
|
||||
await expect(page.locator('.sidebar-nav .sidebar-section-items').first()
|
||||
.getByText('Nodes', { exact: false })).toHaveCount(0)
|
||||
})
|
||||
})
|
||||
|
||||
test.describe('Top navbar', () => {
|
||||
test('admin sees the mode pill and settings cog', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: true })
|
||||
await stubNoP2P(page)
|
||||
await page.goto('/app/chat')
|
||||
await expect(page.locator('.top-navbar__mode')).toBeVisible({ timeout: 15_000 })
|
||||
await expect(page.locator('.top-navbar__icon[aria-label]')).not.toHaveCount(0)
|
||||
})
|
||||
|
||||
test('admin-via-chat jump shows when localai_assistant is enabled', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: false, localai_assistant: true })
|
||||
await stubNoP2P(page)
|
||||
await page.goto('/app/chat')
|
||||
await expect(page.locator('.top-navbar__assistant')).toBeVisible({ timeout: 15_000 })
|
||||
})
|
||||
|
||||
test('admin-via-chat jump hidden when localai_assistant is off', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: false, localai_assistant: false })
|
||||
await stubNoP2P(page)
|
||||
await page.goto('/app/chat')
|
||||
await expect(page.locator('.top-navbar__assistant')).toHaveCount(0)
|
||||
})
|
||||
})
|
||||
|
||||
test.describe('Token usage meter', () => {
|
||||
test('renders when admin usage has data', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: false })
|
||||
await stubNoP2P(page)
|
||||
await page.route('**/api/auth/admin/usage**', route =>
|
||||
route.fulfill({ contentType: 'application/json',
|
||||
body: JSON.stringify({ buckets: [{ total_tokens: 1234 }] }) }))
|
||||
await page.goto('/app/chat')
|
||||
await expect(page.locator('.top-navbar__meter')).toBeVisible({ timeout: 15_000 })
|
||||
})
|
||||
|
||||
test('hidden when admin usage is empty (graceful degrade)', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: false })
|
||||
await stubNoP2P(page)
|
||||
await page.route('**/api/auth/admin/usage**', route =>
|
||||
route.fulfill({ contentType: 'application/json', body: JSON.stringify({ buckets: [] }) }))
|
||||
await page.goto('/app/chat')
|
||||
await expect(page.locator('.top-navbar')).toBeVisible({ timeout: 15_000 })
|
||||
await expect(page.locator('.top-navbar__meter')).toHaveCount(0)
|
||||
})
|
||||
})
|
||||
@@ -12,16 +12,6 @@
|
||||
"accountSettings": "Account settings",
|
||||
"account": "Account",
|
||||
"accountFor": "Account: {{name}}",
|
||||
"topbar": {
|
||||
"label": "Top bar",
|
||||
"modeDistributed": "Distributed",
|
||||
"modeSwarm": "Swarm",
|
||||
"modeSingle": "Single-node",
|
||||
"pickModel": "Models",
|
||||
"adminViaChat": "Admin via chat",
|
||||
"tokensToday": "Tokens today",
|
||||
"usageDetail": "View usage detail"
|
||||
},
|
||||
"sections": {
|
||||
"create": "Create",
|
||||
"recognition": "Recognition",
|
||||
|
||||
@@ -184,50 +184,6 @@
|
||||
font-size: 1.5rem;
|
||||
}
|
||||
|
||||
/* Desktop top bar: deployment + admin affordances on wide screens. Hidden on
|
||||
mobile, where .mobile-header carries the equivalent actions. */
|
||||
.top-navbar {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: var(--spacing-md);
|
||||
padding: var(--spacing-sm) var(--spacing-lg);
|
||||
border-bottom: 1px solid var(--color-border-default);
|
||||
background: var(--color-bg-secondary);
|
||||
}
|
||||
.top-navbar__right { display: flex; align-items: center; gap: var(--spacing-sm); }
|
||||
.top-navbar__mode {
|
||||
font-size: 0.75rem;
|
||||
padding: 2px 10px;
|
||||
border-radius: 999px;
|
||||
border: 1px solid var(--color-border-default);
|
||||
color: var(--color-text-secondary);
|
||||
}
|
||||
.top-navbar__mode.is-active { color: var(--color-success); border-color: var(--color-success); }
|
||||
.top-navbar__btn {
|
||||
display: inline-flex; align-items: center; gap: 6px;
|
||||
font-size: 0.8125rem; padding: 5px 10px; border-radius: 8px;
|
||||
border: 1px solid var(--color-border-default); background: var(--color-bg-tertiary);
|
||||
color: var(--color-text-primary); cursor: pointer;
|
||||
}
|
||||
.top-navbar__icon {
|
||||
width: 32px; height: 32px; display: inline-flex; align-items: center;
|
||||
justify-content: center; border-radius: 8px; border: 1px solid var(--color-border-default);
|
||||
background: var(--color-bg-tertiary); color: var(--color-text-secondary); cursor: pointer;
|
||||
}
|
||||
.top-navbar__avatar img { width: 100%; height: 100%; border-radius: 50%; object-fit: cover; }
|
||||
.top-navbar__meter {
|
||||
display: inline-flex; flex-direction: column; gap: 3px; align-items: flex-start;
|
||||
padding: 4px 10px; border-radius: 8px; border: 1px solid var(--color-border-default);
|
||||
background: var(--color-bg-tertiary); cursor: pointer; min-width: 150px;
|
||||
}
|
||||
.top-navbar__meter-label { font-size: 0.6875rem; color: var(--color-text-secondary); }
|
||||
.top-navbar__meter-bar { width: 100%; height: 5px; border-radius: 3px; background: var(--color-bg-secondary); overflow: hidden; }
|
||||
.top-navbar__meter-bar i { display: block; height: 100%; background: var(--color-primary); }
|
||||
@media (max-width: 639px) {
|
||||
.top-navbar { display: none; }
|
||||
}
|
||||
|
||||
/* Sidebar */
|
||||
.sidebar {
|
||||
position: fixed;
|
||||
|
||||
@@ -3,7 +3,6 @@ import { Outlet, useLocation, useNavigate } from 'react-router-dom'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Sidebar from './components/Sidebar'
|
||||
import OperationsBar from './components/OperationsBar'
|
||||
import TopNavbar from './components/TopNavbar'
|
||||
import { ToastContainer, useToast } from './components/Toast'
|
||||
import { systemApi } from './utils/api'
|
||||
import { useTheme } from './contexts/ThemeContext'
|
||||
@@ -99,7 +98,6 @@ export default function App() {
|
||||
<Sidebar isOpen={sidebarOpen} onClose={() => setSidebarOpen(false)} />
|
||||
<main className="main-content" {...(sidebarOpen ? { 'aria-hidden': 'true', inert: '' } : {})}>
|
||||
<OperationsBar />
|
||||
<TopNavbar />
|
||||
{/* Mobile header — primary actions reachable without opening the
|
||||
drawer. Hamburger is the only way to expand the nav on phones;
|
||||
theme toggle and account avatar are mirrored from the sidebar
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
import { lazy, Suspense } from 'react'
|
||||
import { Navigate } from 'react-router-dom'
|
||||
import { useAuth } from '../context/AuthContext'
|
||||
import { useDeployment } from '../contexts/DeploymentContext'
|
||||
import { resolveHome } from '../utils/resolveHome'
|
||||
import RouteFallback from './RouteFallback'
|
||||
|
||||
const Home = lazy(() => import('../pages/Home'))
|
||||
|
||||
// Index-route element. Waits for auth + deployment signals to load (so we never
|
||||
// flash the wrong landing), then either renders Home or redirects to the cell's
|
||||
// landing page. Redirecting (rather than rendering Nodes/Chat inline at /app)
|
||||
// keeps each target's own route guard, active-nav state, and deep-linkability.
|
||||
export default function HomeRoute() {
|
||||
const { isAdmin, loading: authLoading } = useAuth()
|
||||
const { distributed, p2pEnabled, loading: deployLoading } = useDeployment()
|
||||
|
||||
if (authLoading || deployLoading) return <RouteFallback />
|
||||
|
||||
const target = resolveHome({ isAdmin, distributed, p2pEnabled })
|
||||
if (target) return <Navigate to={target} replace />
|
||||
|
||||
return (
|
||||
<Suspense fallback={<RouteFallback />}>
|
||||
<Home />
|
||||
</Suspense>
|
||||
)
|
||||
}
|
||||
@@ -5,11 +5,9 @@ import ThemeToggle from './ThemeToggle'
|
||||
import LanguageSwitcher from './LanguageSwitcher'
|
||||
import { useAuth } from '../context/AuthContext'
|
||||
import { useBranding } from '../contexts/BrandingContext'
|
||||
import { useDeployment } from '../contexts/DeploymentContext'
|
||||
import { apiUrl } from '../utils/basePath'
|
||||
import { preloadRoute } from '../router'
|
||||
import { consoles, firstVisiblePath, consolePaths } from './console/consoleConfig'
|
||||
import { clusterPinItems, shouldCollapseCreate } from '../utils/sidebarPolicy'
|
||||
|
||||
const COLLAPSED_KEY = 'localai_sidebar_collapsed'
|
||||
const SECTIONS_KEY = 'localai_sidebar_sections'
|
||||
@@ -60,13 +58,11 @@ function NavItem({ item, onClose, collapsed }) {
|
||||
)
|
||||
}
|
||||
|
||||
function loadSectionState(collapseCreate = false) {
|
||||
// Tiers render expanded by default; users can collapse any tier and the
|
||||
// choice persists (stored values override defaults). In cluster cells we
|
||||
// start Create collapsed so the pinned cluster group leads - but only when
|
||||
// the user has not already expressed a preference.
|
||||
function loadSectionState() {
|
||||
// Tiers render expanded by default (the redesign favours showing the few
|
||||
// intent groups up front); users can still collapse any tier and the choice
|
||||
// is persisted. Stored values override the defaults so a saved collapse wins.
|
||||
const defaults = Object.fromEntries(sections.map(s => [s.id, true]))
|
||||
if (collapseCreate) defaults.create = false
|
||||
try {
|
||||
const stored = localStorage.getItem(SECTIONS_KEY)
|
||||
return stored ? { ...defaults, ...JSON.parse(stored) } : defaults
|
||||
@@ -81,34 +77,20 @@ function saveSectionState(state) {
|
||||
|
||||
export default function Sidebar({ isOpen, onClose }) {
|
||||
const { t } = useTranslation('nav')
|
||||
const { isAdmin, authEnabled, user, logout, hasFeature } = useAuth()
|
||||
// Deployment shape (server features + p2p) drives the adaptive sidebar; the
|
||||
// shared context replaces the sidebar's own /api/features fetch so the
|
||||
// landing resolver, navbar, and this policy agree on one snapshot.
|
||||
const deployment = useDeployment()
|
||||
const features = deployment.features
|
||||
// Shared shape for the console gating helpers (consoleConfig.js); in scope for
|
||||
// both the pinned cluster group and the console-tier rendering below.
|
||||
const auth = { isAdmin, authEnabled, hasFeature, features }
|
||||
const collapseCreate = shouldCollapseCreate(auth, deployment)
|
||||
const [features, setFeatures] = useState({})
|
||||
const [collapsed, setCollapsed] = useState(() => {
|
||||
try { return localStorage.getItem(COLLAPSED_KEY) === 'true' } catch (_) { return false }
|
||||
})
|
||||
const [openSections, setOpenSections] = useState(loadSectionState)
|
||||
const { isAdmin, authEnabled, user, logout, hasFeature } = useAuth()
|
||||
const branding = useBranding()
|
||||
const navigate = useNavigate()
|
||||
const location = useLocation()
|
||||
const closeBtnRef = useRef(null)
|
||||
|
||||
// Apply the cluster-cell Create-collapse default once, only when the user has
|
||||
// no stored section preference (so we never override an explicit choice).
|
||||
useEffect(() => {
|
||||
if (deployment.loading) return
|
||||
let hasStored = false
|
||||
try { hasStored = !!localStorage.getItem(SECTIONS_KEY) } catch { hasStored = false }
|
||||
if (hasStored || !collapseCreate) return
|
||||
setOpenSections(prev => (prev.create === false ? prev : { ...prev, create: false }))
|
||||
}, [deployment.loading, collapseCreate])
|
||||
fetch(apiUrl('/api/features')).then(r => r.json()).then(setFeatures).catch(() => {})
|
||||
}, [])
|
||||
|
||||
// Stay in sync with external collapse dispatches (e.g. the chat
|
||||
// page's focus mode). The collapse-toggle button still owns the
|
||||
@@ -175,6 +157,8 @@ export default function Sidebar({ isOpen, onClose }) {
|
||||
}
|
||||
|
||||
const visibleTopItems = topItems.filter(filterItem)
|
||||
// Shared shape for the console gating helpers (consoleConfig.js).
|
||||
const auth = { isAdmin, authEnabled, hasFeature, features }
|
||||
|
||||
// Inline sections (Create) carry no gating; a plain filterItem pass suffices.
|
||||
const getVisibleSectionItems = (section) => section.items.filter(filterItem)
|
||||
@@ -215,28 +199,6 @@ export default function Sidebar({ isOpen, onClose }) {
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Pinned Cluster quick-access (admin + distributed/p2p). Same gate
|
||||
as the Operate rail; surfaced at the top for cluster operators. */}
|
||||
{(() => {
|
||||
const pinned = clusterPinItems(auth, deployment)
|
||||
if (pinned.length === 0) return null
|
||||
return (
|
||||
<div className="sidebar-section">
|
||||
<div className="sidebar-section-title">{t('operate.cluster')}</div>
|
||||
<div className="sidebar-section-items">
|
||||
{pinned.map(item => (
|
||||
<NavItem
|
||||
key={item.path}
|
||||
item={{ path: item.path, icon: item.icon, labelKey: item.labelKey }}
|
||||
onClose={onClose}
|
||||
collapsed={collapsed}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})()}
|
||||
|
||||
{/* Collapsible sections */}
|
||||
{sections.map(section => {
|
||||
const visibleItems = getVisibleSectionItems(section)
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
import { useNavigate } from 'react-router-dom'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useAuth } from '../context/AuthContext'
|
||||
import { useDeployment } from '../contexts/DeploymentContext'
|
||||
import { useTheme } from '../contexts/ThemeContext'
|
||||
import { launchAssistantChat } from '../utils/launchAssistantChat'
|
||||
import TokenUsageMeter from './navbar/TokenUsageMeter'
|
||||
|
||||
// Desktop top bar. Complementary to the mobile-only header in App.jsx: this is
|
||||
// hidden on small screens (see .top-navbar CSS) and shows deployment/admin
|
||||
// affordances on wide screens where the sidebar footer is far from the content.
|
||||
export default function TopNavbar() {
|
||||
const { t } = useTranslation('nav')
|
||||
const navigate = useNavigate()
|
||||
const { isAdmin, authEnabled, user } = useAuth()
|
||||
const { features, distributed, p2pEnabled } = useDeployment()
|
||||
const { theme, toggleTheme } = useTheme()
|
||||
|
||||
const modeLabel = distributed
|
||||
? t('topbar.modeDistributed')
|
||||
: p2pEnabled
|
||||
? t('topbar.modeSwarm')
|
||||
: t('topbar.modeSingle')
|
||||
|
||||
const showAssistantJump = isAdmin && !!features.localai_assistant
|
||||
const showAvatar = authEnabled && user
|
||||
const themeLabel = theme === 'dark' ? t('switchToLightMode') : t('switchToDarkMode')
|
||||
|
||||
return (
|
||||
<div className="top-navbar" role="navigation" aria-label={t('topbar.label')}>
|
||||
<div className="top-navbar__left">
|
||||
{isAdmin && (
|
||||
<span className={`top-navbar__mode ${distributed || p2pEnabled ? 'is-active' : ''}`}>
|
||||
<i className="fas fa-circle-nodes" aria-hidden="true" /> {modeLabel}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="top-navbar__right">
|
||||
{!isAdmin && (
|
||||
<button
|
||||
type="button"
|
||||
className="top-navbar__btn"
|
||||
onClick={() => navigate('/app/chat')}
|
||||
title={t('topbar.pickModel')}
|
||||
>
|
||||
<i className="fas fa-cube" aria-hidden="true" /> {t('topbar.pickModel')}
|
||||
</button>
|
||||
)}
|
||||
{showAssistantJump && (
|
||||
<button
|
||||
type="button"
|
||||
className="top-navbar__btn top-navbar__assistant"
|
||||
onClick={() => launchAssistantChat(navigate)}
|
||||
title={t('topbar.adminViaChat')}
|
||||
>
|
||||
<i className="fas fa-user-shield" aria-hidden="true" /> {t('topbar.adminViaChat')}
|
||||
</button>
|
||||
)}
|
||||
{isAdmin && <TokenUsageMeter />}
|
||||
{isAdmin && (
|
||||
<button
|
||||
type="button"
|
||||
className="top-navbar__icon"
|
||||
onClick={() => navigate('/app/settings')}
|
||||
aria-label={t('items.settings')}
|
||||
title={t('items.settings')}
|
||||
>
|
||||
<i className="fas fa-cog" aria-hidden="true" />
|
||||
</button>
|
||||
)}
|
||||
<button
|
||||
type="button"
|
||||
className="top-navbar__icon"
|
||||
onClick={toggleTheme}
|
||||
aria-label={themeLabel}
|
||||
title={themeLabel}
|
||||
>
|
||||
<i className={`fas ${theme === 'dark' ? 'fa-sun' : 'fa-moon'}`} aria-hidden="true" />
|
||||
</button>
|
||||
{showAvatar && (
|
||||
<button
|
||||
type="button"
|
||||
className="top-navbar__icon top-navbar__avatar"
|
||||
onClick={() => navigate('/app/account')}
|
||||
aria-label={user.name || user.email}
|
||||
title={user.name || user.email}
|
||||
>
|
||||
{user.avatarUrl
|
||||
? <img src={user.avatarUrl} alt="" />
|
||||
: <i className="fas fa-user-circle" aria-hidden="true" />}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
import { useState, useEffect } from 'react'
|
||||
import { useNavigate } from 'react-router-dom'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { usageApi } from '../../utils/api'
|
||||
|
||||
// Compact admin-only usage glance: today's total tokens, optionally against a
|
||||
// quota cap, linking to the full /app/usage page. Self-contained data fetch so
|
||||
// a usage-API failure cannot break the navbar - it just renders nothing.
|
||||
function sumTotalTokens(res) {
|
||||
const buckets = res?.buckets || res?.usage || (Array.isArray(res) ? res : [])
|
||||
if (!Array.isArray(buckets) || buckets.length === 0) return null
|
||||
return buckets.reduce((s, b) => s + (b.total_tokens || 0), 0)
|
||||
}
|
||||
|
||||
export default function TokenUsageMeter() {
|
||||
const { t } = useTranslation('nav')
|
||||
const navigate = useNavigate()
|
||||
const [tokens, setTokens] = useState(null)
|
||||
const [cap, setCap] = useState(null)
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false
|
||||
usageApi.getAdminUsage('day')
|
||||
.then(res => { if (!cancelled) setTokens(sumTotalTokens(res)) })
|
||||
.catch(() => { if (!cancelled) setTokens(null) })
|
||||
usageApi.getMyQuotas()
|
||||
.then(q => { if (!cancelled) setCap(q?.token_limit || q?.tokens?.limit || null) })
|
||||
.catch(() => { if (!cancelled) setCap(null) })
|
||||
return () => { cancelled = true }
|
||||
}, [])
|
||||
|
||||
if (tokens === null) return null
|
||||
|
||||
const pct = cap ? Math.min(100, Math.round((tokens / cap) * 100)) : null
|
||||
|
||||
return (
|
||||
<button
|
||||
type="button"
|
||||
className="top-navbar__meter"
|
||||
onClick={() => navigate('/app/usage')}
|
||||
title={t('topbar.usageDetail')}
|
||||
>
|
||||
<span className="top-navbar__meter-label">
|
||||
{t('topbar.tokensToday')}: {Intl.NumberFormat().format(tokens)}
|
||||
{cap ? ` / ${Intl.NumberFormat().format(cap)}` : ''}
|
||||
</span>
|
||||
{pct !== null && (
|
||||
<span className="top-navbar__meter-bar"><i style={{ width: `${pct}%` }} /></span>
|
||||
)}
|
||||
</button>
|
||||
)
|
||||
}
|
||||
@@ -1,55 +0,0 @@
|
||||
import { createContext, useContext, useState, useEffect } from 'react'
|
||||
import { apiUrl } from '../utils/basePath'
|
||||
import { p2pApi } from '../utils/api'
|
||||
|
||||
const DeploymentContext = createContext(null)
|
||||
|
||||
// One shared fetch of the deployment-shape signals the adaptive UI keys off:
|
||||
// server features (/api/features) and whether a P2P network token exists.
|
||||
// Components used to fetch /api/features independently (Sidebar, Home); this
|
||||
// centralises it so the landing resolver, sidebar policy, and navbar agree on
|
||||
// one snapshot and we issue a single request.
|
||||
export function DeploymentProvider({ children }) {
|
||||
const [features, setFeatures] = useState({})
|
||||
const [p2pEnabled, setP2pEnabled] = useState(false)
|
||||
const [loading, setLoading] = useState(true)
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false
|
||||
const featuresP = fetch(apiUrl('/api/features'))
|
||||
.then(r => r.json())
|
||||
.catch(() => ({}))
|
||||
// P2P has no /api/features flag: it is "enabled" when a network token
|
||||
// exists (mirrors pages/P2P.jsx). A 404/disabled endpoint throws and we
|
||||
// treat that as not-enabled.
|
||||
const p2pP = p2pApi.getToken()
|
||||
.then(tok => (typeof tok === 'string' ? tok : (tok?.token || '')).trim())
|
||||
.catch(() => '')
|
||||
Promise.all([featuresP, p2pP]).then(([f, tok]) => {
|
||||
if (cancelled) return
|
||||
setFeatures(f || {})
|
||||
setP2pEnabled(!!tok)
|
||||
setLoading(false)
|
||||
})
|
||||
return () => { cancelled = true }
|
||||
}, [])
|
||||
|
||||
const value = {
|
||||
features,
|
||||
distributed: !!features.distributed,
|
||||
p2pEnabled,
|
||||
loading,
|
||||
}
|
||||
|
||||
return (
|
||||
<DeploymentContext.Provider value={value}>
|
||||
{children}
|
||||
</DeploymentContext.Provider>
|
||||
)
|
||||
}
|
||||
|
||||
export function useDeployment() {
|
||||
const ctx = useContext(DeploymentContext)
|
||||
if (!ctx) throw new Error('useDeployment must be used within DeploymentProvider')
|
||||
return ctx
|
||||
}
|
||||
@@ -4,7 +4,6 @@ import { RouterProvider } from 'react-router-dom'
|
||||
import { ThemeProvider } from './contexts/ThemeContext'
|
||||
import { BrandingProvider } from './contexts/BrandingContext'
|
||||
import { AuthProvider } from './context/AuthContext'
|
||||
import { DeploymentProvider } from './contexts/DeploymentContext'
|
||||
import { OperationsProvider } from './contexts/OperationsContext'
|
||||
import { router } from './router'
|
||||
import './i18n'
|
||||
@@ -33,11 +32,9 @@ createRoot(document.getElementById('root')).render(
|
||||
<ThemeProvider>
|
||||
<BrandingProvider>
|
||||
<AuthProvider>
|
||||
<DeploymentProvider>
|
||||
<OperationsProvider>
|
||||
<RouterProvider router={router} />
|
||||
</OperationsProvider>
|
||||
</DeploymentProvider>
|
||||
<OperationsProvider>
|
||||
<RouterProvider router={router} />
|
||||
</OperationsProvider>
|
||||
</AuthProvider>
|
||||
</BrandingProvider>
|
||||
</ThemeProvider>
|
||||
|
||||
@@ -541,73 +541,58 @@ export default function Chat() {
|
||||
updateChatSettings(activeChat.id, { clientMCPServers: next })
|
||||
}, [activeChat, updateChatSettings])
|
||||
|
||||
// Load initial message / assistant launch from the Home page or the navbar
|
||||
// quick-jump. Factored into a callback so both the mount-time reader and the
|
||||
// navbar re-trigger event below consume the same payload through one path.
|
||||
// Load initial message from home page
|
||||
const homeDataProcessed = useRef(false)
|
||||
const consumeHomeChatData = useCallback(() => {
|
||||
const stored = localStorage.getItem('localai_index_chat_data')
|
||||
if (!stored) return
|
||||
try {
|
||||
const data = JSON.parse(stored)
|
||||
localStorage.removeItem('localai_index_chat_data')
|
||||
|
||||
// Two entry shapes from Home:
|
||||
// - "compose-and-send": data.message present → open new chat,
|
||||
// prefill the composer, click submit.
|
||||
// - "open-assistant": no message, just data.localaiAssistant → open
|
||||
// a fresh chat already in admin mode so the wizard can fire.
|
||||
const hasMessage = !!data.message
|
||||
const wantsAssistant = !!data.localaiAssistant
|
||||
|
||||
if (hasMessage || wantsAssistant) {
|
||||
let targetChat = activeChat
|
||||
if (data.newChat) {
|
||||
targetChat = addChat(data.model || '', '', data.mcpMode || false)
|
||||
} else {
|
||||
if (data.model && activeChat) {
|
||||
updateChatSettings(activeChat.id, { model: data.model })
|
||||
}
|
||||
if (data.mcpMode && activeChat) {
|
||||
updateChatSettings(activeChat.id, { mcpMode: true })
|
||||
}
|
||||
}
|
||||
if (data.mcpServers?.length > 0 && targetChat) {
|
||||
updateChatSettings(targetChat.id, { mcpServers: data.mcpServers })
|
||||
}
|
||||
if (data.clientMCPServers?.length > 0 && targetChat) {
|
||||
updateChatSettings(targetChat.id, { clientMCPServers: data.clientMCPServers })
|
||||
}
|
||||
if (wantsAssistant && targetChat) {
|
||||
updateChatSettings(targetChat.id, { localaiAssistant: true })
|
||||
}
|
||||
if (hasMessage) {
|
||||
setInput(data.message)
|
||||
if (data.files) setFiles(data.files)
|
||||
setTimeout(() => {
|
||||
const submitBtn = document.getElementById('chat-submit-btn')
|
||||
submitBtn?.click()
|
||||
}, 100)
|
||||
}
|
||||
}
|
||||
} catch (_e) { /* ignore */ }
|
||||
}, [activeChat, addChat, updateChatSettings])
|
||||
|
||||
useEffect(() => {
|
||||
if (homeDataProcessed.current) return
|
||||
homeDataProcessed.current = true
|
||||
consumeHomeChatData()
|
||||
}, [consumeHomeChatData])
|
||||
const stored = localStorage.getItem('localai_index_chat_data')
|
||||
if (stored) {
|
||||
homeDataProcessed.current = true
|
||||
try {
|
||||
const data = JSON.parse(stored)
|
||||
localStorage.removeItem('localai_index_chat_data')
|
||||
|
||||
// Admins can re-trigger the assistant jump from the navbar while already on
|
||||
// the chat page; navigate('/app/chat') does not remount Chat, so the
|
||||
// mount-time reader above never fires. The launcher dispatches this event
|
||||
// after writing the payload so we re-consume it and open a fresh assistant.
|
||||
useEffect(() => {
|
||||
const onOpenAssistant = () => consumeHomeChatData()
|
||||
window.addEventListener('localai-open-assistant', onOpenAssistant)
|
||||
return () => window.removeEventListener('localai-open-assistant', onOpenAssistant)
|
||||
}, [consumeHomeChatData])
|
||||
// Two entry shapes from Home:
|
||||
// - "compose-and-send": data.message present → open new chat,
|
||||
// prefill the composer, click submit.
|
||||
// - "open-assistant": no message, just data.localaiAssistant → open
|
||||
// a fresh chat already in admin mode so the wizard can fire.
|
||||
const hasMessage = !!data.message
|
||||
const wantsAssistant = !!data.localaiAssistant
|
||||
|
||||
if (hasMessage || wantsAssistant) {
|
||||
let targetChat = activeChat
|
||||
if (data.newChat) {
|
||||
targetChat = addChat(data.model || '', '', data.mcpMode || false)
|
||||
} else {
|
||||
if (data.model && activeChat) {
|
||||
updateChatSettings(activeChat.id, { model: data.model })
|
||||
}
|
||||
if (data.mcpMode && activeChat) {
|
||||
updateChatSettings(activeChat.id, { mcpMode: true })
|
||||
}
|
||||
}
|
||||
if (data.mcpServers?.length > 0 && targetChat) {
|
||||
updateChatSettings(targetChat.id, { mcpServers: data.mcpServers })
|
||||
}
|
||||
if (data.clientMCPServers?.length > 0 && targetChat) {
|
||||
updateChatSettings(targetChat.id, { clientMCPServers: data.clientMCPServers })
|
||||
}
|
||||
if (wantsAssistant && targetChat) {
|
||||
updateChatSettings(targetChat.id, { localaiAssistant: true })
|
||||
}
|
||||
if (hasMessage) {
|
||||
setInput(data.message)
|
||||
if (data.files) setFiles(data.files)
|
||||
setTimeout(() => {
|
||||
const submitBtn = document.getElementById('chat-submit-btn')
|
||||
submitBtn?.click()
|
||||
}, 100)
|
||||
}
|
||||
}
|
||||
} catch (_e) { /* ignore */ }
|
||||
}
|
||||
}, [])
|
||||
|
||||
// Track whether the user is pinned to the bottom. If they scroll up
|
||||
// while a response is streaming, stop forcing them back down.
|
||||
|
||||
@@ -13,7 +13,6 @@ import { useResources } from '../hooks/useResources'
|
||||
import { fileToBase64, backendControlApi, systemApi, modelsApi, mcpApi, nodesApi } from '../utils/api'
|
||||
import { API_CONFIG } from '../utils/config'
|
||||
import { greetingKey } from '../utils/greeting'
|
||||
import { launchAssistantChat } from '../utils/launchAssistantChat'
|
||||
import StatusPill from '../components/StatusPill'
|
||||
import Skeleton from '../components/Skeleton'
|
||||
import SectionHeading from '../components/SectionHeading'
|
||||
@@ -229,8 +228,16 @@ export default function Home() {
|
||||
// requiring an initial message or model selection. Useful when an admin
|
||||
// wants to start the assistant from a cold home page.
|
||||
const openAssistantChat = useCallback(() => {
|
||||
launchAssistantChat(navigate, selectedModel)
|
||||
const chatData = {
|
||||
model: selectedModel || '',
|
||||
mcpMode: false,
|
||||
localaiAssistant: true,
|
||||
newChat: true,
|
||||
}
|
||||
localStorage.setItem('localai_index_chat_data', JSON.stringify(chatData))
|
||||
try { localStorage.setItem('localai_assistant_used', '1') } catch { /* ignore */ }
|
||||
setAssistantUsed(true)
|
||||
navigate('/app/chat')
|
||||
}, [navigate, selectedModel])
|
||||
|
||||
const handleSubmit = (e) => {
|
||||
|
||||
@@ -6,7 +6,6 @@ import RequireAdmin from './components/RequireAdmin'
|
||||
import RequireAuth from './components/RequireAuth'
|
||||
import RequireAuthEnabled from './components/RequireAuthEnabled'
|
||||
import RequireFeature from './components/RequireFeature'
|
||||
import HomeRoute from './components/HomeRoute'
|
||||
|
||||
// Pages are code-split: each becomes its own chunk loaded on demand, so a route
|
||||
// no longer drags every other page (and its heavy deps — CodeMirror, the MCP
|
||||
@@ -33,7 +32,7 @@ export function preloadRoute(path) {
|
||||
preloaders[m[1] ?? '']?.().catch(() => { /* network blip — real click will retry */ })
|
||||
}
|
||||
|
||||
page('', () => import('./pages/Home'))
|
||||
const Home = page('', () => import('./pages/Home'))
|
||||
const Chat = page('chat', () => import('./pages/Chat'))
|
||||
const Models = page('models', () => import('./pages/Models'))
|
||||
const Manage = page('manage', () => import('./pages/Manage'))
|
||||
@@ -97,7 +96,7 @@ function Feature({ feature, children }) {
|
||||
}
|
||||
|
||||
const appChildren = [
|
||||
{ index: true, element: <HomeRoute /> },
|
||||
{ index: true, element: <Home /> },
|
||||
{ path: 'chat', element: <Chat /> },
|
||||
{ path: 'chat/:model', element: <Chat /> },
|
||||
{ path: 'image', element: <ImageGen /> },
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
// Opens a fresh chat already in LocalAI Assistant ("manage") mode. Chat.jsx
|
||||
// reads localai_index_chat_data on mount and enables localaiAssistant for the
|
||||
// new chat. Shared by the Home CTA and the top navbar quick-jump so there is
|
||||
// one definition of how the assistant is launched.
|
||||
export function launchAssistantChat(navigate, model = '') {
|
||||
const chatData = {
|
||||
model: model || '',
|
||||
mcpMode: false,
|
||||
localaiAssistant: true,
|
||||
newChat: true,
|
||||
}
|
||||
try { localStorage.setItem('localai_index_chat_data', JSON.stringify(chatData)) } catch { /* ignore */ }
|
||||
try { localStorage.setItem('localai_assistant_used', '1') } catch { /* ignore */ }
|
||||
navigate('/app/chat')
|
||||
// When already on /app/chat, navigate() does not remount Chat, so its
|
||||
// mount-time reader would never see the payload above. Signal the mounted
|
||||
// Chat to re-consume it; harmless elsewhere since Chat reads on mount anyway.
|
||||
try { window.dispatchEvent(new CustomEvent('localai-open-assistant')) } catch { /* ignore */ }
|
||||
}
|
||||
11
core/http/react-ui/src/utils/resolveHome.js
vendored
11
core/http/react-ui/src/utils/resolveHome.js
vendored
@@ -1,11 +0,0 @@
|
||||
// Pure landing-page resolver for the index route. Returns a target path, or ''
|
||||
// meaning "render the default Home". Admin precedence is distributed > p2p >
|
||||
// plain; non-admins always go to Chat (distributed/p2p are admin-only and
|
||||
// invisible to them). Visibility gates are enforced elsewhere - this only
|
||||
// chooses where /app lands.
|
||||
export function resolveHome({ isAdmin, distributed, p2pEnabled }) {
|
||||
if (!isAdmin) return '/app/chat'
|
||||
if (distributed) return '/app/nodes'
|
||||
if (p2pEnabled) return '/app/p2p'
|
||||
return ''
|
||||
}
|
||||
20
core/http/react-ui/src/utils/sidebarPolicy.js
vendored
20
core/http/react-ui/src/utils/sidebarPolicy.js
vendored
@@ -1,20 +0,0 @@
|
||||
import { operateConsole, isConsoleItemVisible } from '../components/console/consoleConfig'
|
||||
|
||||
// The Operate > Cluster group, surfaced as a pinned top-of-sidebar quick-access
|
||||
// group when the admin is running a cluster (NATS-distributed) or a P2P swarm.
|
||||
// Items are filtered through the SAME gate as everywhere else, so e.g. in a
|
||||
// p2p-only deployment Nodes/Scheduling (feature: 'distributed') drop out and
|
||||
// only Swarm remains. Returns [] when the pin does not apply.
|
||||
export function clusterPinItems(auth, deployment) {
|
||||
if (!auth.isAdmin) return []
|
||||
if (!deployment.distributed && !deployment.p2pEnabled) return []
|
||||
const group = operateConsole.groups.find(g => g.titleKey === 'operate.cluster')
|
||||
if (!group) return []
|
||||
return group.items.filter(item => isConsoleItemVisible(item, auth))
|
||||
}
|
||||
|
||||
// In the cluster cells the Create group defaults collapsed so the pinned
|
||||
// cluster group leads. Users can still expand it; their stored choice wins.
|
||||
export function shouldCollapseCreate(auth, deployment) {
|
||||
return !!auth.isAdmin && (!!deployment.distributed || !!deployment.p2pEnabled)
|
||||
}
|
||||
@@ -79,21 +79,29 @@ func (s *GalleryStore) Create(op *GalleryOperationRecord) error {
|
||||
}).Create(op).Error
|
||||
}
|
||||
|
||||
// UpdateProgress updates progress for an operation.
|
||||
func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string) error {
|
||||
// UpdateProgress updates progress for an operation. The cancellable flag is
|
||||
// persisted on every tick so a replica that restarts mid-install rehydrates the
|
||||
// op as still cancellable — otherwise the column keeps its Create-time zero
|
||||
// value (false), the UI hides the cancel button, and the orphaned op can only
|
||||
// be dismissed by waiting for the 30-minute stale reaper.
|
||||
func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string, cancellable bool) error {
|
||||
return s.db.Model(&GalleryOperationRecord{}).Where("id = ?", id).Updates(map[string]any{
|
||||
"progress": progress,
|
||||
"message": message,
|
||||
"downloaded_file_size": downloadedSize,
|
||||
"cancellable": cancellable,
|
||||
"updated_at": time.Now(),
|
||||
}).Error
|
||||
}
|
||||
|
||||
// UpdateStatus updates the status of an operation.
|
||||
// UpdateStatus updates the status of an operation. A terminal status is never
|
||||
// cancellable, so the flag is cleared here to keep the persisted row consistent
|
||||
// with what the UI should offer.
|
||||
func (s *GalleryStore) UpdateStatus(id, status, errMsg string) error {
|
||||
updates := map[string]any{
|
||||
"status": status,
|
||||
"updated_at": time.Now(),
|
||||
"status": status,
|
||||
"cancellable": false,
|
||||
"updated_at": time.Now(),
|
||||
}
|
||||
if errMsg != "" {
|
||||
updates["error"] = errMsg
|
||||
|
||||
56
core/services/galleryop/cancellable_persist_test.go
Normal file
56
core/services/galleryop/cancellable_persist_test.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package galleryop_test
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services/distributed"
|
||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||
"github.com/mudler/LocalAI/core/services/testutil"
|
||||
)
|
||||
|
||||
// Reproduces "an in-flight install can't be cancelled after a restart". The
|
||||
// live install path marks OpStatus.Cancellable=true on every progress tick, but
|
||||
// UpdateStatus persisted progress/status to the gallery store WITHOUT the
|
||||
// cancellable flag, and Create defaulted it to false. So after a replica
|
||||
// restart Hydrate rebuilt the op with Cancellable=false, /api/operations
|
||||
// reported cancellable:false, and the UI hid the cancel button — the orphaned
|
||||
// op lingered until the 30-minute stale reaper expired it. The cancellable
|
||||
// state must be persisted so a rehydrated in-flight op stays cancellable.
|
||||
var _ = Describe("GalleryService cancellable persistence across restart", func() {
|
||||
It("rehydrates an in-flight op as still cancellable", func() {
|
||||
db := testutil.SetupTestDB()
|
||||
store, err := distributed.NewGalleryStore(db)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
svc := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
|
||||
svc.SetGalleryStore(store)
|
||||
|
||||
// Seed the in-flight op row as the worker goroutine does on admission.
|
||||
Expect(store.Create(&distributed.GalleryOperationRecord{
|
||||
ID: "op-inflight",
|
||||
GalleryElementName: "llama-cpp-development",
|
||||
OpType: "backend_install",
|
||||
Status: "pending",
|
||||
})).To(Succeed())
|
||||
|
||||
// Simulate a progress tick: the live path always marks installs
|
||||
// cancellable while they are downloading/processing.
|
||||
svc.UpdateStatus("op-inflight", &galleryop.OpStatus{
|
||||
Message: "downloading",
|
||||
Progress: 25,
|
||||
Cancellable: true,
|
||||
})
|
||||
|
||||
// A fresh replica boots and hydrates from the store.
|
||||
fresh := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
|
||||
fresh.SetGalleryStore(store)
|
||||
Expect(fresh.Hydrate()).To(Succeed())
|
||||
|
||||
st := fresh.GetStatus("op-inflight")
|
||||
Expect(st).ToNot(BeNil(), "the in-flight op must hydrate after a restart")
|
||||
Expect(st.Cancellable).To(BeTrue(),
|
||||
"a still-active install must rehydrate as cancellable so the admin can dismiss it")
|
||||
})
|
||||
})
|
||||
@@ -167,7 +167,7 @@ func (g *GalleryService) UpdateStatus(s string, op *OpStatus) {
|
||||
xlog.Warn("Failed to persist gallery operation status", "op_id", s, "error", err)
|
||||
}
|
||||
} else {
|
||||
if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize); err != nil {
|
||||
if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize, op.Cancellable); err != nil {
|
||||
xlog.Warn("Failed to persist gallery operation progress", "op_id", s, "error", err)
|
||||
}
|
||||
}
|
||||
@@ -467,6 +467,7 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader,
|
||||
GalleryElementName: op.GalleryElementName,
|
||||
OpType: "backend_install",
|
||||
Status: "pending",
|
||||
Cancellable: true,
|
||||
})
|
||||
}
|
||||
err := g.backendHandler(&op, systemState)
|
||||
@@ -499,6 +500,8 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader,
|
||||
GalleryElementName: op.GalleryElementName,
|
||||
OpType: opType,
|
||||
Status: "pending",
|
||||
// A delete is not cancellable; an install is.
|
||||
Cancellable: !op.Delete,
|
||||
})
|
||||
}
|
||||
err := g.modelHandler(&op, cl, systemState)
|
||||
|
||||
@@ -1,4 +1,175 @@
|
||||
---
|
||||
- name: "qwopus3.6-27b-coder-compat-mtp"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF
|
||||
description: "\U0001FA90 Qwopus-3.6-27B-Coder\nCoder SFT Release\n\nAgentic Coding & Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Dense Model\n⚡ Agentic Coding\n\U0001F6E0️ Tool Calling & Agent\n\U0001F3C6 SWE-bench Verified: 67.0% (off-thinking)\n\n\U0001F4A1 What is Qwopus-3.6-27B-Coder?\n\U0001FA90 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.\n\n\U0001F9E9 Agentic Coding\nOptimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.\n\n\U0001F6E0️ Tool Calling\nLearns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.\n\n...\n"
|
||||
license: "apache-2.0"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- vision
|
||||
- multimodal
|
||||
- reasoning
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/66309bd090589b7c65950665/sGQKmrMc6L6guMoaB5_Y2.png
|
||||
overrides:
|
||||
backend: llama-cpp
|
||||
function:
|
||||
automatic_tool_parsing_fallback: true
|
||||
grammar:
|
||||
disable: true
|
||||
known_usecases:
|
||||
- chat
|
||||
mmproj: llama-cpp/mmproj/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/mmproj-F32.gguf
|
||||
options:
|
||||
- use_jinja:true
|
||||
- spec_type:draft-mtp
|
||||
- spec_n_max:6
|
||||
- spec_p_min:0.75
|
||||
parameters:
|
||||
model: llama-cpp/models/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
|
||||
sha256: f893632170124da60e159b7bcc9d91e1cda3014b2c6b8ad9c6cde38a1fcd2f6f
|
||||
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/resolve/main/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
|
||||
- filename: llama-cpp/mmproj/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/mmproj-F32.gguf
|
||||
sha256: 32f7ea0600c07272547da401d460f8abbd980f3a57b69d6df87be0e2505e0b9c
|
||||
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/resolve/main/mmproj-F32.gguf
|
||||
- name: "kimi-k2.7-code"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF
|
||||
description: |
|
||||
## 1. Model Introduction
|
||||
|
||||
Kimi K2.7 Code is a coding-focused agentic model built upon Kimi K2.6. With substantial improvements on real-world long-horizon coding tasks, it strengthens end-to-end task completion across complex software engineering workflows while improving token efficiency, reducing thinking-token usage by approximately 30% compared with Kimi K2.6.
|
||||
|
||||
## 2. Model Summary
|
||||
|
||||
## 3. Evaluation Results
|
||||
|
||||
Benchmark
|
||||
Kimi K2.6
|
||||
Kimi K2.7 Code
|
||||
GPT-5.5
|
||||
Claude Opus 4.8
|
||||
|
||||
Coding
|
||||
|
||||
Kimi Code Bench v2
|
||||
50.9
|
||||
62.0
|
||||
69.0
|
||||
67.4
|
||||
|
||||
Program Bench
|
||||
48.3
|
||||
53.6
|
||||
69.1
|
||||
63.8
|
||||
|
||||
MLS Bench Lite
|
||||
26.7
|
||||
35.1
|
||||
35.5
|
||||
42.8
|
||||
|
||||
Agentic
|
||||
|
||||
Kimi Claw 24/7 Bench
|
||||
42.9
|
||||
46.9
|
||||
52.8
|
||||
50.4
|
||||
|
||||
MCP Atlas
|
||||
69.4
|
||||
76.0
|
||||
79.4
|
||||
81.3
|
||||
|
||||
MCP Mark Verified
|
||||
72.8
|
||||
81.1
|
||||
92.9
|
||||
76.4
|
||||
|
||||
Footnotes
|
||||
|
||||
...
|
||||
license: "other"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
icon: https://huggingface.co/moonshotai/Kimi-K2.7-Code/resolve/main/figures/kimi-logo.png
|
||||
overrides:
|
||||
backend: llama-cpp
|
||||
function:
|
||||
automatic_tool_parsing_fallback: true
|
||||
grammar:
|
||||
disable: true
|
||||
known_usecases:
|
||||
- chat
|
||||
mmproj: llama-cpp/mmproj/Kimi-K2.7-Code-GGUF/mmproj-F32.gguf
|
||||
options:
|
||||
- use_jinja:true
|
||||
parameters:
|
||||
min_p: 0.01
|
||||
model: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
|
||||
repeat_penalty: 1
|
||||
temperature: 0.6
|
||||
top_k: -1
|
||||
top_p: 0.95
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
|
||||
sha256: 65f0aca336f876902323a90e2aff32cac76d071b2cdd818c6a8d78be8fc2c680
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00002-of-00014.gguf
|
||||
sha256: 40f4416c130827a11502778891f4ef95b2144db90f51d63aa3548d0952a39683
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00002-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00003-of-00014.gguf
|
||||
sha256: ba2ba0b5168784ace7c752ecadfc3631279b2bb023824cb0fe9e2dab3dd28f22
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00003-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00004-of-00014.gguf
|
||||
sha256: 10298a6c98b13ef49be286fefbea8663e16473fb69bbeabe153bc80c60ae116e
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00004-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00005-of-00014.gguf
|
||||
sha256: 8e9e4c8e35d34fc4fef6bfb65a715ad7defbd196970d833c1df6924d701c88b3
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00005-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00006-of-00014.gguf
|
||||
sha256: ccff6e7f299742f82cf6f51a871e3eb3167511efaee967477cc8387f54d16442
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00006-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00007-of-00014.gguf
|
||||
sha256: 1a3b639633a2d22f71156a9f643ded2329cdd969cc21177b644b5741bac1af8e
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00007-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00008-of-00014.gguf
|
||||
sha256: bde28f682a1eab973538b2102007d952f37a13c1f7d55e2ed99177445ddc4282
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00008-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00009-of-00014.gguf
|
||||
sha256: b6a23a95b61e100f7593fa75e2363966323fa767b7e4fdf45d963b59e8fdc69f
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00009-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00010-of-00014.gguf
|
||||
sha256: fb10231c2e6d76921d40f22690f4aa08a8090c708edeaf7e581abafc24d3b25c
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00010-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00011-of-00014.gguf
|
||||
sha256: d2290be7ed1a22ac1f9f8a4813389689e075ce2ab8abc3aaaa1157a3cb1462d8
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00011-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00012-of-00014.gguf
|
||||
sha256: ce0d028314aa3fc783082dbca097e1055d69686a17ab8306574e2949568f26a5
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00012-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00013-of-00014.gguf
|
||||
sha256: 217864ce63a1d130ab39dcb0996b6097e1aa78eb896e38efaefdbbac3a00b7ec
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00013-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00014-of-00014.gguf
|
||||
sha256: eb7582ad7066c5eaa01bde95acb00b4ad9cd7b07cd50a6cf5c9ee427258bc9dd
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00014-of-00014.gguf
|
||||
- filename: llama-cpp/mmproj/Kimi-K2.7-Code-GGUF/mmproj-F32.gguf
|
||||
sha256: b2cc50c8c13fe70fc4968a83332f31e9007ea09ebb9ae91d46a4e4cd2a3053cd
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/mmproj-F32.gguf
|
||||
- name: "qwythos-9b-claude-mythos-5-1m"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
@@ -49,33 +220,7 @@
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/GLM-5.2-GGUF
|
||||
description: |
|
||||
# GLM-5.2
|
||||
|
||||
👋 Join our WeChat or Discord community.
|
||||
|
||||
📖 Check out the GLM-5.2 blog and GLM-5 Technical report.
|
||||
|
||||
📍 Use GLM-5.2 API services on Z.ai API Platform.
|
||||
|
||||
🔜 Try GLM-5.2 here.
|
||||
|
||||
[Paper]
|
||||
[GitHub]
|
||||
|
||||
## Introduction
|
||||
|
||||
We're introducing GLM-5.2, our latest flagship model for long-horizon tasks. It marks a substantial leap in long-horizon task capability over its predecessor GLM-5.1 and, for the first time, delivers that capability on a **solid 1M-token context**. GLM-5.2's new capabilities include:
|
||||
- **Solid 1M Context:** A solid 1M-token context that stably sustains long-horizon work
|
||||
- **Advanced Coding with Flexible Effort**: Stronger coding capabilities with multiple thinking effort levels to balance performance and latency
|
||||
- **Improved Architecture**: We propose IndexShare, which reuses the same indexer across every four sparse attention layers, reducing per-token FLOPs by 2.9× at a 1M context length. We also improve GLM-5.2’s MTP layer for speculative decoding, increasing the acceptance length by up to 20%
|
||||
- **Pure Open**: An MIT open-source license — no regional limits, technical access without borders
|
||||
|
||||
## Benchmark
|
||||
|
||||
## Serve GLM-5.2 Locally
|
||||
|
||||
...
|
||||
description: "# GLM-5.2\n\n\U0001F44B Join our WeChat or Discord community.\n\n\U0001F4D6 Check out the GLM-5.2 blog and GLM-5 Technical report.\n\n\U0001F4CD Use GLM-5.2 API services on Z.ai API Platform.\n\n\U0001F51C Try GLM-5.2 here.\n\n[Paper]\n[GitHub]\n\n## Introduction\n\nWe're introducing GLM-5.2, our latest flagship model for long-horizon tasks. It marks a substantial leap in long-horizon task capability over its predecessor GLM-5.1 and, for the first time, delivers that capability on a **solid 1M-token context**. GLM-5.2's new capabilities include:\n - **Solid 1M Context:** A solid 1M-token context that stably sustains long-horizon work\n - **Advanced Coding with Flexible Effort**: Stronger coding capabilities with multiple thinking effort levels to balance performance and latency\n - **Improved Architecture**: We propose IndexShare, which reuses the same indexer across every four sparse attention layers, reducing per-token FLOPs by 2.9× at a 1M context length. We also improve GLM-5.2’s MTP layer for speculative decoding, increasing the acceptance length by up to 20%\n - **Pure Open**: An MIT open-source license — no regional limits, technical access without borders\n\n## Benchmark\n\n## Serve GLM-5.2 Locally\n\n...\n"
|
||||
license: "mit"
|
||||
tags:
|
||||
- llm
|
||||
@@ -198,26 +343,7 @@
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/michaelw9999/Qwopus3.6-27B-v2-MTP-NVFP4-GGUF
|
||||
description: |
|
||||
🪐 Qwopus3.6-27B-v2-MTP
|
||||
MTP Release
|
||||
|
||||
Multi-Token Prediction reasoning model fine-tuned from Qwen3.6-27B
|
||||
|
||||
🧬 Trace Inversion & Negentropy
|
||||
🧠 27B Parameters
|
||||
⚡ Speculative Decoding
|
||||
🛠️ Coding / DevOps / Math
|
||||
|
||||
💡 What is Qwopus3.6-27B-v2-MTP?
|
||||
🪐 Qwopus3.6-27B-v2-MTP is a speed-oriented reasoning release built on top of Qwen3.6-27B. It keeps the Qwopus line's focus on reconstructed reasoning traces, coding discipline, DevOps procedures, and mathematical derivations, while adding Multi-Token Prediction for faster generation. The goal is simple: preserve the depth and structure of a 27B reasoning model while making real interactive use noticeably faster.
|
||||
|
||||
⚡ MTP DecodingAuxiliary future-token prediction improves throughput on long reasoning, code, math, and strict-format prompts.
|
||||
🧩 Structured ReasoningInherits the Qwopus training recipe built around reconstructed step-by-step reasoning trajectories.
|
||||
🧪 GB10 TestedValidated on a 30-question local benchmark across Logic, Coding, DevOps, Math, and Edge tasks.
|
||||
🚀 Practical SpeedDesigned for workflows where strong answers matter, but waiting several extra minutes per task does not.
|
||||
|
||||
...
|
||||
description: "\U0001FA90 Qwopus3.6-27B-v2-MTP\nMTP Release\n\nMulti-Token Prediction reasoning model fine-tuned from Qwen3.6-27B\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Parameters\n⚡ Speculative Decoding\n\U0001F6E0️ Coding / DevOps / Math\n\n\U0001F4A1 What is Qwopus3.6-27B-v2-MTP?\n\U0001FA90 Qwopus3.6-27B-v2-MTP is a speed-oriented reasoning release built on top of Qwen3.6-27B. It keeps the Qwopus line's focus on reconstructed reasoning traces, coding discipline, DevOps procedures, and mathematical derivations, while adding Multi-Token Prediction for faster generation. The goal is simple: preserve the depth and structure of a 27B reasoning model while making real interactive use noticeably faster.\n\n⚡ MTP DecodingAuxiliary future-token prediction improves throughput on long reasoning, code, math, and strict-format prompts.\n\U0001F9E9 Structured ReasoningInherits the Qwopus training recipe built around reconstructed step-by-step reasoning trajectories.\n\U0001F9EA GB10 TestedValidated on a 30-question local benchmark across Logic, Coding, DevOps, Math, and Edge tasks.\n\U0001F680 Practical SpeedDesigned for workflows where strong answers matter, but waiting several extra minutes per task does not.\n\n...\n"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
@@ -243,28 +369,7 @@
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/michaelw9999/Qwopus3.6-27B-Coder-MTP-NVFP4-GGUF
|
||||
description: |
|
||||
🪐 Qwopus-3.6-27B-Coder
|
||||
Coder SFT Release
|
||||
|
||||
Agentic Coding & Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2
|
||||
|
||||
🧬 Trace Inversion & Negentropy
|
||||
🧠 27B Dense Model
|
||||
⚡ Agentic Coding
|
||||
🛠️ Tool Calling & Agent
|
||||
🏆 SWE-bench Verified: 67.0% (off-thinking)
|
||||
|
||||
💡 What is Qwopus-3.6-27B-Coder?
|
||||
🪐 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro (300ex) and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.
|
||||
|
||||
🧩 Agentic Coding
|
||||
Optimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.
|
||||
|
||||
🛠️ Tool Calling
|
||||
Learns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.
|
||||
|
||||
...
|
||||
description: "\U0001FA90 Qwopus-3.6-27B-Coder\nCoder SFT Release\n\nAgentic Coding & Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Dense Model\n⚡ Agentic Coding\n\U0001F6E0️ Tool Calling & Agent\n\U0001F3C6 SWE-bench Verified: 67.0% (off-thinking)\n\n\U0001F4A1 What is Qwopus-3.6-27B-Coder?\n\U0001FA90 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro (300ex) and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.\n\n\U0001F9E9 Agentic Coding\nOptimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.\n\n\U0001F6E0️ Tool Calling\nLearns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.\n\n...\n"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
@@ -1484,8 +1589,8 @@
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/Qwopus3.6-27B-v2-MTP-GGUF/Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf
|
||||
sha256: 818d68223be4d8518dac0b3b5604dde633cbbcbae1f491d842a3e26711c6606d
|
||||
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-v2-MTP-GGUF/resolve/main/Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf
|
||||
sha256: 31cf5fc2406a0c7aaebcc26d440bf0df94e215d0589d5205bf319649c052b50a
|
||||
- name: "qwen3.6-40b-claude-4.6-opus-deckard-heretic-uncensored-thinking-neo-code-di-imatrix-max"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
|
||||
@@ -53,12 +53,13 @@ var _ = Describe("Gallery Distributed", Label("Distributed"), func() {
|
||||
Expect(retrieved.Status).To(Equal("downloading"))
|
||||
Expect(retrieved.FrontendID).To(Equal("f1"))
|
||||
|
||||
// Update progress
|
||||
Expect(galleryStore.UpdateProgress(op.ID, 0.75, "75% complete", "6GB")).To(Succeed())
|
||||
// Update progress (cancellable: a downloading install can be cancelled)
|
||||
Expect(galleryStore.UpdateProgress(op.ID, 0.75, "75% complete", "6GB", true)).To(Succeed())
|
||||
|
||||
updated, _ := galleryStore.Get(op.ID)
|
||||
Expect(updated.Progress).To(BeNumerically("~", 0.75, 0.01))
|
||||
Expect(updated.Message).To(Equal("75% complete"))
|
||||
Expect(updated.Cancellable).To(BeTrue())
|
||||
|
||||
// Complete
|
||||
Expect(galleryStore.UpdateStatus(op.ID, "completed", "")).To(Succeed())
|
||||
|
||||
@@ -104,11 +104,12 @@ var _ = Describe("Phase 4: MCP, Skills, Gallery, Fine-Tuning", Label("Distribute
|
||||
}
|
||||
stores.Gallery.Create(op)
|
||||
|
||||
Expect(stores.Gallery.UpdateProgress(op.ID, 0.5, "50% complete", "2GB")).To(Succeed())
|
||||
Expect(stores.Gallery.UpdateProgress(op.ID, 0.5, "50% complete", "2GB", true)).To(Succeed())
|
||||
|
||||
updated, _ := stores.Gallery.Get(op.ID)
|
||||
Expect(updated.Progress).To(BeNumerically("~", 0.5, 0.01))
|
||||
Expect(updated.Message).To(Equal("50% complete"))
|
||||
Expect(updated.Cancellable).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should deduplicate concurrent downloads", func() {
|
||||
|
||||
Reference in New Issue
Block a user