mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-02 13:22:34 -04:00
feat(ui): show cluster distribution of models in the System page
When a frontend restarted in distributed mode, models that workers had already loaded weren't visible until the operator clicked into each node manually — the /api/models/capabilities endpoint only knew about configs on the frontend's filesystem, not the registry-backed truth. /api/models/capabilities now joins in ListAllLoadedModels() when the registry is active, returning loaded_on[] with node id/name/state/status for each model. Models that live in the registry but lack a local config (the actual ghosts, not recovered from the frontend's file cache) still surface with source="registry-only" so operators can see and persist them; without that emission they'd be invisible to this frontend. Manage → Models replaces the old Running/Idle pill with a distribution cell that lists the first three nodes the model is loaded on as chips colored by state (green loaded, blue loading, amber anything else). On wider clusters the remaining count collapses into a +N chip with a title-attribute breakdown. Disabled / single-node behavior unchanged. Adopted models get an extra "Adopted" ghost-icon chip with hover copy explaining what it means and how to make it permanent. Distributed mode also enables a 10s auto-refresh and a "Last synced Xs ago" indicator next to the Update button so ghost rows drop off within one reconcile tick after their owning process dies. Non-distributed mode is untouched — no polling, no cell-stack, same old Running/Idle.
This commit is contained in:
@@ -80,6 +80,35 @@ export default function Manage() {
|
||||
nodesApi.list().then(() => setDistributedMode(true)).catch(() => {})
|
||||
}, [fetchLoadedModels, fetchBackends])
|
||||
|
||||
// Auto-refresh the Models tab every 10s in distributed mode so ghost models
|
||||
// (loaded on a worker but absent from this frontend's in-memory cache)
|
||||
// clear on their own without the user clicking Update.
|
||||
const [lastSyncedAt, setLastSyncedAt] = useState(() => Date.now())
|
||||
const [nowTick, setNowTick] = useState(() => Date.now())
|
||||
useEffect(() => {
|
||||
if (!distributedMode || activeTab !== 'models') return
|
||||
const interval = setInterval(() => {
|
||||
refetchModels()
|
||||
fetchLoadedModels()
|
||||
setLastSyncedAt(Date.now())
|
||||
}, 10000)
|
||||
return () => clearInterval(interval)
|
||||
}, [distributedMode, activeTab, refetchModels, fetchLoadedModels])
|
||||
|
||||
// Drive the "last synced Ns ago" label without over-rendering the table.
|
||||
useEffect(() => {
|
||||
if (!distributedMode) return
|
||||
const interval = setInterval(() => setNowTick(Date.now()), 1000)
|
||||
return () => clearInterval(interval)
|
||||
}, [distributedMode])
|
||||
const lastSyncedAgo = (() => {
|
||||
const s = Math.max(0, Math.floor((nowTick - lastSyncedAt) / 1000))
|
||||
if (s < 5) return 'just now'
|
||||
if (s < 60) return `${s}s ago`
|
||||
const m = Math.floor(s / 60)
|
||||
return `${m}m ago`
|
||||
})()
|
||||
|
||||
// Fetch available backend upgrades
|
||||
useEffect(() => {
|
||||
if (activeTab === 'backends') {
|
||||
@@ -291,7 +320,12 @@ export default function Manage() {
|
||||
{/* Models Tab */}
|
||||
{activeTab === 'models' && (
|
||||
<div>
|
||||
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'flex-end', marginBottom: 'var(--spacing-md)' }}>
|
||||
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'flex-end', gap: 'var(--spacing-sm)', marginBottom: 'var(--spacing-md)' }}>
|
||||
{distributedMode && (
|
||||
<span className="cell-muted" title="Auto-refreshes every 10s in distributed mode so ghost models clear promptly">
|
||||
<i className="fas fa-rotate" /> Last synced {lastSyncedAgo}
|
||||
</span>
|
||||
)}
|
||||
<button className="btn btn-secondary btn-sm" onClick={handleReload} disabled={reloading}>
|
||||
<i className={`fas ${reloading ? 'fa-spinner fa-spin' : 'fa-rotate'}`} />
|
||||
{reloading ? 'Updating...' : 'Update'}
|
||||
@@ -376,21 +410,47 @@ export default function Manage() {
|
||||
</div>
|
||||
</div>
|
||||
</td>
|
||||
{/* Status */}
|
||||
{/* Status / Distribution */}
|
||||
<td>
|
||||
{model.disabled ? (
|
||||
<span className="badge" style={{ background: 'var(--color-bg-tertiary)', color: 'var(--color-text-muted)' }}>
|
||||
<i className="fas fa-ban" style={{ fontSize: '6px' }} /> Disabled
|
||||
</span>
|
||||
) : loadedModelIds.has(model.id) ? (
|
||||
<span className="badge badge-success">
|
||||
<i className="fas fa-circle" style={{ fontSize: '6px' }} /> Running
|
||||
</span>
|
||||
) : (
|
||||
<span className="badge" style={{ background: 'var(--color-bg-tertiary)', color: 'var(--color-text-muted)' }}>
|
||||
<i className="fas fa-circle" style={{ fontSize: '6px' }} /> Idle
|
||||
</span>
|
||||
)}
|
||||
<div className="cell-stack">
|
||||
{model.disabled ? (
|
||||
<span className="badge" style={{ background: 'var(--color-bg-tertiary)', color: 'var(--color-text-muted)' }}>
|
||||
<i className="fas fa-ban" /> Disabled
|
||||
</span>
|
||||
) : model.loaded_on && model.loaded_on.length > 0 ? (
|
||||
// Distributed: surface where the model is actually loaded
|
||||
// so operators don't have to expand each node manually.
|
||||
<div className="badge-row">
|
||||
{model.loaded_on.slice(0, 3).map(l => (
|
||||
<span
|
||||
key={l.node_id}
|
||||
className={`badge ${l.state === 'loaded' ? 'badge-success' : l.state === 'loading' ? 'badge-info' : 'badge-warning'}`}
|
||||
title={`${l.node_name} — ${l.state} (${l.node_status})`}
|
||||
>
|
||||
<i className="fas fa-server" /> {l.node_name}
|
||||
</span>
|
||||
))}
|
||||
{model.loaded_on.length > 3 && (
|
||||
<span className="badge" title={model.loaded_on.map(l => `${l.node_name} (${l.state})`).join('\n')}>
|
||||
+{model.loaded_on.length - 3}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
) : loadedModelIds.has(model.id) ? (
|
||||
<span className="badge badge-success">
|
||||
<i className="fas fa-circle" style={{ fontSize: '6px' }} /> Running
|
||||
</span>
|
||||
) : (
|
||||
<span className="badge" style={{ background: 'var(--color-bg-tertiary)', color: 'var(--color-text-muted)' }}>
|
||||
<i className="fas fa-circle" style={{ fontSize: '6px' }} /> Idle
|
||||
</span>
|
||||
)}
|
||||
{model.source === 'registry-only' && (
|
||||
<span className="badge badge-warning" title="Discovered on a worker but not configured locally. Persist the config to make it permanent.">
|
||||
<i className="fas fa-ghost" /> Adopted
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</td>
|
||||
{/* Backend */}
|
||||
<td>
|
||||
|
||||
@@ -510,28 +510,89 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
||||
modelConfigs := cl.GetAllModelsConfigs()
|
||||
modelsWithoutConfig, _ := galleryop.ListModels(cl, ml, config.NoFilterFn, galleryop.LOOSE_ONLY)
|
||||
|
||||
type loadedOn struct {
|
||||
NodeID string `json:"node_id"`
|
||||
NodeName string `json:"node_name"`
|
||||
State string `json:"state"`
|
||||
NodeStatus string `json:"node_status"`
|
||||
}
|
||||
type modelCapability struct {
|
||||
ID string `json:"id"`
|
||||
Capabilities []string `json:"capabilities"`
|
||||
Backend string `json:"backend"`
|
||||
Disabled bool `json:"disabled"`
|
||||
Pinned bool `json:"pinned"`
|
||||
ID string `json:"id"`
|
||||
Capabilities []string `json:"capabilities"`
|
||||
Backend string `json:"backend"`
|
||||
Disabled bool `json:"disabled"`
|
||||
Pinned bool `json:"pinned"`
|
||||
// LoadedOn is populated only when the node registry is active
|
||||
// (distributed mode). Lets the UI show "loaded on worker-1" without
|
||||
// the operator having to expand every node manually. An empty slice
|
||||
// with nil reports "no loaded replicas" vs. nil reports "not in
|
||||
// cluster mode" — the frontend treats both as "no distribution info".
|
||||
LoadedOn []loadedOn `json:"loaded_on,omitempty"`
|
||||
// Source="registry-only" marks models adopted from the cluster that
|
||||
// have no local config yet (ghosts that the reconciler discovered).
|
||||
Source string `json:"source,omitempty"`
|
||||
}
|
||||
|
||||
// Join with the node registry when we have one (distributed mode). A
|
||||
// single registry fetch + map join beats per-model queries for the
|
||||
// 100-model case.
|
||||
var loadedByModel map[string][]loadedOn
|
||||
if ds := applicationInstance.Distributed(); ds != nil && ds.Registry != nil {
|
||||
nodeModels, err := ds.Registry.ListAllLoadedModels(c.Request().Context())
|
||||
if err == nil {
|
||||
allNodes, _ := ds.Registry.List(c.Request().Context())
|
||||
nameByID := make(map[string]string, len(allNodes))
|
||||
statusByID := make(map[string]string, len(allNodes))
|
||||
for _, n := range allNodes {
|
||||
nameByID[n.ID] = n.Name
|
||||
statusByID[n.ID] = n.Status
|
||||
}
|
||||
loadedByModel = make(map[string][]loadedOn)
|
||||
for _, nm := range nodeModels {
|
||||
loadedByModel[nm.ModelName] = append(loadedByModel[nm.ModelName], loadedOn{
|
||||
NodeID: nm.NodeID,
|
||||
NodeName: nameByID[nm.NodeID],
|
||||
State: nm.State,
|
||||
NodeStatus: statusByID[nm.NodeID],
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result := make([]modelCapability, 0, len(modelConfigs)+len(modelsWithoutConfig))
|
||||
seen := make(map[string]bool, len(modelConfigs)+len(modelsWithoutConfig))
|
||||
for _, cfg := range modelConfigs {
|
||||
seen[cfg.Name] = true
|
||||
result = append(result, modelCapability{
|
||||
ID: cfg.Name,
|
||||
Capabilities: cfg.KnownUsecaseStrings,
|
||||
Backend: cfg.Backend,
|
||||
Disabled: cfg.IsDisabled(),
|
||||
Pinned: cfg.IsPinned(),
|
||||
LoadedOn: loadedByModel[cfg.Name],
|
||||
})
|
||||
}
|
||||
for _, name := range modelsWithoutConfig {
|
||||
seen[name] = true
|
||||
result = append(result, modelCapability{
|
||||
ID: name,
|
||||
Capabilities: []string{},
|
||||
LoadedOn: loadedByModel[name],
|
||||
})
|
||||
}
|
||||
// Emit entries for cluster models that have no local config — these
|
||||
// are the actual ghosts. Without this the operator would have no way
|
||||
// to see a model the cluster is running if its config file wasn't
|
||||
// synced to this frontend's filesystem.
|
||||
for name, loc := range loadedByModel {
|
||||
if seen[name] {
|
||||
continue
|
||||
}
|
||||
result = append(result, modelCapability{
|
||||
ID: name,
|
||||
Capabilities: []string{},
|
||||
LoadedOn: loc,
|
||||
Source: "registry-only",
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user