feat(ui): show cluster distribution of models in the System page

When a frontend restarted in distributed mode, models that workers had
already loaded weren't visible until the operator clicked into each node
manually — the /api/models/capabilities endpoint only knew about
configs on the frontend's filesystem, not the registry-backed truth.

/api/models/capabilities now joins in ListAllLoadedModels() when the
registry is active, returning loaded_on[] with node id/name/state/status
for each model. Models that live in the registry but lack a local config
(the actual ghosts, not recovered from the frontend's file cache) still
surface with source="registry-only" so operators can see and persist
them; without that emission they'd be invisible to this frontend.

Manage → Models replaces the old Running/Idle pill with a distribution
cell that lists the first three nodes the model is loaded on as chips
colored by state (green loaded, blue loading, amber anything else). On
wider clusters the remaining count collapses into a +N chip with a
title-attribute breakdown. Disabled / single-node behavior unchanged.

Adopted models get an extra "Adopted" ghost-icon chip with hover copy
explaining what it means and how to make it permanent.

Distributed mode also enables a 10s auto-refresh and a "Last synced Xs
ago" indicator next to the Update button so ghost rows drop off within
one reconcile tick after their owning process dies. Non-distributed
mode is untouched — no polling, no cell-stack, same old Running/Idle.
This commit is contained in:
Ettore Di Giacinto
2026-04-19 08:37:45 +00:00
parent f0ab68e352
commit 92b9e22dc9
2 changed files with 141 additions and 20 deletions

View File

@@ -80,6 +80,35 @@ export default function Manage() {
nodesApi.list().then(() => setDistributedMode(true)).catch(() => {})
}, [fetchLoadedModels, fetchBackends])
// Auto-refresh the Models tab every 10s in distributed mode so ghost models
// (loaded on a worker but absent from this frontend's in-memory cache)
// clear on their own without the user clicking Update.
const [lastSyncedAt, setLastSyncedAt] = useState(() => Date.now())
const [nowTick, setNowTick] = useState(() => Date.now())
useEffect(() => {
if (!distributedMode || activeTab !== 'models') return
const interval = setInterval(() => {
refetchModels()
fetchLoadedModels()
setLastSyncedAt(Date.now())
}, 10000)
return () => clearInterval(interval)
}, [distributedMode, activeTab, refetchModels, fetchLoadedModels])
// Drive the "last synced Ns ago" label without over-rendering the table.
useEffect(() => {
if (!distributedMode) return
const interval = setInterval(() => setNowTick(Date.now()), 1000)
return () => clearInterval(interval)
}, [distributedMode])
const lastSyncedAgo = (() => {
const s = Math.max(0, Math.floor((nowTick - lastSyncedAt) / 1000))
if (s < 5) return 'just now'
if (s < 60) return `${s}s ago`
const m = Math.floor(s / 60)
return `${m}m ago`
})()
// Fetch available backend upgrades
useEffect(() => {
if (activeTab === 'backends') {
@@ -291,7 +320,12 @@ export default function Manage() {
{/* Models Tab */}
{activeTab === 'models' && (
<div>
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'flex-end', marginBottom: 'var(--spacing-md)' }}>
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'flex-end', gap: 'var(--spacing-sm)', marginBottom: 'var(--spacing-md)' }}>
{distributedMode && (
<span className="cell-muted" title="Auto-refreshes every 10s in distributed mode so ghost models clear promptly">
<i className="fas fa-rotate" /> Last synced {lastSyncedAgo}
</span>
)}
<button className="btn btn-secondary btn-sm" onClick={handleReload} disabled={reloading}>
<i className={`fas ${reloading ? 'fa-spinner fa-spin' : 'fa-rotate'}`} />
{reloading ? 'Updating...' : 'Update'}
@@ -376,21 +410,47 @@ export default function Manage() {
</div>
</div>
</td>
{/* Status */}
{/* Status / Distribution */}
<td>
{model.disabled ? (
<span className="badge" style={{ background: 'var(--color-bg-tertiary)', color: 'var(--color-text-muted)' }}>
<i className="fas fa-ban" style={{ fontSize: '6px' }} /> Disabled
</span>
) : loadedModelIds.has(model.id) ? (
<span className="badge badge-success">
<i className="fas fa-circle" style={{ fontSize: '6px' }} /> Running
</span>
) : (
<span className="badge" style={{ background: 'var(--color-bg-tertiary)', color: 'var(--color-text-muted)' }}>
<i className="fas fa-circle" style={{ fontSize: '6px' }} /> Idle
</span>
)}
<div className="cell-stack">
{model.disabled ? (
<span className="badge" style={{ background: 'var(--color-bg-tertiary)', color: 'var(--color-text-muted)' }}>
<i className="fas fa-ban" /> Disabled
</span>
) : model.loaded_on && model.loaded_on.length > 0 ? (
// Distributed: surface where the model is actually loaded
// so operators don't have to expand each node manually.
<div className="badge-row">
{model.loaded_on.slice(0, 3).map(l => (
<span
key={l.node_id}
className={`badge ${l.state === 'loaded' ? 'badge-success' : l.state === 'loading' ? 'badge-info' : 'badge-warning'}`}
title={`${l.node_name}${l.state} (${l.node_status})`}
>
<i className="fas fa-server" /> {l.node_name}
</span>
))}
{model.loaded_on.length > 3 && (
<span className="badge" title={model.loaded_on.map(l => `${l.node_name} (${l.state})`).join('\n')}>
+{model.loaded_on.length - 3}
</span>
)}
</div>
) : loadedModelIds.has(model.id) ? (
<span className="badge badge-success">
<i className="fas fa-circle" style={{ fontSize: '6px' }} /> Running
</span>
) : (
<span className="badge" style={{ background: 'var(--color-bg-tertiary)', color: 'var(--color-text-muted)' }}>
<i className="fas fa-circle" style={{ fontSize: '6px' }} /> Idle
</span>
)}
{model.source === 'registry-only' && (
<span className="badge badge-warning" title="Discovered on a worker but not configured locally. Persist the config to make it permanent.">
<i className="fas fa-ghost" /> Adopted
</span>
)}
</div>
</td>
{/* Backend */}
<td>

View File

@@ -510,28 +510,89 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
modelConfigs := cl.GetAllModelsConfigs()
modelsWithoutConfig, _ := galleryop.ListModels(cl, ml, config.NoFilterFn, galleryop.LOOSE_ONLY)
type loadedOn struct {
NodeID string `json:"node_id"`
NodeName string `json:"node_name"`
State string `json:"state"`
NodeStatus string `json:"node_status"`
}
type modelCapability struct {
ID string `json:"id"`
Capabilities []string `json:"capabilities"`
Backend string `json:"backend"`
Disabled bool `json:"disabled"`
Pinned bool `json:"pinned"`
ID string `json:"id"`
Capabilities []string `json:"capabilities"`
Backend string `json:"backend"`
Disabled bool `json:"disabled"`
Pinned bool `json:"pinned"`
// LoadedOn is populated only when the node registry is active
// (distributed mode). Lets the UI show "loaded on worker-1" without
// the operator having to expand every node manually. An empty slice
// with nil reports "no loaded replicas" vs. nil reports "not in
// cluster mode" — the frontend treats both as "no distribution info".
LoadedOn []loadedOn `json:"loaded_on,omitempty"`
// Source="registry-only" marks models adopted from the cluster that
// have no local config yet (ghosts that the reconciler discovered).
Source string `json:"source,omitempty"`
}
// Join with the node registry when we have one (distributed mode). A
// single registry fetch + map join beats per-model queries for the
// 100-model case.
var loadedByModel map[string][]loadedOn
if ds := applicationInstance.Distributed(); ds != nil && ds.Registry != nil {
nodeModels, err := ds.Registry.ListAllLoadedModels(c.Request().Context())
if err == nil {
allNodes, _ := ds.Registry.List(c.Request().Context())
nameByID := make(map[string]string, len(allNodes))
statusByID := make(map[string]string, len(allNodes))
for _, n := range allNodes {
nameByID[n.ID] = n.Name
statusByID[n.ID] = n.Status
}
loadedByModel = make(map[string][]loadedOn)
for _, nm := range nodeModels {
loadedByModel[nm.ModelName] = append(loadedByModel[nm.ModelName], loadedOn{
NodeID: nm.NodeID,
NodeName: nameByID[nm.NodeID],
State: nm.State,
NodeStatus: statusByID[nm.NodeID],
})
}
}
}
result := make([]modelCapability, 0, len(modelConfigs)+len(modelsWithoutConfig))
seen := make(map[string]bool, len(modelConfigs)+len(modelsWithoutConfig))
for _, cfg := range modelConfigs {
seen[cfg.Name] = true
result = append(result, modelCapability{
ID: cfg.Name,
Capabilities: cfg.KnownUsecaseStrings,
Backend: cfg.Backend,
Disabled: cfg.IsDisabled(),
Pinned: cfg.IsPinned(),
LoadedOn: loadedByModel[cfg.Name],
})
}
for _, name := range modelsWithoutConfig {
seen[name] = true
result = append(result, modelCapability{
ID: name,
Capabilities: []string{},
LoadedOn: loadedByModel[name],
})
}
// Emit entries for cluster models that have no local config — these
// are the actual ghosts. Without this the operator would have no way
// to see a model the cluster is running if its config file wasn't
// synced to this frontend's filesystem.
for name, loc := range loadedByModel {
if seen[name] {
continue
}
result = append(result, modelCapability{
ID: name,
Capabilities: []string{},
LoadedOn: loc,
Source: "registry-only",
})
}