Compare commits

...

1 Commits

Author SHA1 Message Date
Alex Cheema
59b0deb4ab feat: add MetaInstance dashboard UI and remove MlxIbv
Add MetaInstancePanel component showing meta-instance status cards with
create/delete support. Integrates into both welcome and chat sidebars
with status indicators, error display, and topology hover highlighting.
Remove deprecated MlxIbv runtime type from all dashboard components.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 09:57:30 -08:00
6 changed files with 365 additions and 26 deletions

View File

@@ -185,11 +185,7 @@
let instanceType: string | null = null;
if (instanceTag === "MlxRingInstance") instanceType = "MLX Ring";
else if (
instanceTag === "MlxIbvInstance" ||
instanceTag === "MlxJacclInstance"
)
instanceType = "MLX RDMA";
else if (instanceTag === "MlxJacclInstance") instanceType = "MLX RDMA";
let sharding: string | null = null;
const inst = instance as {

View File

@@ -0,0 +1,256 @@
<script lang="ts">
import type { MetaInstance } from "$lib/stores/app.svelte";
interface Props {
metaInstances: Record<string, MetaInstance>;
instances: Record<string, unknown>;
onDelete?: (metaInstanceId: string) => void;
onHoverNodes?: (nodeIds: Set<string>) => void;
onHoverEnd?: () => void;
}
let { metaInstances, instances, onDelete, onHoverNodes, onHoverEnd }: Props =
$props();
function getTagged(obj: unknown): [string | null, unknown] {
if (!obj || typeof obj !== "object") return [null, null];
const keys = Object.keys(obj as Record<string, unknown>);
if (keys.length === 1) {
return [keys[0], (obj as Record<string, unknown>)[keys[0]]];
}
return [null, null];
}
interface LinkedInstance {
instanceId: string;
modelId: string;
nodeIds: string[];
}
function findLinkedInstance(metaInstanceId: string): LinkedInstance | null {
for (const [instanceId, instanceWrapped] of Object.entries(instances)) {
const [, instance] = getTagged(instanceWrapped);
if (!instance || typeof instance !== "object") continue;
const inst = instance as {
metaInstanceId?: string;
shardAssignments?: {
modelId?: string;
nodeToRunner?: Record<string, string>;
};
};
if (inst.metaInstanceId === metaInstanceId) {
return {
instanceId,
modelId: inst.shardAssignments?.modelId || "Unknown",
nodeIds: Object.keys(inst.shardAssignments?.nodeToRunner ?? {}),
};
}
}
return null;
}
type MetaStatus = "active" | "provisioning" | "error" | "retrying";
function getStatus(
meta: MetaInstance,
linked: LinkedInstance | null,
): MetaStatus {
if (meta.placementError || meta.lastFailureError) {
if (meta.consecutiveFailures > 0 && meta.consecutiveFailures < 3)
return "retrying";
return "error";
}
if (linked) return "active";
return "provisioning";
}
function statusLabel(status: MetaStatus): string {
switch (status) {
case "active":
return "ACTIVE";
case "provisioning":
return "PROVISIONING";
case "error":
return "ERROR";
case "retrying":
return "RETRYING";
}
}
function statusDotClass(status: MetaStatus): string {
switch (status) {
case "active":
return "bg-green-400 shadow-[0_0_6px_rgba(74,222,128,0.6)]";
case "provisioning":
return "bg-yellow-400 animate-pulse shadow-[0_0_6px_rgba(250,204,21,0.6)]";
case "error":
return "bg-red-400 shadow-[0_0_6px_rgba(248,113,113,0.6)]";
case "retrying":
return "bg-orange-400 animate-pulse shadow-[0_0_6px_rgba(251,146,60,0.6)]";
}
}
function statusTextClass(status: MetaStatus): string {
switch (status) {
case "active":
return "text-green-400";
case "provisioning":
return "text-yellow-400";
case "error":
return "text-red-400";
case "retrying":
return "text-orange-400";
}
}
function borderClass(status: MetaStatus): string {
switch (status) {
case "active":
return "border-green-500/30 border-l-green-400";
case "provisioning":
return "border-purple-500/30 border-l-purple-400";
case "error":
return "border-red-500/30 border-l-red-400";
case "retrying":
return "border-orange-500/30 border-l-orange-400";
}
}
function cornerClass(status: MetaStatus): string {
switch (status) {
case "active":
return "border-green-500/50";
case "provisioning":
return "border-purple-500/50";
case "error":
return "border-red-500/50";
case "retrying":
return "border-orange-500/50";
}
}
function handleHover(meta: MetaInstance, linked: LinkedInstance | null) {
if (!onHoverNodes) return;
if (linked && linked.nodeIds.length > 0) {
onHoverNodes(new Set(linked.nodeIds));
} else if (meta.nodeIds && meta.nodeIds.length > 0) {
onHoverNodes(new Set(meta.nodeIds));
}
}
function formatModelId(modelId: string): string {
return modelId.split("/").pop() || modelId;
}
</script>
<!-- Panel Header -->
<div class="flex items-center gap-2 mb-4">
<div
class="w-2 h-2 bg-purple-400 rounded-full shadow-[0_0_8px_rgba(168,85,247,0.6)] animate-pulse"
></div>
<h3 class="text-xs text-purple-400 font-mono tracking-[0.2em] uppercase">
Meta-Instances
</h3>
<div
class="flex-1 h-px bg-gradient-to-r from-purple-400/30 to-transparent"
></div>
</div>
<div
class="space-y-3 max-h-72 xl:max-h-96 overflow-y-auto overflow-x-hidden py-px"
>
{#each Object.entries(metaInstances) as [id, meta]}
{@const linked = findLinkedInstance(meta.metaInstanceId)}
{@const status = getStatus(meta, linked)}
{@const corners = cornerClass(status)}
<div
class="relative group cursor-default"
role="group"
onmouseenter={() => handleHover(meta, linked)}
onmouseleave={() => onHoverEnd?.()}
>
<!-- Corner accents -->
<div
class="absolute -top-px -left-px w-2 h-2 border-l border-t {corners}"
></div>
<div
class="absolute -top-px -right-px w-2 h-2 border-r border-t {corners}"
></div>
<div
class="absolute -bottom-px -left-px w-2 h-2 border-l border-b {corners}"
></div>
<div
class="absolute -bottom-px -right-px w-2 h-2 border-r border-b {corners}"
></div>
<div
class="bg-exo-dark-gray/60 border border-l-2 {borderClass(status)} p-3"
>
<div class="flex justify-between items-start mb-2 pl-2">
<div class="flex items-center gap-2">
<div
class="w-1.5 h-1.5 {statusDotClass(status)} rounded-full"
></div>
<span class="text-exo-light-gray font-mono text-sm tracking-wider">
{meta.metaInstanceId.slice(0, 8).toUpperCase()}
</span>
<span
class="{statusTextClass(
status,
)} text-[10px] font-mono tracking-wider"
>
{statusLabel(status)}
</span>
</div>
<button
onclick={() => onDelete?.(meta.metaInstanceId)}
class="text-xs px-2 py-1 font-mono tracking-wider uppercase border border-red-500/30 text-red-400 hover:bg-red-500/20 hover:text-red-400 hover:border-red-500/50 transition-all duration-200 cursor-pointer"
>
DELETE
</button>
</div>
<div class="pl-2">
<div class="text-exo-yellow text-xs font-mono tracking-wide truncate">
{formatModelId(meta.modelId)}
</div>
<div class="text-white/60 text-xs font-mono">
{meta.sharding} &middot; {meta.instanceMeta} &middot; min {meta.minNodes}
node{meta.minNodes !== 1 ? "s" : ""}
</div>
{#if meta.nodeIds && meta.nodeIds.length > 0}
<div class="text-white/50 text-[10px] font-mono mt-0.5">
Pinned: {meta.nodeIds.map((n) => n.slice(0, 8)).join(", ")}
</div>
{/if}
{#if meta.placementError}
<div
class="text-red-400/80 text-[10px] font-mono mt-1 truncate"
title={meta.placementError}
>
{meta.placementError}
</div>
{/if}
{#if meta.lastFailureError}
<div
class="text-orange-400/80 text-[10px] font-mono mt-0.5 truncate"
title={meta.lastFailureError}
>
Failure: {meta.lastFailureError}
</div>
{/if}
{#if meta.consecutiveFailures > 0}
<div class="text-orange-400/60 text-[10px] font-mono mt-0.5">
Retries: {meta.consecutiveFailures}/3
</div>
{/if}
{#if linked}
<div class="text-purple-400/60 text-[10px] font-mono mt-1">
Instance: {linked.instanceId.slice(0, 8)} &middot; {linked.nodeIds
.length} node{linked.nodeIds.length !== 1 ? "s" : ""}
</div>
{/if}
</div>
</div>
</div>
{/each}
</div>

View File

@@ -21,7 +21,7 @@
} | null;
nodes?: Record<string, NodeInfo>;
sharding?: "Pipeline" | "Tensor";
runtime?: "MlxRing" | "MlxIbv" | "MlxJaccl";
runtime?: "MlxRing" | "MlxJaccl";
onLaunch?: () => void;
tags?: string[];
apiPreview?: PlacementPreview | null;
@@ -348,7 +348,7 @@
// Debug mode state
const isDebugMode = $derived(debugMode());
const topology = $derived(topologyData());
const isRdma = $derived(runtime === "MlxIbv" || runtime === "MlxJaccl");
const isRdma = $derived(runtime === "MlxJaccl");
// Get interface name for an IP from node data
function getInterfaceForIp(nodeId: string, ip?: string): string | null {
@@ -575,7 +575,7 @@
>
{runtime === "MlxRing"
? "MLX Ring"
: runtime === "MlxIbv" || runtime === "MlxJaccl"
: runtime === "MlxJaccl"
? "MLX RDMA"
: runtime}
</span>

View File

@@ -11,4 +11,5 @@ export { default as FamilySidebar } from "./FamilySidebar.svelte";
export { default as HuggingFaceResultItem } from "./HuggingFaceResultItem.svelte";
export { default as ModelFilterPopover } from "./ModelFilterPopover.svelte";
export { default as ModelPickerGroup } from "./ModelPickerGroup.svelte";
export { default as MetaInstancePanel } from "./MetaInstancePanel.svelte";
export { default as ModelPickerModal } from "./ModelPickerModal.svelte";

View File

@@ -74,6 +74,18 @@ export interface Instance {
};
}
export interface MetaInstance {
metaInstanceId: string;
modelId: string;
sharding: "Pipeline" | "Tensor";
instanceMeta: "MlxRing" | "MlxJaccl";
minNodes: number;
nodeIds: string[] | null;
placementError: string | null;
consecutiveFailures: number;
lastFailureError: string | null;
}
// Granular node state types from the new state structure
interface RawNodeIdentity {
modelId?: string;
@@ -168,7 +180,7 @@ export interface ModelDownloadStatus {
export interface PlacementPreview {
model_id: string;
sharding: "Pipeline" | "Tensor";
instance_meta: "MlxRing" | "MlxIbv" | "MlxJaccl";
instance_meta: "MlxRing" | "MlxJaccl";
instance: unknown | null;
memory_delta_by_node: Record<string, number> | null;
error: string | null;
@@ -219,10 +231,10 @@ interface RawStateResponse {
string,
{
MlxRingInstance?: Instance;
MlxIbvInstance?: Instance;
MlxJacclInstance?: Instance;
}
>;
metaInstances?: Record<string, MetaInstance>;
runners?: Record<string, unknown>;
downloads?: Record<string, unknown[]>;
// New granular node state fields
@@ -533,6 +545,7 @@ class AppStore {
// Topology state
topologyData = $state<TopologyData | null>(null);
instances = $state<Record<string, unknown>>({});
metaInstances = $state<Record<string, MetaInstance>>({});
runners = $state<Record<string, unknown>>({});
downloads = $state<Record<string, unknown[]>>({});
nodeDisk = $state<
@@ -905,11 +918,7 @@ class AppStore {
let instanceType: string | null = null;
if (instanceTag === "MlxRingInstance") instanceType = "MLX Ring";
else if (
instanceTag === "MlxIbvInstance" ||
instanceTag === "MlxJacclInstance"
)
instanceType = "MLX RDMA";
else if (instanceTag === "MlxJacclInstance") instanceType = "MLX RDMA";
let sharding: string | null = null;
const inst = instance as {
@@ -1271,6 +1280,9 @@ class AppStore {
if (data.runners) {
this.runners = data.runners;
}
if (data.metaInstances) {
this.metaInstances = data.metaInstances;
}
if (data.downloads) {
this.downloads = data.downloads;
}
@@ -3112,6 +3124,7 @@ export const totalTokens = () => appStore.totalTokens;
export const prefillProgress = () => appStore.prefillProgress;
export const topologyData = () => appStore.topologyData;
export const instances = () => appStore.instances;
export const metaInstances = () => appStore.metaInstances;
export const runners = () => appStore.runners;
export const downloads = () => appStore.downloads;
export const nodeDisk = () => appStore.nodeDisk;

View File

@@ -5,6 +5,7 @@
ChatMessages,
ChatSidebar,
ModelCard,
MetaInstancePanel,
ModelPickerModal,
} from "$lib/components";
import {
@@ -24,6 +25,7 @@
lastUpdate,
clearChat,
instances,
metaInstances,
runners,
downloads,
placementPreviews,
@@ -60,6 +62,7 @@
const data = $derived(topologyData());
const update = $derived(lastUpdate());
const instanceData = $derived(instances());
const metaInstancesData = $derived(metaInstances());
const runnersData = $derived(runners());
const downloadsData = $derived(downloads());
const previewsData = $derived(placementPreviews());
@@ -224,7 +227,7 @@
return model.tasks.includes("ImageToImage");
}
let selectedSharding = $state<"Pipeline" | "Tensor">("Pipeline");
type InstanceMeta = "MlxRing" | "MlxIbv" | "MlxJaccl";
type InstanceMeta = "MlxRing" | "MlxJaccl";
// Launch defaults persistence
const LAUNCH_DEFAULTS_KEY = "exo-launch-defaults";
@@ -481,7 +484,7 @@
const matchesSelectedRuntime = (runtime: InstanceMeta): boolean =>
selectedInstanceType === "MlxRing"
? runtime === "MlxRing"
: runtime === "MlxIbv" || runtime === "MlxJaccl";
: runtime === "MlxJaccl";
// Helper to check if a model can be launched (has valid placement with >= minNodes)
function canModelFit(modelId: string): boolean {
@@ -1248,6 +1251,44 @@
}
}
async function deleteMetaInstance(metaInstanceId: string) {
if (!confirm(`Delete meta-instance ${metaInstanceId.slice(0, 8)}...?`))
return;
try {
const response = await fetch(`/meta_instance/${metaInstanceId}`, {
method: "DELETE",
headers: { "Content-Type": "application/json" },
});
if (!response.ok) {
console.error("Failed to delete meta-instance:", response.status);
}
} catch (error) {
console.error("Error deleting meta-instance:", error);
}
}
async function createMetaInstance() {
if (!selectedModelId) return;
try {
const response = await fetch("/meta_instance", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model_id: selectedModelId,
sharding: selectedSharding,
instance_meta: selectedInstanceType,
min_nodes: selectedMinNodes,
node_ids: nodeFilter.size > 0 ? Array.from(nodeFilter) : null,
}),
});
if (!response.ok) {
console.error("Failed to create meta-instance:", await response.text());
}
} catch (error) {
console.error("Error creating meta-instance:", error);
}
}
// Helper to unwrap tagged unions like { MlxRingInstance: {...} }
function getTagged(obj: unknown): [string | null, unknown] {
if (!obj || typeof obj !== "object") return [null, null];
@@ -1288,11 +1329,7 @@
// Instance type from tag
let instanceType = "Unknown";
if (instanceTag === "MlxRingInstance") instanceType = "MLX Ring";
else if (
instanceTag === "MlxIbvInstance" ||
instanceTag === "MlxJacclInstance"
)
instanceType = "MLX RDMA";
else if (instanceTag === "MlxJacclInstance") instanceType = "MLX RDMA";
const inst = instance as {
shardAssignments?: {
@@ -1641,6 +1678,7 @@
const nodeCount = $derived(data ? Object.keys(data.nodes).length : 0);
const instanceCount = $derived(Object.keys(instanceData).length);
const metaInstanceCount = $derived(Object.keys(metaInstancesData).length);
// Helper to get the number of nodes in a placement preview
function getPreviewNodeCount(preview: PlacementPreview): number {
@@ -2286,6 +2324,19 @@
<aside
class="w-80 border-l border-exo-yellow/10 bg-exo-dark-gray flex flex-col flex-shrink-0"
>
<!-- Meta-Instances Panel -->
{#if metaInstanceCount > 0}
<div class="p-4 flex-shrink-0">
<MetaInstancePanel
metaInstances={metaInstancesData}
instances={instanceData}
onDelete={deleteMetaInstance}
onHoverNodes={(nodes) => (hoveredPreviewNodes = nodes)}
onHoverEnd={() => (hoveredPreviewNodes = new Set())}
/>
</div>
{/if}
<!-- Running Instances Panel (only shown when instances exist) - Scrollable -->
{#if instanceCount > 0}
<div class="p-4 flex-shrink-0">
@@ -2877,21 +2928,21 @@
</button>
<button
onclick={() => {
selectedInstanceType = "MlxIbv";
selectedInstanceType = "MlxJaccl";
saveLaunchDefaults();
}}
class="flex items-center gap-2 py-2 px-4 text-sm font-mono border rounded transition-all duration-200 cursor-pointer {selectedInstanceType ===
'MlxIbv'
'MlxJaccl'
? 'bg-transparent text-exo-yellow border-exo-yellow'
: 'bg-transparent text-white/70 border-exo-medium-gray/50 hover:border-exo-yellow/50'}"
>
<span
class="w-4 h-4 rounded-full border-2 flex items-center justify-center {selectedInstanceType ===
'MlxIbv'
'MlxJaccl'
? 'border-exo-yellow'
: 'border-exo-medium-gray'}"
>
{#if selectedInstanceType === "MlxIbv"}
{#if selectedInstanceType === "MlxJaccl"}
<span class="w-2 h-2 rounded-full bg-exo-yellow"></span>
{/if}
</span>
@@ -3018,6 +3069,15 @@
/>
</div>
{/each}
{#if selectedModelId}
<button
type="button"
onclick={createMetaInstance}
class="w-full mt-3 py-2 px-4 text-xs font-mono tracking-wider uppercase border border-purple-500/30 text-purple-400 hover:bg-purple-500/20 hover:border-purple-500/50 transition-all duration-200 cursor-pointer"
>
CREATE PERSISTENT
</button>
{/if}
</div>
{:else if selectedModel}
<div class="text-center py-4">
@@ -3101,6 +3161,19 @@
</div>
</button>
<!-- Meta-Instances Section -->
{#if metaInstanceCount > 0}
<div class="p-4 flex-shrink-0">
<MetaInstancePanel
metaInstances={metaInstancesData}
instances={instanceData}
onDelete={deleteMetaInstance}
onHoverNodes={(nodes) => (hoveredPreviewNodes = nodes)}
onHoverEnd={() => (hoveredPreviewNodes = new Set())}
/>
</div>
{/if}
<!-- Instances Section (only shown when instances exist) -->
{#if instanceCount > 0}
<div class="p-4 flex-1">