previews fix

fix type checker - tests are fixed on new-runner-status-step
download stats fix
2025-12-24 22:58:49 -05:00 · 2025-12-25 03:17:04 +05:00 · 2025-12-24 21:31:55 +00:00 · 2025-12-25 01:43:01 +05:00 · 2025-12-24 19:54:40 +00:00 · 2025-12-24 19:54:40 +00:00
25 changed files with 1538 additions and 535 deletions
--- a/dashboard/package-lock.json
+++ b/dashboard/package-lock.json
@@ -9,6 +9,8 @@
 			"version": "1.0.0",
 			"dependencies": {
 				"highlight.js": "^11.11.1",
+				"katex": "^0.16.27",
+				"marked": "^17.0.1",
 				"mode-watcher": "^1.1.0"
 			},
 			"devDependencies": {
@@ -861,7 +863,6 @@
 			"integrity": "sha512-oH8tXw7EZnie8FdOWYrF7Yn4IKrqTFHhXvl8YxXxbKwTMcD/5NNCryUSEXRk2ZR4ojnub0P8rNrsVGHXWqIDtA==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"@standard-schema/spec": "^1.0.0",
 				"@sveltejs/acorn-typescript": "^1.0.5",
@@ -901,7 +902,6 @@
 			"integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"@sveltejs/vite-plugin-svelte-inspector": "^4.0.1",
 				"debug": "^4.4.1",
@@ -1518,7 +1518,6 @@
 			"integrity": "sha512-LCCV0HdSZZZb34qifBsyWlUmok6W7ouER+oQIGBScS8EsZsQbrtFTUrDX4hOl+CS6p7cnNC4td+qrSVGSCTUfQ==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"undici-types": "~6.21.0"
 			}
@@ -1528,7 +1527,6 @@
 			"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
 			"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
 			"license": "MIT",
-			"peer": true,
 			"bin": {
 				"acorn": "bin/acorn"
 			},
@@ -1941,7 +1939,6 @@
 			"integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
 			"dev": true,
 			"license": "ISC",
-			"peer": true,
 			"engines": {
 				"node": ">=12"
 			}
@@ -2254,6 +2251,31 @@
 				"jiti": "lib/jiti-cli.mjs"
 			}
 		},
+		"node_modules/katex": {
+			"version": "0.16.27",
+			"resolved": "https://registry.npmjs.org/katex/-/katex-0.16.27.tgz",
+			"integrity": "sha512-aeQoDkuRWSqQN6nSvVCEFvfXdqo1OQiCmmW1kc9xSdjutPv7BGO7pqY9sQRJpMOGrEdfDgF2TfRXe5eUAD2Waw==",
+			"funding": [
+				"https://opencollective.com/katex",
+				"https://github.com/sponsors/katex"
+			],
+			"license": "MIT",
+			"dependencies": {
+				"commander": "^8.3.0"
+			},
+			"bin": {
+				"katex": "cli.js"
+			}
+		},
+		"node_modules/katex/node_modules/commander": {
+			"version": "8.3.0",
+			"resolved": "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz",
+			"integrity": "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==",
+			"license": "MIT",
+			"engines": {
+				"node": ">= 12"
+			}
+		},
 		"node_modules/kleur": {
 			"version": "4.1.5",
 			"resolved": "https://registry.npmjs.org/kleur/-/kleur-4.1.5.tgz",
@@ -2540,6 +2562,18 @@
 				"@jridgewell/sourcemap-codec": "^1.5.5"
 			}
 		},
+		"node_modules/marked": {
+			"version": "17.0.1",
+			"resolved": "https://registry.npmjs.org/marked/-/marked-17.0.1.tgz",
+			"integrity": "sha512-boeBdiS0ghpWcSwoNm/jJBwdpFaMnZWRzjA6SkUMYb40SVaN1x7mmfGKp0jvexGcx+7y2La5zRZsYFZI6Qpypg==",
+			"license": "MIT",
+			"bin": {
+				"marked": "bin/marked.js"
+			},
+			"engines": {
+				"node": ">= 20"
+			}
+		},
 		"node_modules/mode-watcher": {
 			"version": "1.1.0",
 			"resolved": "https://registry.npmjs.org/mode-watcher/-/mode-watcher-1.1.0.tgz",
@@ -2612,7 +2646,6 @@
 			"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"engines": {
 				"node": ">=12"
 			},
@@ -2800,7 +2833,6 @@
 			"resolved": "https://registry.npmjs.org/svelte/-/svelte-5.45.3.tgz",
 			"integrity": "sha512-ngKXNhNvwPzF43QqEhDOue7TQTrG09em1sd4HBxVF0Wr2gopAmdEWan+rgbdgK4fhBtSOTJO8bYU4chUG7VXZQ==",
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"@jridgewell/remapping": "^2.3.4",
 				"@jridgewell/sourcemap-codec": "^1.5.0",
@@ -2945,7 +2977,6 @@
 			"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
 			"dev": true,
 			"license": "Apache-2.0",
-			"peer": true,
 			"bin": {
 				"tsc": "bin/tsc",
 				"tsserver": "bin/tsserver"
@@ -2967,7 +2998,6 @@
 			"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"esbuild": "^0.25.0",
 				"fdir": "^6.4.4",
--- a/dashboard/package.json
+++ b/dashboard/package.json
@@ -27,7 +27,8 @@
 	},
 	"dependencies": {
 		"highlight.js": "^11.11.1",
+		"katex": "^0.16.27",
+		"marked": "^17.0.1",
 		"mode-watcher": "^1.1.0"
 	}
 }
-
--- a/dashboard/src/lib/components/ChatMessages.svelte
+++ b/dashboard/src/lib/components/ChatMessages.svelte
@@ -8,89 +8,80 @@
 		regenerateLastResponse
 	} from '$lib/stores/app.svelte';
 	import type { MessageAttachment } from '$lib/stores/app.svelte';
-import { tick, onDestroy } from 'svelte';
+	import MarkdownContent from './MarkdownContent.svelte';

-interface Props {
-	class?: string;
-	scrollParent?: HTMLElement | null;
-}
+	interface Props {
+		class?: string;
+		scrollParent?: HTMLElement | null;
+	}

-let { class: className = '', scrollParent = null }: Props = $props();
+	let { class: className = '', scrollParent = null }: Props = $props();

 	const messageList = $derived(messages());
 	const response = $derived(currentResponse());
 	const loading = $derived(isLoading());

-// Ref for scroll anchor at bottom
-let scrollAnchorRef: HTMLDivElement | undefined = $state();
+	// Scroll management - user controls scroll, show button when not at bottom
+	const SCROLL_THRESHOLD = 100;
+	let showScrollButton = $state(false);
+	let lastMessageCount = 0;
+	let containerRef: HTMLDivElement | undefined = $state();

-// Scroll management
-const SCROLL_BOTTOM_THRESHOLD = 120;
-let autoScrollEnabled = true;
-let currentScrollEl: HTMLElement | null = null;
-
-function resolveScrollElement(): HTMLElement | null {
-	if (scrollParent) return scrollParent;
-	let node: HTMLElement | null = scrollAnchorRef?.parentElement as HTMLElement | null;
-	while (node) {
-		const isScrollable = node.scrollHeight > node.clientHeight + 1;
-		if (isScrollable) return node;
-		node = node.parentElement;
+	function getScrollContainer(): HTMLElement | null {
+		if (scrollParent) return scrollParent;
+		return containerRef?.parentElement ?? null;
 	}
-	return null;
-}

-function handleScroll() {
-	if (!currentScrollEl) return;
-	const distanceFromBottom = currentScrollEl.scrollHeight - currentScrollEl.scrollTop - currentScrollEl.clientHeight;
-	const isNearBottom = distanceFromBottom < SCROLL_BOTTOM_THRESHOLD;
-	autoScrollEnabled = isNearBottom;
-}
-
-function attachScrollListener() {
-	const nextEl = resolveScrollElement();
-	if (currentScrollEl === nextEl) return;
-	if (currentScrollEl) {
-		currentScrollEl.removeEventListener('scroll', handleScroll);
+	function isNearBottom(el: HTMLElement): boolean {
+		return el.scrollHeight - el.scrollTop - el.clientHeight < SCROLL_THRESHOLD;
 	}
-	currentScrollEl = nextEl;
-	if (currentScrollEl) {
-		currentScrollEl.addEventListener('scroll', handleScroll);
-		// Initialize state based on current position
-		handleScroll();
-	}
-}

-onDestroy(() => {
-	if (currentScrollEl) {
-		currentScrollEl.removeEventListener('scroll', handleScroll);
-	}
-});
-
-$effect(() => {
-	// Re-evaluate scroll container if prop changes or after mount
-	scrollParent;
-	attachScrollListener();
-});
-
-// Auto-scroll to bottom when messages change or response updates, but only if user is near bottom
-$effect(() => {
-	// Track these values to trigger effect
-	const _ = messageList.length;
-	const __ = response;
-	const ___ = loading;
-	
-	tick().then(() => {
-		const el = currentScrollEl ?? resolveScrollElement();
-		if (!el || !scrollAnchorRef) return;
-		const distanceFromBottom = el.scrollHeight - el.scrollTop - el.clientHeight;
-		const isNearBottom = distanceFromBottom < SCROLL_BOTTOM_THRESHOLD;
-		if (autoScrollEnabled || isNearBottom) {
-			scrollAnchorRef.scrollIntoView({ behavior: 'smooth', block: 'end' });
-			autoScrollEnabled = true;
+	function scrollToBottom() {
+		const el = getScrollContainer();
+		if (el) {
+			el.scrollTo({ top: el.scrollHeight, behavior: 'smooth' });
 		}
+	}
+
+	function updateScrollButtonVisibility() {
+		const el = getScrollContainer();
+		if (!el) return;
+		showScrollButton = !isNearBottom(el);
+	}
+
+	// Attach scroll listener
+	$effect(() => {
+		const el = scrollParent ?? containerRef?.parentElement;
+		if (!el) return;
+		
+		el.addEventListener('scroll', updateScrollButtonVisibility, { passive: true });
+		// Initial check
+		updateScrollButtonVisibility();
+		return () => el.removeEventListener('scroll', updateScrollButtonVisibility);
+	});
+
+	// Auto-scroll when user sends a new message
+	$effect(() => {
+		const count = messageList.length;
+		if (count > lastMessageCount) {
+			const el = getScrollContainer();
+			if (el) {
+				requestAnimationFrame(() => {
+					el.scrollTo({ top: el.scrollHeight, behavior: 'smooth' });
+				});
+			}
+		}
+		lastMessageCount = count;
+	});
+
+	// Update scroll button visibility when content changes
+	$effect(() => {
+		// Track response to trigger re-check during streaming
+		const _ = response;
+		
+		// Small delay to let DOM update
+		requestAnimationFrame(() => updateScrollButtonVisibility());
 	});
-});

 	// Edit state
 	let editingMessageId = $state<string | null>(null);
@@ -231,7 +222,7 @@ function isThinkingExpanded(messageId: string): boolean {
 <div class="flex flex-col gap-4 sm:gap-6 {className}">
 	{#each messageList as message (message.id)}
 		<div class="group flex {message.role === 'user' ? 'justify-end' : 'justify-start'}">
-			<div class="{message.role === 'user' ? 'max-w-[85%] sm:max-w-[70%] flex flex-col items-end' : 'max-w-[95%] sm:max-w-[85%]'}">
+			<div class="{message.role === 'user' ? 'max-w-[85%] sm:max-w-[70%] flex flex-col items-end' : 'w-full max-w-[98%] sm:max-w-[95%]'}">
 				{#if message.role === 'assistant'}
 					<!-- Assistant message header -->
 					<div class="flex items-center gap-1.5 sm:gap-2 mb-1.5 sm:mb-2">
@@ -305,7 +296,7 @@ function isThinkingExpanded(messageId: string): boolean {
 				{:else}
 					<div class="{message.role === 'user' 
 						? 'command-panel rounded-lg rounded-tr-sm inline-block' 
-						: 'command-panel rounded-lg rounded-tl-sm border-l-2 border-l-exo-yellow/50 inline-block'}">
+						: 'command-panel rounded-lg rounded-tl-sm border-l-2 border-l-exo-yellow/50 block w-full'}">
 						
 						{#if message.role === 'user'}
 							<!-- User message styling -->
@@ -331,7 +322,7 @@ function isThinkingExpanded(messageId: string): boolean {
 								{/if}
 								
 								{#if message.content}
-									<div class="text-sm text-foreground font-mono tracking-wide whitespace-pre-wrap break-words leading-relaxed">
+									<div class="text-xs text-foreground font-mono tracking-wide whitespace-pre-wrap break-words leading-relaxed">
 										{message.content}
 									</div>
 								{/if}
@@ -360,7 +351,7 @@ function isThinkingExpanded(messageId: string): boolean {
 												</svg>
 												<span>Thinking...</span>
 											</span>
-											<span class="text-[10px] tracking-[0.2em] text-exo-light-gray/60">
+											<span class="text-[10px] tracking-[0.2em] text-exo-light-gray/60 ml-4">
 												{isThinkingExpanded(message.id) ? 'HIDE' : 'SHOW'}
 											</span>
 										</button>
@@ -374,8 +365,8 @@ function isThinkingExpanded(messageId: string): boolean {
 										{/if}
 									</div>
 								{/if}
-								<div class="text-sm text-foreground font-mono tracking-wide whitespace-pre-wrap break-words leading-relaxed">
-									{message.content || (loading ? response : '')}
+								<div class="text-xs text-foreground">
+									<MarkdownContent content={message.content || (loading ? response : '')} />
 									{#if loading && !message.content}
 										<span class="inline-block w-2 h-4 bg-exo-yellow/70 ml-1 cursor-blink"></span>
 									{/if}
@@ -457,6 +448,20 @@ function isThinkingExpanded(messageId: string): boolean {
 		</div>
 	{/if}
 	
-	<!-- Scroll anchor for auto-scroll -->
-	<div bind:this={scrollAnchorRef}></div>
+	<!-- Invisible element for container reference -->
+	<div bind:this={containerRef}></div>
+
+	<!-- Scroll to bottom button -->
+	{#if showScrollButton}
+		<button
+			type="button"
+			onclick={scrollToBottom}
+			class="sticky bottom-4 left-1/2 -translate-x-1/2 w-10 h-10 rounded-full bg-exo-dark-gray/90 border border-exo-medium-gray/50 flex items-center justify-center text-exo-light-gray hover:text-exo-yellow hover:border-exo-yellow/50 transition-all shadow-lg cursor-pointer z-10"
+			title="Scroll to bottom"
+		>
+			<svg class="w-5 h-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+				<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 14l-7 7m0 0l-7-7m7 7V3" />
+			</svg>
+		</button>
+	{/if}
 </div>
--- a/dashboard/src/lib/components/ChatSidebar.svelte
+++ b/dashboard/src/lib/components/ChatSidebar.svelte
@@ -10,7 +10,9 @@ import {
 		clearChat,
 		instances,
 		debugMode,
-		toggleDebugMode
+		toggleDebugMode,
+		topologyOnlyMode,
+		toggleTopologyOnlyMode
 	} from '$lib/stores/app.svelte';

 	interface Props {
@@ -23,6 +25,7 @@ import {
 	const activeId = $derived(activeConversationId());
 const instanceData = $derived(instances());
 const debugEnabled = $derived(debugMode());
+const topologyOnlyEnabled = $derived(topologyOnlyMode());

 	let searchQuery = $state('');
 	let editingId = $state<string | null>(null);
@@ -424,6 +427,19 @@ const debugEnabled = $derived(debugMode());
 		<div class="text-xs text-white/60 font-mono tracking-wider text-center">
 			{conversationList.length} CONVERSATION{conversationList.length !== 1 ? 'S' : ''}
 		</div>
+		<button
+			type="button"
+			onclick={toggleTopologyOnlyMode}
+			class="p-1.5 rounded border border-exo-medium-gray/40 hover:border-exo-yellow/50 transition-colors cursor-pointer"
+			title="Toggle topology only mode"
+		>
+			<svg class="w-4 h-4 {topologyOnlyEnabled ? 'text-exo-yellow' : 'text-exo-medium-gray'}" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+				<circle cx="12" cy="5" r="2" fill="currentColor" />
+				<circle cx="5" cy="19" r="2" fill="currentColor" />
+				<circle cx="19" cy="19" r="2" fill="currentColor" />
+				<path stroke-linecap="round" d="M12 7v5m0 0l-5 5m5-5l5 5" />
+			</svg>
+		</button>
 	</div>
 	</div>
 </aside>
--- a/dashboard/src/lib/components/HeaderNav.svelte
+++ b/dashboard/src/lib/components/HeaderNav.svelte
@@ -3,6 +3,9 @@

 	export let showHome = true;
 	export let onHome: (() => void) | null = null;
+	export let showSidebarToggle = false;
+	export let sidebarVisible = true;
+	export let onToggleSidebar: (() => void) | null = null;

 	function handleHome(): void {
 		if (onHome) {
@@ -14,9 +17,34 @@
 			window.location.hash = '/';
 		}
 	}
+
+	function handleToggleSidebar(): void {
+		if (onToggleSidebar) {
+			onToggleSidebar();
+		}
+	}
 </script>

 <header class="relative z-20 flex items-center justify-center px-6 pt-8 pb-4 bg-exo-dark-gray">
+	<!-- Left: Sidebar Toggle -->
+	{#if showSidebarToggle}
+	<div class="absolute left-6 top-1/2 -translate-y-1/2">
+		<button
+			onclick={handleToggleSidebar}
+			class="p-2 rounded border border-exo-medium-gray/40 hover:border-exo-yellow/50 transition-colors cursor-pointer"
+			title={sidebarVisible ? 'Hide sidebar' : 'Show sidebar'}
+		>
+			<svg class="w-5 h-5 {sidebarVisible ? 'text-exo-yellow' : 'text-exo-medium-gray'}" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+				{#if sidebarVisible}
+					<path stroke-linecap="round" stroke-linejoin="round" d="M11 19l-7-7 7-7m8 14l-7-7 7-7" />
+				{:else}
+					<path stroke-linecap="round" stroke-linejoin="round" d="M13 5l7 7-7 7M5 5l7 7-7 7" />
+				{/if}
+			</svg>
+		</button>
+	</div>
+	{/if}
+
 	<!-- Center: Logo (clickable to go home) -->
 	<button
 		onclick={handleHome}
--- a/dashboard/src/lib/components/MarkdownContent.svelte
+++ b/dashboard/src/lib/components/MarkdownContent.svelte
@@ -0,0 +1,451 @@
+<script lang="ts">
+	import { marked } from 'marked';
+	import hljs from 'highlight.js';
+	import katex from 'katex';
+	import 'katex/dist/katex.min.css';
+	import { browser } from '$app/environment';
+
+	interface Props {
+		content: string;
+		class?: string;
+	}
+
+	let { content, class: className = '' }: Props = $props();
+
+	let containerRef = $state<HTMLDivElement>();
+	let processedHtml = $state('');
+
+	// Configure marked with syntax highlighting
+	marked.setOptions({
+		gfm: true,
+		breaks: true
+	});
+
+	// Custom renderer for code blocks
+	const renderer = new marked.Renderer();
+
+	renderer.code = function ({ text, lang }: { text: string; lang?: string }) {
+		const language = lang && hljs.getLanguage(lang) ? lang : 'plaintext';
+		const highlighted = hljs.highlight(text, { language }).value;
+		const codeId = `code-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
+
+		return `
+			<div class="code-block-wrapper">
+				<div class="code-block-header">
+					<span class="code-language">${language}</span>
+					<button type="button" class="copy-code-btn" data-code="${encodeURIComponent(text)}" title="Copy code">
+						<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+							<rect width="14" height="14" x="8" y="8" rx="2" ry="2"/>
+							<path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/>
+						</svg>
+					</button>
+				</div>
+				<pre><code class="hljs language-${language}" data-code-id="${codeId}">${highlighted}</code></pre>
+			</div>
+		`;
+	};
+
+	// Inline code
+	renderer.codespan = function ({ text }: { text: string }) {
+		return `<code class="inline-code">${text}</code>`;
+	};
+
+	marked.use({ renderer });
+
+	/**
+	 * Preprocess LaTeX: convert \(...\) to $...$ and \[...\] to $$...$$
+	 * Also protect code blocks from LaTeX processing
+	 */
+	function preprocessLaTeX(text: string): string {
+		// Protect code blocks
+		const codeBlocks: string[] = [];
+		let processed = text.replace(/```[\s\S]*?```|`[^`]+`/g, (match) => {
+			codeBlocks.push(match);
+			return `<<CODE_${codeBlocks.length - 1}>>`;
+		});
+
+		// Convert \(...\) to $...$
+		processed = processed.replace(/\\\((.+?)\\\)/g, '$$$1$');
+		
+		// Convert \[...\] to $$...$$
+		processed = processed.replace(/\\\[([\s\S]*?)\\\]/g, '$$$$$1$$$$');
+
+		// Restore code blocks
+		processed = processed.replace(/<<CODE_(\d+)>>/g, (_, index) => codeBlocks[parseInt(index)]);
+
+		return processed;
+	}
+
+	/**
+	 * Render math expressions with KaTeX after HTML is generated
+	 */
+	function renderMath(html: string): string {
+		// Render display math ($$...$$)
+		html = html.replace(/\$\$([\s\S]*?)\$\$/g, (_, math) => {
+			try {
+				return katex.renderToString(math.trim(), {
+					displayMode: true,
+					throwOnError: false,
+					output: 'html'
+				});
+			} catch {
+				return `<span class="math-error">$$${math}$$</span>`;
+			}
+		});
+
+		// Render inline math ($...$) but avoid matching currency like $5
+		html = html.replace(/\$([^\$\n]+?)\$/g, (match, math) => {
+			// Skip if it looks like currency ($ followed by number)
+			if (/^\d/.test(math.trim())) {
+				return match;
+			}
+			try {
+				return katex.renderToString(math.trim(), {
+					displayMode: false,
+					throwOnError: false,
+					output: 'html'
+				});
+			} catch {
+				return `<span class="math-error">$${math}$</span>`;
+			}
+		});
+
+		return html;
+	}
+
+	function processMarkdown(text: string): string {
+		try {
+			// Preprocess LaTeX notation
+			const preprocessed = preprocessLaTeX(text);
+			// Parse markdown
+			let html = marked.parse(preprocessed) as string;
+			// Render math expressions
+			html = renderMath(html);
+			return html;
+		} catch (error) {
+			console.error('Markdown processing error:', error);
+			return text.replace(/\n/g, '<br>');
+		}
+	}
+
+	async function handleCopyClick(event: Event) {
+		const target = event.currentTarget as HTMLButtonElement;
+		const encodedCode = target.getAttribute('data-code');
+		if (!encodedCode) return;
+
+		const code = decodeURIComponent(encodedCode);
+
+		try {
+			await navigator.clipboard.writeText(code);
+			// Show copied feedback
+			const originalHtml = target.innerHTML;
+			target.innerHTML = `
+				<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+					<path d="M20 6L9 17l-5-5"/>
+				</svg>
+			`;
+			target.classList.add('copied');
+			setTimeout(() => {
+				target.innerHTML = originalHtml;
+				target.classList.remove('copied');
+			}, 2000);
+		} catch (error) {
+			console.error('Failed to copy:', error);
+		}
+	}
+
+	function setupCopyButtons() {
+		if (!containerRef || !browser) return;
+
+		const buttons = containerRef.querySelectorAll<HTMLButtonElement>('.copy-code-btn');
+		for (const button of buttons) {
+			if (button.dataset.listenerBound !== 'true') {
+				button.dataset.listenerBound = 'true';
+				button.addEventListener('click', handleCopyClick);
+			}
+		}
+	}
+
+	$effect(() => {
+		if (content) {
+			processedHtml = processMarkdown(content);
+		} else {
+			processedHtml = '';
+		}
+	});
+
+	$effect(() => {
+		if (containerRef && processedHtml) {
+			setupCopyButtons();
+		}
+	});
+</script>
+
+<div bind:this={containerRef} class="markdown-content {className}">
+	{@html processedHtml}
+</div>
+
+<style>
+	.markdown-content {
+		line-height: 1.6;
+	}
+
+	/* Paragraphs */
+	.markdown-content :global(p) {
+		margin-bottom: 1rem;
+	}
+
+	.markdown-content :global(p:last-child) {
+		margin-bottom: 0;
+	}
+
+	/* Headers */
+	.markdown-content :global(h1) {
+		font-size: 1.5rem;
+		font-weight: 700;
+		margin: 1.5rem 0 0.75rem 0;
+		color: var(--exo-yellow, #ffd700);
+	}
+
+	.markdown-content :global(h2) {
+		font-size: 1.25rem;
+		font-weight: 600;
+		margin: 1.25rem 0 0.5rem 0;
+		color: var(--exo-yellow, #ffd700);
+	}
+
+	.markdown-content :global(h3) {
+		font-size: 1.125rem;
+		font-weight: 600;
+		margin: 1rem 0 0.5rem 0;
+	}
+
+	.markdown-content :global(h4),
+	.markdown-content :global(h5),
+	.markdown-content :global(h6) {
+		font-size: 1rem;
+		font-weight: 600;
+		margin: 0.75rem 0 0.25rem 0;
+	}
+
+	/* Bold and italic */
+	.markdown-content :global(strong) {
+		font-weight: 600;
+	}
+
+	.markdown-content :global(em) {
+		font-style: italic;
+	}
+
+	/* Inline code */
+	.markdown-content :global(.inline-code) {
+		background: rgba(255, 215, 0, 0.1);
+		color: var(--exo-yellow, #ffd700);
+		padding: 0.125rem 0.375rem;
+		border-radius: 0.25rem;
+		font-family: ui-monospace, SFMono-Regular, 'SF Mono', Monaco, Consolas, monospace;
+		font-size: 0.875em;
+	}
+
+	/* Links */
+	.markdown-content :global(a) {
+		color: var(--exo-yellow, #ffd700);
+		text-decoration: underline;
+		text-underline-offset: 2px;
+	}
+
+	.markdown-content :global(a:hover) {
+		opacity: 0.8;
+	}
+
+	/* Lists */
+	.markdown-content :global(ul) {
+		list-style-type: disc;
+		margin-left: 1.5rem;
+		margin-bottom: 1rem;
+	}
+
+	.markdown-content :global(ol) {
+		list-style-type: decimal;
+		margin-left: 1.5rem;
+		margin-bottom: 1rem;
+	}
+
+	.markdown-content :global(li) {
+		margin-bottom: 0.25rem;
+	}
+
+	.markdown-content :global(li::marker) {
+		color: var(--exo-light-gray, #9ca3af);
+	}
+
+	/* Blockquotes */
+	.markdown-content :global(blockquote) {
+		border-left: 3px solid var(--exo-yellow, #ffd700);
+		padding: 0.5rem 1rem;
+		margin: 1rem 0;
+		background: rgba(255, 215, 0, 0.05);
+		border-radius: 0 0.25rem 0.25rem 0;
+	}
+
+	/* Tables */
+	.markdown-content :global(table) {
+		width: 100%;
+		margin: 1rem 0;
+		border-collapse: collapse;
+		font-size: 0.875rem;
+	}
+
+	.markdown-content :global(th) {
+		background: rgba(255, 215, 0, 0.1);
+		border: 1px solid rgba(255, 215, 0, 0.2);
+		padding: 0.5rem;
+		text-align: left;
+		font-weight: 600;
+	}
+
+	.markdown-content :global(td) {
+		border: 1px solid rgba(255, 255, 255, 0.1);
+		padding: 0.5rem;
+	}
+
+	/* Horizontal rule */
+	.markdown-content :global(hr) {
+		border: none;
+		border-top: 1px solid rgba(255, 255, 255, 0.1);
+		margin: 1.5rem 0;
+	}
+
+	/* Code block wrapper */
+	.markdown-content :global(.code-block-wrapper) {
+		margin: 1rem 0;
+		border-radius: 0.5rem;
+		overflow: hidden;
+		border: 1px solid rgba(255, 215, 0, 0.2);
+		background: rgba(0, 0, 0, 0.4);
+	}
+
+	.markdown-content :global(.code-block-header) {
+		display: flex;
+		justify-content: space-between;
+		align-items: center;
+		padding: 0.5rem 0.75rem;
+		background: rgba(255, 215, 0, 0.05);
+		border-bottom: 1px solid rgba(255, 215, 0, 0.1);
+	}
+
+	.markdown-content :global(.code-language) {
+		color: var(--exo-yellow, #ffd700);
+		font-size: 0.7rem;
+		font-weight: 500;
+		text-transform: uppercase;
+		letter-spacing: 0.1em;
+		font-family: ui-monospace, SFMono-Regular, 'SF Mono', Monaco, Consolas, monospace;
+	}
+
+	.markdown-content :global(.copy-code-btn) {
+		display: flex;
+		align-items: center;
+		justify-content: center;
+		padding: 0.25rem;
+		background: transparent;
+		border: none;
+		color: var(--exo-light-gray, #9ca3af);
+		cursor: pointer;
+		transition: color 0.2s;
+		border-radius: 0.25rem;
+	}
+
+	.markdown-content :global(.copy-code-btn:hover) {
+		color: var(--exo-yellow, #ffd700);
+	}
+
+	.markdown-content :global(.copy-code-btn.copied) {
+		color: #22c55e;
+	}
+
+	.markdown-content :global(.code-block-wrapper pre) {
+		margin: 0;
+		padding: 1rem;
+		overflow-x: auto;
+		background: transparent;
+	}
+
+	.markdown-content :global(.code-block-wrapper code) {
+		font-family: ui-monospace, SFMono-Regular, 'SF Mono', Monaco, Consolas, monospace;
+		font-size: 0.8125rem;
+		line-height: 1.5;
+		background: transparent;
+	}
+
+	/* Syntax highlighting - dark theme matching EXO style */
+	.markdown-content :global(.hljs) {
+		color: #e5e7eb;
+	}
+
+	.markdown-content :global(.hljs-keyword),
+	.markdown-content :global(.hljs-selector-tag),
+	.markdown-content :global(.hljs-literal),
+	.markdown-content :global(.hljs-section),
+	.markdown-content :global(.hljs-link) {
+		color: #c084fc;
+	}
+
+	.markdown-content :global(.hljs-string),
+	.markdown-content :global(.hljs-title),
+	.markdown-content :global(.hljs-name),
+	.markdown-content :global(.hljs-type),
+	.markdown-content :global(.hljs-attribute),
+	.markdown-content :global(.hljs-symbol),
+	.markdown-content :global(.hljs-bullet),
+	.markdown-content :global(.hljs-addition),
+	.markdown-content :global(.hljs-variable),
+	.markdown-content :global(.hljs-template-tag),
+	.markdown-content :global(.hljs-template-variable) {
+		color: #fbbf24;
+	}
+
+	.markdown-content :global(.hljs-comment),
+	.markdown-content :global(.hljs-quote),
+	.markdown-content :global(.hljs-deletion),
+	.markdown-content :global(.hljs-meta) {
+		color: #6b7280;
+	}
+
+	.markdown-content :global(.hljs-number),
+	.markdown-content :global(.hljs-regexp),
+	.markdown-content :global(.hljs-literal),
+	.markdown-content :global(.hljs-built_in) {
+		color: #34d399;
+	}
+
+	.markdown-content :global(.hljs-function),
+	.markdown-content :global(.hljs-class .hljs-title) {
+		color: #60a5fa;
+	}
+
+	/* KaTeX math styling */
+	.markdown-content :global(.katex) {
+		font-size: 1.1em;
+	}
+
+	.markdown-content :global(.katex-display) {
+		margin: 1rem 0;
+		overflow-x: auto;
+		overflow-y: hidden;
+		padding: 0.5rem 0;
+	}
+
+	.markdown-content :global(.katex-display > .katex) {
+		text-align: center;
+	}
+
+	.markdown-content :global(.math-error) {
+		color: #f87171;
+		font-family: ui-monospace, SFMono-Regular, 'SF Mono', Monaco, Consolas, monospace;
+		font-size: 0.875em;
+		background: rgba(248, 113, 113, 0.1);
+		padding: 0.125rem 0.25rem;
+		border-radius: 0.25rem;
+	}
+</style>
--- a/dashboard/src/lib/components/ModelCard.svelte
+++ b/dashboard/src/lib/components/ModelCard.svelte
@@ -206,12 +206,8 @@ function toggleNodeDetails(nodeId: string): void {
 		const centerY = topoHeight / 2;
 		const radius = numNodes === 1 ? 0 : numNodes === 2 ? 45 : Math.min(topoWidth, topoHeight) * 0.32;
 		
-		// Use API preview data if available
+		// Only use API preview data - no local estimation
 		const hasApiPreview = apiPreview !== null && apiPreview.error === null && apiPreview.memory_delta_by_node !== null;
-		const canFit = hasApiPreview ? true : (() => {
-			const totalAvailable = nodeArray.reduce((sum, n) => sum + n.availableGB, 0);
-			return totalAvailable >= estimatedMemory;
-		})();
 		const error = apiPreview?.error ?? null;
 		
 		let placementNodes: Array<{ 
@@ -232,129 +228,39 @@ function toggleNodeDetails(nodeId: string): void {
 			modelFillHeight: number;
 		}> = [];
 		
-		if (hasApiPreview && apiPreview.memory_delta_by_node) {
-			// Use API placement data
-			const memoryDelta = apiPreview.memory_delta_by_node;
-			placementNodes = nodeArray.map((n, i) => {
-				const deltaBytes = memoryDelta[n.id] ?? 0;
-				const modelUsageGB = deltaBytes / (1024 * 1024 * 1024);
-				const isUsed = deltaBytes > 0;
-				const angle = numNodes === 1 ? 0 : (i / numNodes) * Math.PI * 2 - Math.PI / 2;
-				const safeTotal = Math.max(n.totalGB, 0.001);
-				const currentPercent = clampPercent((n.usedGB / safeTotal) * 100);
-				const newPercent = clampPercent(((n.usedGB + modelUsageGB) / safeTotal) * 100);
-				const screenHeight = iconSize * 0.58;
-				
-				return {
-					id: n.id,
-					deviceName: n.deviceName,
-					deviceType: n.deviceType,
-					totalGB: n.totalGB,
-					currentUsedGB: n.usedGB,
-					modelUsageGB,
-					currentPercent,
-					newPercent,
-					isUsed,
-					x: centerX + Math.cos(angle) * radius,
-					y: centerY + Math.sin(angle) * radius,
-					iconSize,
-					screenHeight,
-					currentFillHeight: screenHeight * (currentPercent / 100),
-					modelFillHeight: screenHeight * ((newPercent - currentPercent) / 100)
-				};
-			});
-		} else if (apiPreview?.error) {
-			// API returned an error - model can't fit, show all nodes as unused
-			placementNodes = nodeArray.map((n, i) => {
-				const angle = numNodes === 1 ? 0 : (i / numNodes) * Math.PI * 2 - Math.PI / 2;
-				const safeTotal = Math.max(n.totalGB, 0.001);
-				const currentPercent = clampPercent((n.usedGB / safeTotal) * 100);
-				const screenHeight = iconSize * 0.58;
-				
-				return {
-					id: n.id,
-					deviceName: n.deviceName,
-					deviceType: n.deviceType,
-					totalGB: n.totalGB,
-					currentUsedGB: n.usedGB,
-					modelUsageGB: 0,
-					currentPercent,
-					newPercent: currentPercent,
-					isUsed: false,
-					x: centerX + Math.cos(angle) * radius,
-					y: centerY + Math.sin(angle) * radius,
-					iconSize,
-					screenHeight,
-					currentFillHeight: screenHeight * (currentPercent / 100),
-					modelFillHeight: 0
-				};
-			});
-		} else {
-			// Fallback: local estimation based on sharding strategy
-			const memoryNeeded = estimatedMemory;
+		// Use API placement data directly
+		const memoryDelta = apiPreview?.memory_delta_by_node ?? {};
+		placementNodes = nodeArray.map((n, i) => {
+			const deltaBytes = memoryDelta[n.id] ?? 0;
+			const modelUsageGB = deltaBytes / (1024 * 1024 * 1024);
+			const isUsed = deltaBytes > 0;
+			const angle = numNodes === 1 ? 0 : (i / numNodes) * Math.PI * 2 - Math.PI / 2;
+			const safeTotal = Math.max(n.totalGB, 0.001);
+			const currentPercent = clampPercent((n.usedGB / safeTotal) * 100);
+			const newPercent = clampPercent(((n.usedGB + modelUsageGB) / safeTotal) * 100);
+			const screenHeight = iconSize * 0.58;
 			
-			if (sharding === 'Pipeline') {
-				const memoryPerNode = memoryNeeded / numNodes;
-				placementNodes = nodeArray.map((n, i) => {
-					const angle = numNodes === 1 ? 0 : (i / numNodes) * Math.PI * 2 - Math.PI / 2;
-					const safeTotal = Math.max(n.totalGB, 0.001);
-					const currentPercent = clampPercent((n.usedGB / safeTotal) * 100);
-					const newPercent = clampPercent(((n.usedGB + memoryPerNode) / safeTotal) * 100);
-					const screenHeight = iconSize * 0.58;
-					
-					return {
-						id: n.id,
-						deviceName: n.deviceName,
-						deviceType: n.deviceType,
-						totalGB: n.totalGB,
-						currentUsedGB: n.usedGB,
-						modelUsageGB: memoryPerNode,
-						currentPercent,
-						newPercent,
-						isUsed: true,
-						x: centerX + Math.cos(angle) * radius,
-						y: centerY + Math.sin(angle) * radius,
-						iconSize,
-						screenHeight,
-						currentFillHeight: screenHeight * (currentPercent / 100),
-						modelFillHeight: screenHeight * ((newPercent - currentPercent) / 100)
-					};
-				});
-			} else {
-				let remaining = memoryNeeded;
-				placementNodes = nodeArray.map((n, i) => {
-					const allocated = Math.min(remaining, n.availableGB);
-					remaining -= allocated;
-					const isUsed = allocated > 0;
-					const angle = numNodes === 1 ? 0 : (i / numNodes) * Math.PI * 2 - Math.PI / 2;
-					const safeTotal = Math.max(n.totalGB, 0.001);
-					const currentPercent = clampPercent((n.usedGB / safeTotal) * 100);
-					const newPercent = clampPercent(((n.usedGB + allocated) / safeTotal) * 100);
-					const screenHeight = iconSize * 0.58;
-					
-					return {
-						id: n.id,
-						deviceName: n.deviceName,
-						deviceType: n.deviceType,
-						totalGB: n.totalGB,
-						currentUsedGB: n.usedGB,
-						modelUsageGB: allocated,
-						currentPercent,
-						newPercent,
-						isUsed,
-						x: centerX + Math.cos(angle) * radius,
-						y: centerY + Math.sin(angle) * radius,
-						iconSize,
-						screenHeight,
-						currentFillHeight: screenHeight * (currentPercent / 100),
-						modelFillHeight: screenHeight * ((newPercent - currentPercent) / 100)
-					};
-				});
-			}
-		}
+			return {
+				id: n.id,
+				deviceName: n.deviceName,
+				deviceType: n.deviceType,
+				totalGB: n.totalGB,
+				currentUsedGB: n.usedGB,
+				modelUsageGB,
+				currentPercent,
+				newPercent,
+				isUsed,
+				x: centerX + Math.cos(angle) * radius,
+				y: centerY + Math.sin(angle) * radius,
+				iconSize,
+				screenHeight,
+				currentFillHeight: screenHeight * (currentPercent / 100),
+				modelFillHeight: screenHeight * ((newPercent - currentPercent) / 100)
+			};
+		});
 		
 		const totalAvailable = nodeArray.reduce((sum, n) => sum + n.availableGB, 0);
-		return { nodes: placementNodes, canFit: hasApiPreview || canFit, totalAvailable, topoWidth, topoHeight, error };
+		return { nodes: placementNodes, canFit: hasApiPreview, totalAvailable, topoWidth, topoHeight, error };
 	});
 	
 	const canFit = $derived(apiPreview ? apiPreview.error === null : placementPreview().canFit);
--- a/dashboard/src/lib/components/index.ts
+++ b/dashboard/src/lib/components/index.ts
@@ -4,4 +4,5 @@ export { default as ChatMessages } from './ChatMessages.svelte';
 export { default as ChatAttachments } from './ChatAttachments.svelte';
 export { default as ChatSidebar } from './ChatSidebar.svelte';
 export { default as ModelCard } from './ModelCard.svelte';
+export { default as MarkdownContent } from './MarkdownContent.svelte';

--- a/dashboard/src/lib/stores/app.svelte.ts
+++ b/dashboard/src/lib/stores/app.svelte.ts
@@ -327,6 +327,8 @@ class AppStore {
 	isTopologyMinimized = $state(false);
 	isSidebarOpen = $state(false); // Hidden by default, shown when in chat mode
 	debugMode = $state(false);
+	topologyOnlyMode = $state(false);
+	chatSidebarVisible = $state(true); // Shown by default
 	
 	private fetchInterval: ReturnType<typeof setInterval> | null = null;
 	private previewsInterval: ReturnType<typeof setInterval> | null = null;
@@ -337,6 +339,8 @@ class AppStore {
 			this.startPolling();
 			this.loadConversationsFromStorage();
 			this.loadDebugModeFromStorage();
+			this.loadTopologyOnlyModeFromStorage();
+			this.loadChatSidebarVisibleFromStorage();
 		}
 	}

@@ -394,6 +398,44 @@ class AppStore {
 		}
 	}

+	private loadTopologyOnlyModeFromStorage() {
+		try {
+			const stored = localStorage.getItem('exo-topology-only-mode');
+			if (stored !== null) {
+				this.topologyOnlyMode = stored === 'true';
+			}
+		} catch (error) {
+			console.error('Failed to load topology only mode:', error);
+		}
+	}
+
+	private saveTopologyOnlyModeToStorage() {
+		try {
+			localStorage.setItem('exo-topology-only-mode', this.topologyOnlyMode ? 'true' : 'false');
+		} catch (error) {
+			console.error('Failed to save topology only mode:', error);
+		}
+	}
+
+	private loadChatSidebarVisibleFromStorage() {
+		try {
+			const stored = localStorage.getItem('exo-chat-sidebar-visible');
+			if (stored !== null) {
+				this.chatSidebarVisible = stored === 'true';
+			}
+		} catch (error) {
+			console.error('Failed to load chat sidebar visibility:', error);
+		}
+	}
+
+	private saveChatSidebarVisibleToStorage() {
+		try {
+			localStorage.setItem('exo-chat-sidebar-visible', this.chatSidebarVisible ? 'true' : 'false');
+		} catch (error) {
+			console.error('Failed to save chat sidebar visibility:', error);
+		}
+	}
+
 	/**
 	 * Create a new conversation
 	 */
@@ -698,6 +740,34 @@ class AppStore {
 		this.saveDebugModeToStorage();
 	}

+	getTopologyOnlyMode(): boolean {
+		return this.topologyOnlyMode;
+	}
+
+	setTopologyOnlyMode(enabled: boolean) {
+		this.topologyOnlyMode = enabled;
+		this.saveTopologyOnlyModeToStorage();
+	}
+
+	toggleTopologyOnlyMode() {
+		this.topologyOnlyMode = !this.topologyOnlyMode;
+		this.saveTopologyOnlyModeToStorage();
+	}
+
+	getChatSidebarVisible(): boolean {
+		return this.chatSidebarVisible;
+	}
+
+	setChatSidebarVisible(visible: boolean) {
+		this.chatSidebarVisible = visible;
+		this.saveChatSidebarVisibleToStorage();
+	}
+
+	toggleChatSidebarVisible() {
+		this.chatSidebarVisible = !this.chatSidebarVisible;
+		this.saveChatSidebarVisibleToStorage();
+	}
+
 	startPolling() {
 		this.fetchState();
 		this.fetchInterval = setInterval(() => this.fetchState(), 1000);
@@ -888,8 +958,6 @@ class AppStore {
 		
 		if (lastUserIndex === -1) return;
 		
-		const lastUserMessage = this.messages[lastUserIndex];
-		
 		// Remove any messages after the user message
 		this.messages = this.messages.slice(0, lastUserIndex + 1);
 		
@@ -930,7 +998,10 @@ class AppStore {
 			}
 			
 			if (!modelToUse) {
-				assistantMessage.content = 'Error: No model available. Please launch an instance first.';
+				const idx = this.messages.findIndex(m => m.id === assistantMessage.id);
+				if (idx !== -1) {
+					this.messages[idx].content = 'Error: No model available. Please launch an instance first.';
+				}
 				this.isLoading = false;
 				this.updateActiveConversation();
 				return;
@@ -948,7 +1019,10 @@ class AppStore {
 			
 			if (!response.ok) {
 				const errorText = await response.text();
-				assistantMessage.content = `Error: ${response.status} - ${errorText}`;
+				const idx = this.messages.findIndex(m => m.id === assistantMessage.id);
+				if (idx !== -1) {
+					this.messages[idx].content = `Error: ${response.status} - ${errorText}`;
+				}
 				this.isLoading = false;
 				this.updateActiveConversation();
 				return;
@@ -956,7 +1030,10 @@ class AppStore {
 			
 			const reader = response.body?.getReader();
 			if (!reader) {
-				assistantMessage.content = 'Error: No response stream available';
+				const idx = this.messages.findIndex(m => m.id === assistantMessage.id);
+				if (idx !== -1) {
+					this.messages[idx].content = 'Error: No response stream available';
+				}
 				this.isLoading = false;
 				this.updateActiveConversation();
 				return;
@@ -984,9 +1061,16 @@ class AppStore {
 							const delta = json.choices?.[0]?.delta?.content;
 							if (delta) {
 								fullContent += delta;
-								const { displayContent } = this.stripThinkingTags(fullContent);
+								const { displayContent, thinkingContent } = this.stripThinkingTags(fullContent);
 								this.currentResponse = displayContent;
-								assistantMessage.content = displayContent;
+								
+								// Update the assistant message in place (triggers Svelte reactivity)
+								const idx = this.messages.findIndex(m => m.id === assistantMessage.id);
+								if (idx !== -1) {
+									this.messages[idx].content = displayContent;
+									this.messages[idx].thinking = thinkingContent || undefined;
+								}
+								this.persistActiveConversation();
 							}
 						} catch {
 							// Skip malformed JSON
@@ -995,16 +1079,25 @@ class AppStore {
 				}
 			}
 			
-			const { displayContent } = this.stripThinkingTags(fullContent);
-			assistantMessage.content = displayContent;
-			this.currentResponse = '';
-			this.updateActiveConversation();
+			// Final cleanup of the message
+			const { displayContent, thinkingContent } = this.stripThinkingTags(fullContent);
+			const idx = this.messages.findIndex(m => m.id === assistantMessage.id);
+			if (idx !== -1) {
+				this.messages[idx].content = displayContent;
+				this.messages[idx].thinking = thinkingContent || undefined;
+			}
+			this.persistActiveConversation();
 			
 		} catch (error) {
-			assistantMessage.content = `Error: ${error instanceof Error ? error.message : 'Unknown error'}`;
-			this.updateActiveConversation();
+			const idx = this.messages.findIndex(m => m.id === assistantMessage.id);
+			if (idx !== -1) {
+				this.messages[idx].content = `Error: ${error instanceof Error ? error.message : 'Unknown error'}`;
+			}
+			this.persistActiveConversation();
 		} finally {
 			this.isLoading = false;
+			this.currentResponse = '';
+			this.updateActiveConversation();
 		}
 	}

@@ -1364,6 +1457,8 @@ export const lastUpdate = () => appStore.lastUpdate;
 export const isTopologyMinimized = () => appStore.isTopologyMinimized;
 export const selectedChatModel = () => appStore.selectedChatModel;
 export const debugMode = () => appStore.getDebugMode();
+export const topologyOnlyMode = () => appStore.getTopologyOnlyMode();
+export const chatSidebarVisible = () => appStore.getChatSidebarVisible();

 // Actions
 export const startChat = () => appStore.startChat();
@@ -1391,5 +1486,9 @@ export const isSidebarOpen = () => appStore.isSidebarOpen;
 export const toggleSidebar = () => appStore.toggleSidebar();
 export const toggleDebugMode = () => appStore.toggleDebugMode();
 export const setDebugMode = (enabled: boolean) => appStore.setDebugMode(enabled);
+export const toggleTopologyOnlyMode = () => appStore.toggleTopologyOnlyMode();
+export const setTopologyOnlyMode = (enabled: boolean) => appStore.setTopologyOnlyMode(enabled);
+export const toggleChatSidebarVisible = () => appStore.toggleChatSidebarVisible();
+export const setChatSidebarVisible = (visible: boolean) => appStore.setChatSidebarVisible(visible);
 export const refreshState = () => appStore.fetchState();

--- a/dashboard/src/routes/+page.svelte
+++ b/dashboard/src/routes/+page.svelte
@@ -18,6 +18,10 @@
 		selectedChatModel,
 	debugMode,
 	toggleDebugMode,
+	topologyOnlyMode,
+	toggleTopologyOnlyMode,
+	chatSidebarVisible,
+	toggleChatSidebarVisible,
 		type DownloadProgress,
 		type PlacementPreview
 	} from '$lib/stores/app.svelte';
@@ -37,6 +41,8 @@
 	const selectedModelId = $derived(selectedPreviewModelId());
 	const loadingPreviews = $derived(isLoadingPreviews());
 const debugEnabled = $derived(debugMode());
+const topologyOnlyEnabled = $derived(topologyOnlyMode());
+const sidebarVisible = $derived(chatSidebarVisible());

 	let mounted = $state(false);

@@ -434,13 +440,14 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 			return { isDownloading: false, progress: null, perNode: [] };
 		}

-		let totalBytes = 0;
-		let downloadedBytes = 0;
+		let modelTotalBytes = 0;
+		let modelTotalFiles = 0;
 		let totalSpeed = 0;
-		let completedFiles = 0;
-		let totalFiles = 0;
+		let maxEtaMs = 0;
+		let minCompletedFiles = Infinity;
 		let isDownloading = false;
-		const allFiles: DownloadProgress['files'] = [];
+		let firstNodeFiles: DownloadProgress['files'] = [];
+		const nodeProgresses: number[] = []; // Track downloadedBytes per node for averaging
 		const perNode: Array<{ nodeId: string; nodeName: string; progress: DownloadProgress }> = [];

 		// Check all nodes for downloads matching this model
@@ -472,12 +479,25 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 				
 				const progress = parseDownloadProgress(downloadPayload);
 				if (progress) {
-					totalBytes += progress.totalBytes;
-					downloadedBytes += progress.downloadedBytes;
+					// All nodes download the same files, so totalBytes and totalFiles are the same
+					// Just capture from the first node
+					if (modelTotalBytes === 0) {
+						modelTotalBytes = progress.totalBytes;
+						modelTotalFiles = progress.totalFiles;
+						firstNodeFiles = progress.files;
+					}
+					
+					// Track each node's downloaded bytes for averaging
+					nodeProgresses.push(progress.downloadedBytes);
+					
+					// Sum speeds (total bandwidth across cluster)
 					totalSpeed += progress.speed;
-					completedFiles += progress.completedFiles;
-					totalFiles += progress.totalFiles;
-					allFiles.push(...progress.files);
+					
+					// Track minimum completed files (all nodes must complete a file for it to count)
+					minCompletedFiles = Math.min(minCompletedFiles, progress.completedFiles);
+					
+					// Track maximum ETA (slowest node determines when download is done)
+					maxEtaMs = Math.max(maxEtaMs, progress.etaMs);

 					const nodeName = data?.nodes?.[nodeId]?.friendly_name ?? nodeId.slice(0, 8);
 					perNode.push({ nodeId, nodeName, progress });
@@ -485,21 +505,27 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 			}
 		}

-		if (!isDownloading) {
+		if (!isDownloading || nodeProgresses.length === 0) {
 			return { isDownloading: false, progress: null, perNode: [] };
 		}

+		// Calculate average downloaded bytes across all nodes
+		const avgDownloadedBytes = nodeProgresses.reduce((a, b) => a + b, 0) / nodeProgresses.length;
+		
+		// Use min completed files; if no nodes found, default to 0
+		const completedFiles = minCompletedFiles === Infinity ? 0 : minCompletedFiles;
+
 		return {
 			isDownloading: true,
 			progress: {
-				totalBytes,
-				downloadedBytes,
+				totalBytes: modelTotalBytes,
+				downloadedBytes: avgDownloadedBytes,
 				speed: totalSpeed,
-				etaMs: totalSpeed > 0 ? ((totalBytes - downloadedBytes) / totalSpeed) * 1000 : 0,
-				percentage: totalBytes > 0 ? (downloadedBytes / totalBytes) * 100 : 0,
+				etaMs: maxEtaMs,
+				percentage: modelTotalBytes > 0 ? (avgDownloadedBytes / modelTotalBytes) * 100 : 0,
 				completedFiles,
-				totalFiles,
-				files: allFiles
+				totalFiles: modelTotalFiles,
+				files: firstNodeFiles
 			},
 			perNode
 		};
@@ -540,13 +566,14 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 			runnerToNode[runnerId] = nodeId;
 		}

-		let totalBytes = 0;
-		let downloadedBytes = 0;
+		let modelTotalBytes = 0;
+		let modelTotalFiles = 0;
 		let totalSpeed = 0;
-		let completedFiles = 0;
-		let totalFiles = 0;
+		let maxEtaMs = 0;
+		let minCompletedFiles = Infinity;
 		let isDownloading = false;
-		const allFiles: DownloadProgress['files'] = [];
+		let firstNodeFiles: DownloadProgress['files'] = [];
+		const nodeProgresses: number[] = []; // Track downloadedBytes per node for averaging
 		const perNode: Array<{ nodeId: string; nodeName: string; progress: DownloadProgress }> = [];

 		// Check downloads for nodes that are part of this instance
@@ -576,12 +603,25 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 					
 					const progress = parseDownloadProgress(downloadPayload);
 					if (progress) {
-						totalBytes += progress.totalBytes;
-						downloadedBytes += progress.downloadedBytes;
+						// All nodes download the same files, so totalBytes and totalFiles are the same
+						// Just capture from the first node
+						if (modelTotalBytes === 0) {
+							modelTotalBytes = progress.totalBytes;
+							modelTotalFiles = progress.totalFiles;
+							firstNodeFiles = progress.files;
+						}
+						
+						// Track each node's downloaded bytes for averaging
+						nodeProgresses.push(progress.downloadedBytes);
+						
+						// Sum speeds (total bandwidth across cluster)
 						totalSpeed += progress.speed;
-						completedFiles += progress.completedFiles;
-						totalFiles += progress.totalFiles;
-						allFiles.push(...progress.files);
+						
+						// Track minimum completed files (all nodes must complete a file for it to count)
+						minCompletedFiles = Math.min(minCompletedFiles, progress.completedFiles);
+						
+						// Track maximum ETA (slowest node determines when download is done)
+						maxEtaMs = Math.max(maxEtaMs, progress.etaMs);

 						const nodeName = data?.nodes?.[nodeId]?.friendly_name ?? nodeId.slice(0, 8);
 						perNode.push({ nodeId, nodeName, progress });
@@ -590,23 +630,29 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 			}
 		}

-		if (!isDownloading) {
+		if (!isDownloading || nodeProgresses.length === 0) {
 			// Check runner status for other states
 			const statusInfo = deriveInstanceStatus(instanceWrapped);
 			return { isDownloading: false, progress: null, statusText: statusInfo.statusText, perNode: [] };
 		}

+		// Calculate average downloaded bytes across all nodes
+		const avgDownloadedBytes = nodeProgresses.reduce((a, b) => a + b, 0) / nodeProgresses.length;
+		
+		// Use min completed files; if no nodes found, default to 0
+		const completedFiles = minCompletedFiles === Infinity ? 0 : minCompletedFiles;
+
 		return {
 			isDownloading: true,
 			progress: {
-				totalBytes,
-				downloadedBytes,
+				totalBytes: modelTotalBytes,
+				downloadedBytes: avgDownloadedBytes,
 				speed: totalSpeed,
-				etaMs: totalSpeed > 0 ? ((totalBytes - downloadedBytes) / totalSpeed) * 1000 : 0,
-				percentage: totalBytes > 0 ? (downloadedBytes / totalBytes) * 100 : 0,
+				etaMs: maxEtaMs,
+				percentage: modelTotalBytes > 0 ? (avgDownloadedBytes / modelTotalBytes) * 100 : 0,
 				completedFiles,
-				totalFiles,
-				files: allFiles
+				totalFiles: modelTotalFiles,
+				files: firstNodeFiles
 			},
 			statusText: 'DOWNLOADING',
 			perNode
@@ -618,10 +664,12 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 	function getStatusColor(statusText: string): string {
 		switch (statusText) {
 			case 'FAILED': return 'text-red-400';
+			case 'SHUTDOWN': return 'text-gray-400';
 			case 'DOWNLOADING': return 'text-blue-400';
 			case 'LOADING': 
 			case 'WARMING UP': 
-			case 'WAITING': return 'text-yellow-400';
+			case 'WAITING':
+			case 'INITIALIZING': return 'text-yellow-400';
 			case 'RUNNING': return 'text-teal-400';
 			case 'READY': 
 			case 'LOADED': return 'text-green-400';
@@ -644,12 +692,15 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 				if (!r) return null;
 				const [kind] = getTagged(r);
 				const statusMap: Record<string, string> = {
+					RunnerWaitingForInitialization: 'WaitingForInitialization',
+					RunnerInitializingBackend: 'InitializingBackend',
 					RunnerWaitingForModel: 'WaitingForModel',
 					RunnerLoading: 'Loading',
 					RunnerLoaded: 'Loaded',
 					RunnerWarmingUp: 'WarmingUp',
 					RunnerReady: 'Ready',
 					RunnerRunning: 'Running',
+					RunnerShutdown: 'Shutdown',
 					RunnerFailed: 'Failed',
 				};
 				return kind ? statusMap[kind] || null : null;
@@ -660,12 +711,15 @@ function toggleInstanceDownloadDetails(nodeId: string): void {

 		if (statuses.length === 0) return { statusText: 'UNKNOWN', statusClass: 'inactive' };
 		if (has('Failed')) return { statusText: 'FAILED', statusClass: 'failed' };
+		if (has('Shutdown')) return { statusText: 'SHUTDOWN', statusClass: 'inactive' };
 		if (has('Loading')) return { statusText: 'LOADING', statusClass: 'starting' };
 		if (has('WarmingUp')) return { statusText: 'WARMING UP', statusClass: 'starting' };
 		if (has('Running')) return { statusText: 'RUNNING', statusClass: 'running' };
 		if (has('Ready')) return { statusText: 'READY', statusClass: 'loaded' };
 		if (has('Loaded')) return { statusText: 'LOADED', statusClass: 'loaded' };
 		if (has('WaitingForModel')) return { statusText: 'WAITING', statusClass: 'starting' };
+		if (has('InitializingBackend')) return { statusText: 'INITIALIZING', statusClass: 'starting' };
+		if (has('WaitingForInitialization')) return { statusText: 'INITIALIZING', statusClass: 'starting' };

 		return { statusText: 'RUNNING', statusClass: 'active' };
 	}
@@ -1107,16 +1161,47 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 		<div class="shooting-star" style="top: 50%; left: 40%; --duration: 45s; --delay: 30s;"></div>
 	</div>

-	<HeaderNav showHome={chatStarted} onHome={handleGoHome} />
+	{#if !topologyOnlyEnabled}
+	<HeaderNav 
+		showHome={chatStarted} 
+		onHome={handleGoHome} 
+		showSidebarToggle={true}
+		sidebarVisible={sidebarVisible}
+		onToggleSidebar={toggleChatSidebarVisible}
+	/>
+	{/if}

 	<!-- Main Content -->
 	<main class="flex-1 flex overflow-hidden relative">
-		<!-- Left: Conversation History Sidebar (always visible) -->
+		<!-- Left: Conversation History Sidebar (hidden in topology-only mode or when toggled off) -->
+		{#if !topologyOnlyEnabled && sidebarVisible}
 		<div class="w-80 flex-shrink-0 border-r border-exo-yellow/10">
 			<ChatSidebar class="h-full" />
 		</div>
+		{/if}

-		{#if !chatStarted}
+		{#if topologyOnlyEnabled}
+			<!-- TOPOLOGY ONLY MODE: Full-screen topology -->
+			<div class="flex-1 flex flex-col min-h-0 min-w-0 p-4" in:fade={{ duration: 300 }}>
+				<div class="flex-1 relative bg-exo-dark-gray/40 rounded-lg overflow-hidden">
+					<TopologyGraph class="w-full h-full" highlightedNodes={highlightedNodes()} />
+					<!-- Exit topology-only mode button -->
+					<button
+						type="button"
+						onclick={toggleTopologyOnlyMode}
+						class="absolute bottom-4 right-4 p-2 rounded border border-exo-yellow/30 bg-exo-dark-gray/80 hover:border-exo-yellow/50 hover:bg-exo-dark-gray transition-colors cursor-pointer backdrop-blur-sm"
+						title="Exit topology only mode"
+					>
+						<svg class="w-5 h-5 text-exo-yellow" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+							<circle cx="12" cy="5" r="2" fill="currentColor" />
+							<circle cx="5" cy="19" r="2" fill="currentColor" />
+							<circle cx="19" cy="19" r="2" fill="currentColor" />
+							<path stroke-linecap="round" d="M12 7v5m0 0l-5 5m5-5l5 5" />
+						</svg>
+					</button>
+				</div>
+			</div>
+		{:else if !chatStarted}
 			<!-- WELCOME STATE: Topology + Instance Controls (no left sidebar for cleaner look) -->
 			<div class="flex-1 flex overflow-visible relative" in:fade={{ duration: 300 }} out:fade={{ duration: 200 }}>
 				
@@ -1611,13 +1696,13 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 					in:fade={{ duration: 300, delay: 100 }}
 				>
 					<div class="flex-1 overflow-y-auto px-8 py-6" bind:this={chatScrollRef}>
-						<div class="max-w-3xl mx-auto">
+						<div class="max-w-7xl mx-auto">
 							<ChatMessages scrollParent={chatScrollRef} />
 						</div>
 					</div>
 					
 					<div class="flex-shrink-0 px-8 pb-6 pt-4 bg-gradient-to-t from-exo-black via-exo-black to-transparent">
-						<div class="max-w-3xl mx-auto">
+						<div class="max-w-7xl mx-auto">
 							<ChatForm placeholder="Ask anything" showModelSelector={true} />
 						</div>
 					</div>
@@ -1655,7 +1740,7 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 							<!-- Panel Header -->
 							<div class="flex items-center gap-2 mb-4">
 								<div class="w-2 h-2 bg-exo-yellow rounded-full shadow-[0_0_8px_rgba(255,215,0,0.6)] animate-pulse"></div>
-								<h3 class="text-sm text-exo-yellow font-mono tracking-[0.2em] uppercase">Instances</h3>
+								<h3 class="text-xs text-exo-yellow font-mono tracking-[0.2em] uppercase">Instances</h3>
 								<div class="flex-1 h-px bg-gradient-to-r from-exo-yellow/30 to-transparent"></div>
 							</div>
 								<div class="space-y-3 max-h-72 overflow-y-auto pr-1">
@@ -1701,28 +1786,28 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 											<div class="flex justify-between items-start mb-2 pl-2">
 												<div class="flex items-center gap-2">
 													<div class="w-1.5 h-1.5 {isDownloading ? 'bg-blue-400 animate-pulse' : isFailed ? 'bg-red-400' : isLoading ? 'bg-yellow-400 animate-pulse' : isReady ? 'bg-green-400' : 'bg-teal-400'} rounded-full shadow-[0_0_6px_currentColor]"></div>
-													<span class="text-exo-light-gray font-mono text-xs tracking-wider">{id.slice(0, 8).toUpperCase()}</span>
+													<span class="text-exo-light-gray font-mono text-sm tracking-wider">{id.slice(0, 8).toUpperCase()}</span>
 												</div>
 												<button 
 													onclick={() => deleteInstance(id)}
-													class="text-xs px-2 py-1 font-mono tracking-wider uppercase border border-red-500/30 text-red-400/80 hover:bg-red-500/20 hover:text-red-400 hover:border-red-500/50 transition-all duration-200 cursor-pointer"
+													class="text-xs px-2 py-1 font-mono tracking-wider uppercase border border-red-500/30 text-red-400 hover:bg-red-500/20 hover:text-red-400 hover:border-red-500/50 transition-all duration-200 cursor-pointer"
 												>
 													DELETE
 												</button>
 												</div>
 												<div class="pl-2">
-													<div class="text-exo-yellow text-sm font-mono tracking-wide truncate">{getInstanceModelId(instance)}</div>
+													<div class="text-exo-yellow text-xs font-mono tracking-wide truncate">{getInstanceModelId(instance)}</div>
 													<div class="text-white/60 text-xs font-mono">Strategy: <span class="text-white/80">{instanceInfo.sharding} ({instanceInfo.instanceType})</span></div>
 														{#if instanceModelId && instanceModelId !== 'Unknown' && instanceModelId !== 'Unknown Model'}
 															<a
-																class="inline-flex items-center gap-1 text-[10px] text-white/60 hover:text-exo-yellow transition-colors mt-0.5"
+																class="inline-flex items-center gap-1 text-[11px] text-white/60 hover:text-exo-yellow transition-colors mt-1"
 																href={`https://huggingface.co/${instanceModelId}`}
 																target="_blank"
 																rel="noreferrer noopener"
 																aria-label="View model on Hugging Face"
 															>
 																<span>Hugging Face</span>
-																<svg class="w-3 h-3" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+																<svg class="w-3.5 h-3.5" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
 																	<path d="M14 3h7v7"/>
 																	<path d="M10 14l11-11"/>
 																	<path d="M21 14v6a1 1 0 0 1-1 1h-16a1 1 0 0 1-1-1v-16a1 1 0 0 1 1-1h6"/>
@@ -1733,68 +1818,83 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 														<div class="text-white/60 text-xs font-mono">{instanceInfo.nodeNames.join(', ')}</div>
 													{/if}
 													{#if debugEnabled && instanceConnections.length > 0}
-														<div class="mt-1 space-y-0.5">
-															{#each instanceConnections as conn}
-																<div class="text-[10px] leading-snug font-mono text-white/70">
-																	<span>{conn.from} -> {conn.to}: {conn.ip}</span>
-																	<span class="{conn.missingIface ? 'text-red-400' : 'text-white/60'}"> ({conn.ifaceLabel})</span>
-																</div>
-															{/each}
+													<div class="mt-2 space-y-1">
+														{#each instanceConnections as conn}
+															<div class="text-[11px] leading-snug font-mono text-white/70">
+																<span>{conn.from} -> {conn.to}: {conn.ip}</span>
+																<span class="{conn.missingIface ? 'text-red-400' : 'text-white/60'}"> ({conn.ifaceLabel})</span>
+															</div>
+														{/each}
+													</div>
+												{/if}
+												
+												<!-- Download Progress -->
+												{#if downloadInfo.isDownloading && downloadInfo.progress}
+													<div class="mt-2 space-y-1">
+														<div class="flex justify-between text-xs font-mono">
+															<span class="text-blue-400">{downloadInfo.progress.percentage.toFixed(1)}%</span>
+															<span class="text-exo-light-gray">{formatBytes(downloadInfo.progress.downloadedBytes)}/{formatBytes(downloadInfo.progress.totalBytes)}</span>
 														</div>
-													{/if}
-													
-													<!-- Download Progress -->
-													{#if downloadInfo.isDownloading && downloadInfo.progress}
-														<div class="mt-2 space-y-1">
-															<div class="flex justify-between text-sm font-mono">
-																<span class="text-blue-400">{downloadInfo.progress.percentage.toFixed(1)}%</span>
-																<span class="text-exo-light-gray">{formatBytes(downloadInfo.progress.downloadedBytes)}/{formatBytes(downloadInfo.progress.totalBytes)}</span>
-															</div>
-															<div class="relative h-1 bg-exo-black/60 rounded-sm overflow-hidden">
-																<div 
-																	class="absolute inset-y-0 left-0 bg-gradient-to-r from-blue-500 to-blue-400 transition-all duration-300"
-																	style="width: {downloadInfo.progress.percentage}%"
-																></div>
-															</div>
-															<div class="flex justify-between text-xs font-mono text-exo-light-gray">
-																<span>{formatSpeed(downloadInfo.progress.speed)}</span>
-																<span>ETA: {formatEta(downloadInfo.progress.etaMs)}</span>
-																<span>{downloadInfo.progress.completedFiles}/{downloadInfo.progress.totalFiles} files</span>
-															</div>
+														<div class="relative h-1.5 bg-exo-black/60 rounded-sm overflow-hidden">
+															<div 
+																class="absolute inset-y-0 left-0 bg-gradient-to-r from-blue-500 to-blue-400 transition-all duration-300"
+																style="width: {downloadInfo.progress.percentage}%"
+															></div>
 														</div>
-														{#if downloadInfo.perNode.length > 0}
-															<div class="mt-2 space-y-1.5 max-h-48 overflow-y-auto pr-1">
-																{#each downloadInfo.perNode as nodeProg}
-																	<div class="rounded border border-exo-medium-gray/40 bg-exo-black/30 p-2">
-																		<div class="flex items-center justify-between text-[11px] font-mono text-exo-light-gray mb-1">
+														<div class="flex justify-between text-xs font-mono text-exo-light-gray">
+															<span>{formatSpeed(downloadInfo.progress.speed)}</span>
+															<span>ETA: {formatEta(downloadInfo.progress.etaMs)}</span>
+															<span>{downloadInfo.progress.completedFiles}/{downloadInfo.progress.totalFiles} files</span>
+														</div>
+													</div>
+													{#if downloadInfo.perNode.length > 0}
+														<div class="mt-2 space-y-2 max-h-48 overflow-y-auto pr-1">
+															{#each downloadInfo.perNode as nodeProg}
+																{@const nodePercent = Math.min(100, Math.max(0, nodeProg.progress.percentage))}
+																{@const isExpanded = instanceDownloadExpandedNodes.has(nodeProg.nodeId)}
+																<div class="rounded border border-exo-medium-gray/40 bg-exo-black/30 p-2">
+																	<button
+																		type="button"
+																		class="w-full text-left space-y-1.5"
+																		onclick={() => toggleInstanceDownloadDetails(nodeProg.nodeId)}
+																	>
+																		<div class="flex items-center justify-between text-[11px] font-mono text-exo-light-gray">
 																			<span class="text-white/80 truncate pr-2">{nodeProg.nodeName}</span>
-																			<span class="text-blue-300">{Math.min(100, Math.max(0, nodeProg.progress.percentage)).toFixed(1)}%</span>
+																			<span class="flex items-center gap-1 text-blue-300">
+																				{nodePercent.toFixed(1)}%
+																				<svg class="w-3 h-3 text-exo-light-gray" viewBox="0 0 20 20" fill="none" stroke="currentColor" stroke-width="2">
+																					<path d="M6 8l4 4 4-4" class={isExpanded ? 'transform rotate-180 origin-center transition-transform duration-150' : 'transition-transform duration-150'}></path>
+																				</svg>
+																			</span>
 																		</div>
-																		<div class="relative h-1 bg-exo-black/60 rounded-sm overflow-hidden mb-1.5">
+																		<div class="relative h-1.5 bg-exo-black/60 rounded-sm overflow-hidden">
 																			<div 
-																				class="absolute inset-y-0 left-0 bg-blue-500/80 transition-all duration-300"
-																				style="width: {Math.min(100, Math.max(0, nodeProg.progress.percentage)).toFixed(1)}%"
+																				class="absolute inset-y-0 left-0 bg-gradient-to-r from-blue-500 to-blue-400 transition-all duration-300"
+																				style="width: {nodePercent.toFixed(1)}%"
 																			></div>
 																		</div>
-																		<div class="flex items-center justify-between text-[11px] font-mono text-exo-light-gray mb-1">
+																		<div class="flex items-center justify-between text-[11px] font-mono text-exo-light-gray">
 																			<span>{formatBytes(nodeProg.progress.downloadedBytes)} / {formatBytes(nodeProg.progress.totalBytes)}</span>
 																			<span>{formatSpeed(nodeProg.progress.speed)} • ETA {formatEta(nodeProg.progress.etaMs)}</span>
 																		</div>
-																	{#if nodeProg.progress.files.length > 0}
-																		{@const inProgressFiles = nodeProg.progress.files.filter(f => (f.percentage ?? 0) < 100)}
-																		{@const completedFiles = nodeProg.progress.files.filter(f => (f.percentage ?? 0) >= 100)}
-																		{#if inProgressFiles.length > 0}
-																			<div class="space-y-1">
-																				{#each inProgressFiles as f}
-																					<div class="text-[10px] font-mono text-exo-light-gray/80">
-																						<div class="flex items-center justify-between">
+																	</button>
+
+																	{#if isExpanded}
+																		<div class="mt-2 space-y-1.5">
+																			{#if nodeProg.progress.files.length === 0}
+																				<div class="text-[11px] font-mono text-exo-light-gray/70">No file details reported.</div>
+																			{:else}
+																				{#each nodeProg.progress.files as f}
+																					{@const filePercent = Math.min(100, Math.max(0, f.percentage ?? 0))}
+																					<div class="rounded border border-exo-medium-gray/30 bg-exo-black/40 p-2">
+																						<div class="flex items-center justify-between text-[10px] font-mono text-exo-light-gray/90">
 																							<span class="truncate pr-2">{f.name}</span>
-																							<span class="text-white/70">{Math.min(100, Math.max(0, f.percentage)).toFixed(1)}%</span>
+																							<span class="text-white/80">{filePercent.toFixed(1)}%</span>
 																						</div>
-																						<div class="relative h-1 bg-exo-black/50 rounded-sm overflow-hidden mt-0.5">
+																						<div class="relative h-1 bg-exo-black/60 rounded-sm overflow-hidden mt-1">
 																							<div 
-																								class="absolute inset-y-0 left-0 bg-gradient-to-r from-exo-yellow to-exo-yellow/70"
-																								style="width: {Math.min(100, Math.max(0, f.percentage)).toFixed(1)}%"
+																								class="absolute inset-y-0 left-0 bg-gradient-to-r from-exo-yellow to-exo-yellow/70 transition-all duration-300"
+																								style="width: {filePercent.toFixed(1)}%"
 																							></div>
 																						</div>
 																						<div class="flex items-center justify-between text-[10px] text-exo-light-gray/70 mt-0.5">
@@ -1803,27 +1903,17 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 																						</div>
 																					</div>
 																				{/each}
-																			</div>
-																		{/if}
-																		{#if completedFiles.length > 0}
-																			<div class="pt-1 space-y-0.5">
-																				{#each completedFiles as f}
-																					<div class="text-[10px] font-mono text-exo-light-gray/70 flex items-center justify-between">
-																						<span class="truncate pr-2">{f.name}</span>
-																						<span class="text-white/60">100%</span>
-																					</div>
-																				{/each}
-																			</div>
-																		{/if}
+																			{/if}
+																		</div>
 																	{/if}
-																	</div>
-																{/each}
-															</div>
-														{/if}
-														<div class="text-sm text-blue-400 font-mono tracking-wider mt-1">DOWNLOADING</div>
-													{:else}
-														<div class="text-sm {getStatusColor(downloadInfo.statusText)} font-mono tracking-wider mt-1">{downloadInfo.statusText}</div>
+																</div>
+															{/each}
+														</div>
 													{/if}
+													<div class="text-xs text-blue-400 font-mono tracking-wider mt-1">DOWNLOADING</div>
+												{:else}
+													<div class="text-xs {getStatusColor(downloadInfo.statusText)} font-mono tracking-wider mt-1">{downloadInfo.statusText}</div>
+												{/if}
 												</div>
 											</div>
 										</div>
--- a/dashboard/src/routes/downloads/+page.svelte
+++ b/dashboard/src/routes/downloads/+page.svelte
@@ -345,13 +345,19 @@
 							<div class="rounded border border-exo-medium-gray/30 bg-exo-dark-gray/60 p-3 space-y-2">
 								<div class="flex items-center justify-between gap-3">
 									<div class="min-w-0 space-y-0.5">
-										<div class="text-sm font-mono text-white truncate">{model.prettyName ?? model.modelId}</div>
-										<div class="text-[11px] text-exo-light-gray font-mono truncate">
-											{model.modelId}
-										</div>
-										<div class="text-[11px] text-exo-light-gray font-mono">
-											{formatBytes(model.downloadedBytes)} / {formatBytes(model.totalBytes)}
-										</div>
+										<div 
+											class="text-xs font-mono text-white truncate"
+											title={model.prettyName ?? model.modelId}
+										>{model.prettyName ?? model.modelId}</div>
+										<div 
+											class="text-[10px] text-exo-light-gray font-mono truncate"
+											title={model.modelId}
+										>{model.modelId}</div>
+										{#if model.status !== 'completed'}
+											<div class="text-[11px] text-exo-light-gray font-mono">
+												{formatBytes(model.downloadedBytes)} / {formatBytes(model.totalBytes)}
+											</div>
+										{/if}
 									</div>
 									<div class="flex items-center gap-2">
 										<span class="text-xs font-mono {pct >= 100 ? 'text-green-400' : pct <= 0 ? 'text-red-400' : 'text-exo-yellow'}">
@@ -426,14 +432,14 @@
 <style>
 	.downloads-grid {
 		display: grid;
-		grid-template-columns: repeat(auto-fill, minmax(260px, 1fr));
+		grid-template-columns: repeat(auto-fill, minmax(320px, 1fr));
 	}
 	@media (min-width: 1024px) {
 		.downloads-grid {
 			grid-template-columns: repeat(3, minmax(0, 1fr));
 		}
 	}
-	@media (min-width: 1440px) {
+	@media (min-width: 1600px) {
 		.downloads-grid {
 			grid-template-columns: repeat(4, minmax(0, 1fr));
 		}
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,7 @@ dependencies = [
    "mlx-lm>=0.28.3",
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
    "hypercorn>=0.18.0",
+    "openai-harmony>=0.0.8",
 ]

 [project.scripts]
--- a/src/exo/master/api.py
+++ b/src/exo/master/api.py
@@ -13,6 +13,12 @@ from hypercorn.asyncio import serve  # pyright: ignore[reportUnknownVariableType
 from hypercorn.config import Config
 from hypercorn.typing import ASGIFramework
 from loguru import logger
+from openai_harmony import (  # pyright: ignore[reportMissingTypeStubs]
+    HarmonyEncodingName,
+    Role,
+    StreamableParser,
+    load_harmony_encoding,
+)

 from exo.master.placement import place_instance as get_instance_placements
 from exo.shared.apply import apply
@@ -21,11 +27,13 @@ from exo.shared.logging import InterceptLogger
 from exo.shared.models.model_cards import MODEL_CARDS
 from exo.shared.models.model_meta import get_model_meta
 from exo.shared.types.api import (
+    ChatCompletionChoice,
    ChatCompletionMessage,
    ChatCompletionResponse,
    CreateInstanceParams,
    CreateInstanceResponse,
    DeleteInstanceResponse,
+    FinishReason,
    ModelList,
    ModelListModel,
    PlaceInstanceParams,
@@ -56,7 +64,7 @@ from exo.utils.channels import Receiver, Sender, channel
 from exo.utils.dashboard_path import find_dashboard
 from exo.utils.event_buffer import OrderedBuffer

-HIDE_THINKING = False
+encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)


 def chunk_to_response(
@@ -161,7 +169,9 @@ class API:
        self.app.delete("/instance/{instance_id}")(self.delete_instance)
        self.app.get("/models")(self.get_models)
        self.app.get("/v1/models")(self.get_models)
-        self.app.post("/v1/chat/completions")(self.chat_completions)
+        self.app.post("/v1/chat/completions", response_model=None)(
+            self.chat_completions
+        )
        self.app.get("/state")(lambda: self.state)
        self.app.get("/events")(lambda: self._event_log)

@@ -177,17 +187,32 @@ class API:
        return CreateInstanceResponse(
            message="Command received.",
            command_id=command.command_id,
+            model_meta=command.model_meta,
        )

    async def create_instance(
        self, payload: CreateInstanceParams
    ) -> CreateInstanceResponse:
-        command = CreateInstance(instance=payload.instance)
+        instance = payload.instance
+        model_meta = await resolve_model_meta(instance.shard_assignments.model_id)
+        required_memory = model_meta.storage_size
+        available_memory = self._calculate_total_available_memory()
+
+        if required_memory > available_memory:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Insufficient memory to create instance. Required: {required_memory.in_gb:.1f}GB, Available: {available_memory.in_gb:.1f}GB",
+            )
+
+        command = CreateInstance(
+            instance=instance,
+        )
        await self._send(command)

        return CreateInstanceResponse(
            message="Command received.",
            command_id=command.command_id,
+            model_meta=model_meta,
        )

    async def get_placement(
@@ -352,32 +377,52 @@ class API:
            instance_id=instance_id,
        )

-    async def _generate_chat_stream(
-        self, command_id: CommandId
-    ) -> AsyncGenerator[str, None]:
-        """Generate chat completion stream as JSON strings."""
+    async def _process_gpt_oss(self, token_chunks: Receiver[TokenChunk]):
+        stream = StreamableParser(encoding, role=Role.ASSISTANT)
+        thinking = False
+
+        async for chunk in token_chunks:
+            stream.process(chunk.token_id)
+
+            delta = stream.last_content_delta
+            ch = stream.current_channel
+
+            if ch == "analysis" and not thinking:
+                thinking = True
+                yield chunk.model_copy(update={"text": "<think>"})
+
+            if ch != "analysis" and thinking:
+                thinking = False
+                yield chunk.model_copy(update={"text": "</think>"})
+
+            if delta:
+                yield chunk.model_copy(update={"text": delta})
+
+            if chunk.finish_reason is not None:
+                if thinking:
+                    yield chunk.model_copy(update={"text": "</think>"})
+                yield chunk
+                break
+
+    async def _chat_chunk_stream(
+        self, command_id: CommandId, parse_gpt_oss: bool
+    ) -> AsyncGenerator[TokenChunk, None]:
+        """Yield `TokenChunk`s for a given command until completion."""

        try:
            self._chat_completion_queues[command_id], recv = channel[TokenChunk]()

-            is_thinking = False
            with recv as token_chunks:
-                async for chunk in token_chunks:
-                    if HIDE_THINKING:
-                        if chunk.text == "<think>":
-                            is_thinking = True
-                        if chunk.text == "</think>":
-                            is_thinking = False
-                    chunk_response: ChatCompletionResponse = chunk_to_response(
-                        chunk, command_id
-                    )
-                    if not (is_thinking and HIDE_THINKING):
-                        logger.debug(f"chunk_response: {chunk_response}")
-                        yield f"data: {chunk_response.model_dump_json()}\n\n"
-
-                    if chunk.finish_reason is not None:
-                        yield "data: [DONE]\n\n"
-                        break
+                if parse_gpt_oss:
+                    async for chunk in self._process_gpt_oss(token_chunks):
+                        yield chunk
+                        if chunk.finish_reason is not None:
+                            break
+                else:
+                    async for chunk in token_chunks:
+                        yield chunk
+                        if chunk.finish_reason is not None:
+                            break

        except anyio.get_cancelled_exc_class():
            # TODO: TaskCancelled
@@ -392,6 +437,59 @@ class API:
            await self._send(command)
            del self._chat_completion_queues[command_id]

+    async def _generate_chat_stream(
+        self, command_id: CommandId, parse_gpt_oss: bool
+    ) -> AsyncGenerator[str, None]:
+        """Generate chat completion stream as JSON strings."""
+
+        async for chunk in self._chat_chunk_stream(command_id, parse_gpt_oss):
+            chunk_response: ChatCompletionResponse = chunk_to_response(
+                chunk, command_id
+            )
+            logger.debug(f"chunk_response: {chunk_response}")
+
+            yield f"data: {chunk_response.model_dump_json()}\n\n"
+
+            if chunk.finish_reason is not None:
+                yield "data: [DONE]\n\n"
+
+    async def _collect_chat_completion(
+        self, command_id: CommandId, parse_gpt_oss: bool
+    ) -> ChatCompletionResponse:
+        """Collect all token chunks for a chat completion and return a single response."""
+
+        text_parts: list[str] = []
+        model: str | None = None
+        finish_reason: FinishReason | None = None
+
+        async for chunk in self._chat_chunk_stream(command_id, parse_gpt_oss):
+            if model is None:
+                model = chunk.model
+
+            text_parts.append(chunk.text)
+
+            if chunk.finish_reason is not None:
+                finish_reason = chunk.finish_reason
+
+        combined_text = "".join(text_parts)
+        assert model is not None
+
+        return ChatCompletionResponse(
+            id=command_id,
+            created=int(time.time()),
+            model=model,
+            choices=[
+                ChatCompletionChoice(
+                    index=0,
+                    message=ChatCompletionMessage(
+                        role="assistant",
+                        content=combined_text,
+                    ),
+                    finish_reason=finish_reason,
+                )
+            ],
+        )
+
    async def _trigger_notify_user_to_download_model(self, model_id: str) -> None:
        logger.warning(
            "TODO: we should send a notification to the user to download the model"
@@ -399,10 +497,12 @@ class API:

    async def chat_completions(
        self, payload: ChatCompletionTaskParams
-    ) -> StreamingResponse:
-        """Handle chat completions with proper streaming response."""
+    ) -> ChatCompletionResponse | StreamingResponse:
+        """Handle chat completions, supporting both streaming and non-streaming responses."""
        model_meta = await resolve_model_meta(payload.model)
        payload.model = model_meta.model_id
+        parse_gpt_oss = "gpt-oss" in model_meta.model_id.lower()
+        logger.info(f"{parse_gpt_oss=}")

        if not any(
            instance.shard_assignments.model_id == payload.model
@@ -417,10 +517,13 @@ class API:
            request_params=payload,
        )
        await self._send(command)
-        return StreamingResponse(
-            self._generate_chat_stream(command.command_id),
-            media_type="text/event-stream",
-        )
+        if payload.stream:
+            return StreamingResponse(
+                self._generate_chat_stream(command.command_id, parse_gpt_oss),
+                media_type="text/event-stream",
+            )
+
+        return await self._collect_chat_completion(command.command_id, parse_gpt_oss)

    def _calculate_total_available_memory(self) -> Memory:
        """Calculate total available memory across all nodes in bytes."""
@@ -442,6 +545,8 @@ class API:
                    name=card.name,
                    description=card.description,
                    tags=card.tags,
+                    storage_size_megabytes=int(card.metadata.storage_size.in_mb),
+                    supports_tensor=card.metadata.supports_tensor,
                )
                for card in MODEL_CARDS.values()
            ]
@@ -458,7 +563,7 @@ class API:
        async with create_task_group() as tg:
            self._tg = tg
            logger.info("Starting API")
-            tg.start_soon(self._applystate)
+            tg.start_soon(self._apply_state)
            tg.start_soon(self._pause_on_new_election)
            print_startup_banner(self.port)
            await serve(
@@ -470,7 +575,7 @@ class API:
        self.command_sender.close()
        self.global_event_receiver.close()

-    async def _applystate(self):
+    async def _apply_state(self):
        with self.global_event_receiver as events:
            async for f_event in events:
                if f_event.origin != self.session_id.master_node_id:
--- a/src/exo/master/tests/test_master.py
+++ b/src/exo/master/tests/test_master.py
@@ -123,6 +123,8 @@ async def test_master():
                            pretty_name="Llama 3.2 1B",
                            n_layers=16,
                            storage_size=Memory.from_bytes(678948),
+                            hidden_size=7168,
+                            supports_tensor=True,
                        ),
                        sharding=Sharding.Pipeline,
                        instance_meta=InstanceMeta.MlxRing,
@@ -180,6 +182,8 @@ async def test_master():
                            pretty_name="Llama 3.2 1B",
                            n_layers=16,
                            storage_size=Memory.from_bytes(678948),
+                            hidden_size=7168,
+                            supports_tensor=True,
                        ),
                        device_rank=0,
                        world_size=1,
--- a/src/exo/master/tests/test_placement.py
+++ b/src/exo/master/tests/test_placement.py
@@ -49,6 +49,8 @@ def model_meta() -> ModelMetadata:
        storage_size=Memory.from_kb(1000),
        pretty_name="Test Model",
        n_layers=10,
+        hidden_size=10,
+        supports_tensor=True,
    )


@@ -135,6 +137,8 @@ def test_get_instance_placements_one_node_exact_fit(
            storage_size=Memory.from_kb(1000),
            pretty_name="Test Model",
            n_layers=10,
+            hidden_size=1000,
+            supports_tensor=True,
        ),
    )
    placements = place_instance(cic, topology, {})
@@ -160,6 +164,8 @@ def test_get_instance_placements_one_node_fits_with_extra_memory(
            storage_size=Memory.from_kb(1000),
            pretty_name="Test Model",
            n_layers=10,
+            hidden_size=1000,
+            supports_tensor=True,
        ),
    )
    placements = place_instance(cic, topology, {})
@@ -185,6 +191,8 @@ def test_get_instance_placements_one_node_not_fit(
            storage_size=Memory.from_kb(1001),
            pretty_name="Test Model",
            n_layers=10,
+            hidden_size=1000,
+            supports_tensor=True,
        ),
    )

--- a/src/exo/master/tests/test_placement_utils.py
+++ b/src/exo/master/tests/test_placement_utils.py
@@ -198,6 +198,8 @@ def test_get_shard_assignments(
        pretty_name="Test Model",
        n_layers=total_layers,
        storage_size=Memory.from_kb(1000),
+        hidden_size=1000,
+        supports_tensor=True,
    )
    cycles = topology.get_cycles()
    selected_cycle = cycles[0]
--- a/src/exo/shared/models/model_cards.py
+++ b/src/exo/shared/models/model_cards.py
@@ -51,6 +51,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="DeepSeek V3.1 (4-bit)",
            storage_size=Memory.from_gb(378),
            n_layers=61,
+            hidden_size=7168,
+            supports_tensor=True,
        ),
    ),
    "deepseek-v3.1-8bit": ModelCard(
@@ -64,6 +66,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="DeepSeek V3.1 (8-bit)",
            storage_size=Memory.from_gb(713),
            n_layers=61,
+            hidden_size=7168,
+            supports_tensor=True,
        ),
    ),
    # "deepseek-v3.2": ModelCard(
@@ -135,6 +139,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Kimi K2 Instruct (4-bit)",
            storage_size=Memory.from_gb(578),
            n_layers=61,
+            hidden_size=7168,
+            supports_tensor=True,
        ),
    ),
    "kimi-k2-thinking": ModelCard(
@@ -148,6 +154,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Kimi K2 Thinking (4-bit)",
            storage_size=Memory.from_gb(658),
            n_layers=61,
+            hidden_size=7168,
+            supports_tensor=True,
        ),
    ),
    # llama-3.1
@@ -162,6 +170,38 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Llama 3.1 8B (4-bit)",
            storage_size=Memory.from_mb(4423),
            n_layers=32,
+            hidden_size=4096,
+            supports_tensor=True,
+        ),
+    ),
+    "llama-3.1-8b-8bit": ModelCard(
+        short_id="llama-3.1-8b-8bit",
+        model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"),
+        name="Llama 3.1 8B (8-bit)",
+        description="""Llama 3.1 is a large language model trained on the Llama 3.1 dataset.""",
+        tags=[],
+        metadata=ModelMetadata(
+            model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"),
+            pretty_name="Llama 3.1 8B (8-bit)",
+            storage_size=Memory.from_mb(8540),
+            n_layers=32,
+            hidden_size=4096,
+            supports_tensor=True,
+        ),
+    ),
+    "llama-3.1-8b-bf16": ModelCard(
+        short_id="llama-3.1-8b-bf16",
+        model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"),
+        name="Llama 3.1 8B (BF16)",
+        description="""Llama 3.1 is a large language model trained on the Llama 3.1 dataset.""",
+        tags=[],
+        metadata=ModelMetadata(
+            model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"),
+            pretty_name="Llama 3.1 8B (BF16)",
+            storage_size=Memory.from_mb(16100),
+            n_layers=32,
+            hidden_size=4096,
+            supports_tensor=True,
        ),
    ),
    "llama-3.1-70b": ModelCard(
@@ -175,6 +215,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Llama 3.1 70B (4-bit)",
            storage_size=Memory.from_mb(38769),
            n_layers=80,
+            hidden_size=8192,
+            supports_tensor=True,
        ),
    ),
    # llama-3.2
@@ -189,6 +231,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Llama 3.2 1B (4-bit)",
            storage_size=Memory.from_mb(696),
            n_layers=16,
+            hidden_size=2048,
+            supports_tensor=True,
        ),
    ),
    "llama-3.2-3b": ModelCard(
@@ -202,6 +246,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Llama 3.2 3B (4-bit)",
            storage_size=Memory.from_mb(1777),
            n_layers=28,
+            hidden_size=3072,
+            supports_tensor=True,
        ),
    ),
    "llama-3.2-3b-8bit": ModelCard(
@@ -215,6 +261,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Llama 3.2 3B (8-bit)",
            storage_size=Memory.from_mb(3339),
            n_layers=28,
+            hidden_size=3072,
+            supports_tensor=True,
        ),
    ),
    # llama-3.3
@@ -229,6 +277,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Llama 3.3 70B",
            storage_size=Memory.from_mb(38769),
            n_layers=80,
+            hidden_size=8192,
+            supports_tensor=True,
        ),
    ),
    "llama-3.3-70b-8bit": ModelCard(
@@ -242,6 +292,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Llama 3.3 70B (8-bit)",
            storage_size=Memory.from_mb(73242),
            n_layers=80,
+            hidden_size=8192,
+            supports_tensor=True,
        ),
    ),
    "llama-3.3-70b-fp16": ModelCard(
@@ -255,20 +307,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Llama 3.3 70B (FP16)",
            storage_size=Memory.from_mb(137695),
            n_layers=80,
-        ),
-    ),
-    # phi-3
-    "phi-3-mini": ModelCard(
-        short_id="phi-3-mini",
-        model_id=ModelId("mlx-community/Phi-3-mini-128k-instruct-4bit"),
-        name="Phi 3 Mini 128k (4-bit)",
-        description="""Phi 3 Mini is a large language model trained on the Phi 3 Mini dataset.""",
-        tags=[],
-        metadata=ModelMetadata(
-            model_id=ModelId("mlx-community/Phi-3-mini-128k-instruct-4bit"),
-            pretty_name="Phi 3 Mini 128k (4-bit)",
-            storage_size=Memory.from_mb(2099),
-            n_layers=32,
+            hidden_size=8192,
+            supports_tensor=True,
        ),
    ),
    # qwen3
@@ -283,6 +323,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Qwen3 0.6B (4-bit)",
            storage_size=Memory.from_mb(327),
            n_layers=28,
+            hidden_size=1024,
+            supports_tensor=False,
        ),
    ),
    "qwen3-0.6b-8bit": ModelCard(
@@ -296,6 +338,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Qwen3 0.6B (8-bit)",
            storage_size=Memory.from_mb(666),
            n_layers=28,
+            hidden_size=1024,
+            supports_tensor=False,
        ),
    ),
    "qwen3-30b": ModelCard(
@@ -309,6 +353,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Qwen3 30B A3B (4-bit)",
            storage_size=Memory.from_mb(16797),
            n_layers=48,
+            hidden_size=2048,
+            supports_tensor=True,
        ),
    ),
    "qwen3-30b-8bit": ModelCard(
@@ -322,6 +368,68 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Qwen3 30B A3B (8-bit)",
            storage_size=Memory.from_mb(31738),
            n_layers=48,
+            hidden_size=2048,
+            supports_tensor=True,
+        ),
+    ),
+    "qwen3-80b-a3B-4bit": ModelCard(
+        short_id="qwen3-80b-a3B-4bit",
+        model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"),
+        name="Qwen3 80B A3B (4-bit)",
+        description="""Qwen3 80B""",
+        tags=[],
+        metadata=ModelMetadata(
+            model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"),
+            pretty_name="Qwen3 80B A3B (4-bit)",
+            storage_size=Memory.from_mb(44800),
+            n_layers=48,
+            hidden_size=2048,
+            supports_tensor=True,
+        ),
+    ),
+    "qwen3-80b-a3B-8bit": ModelCard(
+        short_id="qwen3-80b-a3B-8bit",
+        model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"),
+        name="Qwen3 80B A3B (8-bit)",
+        description="""Qwen3 80B""",
+        tags=[],
+        metadata=ModelMetadata(
+            model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"),
+            pretty_name="Qwen3 80B A3B (8-bit)",
+            storage_size=Memory.from_mb(84700),
+            n_layers=48,
+            hidden_size=2048,
+            supports_tensor=True,
+        ),
+    ),
+    "qwen3-80b-a3B-thinking-4bit": ModelCard(
+        short_id="qwen3-80b-a3B-thinking-4bit",
+        model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"),
+        name="Qwen3 80B A3B Thinking (4-bit)",
+        description="""Qwen3 80B Reasoning model""",
+        tags=[],
+        metadata=ModelMetadata(
+            model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"),
+            pretty_name="Qwen3 80B A3B (4-bit)",
+            storage_size=Memory.from_mb(84700),
+            n_layers=48,
+            hidden_size=2048,
+            supports_tensor=True,
+        ),
+    ),
+    "qwen3-80b-a3B-thinking-8bit": ModelCard(
+        short_id="qwen3-80b-a3B-thinking-8bit",
+        model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"),
+        name="Qwen3 80B A3B Thinking (8-bit)",
+        description="""Qwen3 80B Reasoning model""",
+        tags=[],
+        metadata=ModelMetadata(
+            model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"),
+            pretty_name="Qwen3 80B A3B (8-bit)",
+            storage_size=Memory.from_mb(84700),
+            n_layers=48,
+            hidden_size=2048,
+            supports_tensor=True,
        ),
    ),
    "qwen3-235b-a22b-4bit": ModelCard(
@@ -335,6 +443,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Qwen3 235B A22B (4-bit)",
            storage_size=Memory.from_gb(132),
            n_layers=94,
+            hidden_size=4096,
+            supports_tensor=True,
        ),
    ),
    "qwen3-235b-a22b-8bit": ModelCard(
@@ -348,6 +458,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Qwen3 235B A22B (8-bit)",
            storage_size=Memory.from_gb(250),
            n_layers=94,
+            hidden_size=4096,
+            supports_tensor=True,
        ),
    ),
    "qwen3-coder-480b-a35b-4bit": ModelCard(
@@ -361,6 +473,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Qwen3 Coder 480B A35B (4-bit)",
            storage_size=Memory.from_gb(270),
            n_layers=62,
+            hidden_size=6144,
+            supports_tensor=True,
        ),
    ),
    "qwen3-coder-480b-a35b-8bit": ModelCard(
@@ -374,77 +488,84 @@ MODEL_CARDS: dict[str, ModelCard] = {
            pretty_name="Qwen3 Coder 480B A35B (8-bit)",
            storage_size=Memory.from_gb(540),
            n_layers=62,
+            hidden_size=6144,
+            supports_tensor=True,
        ),
    ),
-    # granite
-    "granite-3.3-2b": ModelCard(
-        short_id="granite-3.3-2b",
-        model_id=ModelId("mlx-community/granite-3.3-2b-instruct-fp16"),
-        name="Granite 3.3 2B (FP16)",
-        description="""Granite-3.3-2B-Instruct is a 2-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities.""",
+    # gpt-oss
+    "gpt-oss-120b-MXFP4-Q8": ModelCard(
+        short_id="gpt-oss-120b-MXFP4-Q8",
+        model_id=ModelId("mlx-community/gpt-oss-120b-MXFP4-Q8"),
+        name="GPT-OSS 120B (MXFP4-Q8, MLX)",
+        description="""OpenAI's GPT-OSS 120B is a 117B-parameter Mixture-of-Experts model designed for high-reasoning and general-purpose use; this variant is a 4-bit MLX conversion for Apple Silicon.""",
        tags=[],
        metadata=ModelMetadata(
-            model_id=ModelId("mlx-community/granite-3.3-2b-instruct-fp16"),
-            pretty_name="Granite 3.3 2B (FP16)",
-            storage_size=Memory.from_mb(4951),
-            n_layers=40,
+            model_id=ModelId("mlx-community/gpt-oss-120b-MXFP4-Q8"),
+            pretty_name="GPT-OSS 120B (MXFP4-Q8, MLX)",
+            storage_size=Memory.from_kb(68_996_301),
+            n_layers=36,
+            hidden_size=2880,
+            supports_tensor=True,
        ),
    ),
-    # "granite-3.3-8b": ModelCard(
-    #     short_id="granite-3.3-8b",
-    #     model_id=ModelId("mlx-community/granite-3.3-8b-instruct-fp16"),
-    #     name="Granite 3.3 8B",
-    #     description="""Granite-3.3-8B-Instruct is a 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities.""",
+    "gpt-oss-20b-4bit": ModelCard(
+        short_id="gpt-oss-20b-4bit",
+        model_id=ModelId("mlx-community/gpt-oss-20b-MXFP4-Q4"),
+        name="GPT-OSS 20B (MXFP4-Q4, MLX)",
+        description="""OpenAI's GPT-OSS 20B is a medium-sized MoE model for lower-latency and local or specialized use cases; this MLX variant uses MXFP4 4-bit quantization.""",
+        tags=[],
+        metadata=ModelMetadata(
+            model_id=ModelId("mlx-community/gpt-oss-20b-MXFP4-Q4"),
+            pretty_name="GPT-OSS 20B (MXFP4-Q4, MLX)",
+            storage_size=Memory.from_kb(11_744_051),
+            n_layers=24,
+            hidden_size=2880,
+            supports_tensor=True,
+        ),
+    ),
+    # Needs to be quantized g32 or g16.
+    "glm-4.5-air-8bit": ModelCard(
+        short_id="glm-4.5-air-8bit",
+        model_id=ModelId("mlx-community/GLM-4.5-Air-8bit"),
+        name="GLM 4.5 Air 8bit",
+        description="""GLM 4.5 Air 8bit""",
+        tags=[],
+        metadata=ModelMetadata(
+            model_id=ModelId("mlx-community/GLM-4.5-Air-8bit"),
+            pretty_name="GLM 4.5 Air 8bit",
+            storage_size=Memory.from_gb(114),
+            n_layers=46,
+            hidden_size=4096,
+            supports_tensor=False,
+        ),
+    ),
+    "glm-4.5-air-bf16": ModelCard(
+        short_id="glm-4.5-air-bf16",
+        model_id=ModelId("mlx-community/GLM-4.5-Air-bf16"),
+        name="GLM 4.5 Air bf16",
+        description="""GLM 4.5 Air bf16""",
+        tags=[],
+        metadata=ModelMetadata(
+            model_id=ModelId("mlx-community/GLM-4.5-Air-bf16"),
+            pretty_name="GLM 4.5 Air bf16",
+            storage_size=Memory.from_gb(214),
+            n_layers=46,
+            hidden_size=4096,
+            supports_tensor=True,
+        ),
+    ),
+    # "devstral-2-123b-instruct-2512-8bit": ModelCard(
+    #     short_id="devstral-2-123b-instruct-2512-8bit",
+    #     model_id=ModelId("mlx-community/Devstral-2-123B-Instruct-2512-8bit"),
+    #     name="Devstral 2 123B Instruct 2512 (8-bit, MLX)",
+    #     description="""Mistral AI's Devstral 2 123B Instruct (2512) is an agentic coding model.""",
    #     tags=[],
    #     metadata=ModelMetadata(
-    #         model_id=ModelId("mlx-community/granite-3.3-8b-instruct-fp16"),
-    #         pretty_name="Granite 3.3 8B",
-    #         storage_size=Memory.from_kb(15958720),
-    #         n_layers=40,
-    #     ),
-    # ),
-    # smol-lm
-    # "smol-lm-135m": ModelCard(
-    #     short_id="smol-lm-135m",
-    #     model_id="mlx-community/SmolLM-135M-4bit",
-    #     name="Smol LM 135M",
-    #     description="""SmolLM is a series of state-of-the-art small language models available in three sizes: 135M, 360M, and 1.7B parameters. """,
-    #     tags=[],
-    #     metadata=ModelMetadata(
-    #         model_id=ModelId("mlx-community/SmolLM-135M-4bit"),
-    #         pretty_name="Smol LM 135M",
-    #         storage_size=Memory.from_kb(73940),
-    #         n_layers=30,
-    #     ),
-    # ),
-    # gpt-oss
-    # "gpt-oss-120b-MXFP4-Q8": ModelCard(
-    #     short_id="gpt-oss-120b-MXFP4-Q8",
-    #     model_id=ModelId("mlx-community/gpt-oss-120b-MXFP4-Q8"),
-    #     name="GPT-OSS 120B (MXFP4-Q8, MLX)",
-    #     description="""OpenAI's GPT-OSS 120B is a 117B-parameter Mixture-of-Experts model designed for high-reasoning and general-purpose use; this variant is a 4-bit MLX conversion for Apple Silicon.""",
-    #     tags=[],
-    #     metadata=ModelMetadata(
-    #         model_id=ModelId("mlx-community/gpt-oss-120b-MXFP4-Q8"),
-    #         pretty_name="GPT-OSS 120B (MXFP4-Q8, MLX)",
-    #         storage_size=Memory.from_kb(68_996_301),
-    #         n_layers=36,
-    #         hidden_size=2880,
-    #         supports_tensor=True,
-    #     ),
-    # ),
-    # "gpt-oss-20b-4bit": ModelCard(
-    #     short_id="gpt-oss-20b-4bit",
-    #     model_id=ModelId("mlx-community/gpt-oss-20b-MXFP4-Q4"),
-    #     name="GPT-OSS 20B (MXFP4-Q4, MLX)",
-    #     description="""OpenAI's GPT-OSS 20B is a medium-sized MoE model for lower-latency and local or specialized use cases; this MLX variant uses MXFP4 4-bit quantization.""",
-    #     tags=[],
-    #     metadata=ModelMetadata(
-    #         model_id=ModelId("mlx-community/gpt-oss-20b-MXFP4-Q4"),
-    #         pretty_name="GPT-OSS 20B (MXFP4-Q4, MLX)",
-    #         storage_size=Memory.from_kb(11_744_051),
-    #         n_layers=24,
-    #         hidden_size=2880,
+    #         model_id=ModelId("mlx-community/Devstral-2-123B-Instruct-2512-8bit"),
+    #         pretty_name="Devstral 2 123B Instruct 2512 (8-bit, MLX)",
+    #         storage_size=Memory.from_kb(133_000_000),
+    #         n_layers=88,
+    #         hidden_size=12288,
    #         supports_tensor=True,
    #     ),
    # ),
--- a/src/exo/shared/models/model_meta.py
+++ b/src/exo/shared/models/model_meta.py
@@ -6,6 +6,7 @@ from huggingface_hub import model_info
 from loguru import logger
 from pydantic import BaseModel, Field

+from exo.shared.models.model_cards import MODEL_CARDS
 from exo.shared.types.memory import Memory
 from exo.shared.types.models import ModelId, ModelMetadata
 from exo.worker.download.download_utils import (
@@ -25,6 +26,7 @@ class ConfigData(BaseModel):
    n_layers: Annotated[int, Field(ge=0)] | None = None  # Sometimes used
    num_decoder_layers: Annotated[int, Field(ge=0)] | None = None  # Transformer models
    decoder_layers: Annotated[int, Field(ge=0)] | None = None  # Some architectures
+    hidden_size: Annotated[int, Field(ge=0)] | None = None

    @property
    def layer_count(self) -> int:
@@ -106,10 +108,19 @@ async def _get_model_meta(model_id: str) -> ModelMetadata:
    config_data = await get_config_data(model_id)
    num_layers = config_data.layer_count
    mem_size_bytes = await get_safetensors_size(model_id)
+    model_card = next(
+        (card for card in MODEL_CARDS.values() if card.model_id == ModelId(model_id)),
+        None,
+    )

    return ModelMetadata(
        model_id=ModelId(model_id),
-        pretty_name=model_id,
+        pretty_name=model_card.name if model_card is not None else model_id,
        storage_size=mem_size_bytes,
        n_layers=num_layers,
+        hidden_size=config_data.hidden_size or 0,
+        # TODO: all custom models currently do not support tensor. We could add a dynamic test for this?
+        supports_tensor=model_card.metadata.supports_tensor
+        if model_card is not None
+        else False,
    )
--- a/src/exo/shared/types/api.py
+++ b/src/exo/shared/types/api.py
@@ -5,7 +5,7 @@ from pydantic import BaseModel, Field, field_validator
 from pydantic_core import PydanticUseDefault

 from exo.shared.types.common import CommandId
-from exo.shared.types.models import ModelId
+from exo.shared.types.models import ModelId, ModelMetadata
 from exo.shared.types.worker.instances import Instance, InstanceId, InstanceMeta
 from exo.shared.types.worker.shards import Sharding

@@ -174,6 +174,7 @@ class DeleteInstanceTaskParams(BaseModel):
 class CreateInstanceResponse(BaseModel):
    message: str
    command_id: CommandId
+    model_meta: ModelMetadata


 class DeleteInstanceResponse(BaseModel):
--- a/src/exo/shared/types/models.py
+++ b/src/exo/shared/types/models.py
@@ -14,3 +14,5 @@ class ModelMetadata(CamelCaseModel):
    pretty_name: str
    storage_size: Memory
    n_layers: PositiveInt
+    hidden_size: PositiveInt
+    supports_tensor: bool
--- a/src/exo/worker/download/shard_downloader.py
+++ b/src/exo/worker/download/shard_downloader.py
@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
+from copy import copy
 from datetime import timedelta
 from pathlib import Path
 from typing import AsyncIterator, Callable
@@ -12,7 +13,7 @@ from exo.shared.types.worker.shards import (
 from exo.worker.download.download_utils import RepoDownloadProgress


-# TODO: the PipelineShardMetadata getting reinstantiated is a bit messy. Shoudl this be a classmethod?
+# TODO: the PipelineShardMetadata getting reinstantiated is a bit messy. Should this be a classmethod?
 class ShardDownloader(ABC):
    @abstractmethod
    async def ensure_shard(
@@ -43,34 +44,7 @@ class ShardDownloader(ABC):
        Yields:
            tuple[Path, RepoDownloadProgress]: The path and progress of a shard download.
        """
-        yield (
-            Path("/tmp/noop_shard"),
-            RepoDownloadProgress(
-                repo_id="noop",
-                repo_revision="noop",
-                shard=PipelineShardMetadata(
-                    model_meta=ModelMetadata(
-                        model_id=ModelId("noop"),
-                        pretty_name="noope",
-                        storage_size=Memory.from_bytes(0),
-                        n_layers=1,
-                    ),
-                    device_rank=0,
-                    world_size=1,
-                    start_layer=0,
-                    end_layer=1,
-                    n_layers=1,
-                ),
-                completed_files=0,
-                total_files=0,
-                downloaded_bytes=Memory.from_bytes(0),
-                downloaded_bytes_this_session=Memory.from_bytes(0),
-                total_bytes=Memory.from_bytes(0),
-                overall_speed=0,
-                overall_eta=timedelta(seconds=0),
-                status="complete",
-            ),
-        )
+        yield (Path("/tmp/noop_shard"), NOOP_DOWNLOAD_PROGRESS)

    @abstractmethod
    async def get_shard_download_status_for_shard(
@@ -94,46 +68,41 @@ class NoopShardDownloader(ShardDownloader):
    ) -> AsyncIterator[tuple[Path, RepoDownloadProgress]]:
        yield (
            Path("/tmp/noop_shard"),
-            RepoDownloadProgress(
-                repo_id="noop",
-                repo_revision="noop",
-                shard=PipelineShardMetadata(
-                    model_meta=ModelMetadata(
-                        model_id=ModelId("noop"),
-                        pretty_name="noope",
-                        storage_size=Memory.from_bytes(0),
-                        n_layers=1,
-                    ),
-                    device_rank=0,
-                    world_size=1,
-                    start_layer=0,
-                    end_layer=1,
-                    n_layers=1,
-                ),
-                completed_files=0,
-                total_files=0,
-                downloaded_bytes=Memory.from_bytes(0),
-                downloaded_bytes_this_session=Memory.from_bytes(0),
-                total_bytes=Memory.from_bytes(0),
-                overall_speed=0,
-                overall_eta=timedelta(seconds=0),
-                status="complete",
-            ),
+            NOOP_DOWNLOAD_PROGRESS,
        )

    async def get_shard_download_status_for_shard(
        self, shard: ShardMetadata
    ) -> RepoDownloadProgress:
-        return RepoDownloadProgress(
-            repo_id="noop",
-            repo_revision="noop",
-            shard=shard,
-            completed_files=0,
-            total_files=0,
-            downloaded_bytes=Memory.from_bytes(0),
-            downloaded_bytes_this_session=Memory.from_bytes(0),
-            total_bytes=Memory.from_bytes(0),
-            overall_speed=0,
-            overall_eta=timedelta(seconds=0),
-            status="complete",
-        )
+        dp = copy(NOOP_DOWNLOAD_PROGRESS)
+        dp.shard = shard
+        return dp
+
+
+NOOP_DOWNLOAD_PROGRESS = RepoDownloadProgress(
+    repo_id="noop",
+    repo_revision="noop",
+    shard=PipelineShardMetadata(
+        model_meta=ModelMetadata(
+            model_id=ModelId("noop"),
+            pretty_name="noope",
+            storage_size=Memory.from_bytes(0),
+            n_layers=1,
+            hidden_size=1,
+            supports_tensor=False,
+        ),
+        device_rank=0,
+        world_size=1,
+        start_layer=0,
+        end_layer=1,
+        n_layers=1,
+    ),
+    completed_files=0,
+    total_files=0,
+    downloaded_bytes=Memory.from_bytes(0),
+    downloaded_bytes_this_session=Memory.from_bytes(0),
+    total_bytes=Memory.from_bytes(0),
+    overall_speed=0,
+    overall_eta=timedelta(seconds=0),
+    status="complete",
+)
--- a/src/exo/worker/runner/bootstrap.py
+++ b/src/exo/worker/runner/bootstrap.py
@@ -2,16 +2,13 @@ import os

 import loguru

-from exo.shared.types.events import Event
+from exo.shared.types.events import Event, RunnerStatusUpdated
 from exo.shared.types.tasks import Task
 from exo.shared.types.worker.instances import BoundInstance, MlxJacclInstance
+from exo.shared.types.worker.runners import RunnerFailed
 from exo.utils.channels import MpReceiver, MpSender

-logger: "loguru.Logger"
-
-
-if os.getenv("EXO_TESTS") == "1":
-    logger = loguru.logger
+logger: "loguru.Logger" = loguru.logger


 def entrypoint(
@@ -30,6 +27,23 @@ def entrypoint(
    logger = _logger

    # Import main after setting global logger - this lets us just import logger from this module
-    from exo.worker.runner.runner import main
+    try:
+        from exo.worker.runner.runner import main

-    main(bound_instance, event_sender, task_receiver)
+        main(bound_instance, event_sender, task_receiver)
+    except Exception as e:
+        logger.opt(exception=e).warning(
+            f"Runner {bound_instance.bound_runner_id} crashed with critical exception {e}"
+        )
+        event_sender.send(
+            RunnerStatusUpdated(
+                runner_id=bound_instance.bound_runner_id,
+                runner_status=RunnerFailed(error_message=str(e)),
+            )
+        )
+    finally:
+        event_sender.close()
+        task_receiver.close()
+        event_sender.join()
+        task_receiver.join()
+        logger.info("bye from the runner")
--- a/src/exo/worker/tests/unittests/conftest.py
+++ b/src/exo/worker/tests/unittests/conftest.py
@@ -1,5 +1,10 @@
+from __future__ import annotations
+
+from collections.abc import Iterator
 from dataclasses import dataclass

+from anyio import ClosedResourceError, WouldBlock
+
 from exo.shared.types.common import NodeId
 from exo.shared.types.memory import Memory
 from exo.shared.types.models import ModelId, ModelMetadata
@@ -14,6 +19,92 @@ from exo.shared.types.worker.runners import RunnerId, RunnerStatus, ShardAssignm
 from exo.shared.types.worker.shards import PipelineShardMetadata, ShardMetadata


+# Synchronous trivial sender and receiver.
+@dataclass
+class _State[T]:
+    buffer: list[T]
+    closed: bool = False
+
+
+class MockSender[T]:
+    def __init__(self, _state: _State[T] | None = None):
+        self._state = _state or _State(buffer=[])
+        self._closed = False
+
+    def send(self, item: T):
+        if self._closed:
+            raise ClosedResourceError
+        self._state.buffer.append(item)
+
+    def close(self):
+        self._closed = True
+        self._state.closed = True
+
+    def join(self):
+        pass
+
+    def clone(self) -> MockSender[T]:
+        if self._closed:
+            raise ClosedResourceError
+        return MockSender(_state=self._state)
+
+    def clone_receiver(self) -> MockReceiver[T]:
+        if self._closed:
+            raise ClosedResourceError
+        return MockReceiver(_state=self._state)
+
+
+class MockReceiver[T]:
+    def __init__(self, _state: _State[T] | None = None):
+        self._state = _state or _State(buffer=[])
+        self._closed = False
+
+    def close(self):
+        self._closed = True
+        self._state.closed = True
+
+    def join(self):
+        pass
+
+    def clone(self) -> MockReceiver[T]:
+        if self._closed:
+            raise ClosedResourceError
+        return MockReceiver(_state=self._state)
+
+    def clone_sender(self) -> MockSender[T]:
+        if self._closed:
+            raise ClosedResourceError
+        return MockSender(_state=self._state)
+
+    def receive_nowait(self) -> T:
+        if self._state.buffer:
+            return self._state.buffer.pop(0)
+        raise WouldBlock
+
+    def collect(self) -> list[T]:
+        out: list[T] = []
+        while True:
+            try:
+                out.append(self.receive_nowait())
+            except WouldBlock:
+                break
+        return out
+
+    async def receive_at_least(self, n: int) -> list[T]:
+        raise NotImplementedError
+
+    def __enter__(self):
+        return self
+
+    def __iter__(self) -> Iterator[T]:
+        while True:
+            try:
+                yield self.receive_nowait()
+            except WouldBlock:
+                break
+
+
+# Runner supervisor without multiprocessing logic.
@dataclass(frozen=True)
 class FakeRunnerSupervisor:
    bound_instance: BoundInstance
@@ -35,6 +126,8 @@ def get_pipeline_shard_metadata(
            pretty_name=str(model_id),
            storage_size=Memory.from_mb(100000),
            n_layers=32,
+            hidden_size=2048,
+            supports_tensor=False,
        ),
        device_rank=device_rank,
        world_size=world_size,
@@ -69,3 +162,18 @@ def get_mlx_ring_instance(
        ),
        hosts=[],
    )
+
+
+def get_bound_mlx_ring_instance(
+    instance_id: InstanceId, model_id: ModelId, runner_id: RunnerId, node_id: NodeId
+) -> BoundInstance:
+    shard = get_pipeline_shard_metadata(model_id=model_id, device_rank=0, world_size=1)
+    instance = get_mlx_ring_instance(
+        instance_id=instance_id,
+        model_id=model_id,
+        node_to_runner={node_id: runner_id},
+        runner_to_shard={runner_id: shard},
+    )
+    return BoundInstance(
+        instance=instance, bound_runner_id=runner_id, bound_node_id=node_id
+    )
--- a/src/exo/worker/tests/unittests/test_runner/test_runner_supervisor.py
+++ b/src/exo/worker/tests/unittests/test_runner/test_runner_supervisor.py
@@ -0,0 +1 @@
+# TODO:
--- a/uv.lock
+++ b/uv.lock
@@ -336,6 +336,7 @@ dependencies = [
    { name = "mlx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "networkx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "openai-harmony", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "psutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -377,6 +378,7 @@ requires-dist = [
    { name = "mlx", specifier = ">=0.30.1" },
    { name = "mlx-lm", specifier = ">=0.28.3" },
    { name = "networkx", specifier = ">=3.5" },
+    { name = "openai-harmony", specifier = ">=0.0.8" },
    { name = "protobuf", specifier = ">=6.32.0" },
    { name = "psutil", specifier = ">=7.0.0" },
    { name = "pydantic", specifier = ">=2.11.7" },
@@ -946,6 +948,27 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e2/c1/6dba12fdf68b02a21ac411c9df19afa66bed2540f467150ca64d246b463d/numpy-2.3.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e1708fac43ef8b419c975926ce1eaf793b0c13b7356cfab6ab0dc34c0a02ac0f", size = 18652691, upload-time = "2025-10-15T16:17:46.247Z" },
 ]

+[[package]]
+name = "openai-harmony"
+version = "0.0.8"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3e/92/2d038d096f29179c7c9571b431f9e739f87a487121901725e23fe338dd9d/openai_harmony-0.0.8.tar.gz", hash = "sha256:6e43f98e6c242fa2de6f8ea12eab24af63fa2ed3e89c06341fb9d92632c5cbdf", size = 284777, upload-time = "2025-11-05T19:07:06.727Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/45/c6/2502f416d46be3ec08bb66d696cccffb57781a499e3ff2e4d7c174af4e8f/openai_harmony-0.0.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:029ec25ca74abe48fdb58eb9fdd2a8c1618581fc33ce8e5653f8a1ffbfbd9326", size = 2627806, upload-time = "2025-11-05T19:06:57.063Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/d2/ce6953ca87db9cae3e775024184da7d1c5cb88cead19a2d75b42f00a959c/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4f709815924ec325b9a890e6ab2bbb0ceec8e319a4e257328eb752cf36b2efc", size = 2948463, upload-time = "2025-11-05T19:06:48.17Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/4c/b553c9651662d6ce102ca7f3629d268b23df1abe5841e24bed81e8a8e949/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5cfcfd963b50a41fc656c84d3440ca6eecdccd6c552158ce790b8f2e33dfb5a9", size = 2704083, upload-time = "2025-11-05T19:06:50.205Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/af/4eec8f9ab9c27bcdb444460c72cf43011d176fc44c79d6e113094ca1e152/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a3a16972aa1cee38ea958470cd04ac9a2d5ac38fdcf77ab686611246220c158", size = 2959765, upload-time = "2025-11-05T19:06:53.62Z" },
+    { url = "https://files.pythonhosted.org/packages/11/3c/33f3374e4624e0e776f6b13b73c45a7ead7f9c4529f8369ed5bfcaa30cac/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b4d5cfa168e74d08f8ba6d58a7e49bc7daef4d58951ec69b66b0d56f4927a68d", size = 3427031, upload-time = "2025-11-05T19:06:51.829Z" },
+    { url = "https://files.pythonhosted.org/packages/25/3f/1a192b93bb47c6b44cd98ba8cc1d3d2a9308f1bb700c3017e6352da11bda/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c007d277218a50db8839e599ed78e0fffe5130f614c3f6d93ae257f282071a29", size = 2953260, upload-time = "2025-11-05T19:06:55.406Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/f8/93b582cad3531797c3db7c2db5400fd841538ccddfd9f5e3df61be99a630/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8565d4f5a0638da1bffde29832ed63c9e695c558611053add3b2dc0b56c92dbc", size = 3127044, upload-time = "2025-11-05T19:06:59.553Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/10/4327dbf87f75ae813405fd9a9b4a5cde63d506ffed0a096a440a4cabd89c/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:cbaa3bda75ef0d8836e1f8cc84af62f971b1d756d740efc95c38c3e04c0bfde2", size = 2932931, upload-time = "2025-11-05T19:07:01.437Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/c8/1774eec4f6f360ef57618fb8f52e3d3af245b2491bd0297513aa09eec04b/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:772922a9bd24e133950fad71eb1550836f415a88e8c77870e12d0c3bd688ddc2", size = 2996140, upload-time = "2025-11-05T19:07:03.438Z" },
+    { url = "https://files.pythonhosted.org/packages/60/c3/3d1e01e2dba517a91760e4a03e4f20ffc75039a6fe584d0e6f9b5c78fd15/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:007b0476a1f331f8130783f901f1da6f5a7057af1a4891f1b6a31dec364189b5", size = 3205080, upload-time = "2025-11-05T19:07:05.078Z" },
+]
+
 [[package]]
 name = "packaging"
 version = "25.0"
Author	SHA1	Message	Date
Sami Khan	452b49bcd1	previews fix	2025-12-25 03:17:04 +05:00
Evan	b2a6640d96	fix type checker - tests are fixed on new-runner-status-step	2025-12-24 21:31:55 +00:00
Sami Khan	24288bbd93	download stats fix	2025-12-25 01:43:01 +05:00
Evan	42f9834297	update api to match gotta fix some tests	2025-12-24 19:54:40 +00:00
Evan	9df1a8e1d6	update to the latest beta_1 dashboard	2025-12-24 19:54:40 +00:00