backport the dashboard to staging

This commit is contained in:
Evan Quiney
2025-12-17 12:22:22 +00:00
committed by GitHub
parent 880a18d205
commit 09593c5e85
37 changed files with 10984 additions and 3544 deletions

43
.gitignore vendored
View File

@@ -1,31 +1,24 @@
*/__pycache__ # gitingest
__pycache__
*.so
hosts.json
hosts*.json
nodes.json
# hide direnv stuff
.direnv/
build/
dist/
*.xcuserstate
.DS_Store
*/.DS_Store
# for the gitingest enthusiasts
digest.txt digest.txt
# Rust # python
**/__pycache__
# nix
.direnv/
# xcode / macos
*.xcuserstate
**/.DS_Store
# rust
target/ target/
## These are backup files generated by rustfmt
**/*.rs.bk **/*.rs.bk
## MSVC Windows builds of rustc generate these, which store debugging information
*.pdb *.pdb
## Generated by cargo mutants # svelte
## Contains mutation testing data dashboard/build/
**/mutants.out*/ dashboard/node_modules/
dashboard/.svelte-kit/

View File

File diff suppressed because it is too large Load Diff

3058
dashboard/package-lock.json generated Normal file
View File

File diff suppressed because it is too large Load Diff

33
dashboard/package.json Normal file
View File

@@ -0,0 +1,33 @@
{
"name": "exo-dashboard",
"private": true,
"version": "1.0.0",
"type": "module",
"scripts": {
"dev": "vite dev",
"build": "vite build",
"preview": "vite preview",
"prepare": "svelte-kit sync || echo ''",
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json"
},
"devDependencies": {
"@sveltejs/adapter-static": "^3.0.10",
"@sveltejs/kit": "^2.48.4",
"@sveltejs/vite-plugin-svelte": "^5.0.0",
"@tailwindcss/vite": "^4.0.0",
"@types/d3": "^7.4.3",
"@types/node": "^22",
"d3": "^7.9.0",
"svelte": "^5.0.0",
"svelte-check": "^4.0.0",
"tailwindcss": "^4.0.0",
"tw-animate-css": "^1.3.5",
"typescript": "^5.0.0",
"vite": "^6.0.0"
},
"dependencies": {
"highlight.js": "^11.11.1",
"mode-watcher": "^1.1.0"
}
}

322
dashboard/src/app.css Normal file
View File

@@ -0,0 +1,322 @@
@import 'tailwindcss';
@import 'tw-animate-css';
@custom-variant dark (&:is(.dark *));
:root {
/* EXO Brand Colors - Command Center Theme (neutral dark greys) */
--exo-black: oklch(0.12 0 0);
--exo-dark-gray: oklch(0.16 0 0);
--exo-medium-gray: oklch(0.22 0 0);
--exo-light-gray: oklch(0.6 0 0);
--exo-yellow: oklch(0.85 0.18 85);
--exo-yellow-darker: oklch(0.7 0.16 85);
--exo-yellow-glow: oklch(0.9 0.2 85);
/* Gotham-inspired accent colors */
--exo-grid: oklch(0.25 0 0);
--exo-scanline: oklch(0.15 0 0);
--exo-glow-yellow: 0 0 20px oklch(0.85 0.18 85 / 0.3);
--exo-glow-yellow-strong: 0 0 40px oklch(0.85 0.18 85 / 0.5);
/* Theme Variables */
--radius: 0.375rem;
--background: var(--exo-black);
--foreground: oklch(0.9 0 0);
--card: var(--exo-dark-gray);
--card-foreground: oklch(0.9 0 0);
--popover: var(--exo-dark-gray);
--popover-foreground: oklch(0.9 0 0);
--primary: var(--exo-yellow);
--primary-foreground: var(--exo-black);
--secondary: var(--exo-medium-gray);
--secondary-foreground: oklch(0.9 0 0);
--muted: var(--exo-medium-gray);
--muted-foreground: var(--exo-light-gray);
--accent: var(--exo-medium-gray);
--accent-foreground: oklch(0.9 0 0);
--destructive: oklch(0.6 0.25 25);
--border: oklch(0.22 0 0);
--input: oklch(0.22 0 0);
--ring: var(--exo-yellow);
}
@theme inline {
--radius-sm: calc(var(--radius) - 2px);
--radius-md: var(--radius);
--radius-lg: calc(var(--radius) + 2px);
--radius-xl: calc(var(--radius) + 4px);
--color-background: var(--background);
--color-foreground: var(--foreground);
--color-card: var(--card);
--color-card-foreground: var(--card-foreground);
--color-popover: var(--popover);
--color-popover-foreground: var(--popover-foreground);
--color-primary: var(--primary);
--color-primary-foreground: var(--primary-foreground);
--color-secondary: var(--secondary);
--color-secondary-foreground: var(--secondary-foreground);
--color-muted: var(--muted);
--color-muted-foreground: var(--muted-foreground);
--color-accent: var(--accent);
--color-accent-foreground: var(--accent-foreground);
--color-destructive: var(--destructive);
--color-border: var(--border);
--color-input: var(--input);
--color-ring: var(--ring);
/* Custom EXO colors */
--color-exo-yellow: var(--exo-yellow);
--color-exo-yellow-darker: var(--exo-yellow-darker);
--color-exo-black: var(--exo-black);
--color-exo-dark-gray: var(--exo-dark-gray);
--color-exo-medium-gray: var(--exo-medium-gray);
--color-exo-light-gray: var(--exo-light-gray);
}
@layer base {
* {
@apply border-border outline-ring/50;
}
html, body {
@apply bg-background text-foreground;
font-family: 'SF Mono', 'Fira Code', 'Monaco', 'Consolas', 'Liberation Mono', monospace;
letter-spacing: 0.02em;
}
}
@layer utilities {
.scrollbar-hide {
&::-webkit-scrollbar {
display: none;
}
-ms-overflow-style: none;
scrollbar-width: none;
}
/* CRT Scanline effect */
.scanlines {
position: relative;
&::before {
content: '';
position: absolute;
inset: 0;
background: repeating-linear-gradient(
0deg,
transparent,
transparent 2px,
oklch(0 0 0 / 0.03) 2px,
oklch(0 0 0 / 0.03) 4px
);
pointer-events: none;
z-index: 100;
}
}
/* Command panel styling */
.command-panel {
background: linear-gradient(
180deg,
oklch(0.16 0 0 / 0.95) 0%,
oklch(0.12 0 0 / 0.98) 100%
);
border: 1px solid oklch(0.25 0 0);
box-shadow:
inset 0 1px 0 oklch(1 0 0 / 0.03),
0 4px 20px oklch(0 0 0 / 0.5);
}
/* Glow text */
.glow-text {
text-shadow:
0 0 10px oklch(0.85 0.18 85 / 0.5),
0 0 20px oklch(0.85 0.18 85 / 0.3),
0 0 40px oklch(0.85 0.18 85 / 0.1);
}
/* Status indicator pulse */
.status-pulse {
animation: statusPulse 2s ease-in-out infinite;
}
/* Grid background */
.grid-bg {
background-image:
linear-gradient(oklch(0.2 0 0 / 0.3) 1px, transparent 1px),
linear-gradient(90deg, oklch(0.2 0 0 / 0.3) 1px, transparent 1px);
background-size: 40px 40px;
}
}
/* Animations */
@keyframes flowAnimation {
from {
stroke-dashoffset: 0;
}
to {
stroke-dashoffset: -16;
}
}
@keyframes statusPulse {
0%, 100% {
opacity: 1;
}
50% {
opacity: 0.5;
}
}
@keyframes radarSweep {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}
@keyframes glowPulse {
0%, 100% {
box-shadow: 0 0 5px oklch(0.85 0.18 85 / 0.3), 0 0 10px oklch(0.85 0.18 85 / 0.1);
}
50% {
box-shadow: 0 0 15px oklch(0.85 0.18 85 / 0.5), 0 0 30px oklch(0.85 0.18 85 / 0.2);
}
}
@keyframes dataPulse {
0%, 100% {
opacity: 0.6;
}
50% {
opacity: 1;
}
}
.graph-link {
stroke: oklch(0.85 0.18 85 / 0.4);
stroke-width: 1.5px;
stroke-dasharray: 8, 8;
animation: flowAnimation 1s linear infinite;
filter: drop-shadow(0 0 3px oklch(0.85 0.18 85 / 0.5));
}
.graph-link-active {
stroke: oklch(0.85 0.18 85 / 0.8);
stroke-width: 2px;
filter: drop-shadow(0 0 6px oklch(0.85 0.18 85 / 0.8));
}
/* CRT Screen effect for topology */
.crt-screen {
position: relative;
border-radius: 50%;
background: radial-gradient(
ellipse at center,
oklch(0.16 0 0) 0%,
oklch(0.12 0 0) 50%,
oklch(0.09 0 0) 100%
);
box-shadow:
inset 0 0 100px oklch(0 0 0 / 0.5),
0 0 50px oklch(0.85 0.18 85 / 0.1);
}
/* Data readout styling */
.data-readout {
font-family: 'SF Mono', 'Fira Code', monospace;
font-size: 11px;
letter-spacing: 0.05em;
text-transform: uppercase;
}
/* Terminal cursor blink */
.cursor-blink {
animation: cursorBlink 1s step-end infinite;
}
@keyframes cursorBlink {
0%, 100% { opacity: 1; }
50% { opacity: 0; }
}
/* Custom scrollbar for command center */
::-webkit-scrollbar {
width: 6px;
height: 6px;
}
::-webkit-scrollbar-track {
background: oklch(0.1 0 0);
}
::-webkit-scrollbar-thumb {
background: oklch(0.3 0 0);
border-radius: 3px;
}
::-webkit-scrollbar-thumb:hover {
background: oklch(0.85 0.18 85 / 0.5);
}
/* Remove focus outline/border for inputs */
input:focus, textarea:focus {
outline: none;
box-shadow: none;
}
/* Shooting Stars Animation */
.shooting-stars {
position: fixed;
inset: 0;
overflow: hidden;
pointer-events: none;
z-index: 0;
}
.shooting-star {
position: absolute;
width: 3px;
height: 3px;
background: oklch(0.85 0.18 85 / 1);
border-radius: 50%;
box-shadow: 0 0 6px oklch(0.85 0.18 85 / 0.8);
animation: shootingStar var(--duration, 3s) linear infinite;
animation-delay: var(--delay, 0s);
opacity: 0;
}
.shooting-star::before {
content: '';
position: absolute;
width: 80px;
height: 2px;
background: linear-gradient(90deg, oklch(0.85 0.18 85 / 0), oklch(0.85 0.18 85 / 0.6));
transform: rotate(45deg);
transform-origin: right center;
top: 0;
right: 2px;
}
@keyframes shootingStar {
0% {
opacity: 0;
transform: translate(0, 0);
}
0.5% {
opacity: 1;
}
2.5% {
opacity: 0.8;
transform: translate(300px, 300px);
}
3.5% {
opacity: 0;
transform: translate(400px, 400px);
}
100% {
opacity: 0;
transform: translate(400px, 400px);
}
}

14
dashboard/src/app.d.ts vendored Normal file
View File

@@ -0,0 +1,14 @@
// See https://svelte.dev/docs/kit/types#app.d.ts
// for information about these interfaces
declare global {
namespace App {
// interface Error {}
// interface Locals {}
// interface PageData {}
// interface PageState {}
// interface Platform {}
}
}
export {};

14
dashboard/src/app.html Normal file
View File

@@ -0,0 +1,14 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<link rel="icon" href="%sveltekit.assets%/favicon.ico" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>EXO</title>
%sveltekit.head%
</head>
<body data-sveltekit-preload-data="hover">
<div style="display: contents">%sveltekit.body%</div>
</body>
</html>

View File

@@ -0,0 +1,75 @@
<script lang="ts">
import type { ChatUploadedFile } from '$lib/types/files';
import { formatFileSize, getFileCategory } from '$lib/types/files';
interface Props {
files: ChatUploadedFile[];
readonly?: boolean;
onRemove?: (fileId: string) => void;
}
let { files, readonly = false, onRemove }: Props = $props();
function getFileIcon(file: ChatUploadedFile): string {
const category = getFileCategory(file.type, file.name);
switch (category) {
case 'image': return '🖼';
case 'text': return '📄';
case 'pdf': return '📑';
case 'audio': return '🎵';
default: return '📎';
}
}
function truncateName(name: string, maxLen: number = 20): string {
if (name.length <= maxLen) return name;
const ext = name.slice(name.lastIndexOf('.'));
const base = name.slice(0, name.lastIndexOf('.'));
const available = maxLen - ext.length - 3;
return base.slice(0, available) + '...' + ext;
}
</script>
{#if files.length > 0}
<div class="flex flex-wrap gap-2 mb-3 px-1">
{#each files as file (file.id)}
<div class="group relative flex items-center gap-2 bg-exo-dark-gray/80 border border-exo-yellow/30 rounded px-2.5 py-1.5 text-xs font-mono transition-all hover:border-exo-yellow/50 hover:shadow-[0_0_10px_rgba(255,215,0,0.1)]">
<!-- File preview or icon -->
{#if file.preview && getFileCategory(file.type, file.name) === 'image'}
<img
src={file.preview}
alt={file.name}
class="w-8 h-8 object-cover rounded border border-exo-yellow/20"
/>
{:else}
<span class="text-base">{getFileIcon(file)}</span>
{/if}
<!-- File info -->
<div class="flex flex-col min-w-0">
<span class="text-exo-yellow truncate max-w-[120px]" title={file.name}>
{truncateName(file.name)}
</span>
<span class="text-exo-light-gray text-xs">
{formatFileSize(file.size)}
</span>
</div>
<!-- Remove button -->
{#if !readonly && onRemove}
<button
type="button"
onclick={() => onRemove?.(file.id)}
class="ml-1 w-4 h-4 flex items-center justify-center text-exo-light-gray hover:text-red-400 transition-colors cursor-pointer"
title="Remove file"
>
<svg class="w-3 h-3" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12" />
</svg>
</button>
{/if}
</div>
{/each}
</div>
{/if}

View File

@@ -0,0 +1,398 @@
<script lang="ts">
import { isLoading, sendMessage, selectedChatModel, setSelectedChatModel, instances, ttftMs, tps, totalTokens } from '$lib/stores/app.svelte';
import ChatAttachments from './ChatAttachments.svelte';
import type { ChatUploadedFile } from '$lib/types/files';
import { processUploadedFiles, getAcceptString } from '$lib/types/files';
interface Props {
class?: string;
placeholder?: string;
showHelperText?: boolean;
autofocus?: boolean;
showModelSelector?: boolean;
}
let {
class: className = '',
placeholder = 'Ask anything',
showHelperText = false,
autofocus = true,
showModelSelector = false
}: Props = $props();
let message = $state('');
let textareaRef: HTMLTextAreaElement | undefined = $state();
let fileInputRef: HTMLInputElement | undefined = $state();
let uploadedFiles = $state<ChatUploadedFile[]>([]);
let isDragOver = $state(false);
let loading = $derived(isLoading());
const currentModel = $derived(selectedChatModel());
const instanceData = $derived(instances());
const currentTtft = $derived(ttftMs());
const currentTps = $derived(tps());
const currentTokens = $derived(totalTokens());
// Custom dropdown state
let isModelDropdownOpen = $state(false);
let dropdownButtonRef: HTMLButtonElement | undefined = $state();
let dropdownPosition = $derived(() => {
if (!dropdownButtonRef || !isModelDropdownOpen) return { top: 0, left: 0, width: 0 };
const rect = dropdownButtonRef.getBoundingClientRect();
return {
top: rect.top,
left: rect.left,
width: rect.width
};
});
// Accept all supported file types
const acceptString = getAcceptString(['image', 'text', 'pdf']);
// Extract available models from running instances
const availableModels = $derived(() => {
const models: Array<{id: string, label: string}> = [];
for (const [, instance] of Object.entries(instanceData)) {
const modelId = getInstanceModelId(instance);
if (modelId && modelId !== 'Unknown' && !models.some(m => m.id === modelId)) {
models.push({ id: modelId, label: modelId.split('/').pop() || modelId });
}
}
return models;
});
// Auto-select the first available model if none is selected
$effect(() => {
const models = availableModels();
if (models.length > 0 && !currentModel) {
setSelectedChatModel(models[0].id);
}
});
function getInstanceModelId(instanceWrapped: unknown): string {
if (!instanceWrapped || typeof instanceWrapped !== 'object') return '';
const keys = Object.keys(instanceWrapped as Record<string, unknown>);
if (keys.length === 1) {
const instance = (instanceWrapped as Record<string, unknown>)[keys[0]] as { shardAssignments?: { modelId?: string } };
return instance?.shardAssignments?.modelId || '';
}
return '';
}
async function handleFiles(files: File[]) {
if (files.length === 0) return;
const processed = await processUploadedFiles(files);
uploadedFiles = [...uploadedFiles, ...processed];
}
function handleFileInputChange(event: Event) {
const input = event.target as HTMLInputElement;
if (input.files && input.files.length > 0) {
handleFiles(Array.from(input.files));
input.value = ''; // Reset for next selection
}
}
function handleFileRemove(fileId: string) {
uploadedFiles = uploadedFiles.filter(f => f.id !== fileId);
}
function handlePaste(event: ClipboardEvent) {
if (!event.clipboardData) return;
const files = Array.from(event.clipboardData.items)
.filter(item => item.kind === 'file')
.map(item => item.getAsFile())
.filter((file): file is File => file !== null);
if (files.length > 0) {
event.preventDefault();
handleFiles(files);
return;
}
// Handle long text paste as file
const text = event.clipboardData.getData('text/plain');
if (text.length > 2500) {
event.preventDefault();
const textFile = new File([text], 'pasted-text.txt', { type: 'text/plain' });
handleFiles([textFile]);
}
}
function handleDragOver(event: DragEvent) {
event.preventDefault();
isDragOver = true;
}
function handleDragLeave(event: DragEvent) {
event.preventDefault();
isDragOver = false;
}
function handleDrop(event: DragEvent) {
event.preventDefault();
isDragOver = false;
if (event.dataTransfer?.files) {
handleFiles(Array.from(event.dataTransfer.files));
}
}
function handleKeydown(event: KeyboardEvent) {
if (event.key === 'Enter' && !event.shiftKey) {
event.preventDefault();
handleSubmit();
}
}
function handleSubmit() {
if ((!message.trim() && uploadedFiles.length === 0) || loading) return;
const content = message.trim();
const files = [...uploadedFiles];
message = '';
uploadedFiles = [];
resetTextareaHeight();
sendMessage(content, files);
// Refocus the textarea after sending
setTimeout(() => textareaRef?.focus(), 10);
}
function handleInput() {
if (!textareaRef) return;
textareaRef.style.height = 'auto';
textareaRef.style.height = Math.min(textareaRef.scrollHeight, 150) + 'px';
}
function resetTextareaHeight() {
if (textareaRef) {
textareaRef.style.height = 'auto';
}
}
function openFilePicker() {
fileInputRef?.click();
}
// Track previous loading state to detect when loading completes
let wasLoading = $state(false);
$effect(() => {
if (autofocus && textareaRef) {
setTimeout(() => textareaRef?.focus(), 10);
}
});
// Refocus after loading completes (AI response finished)
$effect(() => {
if (wasLoading && !loading && textareaRef) {
setTimeout(() => textareaRef?.focus(), 50);
}
wasLoading = loading;
});
const canSend = $derived(message.trim().length > 0 || uploadedFiles.length > 0);
</script>
<!-- Hidden file input -->
<input
bind:this={fileInputRef}
type="file"
accept={acceptString}
multiple
class="hidden"
onchange={handleFileInputChange}
/>
<form
onsubmit={(e) => { e.preventDefault(); handleSubmit(); }}
class="w-full {className}"
ondragover={handleDragOver}
ondragleave={handleDragLeave}
ondrop={handleDrop}
>
<div
class="relative command-panel rounded overflow-hidden transition-all duration-200 {isDragOver ? 'ring-2 ring-exo-yellow ring-opacity-50' : ''}"
>
<!-- Top accent line -->
<div class="absolute top-0 left-0 right-0 h-px bg-gradient-to-r from-transparent via-exo-yellow/50 to-transparent"></div>
<!-- Drag overlay -->
{#if isDragOver}
<div class="absolute inset-0 bg-exo-dark-gray/80 z-10 flex items-center justify-center">
<div class="text-exo-yellow text-sm font-mono tracking-wider uppercase">
DROP FILES HERE
</div>
</div>
{/if}
<!-- Model selector (when enabled) -->
{#if showModelSelector && availableModels().length > 0}
<div class="flex items-center justify-between gap-2 px-3 py-2 border-b border-exo-medium-gray/30">
<div class="flex items-center gap-2 flex-1">
<span class="text-xs text-exo-light-gray uppercase tracking-wider flex-shrink-0">MODEL:</span>
<!-- Custom dropdown -->
<div class="relative flex-1 max-w-xs">
<button
bind:this={dropdownButtonRef}
type="button"
onclick={() => isModelDropdownOpen = !isModelDropdownOpen}
class="w-full bg-exo-medium-gray/50 border border-exo-yellow/30 rounded pl-3 pr-8 py-1.5 text-xs font-mono text-left tracking-wide cursor-pointer transition-all duration-200 hover:border-exo-yellow/50 focus:outline-none focus:border-exo-yellow/70 {isModelDropdownOpen ? 'border-exo-yellow/70' : ''}"
>
{#if availableModels().find(m => m.id === currentModel)}
<span class="text-exo-yellow truncate">{availableModels().find(m => m.id === currentModel)?.label}</span>
{:else if availableModels().length > 0}
<span class="text-exo-yellow truncate">{availableModels()[0].label}</span>
{:else}
<span class="text-exo-light-gray/50">— SELECT MODEL —</span>
{/if}
</button>
<div class="absolute right-2 top-1/2 -translate-y-1/2 pointer-events-none transition-transform duration-200 {isModelDropdownOpen ? 'rotate-180' : ''}">
<svg class="w-3 h-3 text-exo-yellow/60" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 9l-7 7-7-7" />
</svg>
</div>
</div>
{#if isModelDropdownOpen}
<!-- Backdrop to close dropdown -->
<button
type="button"
class="fixed inset-0 z-[9998] cursor-default"
onclick={() => isModelDropdownOpen = false}
aria-label="Close dropdown"
></button>
<!-- Dropdown Panel - fixed positioning to escape overflow:hidden -->
<div
class="fixed bg-exo-dark-gray border border-exo-yellow/30 rounded shadow-lg shadow-black/50 z-[9999] max-h-48 overflow-y-auto"
style="bottom: calc(100vh - {dropdownPosition().top}px + 4px); left: {dropdownPosition().left}px; width: {dropdownPosition().width}px;"
>
<div class="py-1">
{#each availableModels() as model}
<button
type="button"
onclick={() => {
setSelectedChatModel(model.id);
isModelDropdownOpen = false;
}}
class="w-full px-3 py-2 text-left text-xs font-mono tracking-wide transition-colors duration-100 flex items-center gap-2 {
currentModel === model.id
? 'bg-transparent text-exo-yellow'
: 'text-exo-light-gray hover:text-exo-yellow'
}"
>
{#if currentModel === model.id}
<svg class="w-3 h-3 flex-shrink-0" fill="currentColor" viewBox="0 0 20 20">
<path fill-rule="evenodd" d="M16.707 5.293a1 1 0 010 1.414l-8 8a1 1 0 01-1.414 0l-4-4a1 1 0 011.414-1.414L8 12.586l7.293-7.293a1 1 0 011.414 0z" clip-rule="evenodd" />
</svg>
{:else}
<span class="w-3"></span>
{/if}
<span class="truncate">{model.label}</span>
</button>
{/each}
</div>
</div>
{/if}
</div>
<!-- Performance stats -->
{#if currentTtft !== null || currentTps !== null}
<div class="flex items-center gap-4 text-xs font-mono flex-shrink-0">
{#if currentTtft !== null}
<span class="text-exo-light-gray">
<span class="text-white/70">TTFT</span> <span class="text-exo-yellow">{currentTtft.toFixed(1)}ms</span>
</span>
{/if}
{#if currentTps !== null}
<span class="text-exo-light-gray">
<span class="text-white/70">TPS</span> <span class="text-exo-yellow">{currentTps.toFixed(1)}</span> <span class="text-white/60">tok/s</span>
<span class="text-white/50">({(1000 / currentTps).toFixed(1)} ms/tok)</span>
</span>
{/if}
</div>
{/if}
</div>
{/if}
<!-- Attached files preview -->
{#if uploadedFiles.length > 0}
<div class="px-3 pt-3">
<ChatAttachments
files={uploadedFiles}
onRemove={handleFileRemove}
/>
</div>
{/if}
<!-- Input area -->
<div class="flex items-start gap-2 sm:gap-3 py-3 px-3 sm:px-4">
<!-- Attach file button -->
<button
type="button"
onclick={openFilePicker}
disabled={loading}
class="flex items-center justify-center w-7 h-7 rounded text-exo-light-gray hover:text-exo-yellow transition-all disabled:opacity-50 disabled:cursor-not-allowed flex-shrink-0 cursor-pointer"
title="Attach file"
>
<svg class="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15.172 7l-6.586 6.586a2 2 0 102.828 2.828l6.414-6.586a4 4 0 00-5.656-5.656l-6.415 6.585a6 6 0 108.486 8.486L20.5 13" />
</svg>
</button>
<!-- Terminal prompt -->
<span class="text-exo-yellow text-sm font-bold flex-shrink-0 leading-7"></span>
<textarea
bind:this={textareaRef}
bind:value={message}
onkeydown={handleKeydown}
oninput={handleInput}
onpaste={handlePaste}
{placeholder}
disabled={loading}
rows={1}
class="flex-1 resize-none bg-transparent text-foreground placeholder:text-exo-light-gray/60 placeholder:text-sm placeholder:tracking-[0.15em] placeholder:leading-7 focus:outline-none focus:ring-0 focus:border-none disabled:opacity-50 text-sm leading-7 font-mono"
style="min-height: 28px; max-height: 150px;"
></textarea>
<button
type="submit"
disabled={!canSend || loading}
class="px-2.5 sm:px-4 py-1.5 sm:py-2 rounded text-xs sm:text-xs tracking-[0.1em] sm:tracking-[0.15em] uppercase font-medium transition-all duration-200 whitespace-nowrap
{!canSend || loading
? 'bg-exo-medium-gray/50 text-exo-light-gray cursor-not-allowed'
: 'bg-exo-yellow text-exo-black hover:bg-exo-yellow-darker hover:shadow-[0_0_20px_rgba(255,215,0,0.3)]'}"
aria-label="Send message"
>
{#if loading}
<span class="inline-flex items-center gap-1 sm:gap-2">
<span class="w-2.5 h-2.5 sm:w-3 sm:h-3 border-2 border-current border-t-transparent rounded-full animate-spin"></span>
<span class="hidden sm:inline">PROCESSING</span>
<span class="sm:hidden">...</span>
</span>
{:else}
SEND
{/if}
</button>
</div>
<!-- Bottom accent line -->
<div class="absolute bottom-0 left-0 right-0 h-px bg-gradient-to-r from-transparent via-exo-yellow/30 to-transparent"></div>
</div>
{#if showHelperText}
<p class="mt-2 sm:mt-3 text-center text-xs sm:text-xs text-exo-light-gray tracking-[0.1em] sm:tracking-[0.15em] uppercase">
<kbd class="px-1 sm:px-1.5 py-0.5 rounded bg-exo-medium-gray/30 text-exo-light-gray border border-exo-medium-gray/50">ENTER</kbd>
<span class="mx-0.5 sm:mx-1">TO SEND</span>
<span class="text-exo-medium-gray mx-1 sm:mx-2">|</span>
<kbd class="px-1 sm:px-1.5 py-0.5 rounded bg-exo-medium-gray/30 text-exo-light-gray border border-exo-medium-gray/50">SHIFT+ENTER</kbd>
<span class="mx-0.5 sm:mx-1">NEW LINE</span>
<span class="text-exo-medium-gray mx-1 sm:mx-2">|</span>
<span class="text-exo-light-gray">DRAG & DROP OR PASTE FILES</span>
</p>
{/if}
</form>

View File

@@ -0,0 +1,462 @@
<script lang="ts">
import {
messages,
currentResponse,
isLoading,
deleteMessage,
editAndRegenerate,
regenerateLastResponse
} from '$lib/stores/app.svelte';
import type { MessageAttachment } from '$lib/stores/app.svelte';
import { tick, onDestroy } from 'svelte';
interface Props {
class?: string;
scrollParent?: HTMLElement | null;
}
let { class: className = '', scrollParent = null }: Props = $props();
const messageList = $derived(messages());
const response = $derived(currentResponse());
const loading = $derived(isLoading());
// Ref for scroll anchor at bottom
let scrollAnchorRef: HTMLDivElement | undefined = $state();
// Scroll management
const SCROLL_BOTTOM_THRESHOLD = 120;
let autoScrollEnabled = true;
let currentScrollEl: HTMLElement | null = null;
function resolveScrollElement(): HTMLElement | null {
if (scrollParent) return scrollParent;
let node: HTMLElement | null = scrollAnchorRef?.parentElement as HTMLElement | null;
while (node) {
const isScrollable = node.scrollHeight > node.clientHeight + 1;
if (isScrollable) return node;
node = node.parentElement;
}
return null;
}
function handleScroll() {
if (!currentScrollEl) return;
const distanceFromBottom = currentScrollEl.scrollHeight - currentScrollEl.scrollTop - currentScrollEl.clientHeight;
const isNearBottom = distanceFromBottom < SCROLL_BOTTOM_THRESHOLD;
autoScrollEnabled = isNearBottom;
}
function attachScrollListener() {
const nextEl = resolveScrollElement();
if (currentScrollEl === nextEl) return;
if (currentScrollEl) {
currentScrollEl.removeEventListener('scroll', handleScroll);
}
currentScrollEl = nextEl;
if (currentScrollEl) {
currentScrollEl.addEventListener('scroll', handleScroll);
// Initialize state based on current position
handleScroll();
}
}
onDestroy(() => {
if (currentScrollEl) {
currentScrollEl.removeEventListener('scroll', handleScroll);
}
});
$effect(() => {
// Re-evaluate scroll container if prop changes or after mount
scrollParent;
attachScrollListener();
});
// Auto-scroll to bottom when messages change or response updates, but only if user is near bottom
$effect(() => {
// Track these values to trigger effect
const _ = messageList.length;
const __ = response;
const ___ = loading;
tick().then(() => {
const el = currentScrollEl ?? resolveScrollElement();
if (!el || !scrollAnchorRef) return;
const distanceFromBottom = el.scrollHeight - el.scrollTop - el.clientHeight;
const isNearBottom = distanceFromBottom < SCROLL_BOTTOM_THRESHOLD;
if (autoScrollEnabled || isNearBottom) {
scrollAnchorRef.scrollIntoView({ behavior: 'smooth', block: 'end' });
autoScrollEnabled = true;
}
});
});
// Edit state
let editingMessageId = $state<string | null>(null);
let editContent = $state('');
let editTextareaRef: HTMLTextAreaElement | undefined = $state();
// Delete confirmation state
let deleteConfirmId = $state<string | null>(null);
// Copied state for feedback
let copiedMessageId = $state<string | null>(null);
let expandedThinkingMessageIds = $state<Set<string>>(new Set());
function formatTimestamp(timestamp: number): string {
return new Date(timestamp).toLocaleTimeString('en-US', {
hour12: false,
hour: '2-digit',
minute: '2-digit',
second: '2-digit'
});
}
function getAttachmentIcon(attachment: MessageAttachment): string {
switch (attachment.type) {
case 'image': return '🖼';
case 'text': return '📄';
default: return '📎';
}
}
function truncateName(name: string, maxLen: number = 25): string {
if (name.length <= maxLen) return name;
const ext = name.slice(name.lastIndexOf('.'));
const base = name.slice(0, name.lastIndexOf('.'));
const available = maxLen - ext.length - 3;
return base.slice(0, available) + '...' + ext;
}
async function handleCopy(content: string, messageId: string) {
try {
await navigator.clipboard.writeText(content);
copiedMessageId = messageId;
setTimeout(() => {
copiedMessageId = null;
}, 2000);
} catch (error) {
console.error('Failed to copy:', error);
}
}
function toggleThinkingVisibility(messageId: string) {
const next = new Set(expandedThinkingMessageIds);
if (next.has(messageId)) {
next.delete(messageId);
} else {
next.add(messageId);
}
expandedThinkingMessageIds = next;
}
function isThinkingExpanded(messageId: string): boolean {
return expandedThinkingMessageIds.has(messageId);
}
function handleStartEdit(messageId: string, content: string) {
editingMessageId = messageId;
editContent = content;
setTimeout(() => {
if (editTextareaRef) {
editTextareaRef.focus();
editTextareaRef.setSelectionRange(editTextareaRef.value.length, editTextareaRef.value.length);
// Auto-resize
editTextareaRef.style.height = 'auto';
editTextareaRef.style.height = Math.min(editTextareaRef.scrollHeight, 200) + 'px';
}
}, 10);
}
function handleCancelEdit() {
editingMessageId = null;
editContent = '';
}
function handleSaveEdit() {
if (editingMessageId && editContent.trim()) {
editAndRegenerate(editingMessageId, editContent.trim());
}
editingMessageId = null;
editContent = '';
}
function handleEditKeydown(event: KeyboardEvent) {
if (event.key === 'Enter' && !event.shiftKey) {
event.preventDefault();
handleSaveEdit();
} else if (event.key === 'Escape') {
handleCancelEdit();
}
}
function handleEditInput() {
if (editTextareaRef) {
editTextareaRef.style.height = 'auto';
editTextareaRef.style.height = Math.min(editTextareaRef.scrollHeight, 200) + 'px';
}
}
function handleDeleteClick(messageId: string) {
deleteConfirmId = messageId;
}
function handleConfirmDelete() {
if (deleteConfirmId) {
deleteMessage(deleteConfirmId);
deleteConfirmId = null;
}
}
function handleCancelDelete() {
deleteConfirmId = null;
}
function handleRegenerate() {
regenerateLastResponse();
}
// Check if a message is the last assistant message
function isLastAssistantMessage(messageId: string): boolean {
for (let i = messageList.length - 1; i >= 0; i--) {
if (messageList[i].role === 'assistant') {
return messageList[i].id === messageId;
}
}
return false;
}
</script>
<div class="flex flex-col gap-4 sm:gap-6 {className}">
{#each messageList as message (message.id)}
<div class="group flex {message.role === 'user' ? 'justify-end' : 'justify-start'}">
<div class="{message.role === 'user' ? 'max-w-[85%] sm:max-w-[70%] flex flex-col items-end' : 'max-w-[95%] sm:max-w-[85%]'}">
{#if message.role === 'assistant'}
<!-- Assistant message header -->
<div class="flex items-center gap-1.5 sm:gap-2 mb-1.5 sm:mb-2">
<div class="w-1.5 h-1.5 sm:w-2 sm:h-2 bg-exo-yellow rounded-full shadow-[0_0_10px_rgba(255,215,0,0.5)]"></div>
<span class="text-sm sm:text-xs text-exo-yellow tracking-[0.15em] sm:tracking-[0.2em] uppercase font-medium">EXO</span>
<span class="text-xs sm:text-sm text-exo-light-gray tracking-wider tabular-nums">{formatTimestamp(message.timestamp)}</span>
{#if message.ttftMs || message.tps}
<span class="text-xs text-exo-light-gray/80 font-mono ml-2">
{#if message.ttftMs}<span class="text-exo-light-gray/50">TTFT</span> {message.ttftMs.toFixed(0)}ms{/if}{#if message.ttftMs && message.tps}<span class="text-exo-light-gray/30 mx-1"></span>{/if}{#if message.tps}{message.tps.toFixed(1)} <span class="text-exo-light-gray/50">tok/s</span>{/if}
</span>
{/if}
</div>
{:else}
<!-- User message header -->
<div class="flex items-center justify-end gap-1.5 sm:gap-2 mb-1.5 sm:mb-2">
<span class="text-xs sm:text-sm text-exo-light-gray tracking-wider tabular-nums">{formatTimestamp(message.timestamp)}</span>
<span class="text-sm sm:text-xs text-exo-light-gray tracking-[0.1em] sm:tracking-[0.15em] uppercase">QUERY</span>
<div class="w-1.5 h-1.5 sm:w-2 sm:h-2 bg-exo-light-gray/50 rounded-full"></div>
</div>
{/if}
{#if deleteConfirmId === message.id}
<!-- Delete confirmation -->
<div class="bg-red-500/10 border border-red-500/30 rounded-lg p-3">
<p class="text-xs text-red-400 mb-3">Delete this message{message.role === 'user' ? ' and all responses after it' : ''}?</p>
<div class="flex gap-2 justify-end">
<button
onclick={handleCancelDelete}
class="px-3 py-1.5 text-sm font-mono tracking-wider uppercase bg-exo-medium-gray/20 text-exo-light-gray border border-exo-medium-gray/30 rounded hover:bg-exo-medium-gray/30 transition-colors cursor-pointer"
>
CANCEL
</button>
<button
onclick={handleConfirmDelete}
class="px-3 py-1.5 text-sm font-mono tracking-wider uppercase bg-red-500/20 text-red-400 border border-red-500/30 rounded hover:bg-red-500/30 transition-colors cursor-pointer"
>
DELETE
</button>
</div>
</div>
{:else if editingMessageId === message.id}
<!-- Edit mode -->
<div class="command-panel rounded-lg p-3">
<textarea
bind:this={editTextareaRef}
bind:value={editContent}
onkeydown={handleEditKeydown}
oninput={handleEditInput}
class="w-full bg-exo-black/60 border border-exo-yellow/30 rounded px-3 py-2 text-sm text-foreground font-mono focus:outline-none focus:border-exo-yellow/50 resize-none"
style="min-height: 60px; max-height: 200px;"
></textarea>
<div class="flex gap-2 justify-end mt-2">
<button
onclick={handleCancelEdit}
class="px-3 py-1.5 text-sm font-mono tracking-wider uppercase bg-exo-medium-gray/20 text-exo-light-gray border border-exo-medium-gray/30 rounded hover:bg-exo-medium-gray/30 transition-colors cursor-pointer"
>
CANCEL
</button>
<button
onclick={handleSaveEdit}
disabled={!editContent.trim()}
class="px-3 py-1.5 text-sm font-mono tracking-wider uppercase bg-transparent text-exo-yellow border border-exo-yellow/30 rounded hover:border-exo-yellow/50 transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-1.5 cursor-pointer"
>
<svg class="w-3 h-3" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 19l9 2-9-18-9 18 9-2zm0 0v-8" />
</svg>
SEND
</button>
</div>
</div>
{:else}
<div class="{message.role === 'user'
? 'command-panel rounded-lg rounded-tr-sm inline-block'
: 'command-panel rounded-lg rounded-tl-sm border-l-2 border-l-exo-yellow/50 inline-block'}">
{#if message.role === 'user'}
<!-- User message styling -->
<div class="px-4 py-3">
<!-- Attachments -->
{#if message.attachments && message.attachments.length > 0}
<div class="flex flex-wrap gap-2 mb-3">
{#each message.attachments as attachment}
<div class="flex items-center gap-2 bg-exo-dark-gray/60 border border-exo-yellow/20 rounded px-2 py-1 text-xs font-mono">
{#if attachment.type === 'image' && attachment.preview}
<img
src={attachment.preview}
alt={attachment.name}
class="w-12 h-12 object-cover rounded border border-exo-yellow/20"
/>
{:else}
<span>{getAttachmentIcon(attachment)}</span>
{/if}
<span class="text-exo-yellow" title={attachment.name}>{truncateName(attachment.name)}</span>
</div>
{/each}
</div>
{/if}
{#if message.content}
<div class="text-sm text-foreground font-mono tracking-wide whitespace-pre-wrap break-words leading-relaxed">
{message.content}
</div>
{/if}
</div>
{:else}
<!-- Assistant message styling -->
<div class="p-3 sm:p-4">
{#if message.thinking && message.thinking.trim().length > 0}
<div class="mb-3 rounded border border-exo-yellow/20 bg-exo-black/40">
<button
type="button"
class="w-full flex items-center justify-between px-3 py-2 text-xs font-mono uppercase tracking-[0.2em] text-exo-light-gray/80 hover:text-exo-yellow transition-colors cursor-pointer"
onclick={() => toggleThinkingVisibility(message.id)}
aria-expanded={isThinkingExpanded(message.id)}
aria-controls={`thinking-panel-${message.id}`}
>
<span class="flex items-center gap-2 tracking-[0.25em]">
<svg
class={`w-3.5 h-3.5 text-current transition-transform duration-200 ${isThinkingExpanded(message.id) ? 'rotate-90' : ''}`}
fill="none"
viewBox="0 0 24 24"
stroke="currentColor"
aria-hidden="true"
>
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" />
</svg>
<span>Thinking...</span>
</span>
<span class="text-[10px] tracking-[0.2em] text-exo-light-gray/60">
{isThinkingExpanded(message.id) ? 'HIDE' : 'SHOW'}
</span>
</button>
{#if isThinkingExpanded(message.id)}
<div
id={`thinking-panel-${message.id}`}
class="px-3 pb-3 text-xs text-exo-light-gray/90 font-mono whitespace-pre-wrap break-words leading-relaxed"
>
{message.thinking.trim()}
</div>
{/if}
</div>
{/if}
<div class="text-sm text-foreground font-mono tracking-wide whitespace-pre-wrap break-words leading-relaxed">
{message.content || (loading ? response : '')}
{#if loading && !message.content}
<span class="inline-block w-2 h-4 bg-exo-yellow/70 ml-1 cursor-blink"></span>
{/if}
</div>
</div>
{/if}
</div>
<!-- Action buttons -->
<div class="flex items-center gap-1 mt-1.5 opacity-0 group-hover:opacity-100 transition-opacity {message.role === 'user' ? 'justify-end' : 'justify-start'}">
<!-- Copy button -->
<button
onclick={() => handleCopy(message.content, message.id)}
class="p-1.5 text-exo-light-gray hover:text-exo-yellow transition-colors rounded cursor-pointer"
title="Copy message"
>
{#if copiedMessageId === message.id}
<svg class="w-3.5 h-3.5 text-green-400" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M5 13l4 4L19 7" />
</svg>
{:else}
<svg class="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z" />
</svg>
{/if}
</button>
<!-- Edit button (user messages only) -->
{#if message.role === 'user'}
<button
onclick={() => handleStartEdit(message.id, message.content)}
class="p-1.5 text-exo-light-gray hover:text-exo-yellow transition-colors rounded cursor-pointer"
title="Edit message"
>
<svg class="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z" />
</svg>
</button>
{/if}
<!-- Regenerate button (last assistant message only) -->
{#if message.role === 'assistant' && isLastAssistantMessage(message.id) && !loading}
<button
onclick={handleRegenerate}
class="p-1.5 text-exo-light-gray hover:text-exo-yellow transition-colors rounded cursor-pointer"
title="Regenerate response"
>
<svg class="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" />
</svg>
</button>
{/if}
<!-- Delete button -->
<button
onclick={() => handleDeleteClick(message.id)}
class="p-1.5 text-exo-light-gray hover:text-red-400 transition-colors rounded hover:bg-red-500/10 cursor-pointer"
title="Delete message"
>
<svg class="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
</svg>
</button>
</div>
{/if}
</div>
</div>
{/each}
{#if messageList.length === 0}
<div class="flex-1 flex flex-col items-center justify-center text-center pt-[20vh]">
<div class="w-12 h-12 sm:w-16 sm:h-16 border border-exo-yellow/20 rounded-full flex items-center justify-center mb-3 sm:mb-4">
<div class="w-6 h-6 sm:w-8 sm:h-8 border border-exo-yellow/40 rounded-full flex items-center justify-center">
<div class="w-1.5 h-1.5 sm:w-2 sm:h-2 bg-exo-yellow/60 rounded-full"></div>
</div>
</div>
<p class="text-xs sm:text-sm text-exo-light-gray tracking-[0.15em] sm:tracking-[0.2em] uppercase">AWAITING INPUT</p>
<p class="text-sm sm:text-xs text-exo-light-gray tracking-wider mt-1">ENTER A QUERY TO BEGIN</p>
</div>
{/if}
<!-- Scroll anchor for auto-scroll -->
<div bind:this={scrollAnchorRef}></div>
</div>

View File

@@ -0,0 +1,430 @@
<script lang="ts">
import {
conversations,
activeConversationId,
createConversation,
loadConversation,
deleteConversation,
deleteAllConversations,
renameConversation,
clearChat,
instances,
debugMode,
toggleDebugMode
} from '$lib/stores/app.svelte';
interface Props {
class?: string;
}
let { class: className = '' }: Props = $props();
const conversationList = $derived(conversations());
const activeId = $derived(activeConversationId());
const instanceData = $derived(instances());
const debugEnabled = $derived(debugMode());
let searchQuery = $state('');
let editingId = $state<string | null>(null);
let editingName = $state('');
let deleteConfirmId = $state<string | null>(null);
let showDeleteAllConfirm = $state(false);
const filteredConversations = $derived(
searchQuery.trim()
? conversationList.filter(c => c.name.toLowerCase().includes(searchQuery.toLowerCase()))
: conversationList
);
function handleNewChat() {
createConversation();
}
function handleSelectConversation(id: string) {
loadConversation(id);
}
function handleStartEdit(id: string, name: string, event: MouseEvent) {
event.stopPropagation();
editingId = id;
editingName = name;
}
function handleSaveEdit() {
if (editingId && editingName.trim()) {
renameConversation(editingId, editingName.trim());
}
editingId = null;
editingName = '';
}
function handleCancelEdit() {
editingId = null;
editingName = '';
}
function handleEditKeydown(event: KeyboardEvent) {
if (event.key === 'Enter') {
handleSaveEdit();
} else if (event.key === 'Escape') {
handleCancelEdit();
}
}
function handleDeleteClick(id: string, event: MouseEvent) {
event.stopPropagation();
deleteConfirmId = id;
}
function handleConfirmDelete() {
if (deleteConfirmId) {
deleteConversation(deleteConfirmId);
deleteConfirmId = null;
}
}
function handleCancelDelete() {
deleteConfirmId = null;
}
function handleDeleteAllClick() {
showDeleteAllConfirm = true;
}
function handleConfirmDeleteAll() {
deleteAllConversations();
showDeleteAllConfirm = false;
}
function handleCancelDeleteAll() {
showDeleteAllConfirm = false;
}
function formatDate(timestamp: number): string {
const date = new Date(timestamp);
const now = new Date();
const diffDays = Math.floor((now.getTime() - date.getTime()) / (1000 * 60 * 60 * 24));
if (diffDays === 0) {
return date.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit' });
} else if (diffDays === 1) {
return 'Yesterday';
} else if (diffDays < 7) {
return date.toLocaleDateString('en-US', { weekday: 'short' });
} else {
return date.toLocaleDateString('en-US', { month: 'short', day: 'numeric' });
}
}
function getLastAssistantStats(conversation: typeof conversationList[0]): { ttftMs?: number; tps?: number } | null {
// Find the last assistant message with stats
for (let i = conversation.messages.length - 1; i >= 0; i--) {
const msg = conversation.messages[i];
if (msg.role === 'assistant' && (msg.ttftMs || msg.tps)) {
return { ttftMs: msg.ttftMs, tps: msg.tps };
}
}
return null;
}
function formatModelName(modelId: string | null | undefined): string {
if (!modelId) return 'Unknown Model';
const parts = modelId.split('/');
const tail = parts[parts.length - 1] || modelId;
return tail || modelId;
}
function formatStrategy(sharding: string | null | undefined, instanceType: string | null | undefined): string {
const shardLabel = sharding ?? 'Unknown';
const typeLabel = instanceType ?? null;
return typeLabel ? `${shardLabel} (${typeLabel})` : shardLabel;
}
function getTaggedValue(obj: unknown): [string | null, unknown] {
if (!obj || typeof obj !== 'object') return [null, null];
const keys = Object.keys(obj as Record<string, unknown>);
if (keys.length === 1) {
return [keys[0], (obj as Record<string, unknown>)[keys[0]]];
}
return [null, null];
}
function extractInstanceModelId(instanceWrapped: unknown): string | null {
const [, instance] = getTaggedValue(instanceWrapped);
if (!instance || typeof instance !== 'object') return null;
const inst = instance as { shardAssignments?: { modelId?: string } };
return inst.shardAssignments?.modelId ?? null;
}
function describeInstance(instanceWrapped: unknown): { sharding: string | null; instanceType: string | null } {
const [instanceTag, instance] = getTaggedValue(instanceWrapped);
if (!instance || typeof instance !== 'object') {
return { sharding: null, instanceType: null };
}
let instanceType: string | null = null;
if (instanceTag === 'MlxRingInstance') instanceType = 'MLX Ring';
else if (instanceTag === 'MlxIbvInstance' || instanceTag === 'MlxJacclInstance') instanceType = 'MLX RDMA';
let sharding: string | null = null;
const inst = instance as { shardAssignments?: { runnerToShard?: Record<string, unknown> } };
const runnerToShard = inst.shardAssignments?.runnerToShard || {};
const firstShardWrapped = Object.values(runnerToShard)[0];
if (firstShardWrapped) {
const [shardTag] = getTaggedValue(firstShardWrapped);
if (shardTag === 'PipelineShardMetadata') sharding = 'Pipeline';
else if (shardTag === 'TensorShardMetadata') sharding = 'Tensor';
else if (shardTag === 'PrefillDecodeShardMetadata') sharding = 'Prefill/Decode';
}
return { sharding, instanceType };
}
function resolveConversationInfo(conversation: typeof conversationList[0]): { modelLabel: string; strategyLabel: string } {
// Attempt to match conversation model to an instance
let matchedInstance: unknown = null;
let modelId = conversation.modelId ?? null;
if (modelId) {
for (const [, instanceWrapper] of Object.entries(instanceData)) {
const candidate = extractInstanceModelId(instanceWrapper);
if (candidate === modelId) {
matchedInstance = instanceWrapper;
break;
}
}
}
// Fallback: use the first available instance if no explicit match
if (!matchedInstance) {
const firstInstance = Object.values(instanceData)[0];
if (firstInstance) {
matchedInstance = firstInstance;
modelId = modelId ?? extractInstanceModelId(firstInstance);
}
}
const instanceDetails = matchedInstance ? describeInstance(matchedInstance) : { sharding: null, instanceType: null };
const displayModel = modelId ?? conversation.modelId ?? null;
const sharding = conversation.sharding ?? instanceDetails.sharding ?? 'Unknown';
const instanceType = conversation.instanceType ?? instanceDetails.instanceType;
return {
modelLabel: formatModelName(displayModel),
strategyLabel: formatStrategy(sharding, instanceType)
};
}
</script>
<aside class="flex flex-col h-full bg-exo-dark-gray border-r border-exo-yellow/10 {className}">
<!-- Header -->
<div class="p-4">
<button
onclick={handleNewChat}
class="w-full flex items-center justify-center gap-2 py-2.5 px-4 bg-transparent border border-exo-yellow/30 text-exo-yellow text-xs font-mono tracking-wider uppercase hover:border-exo-yellow/50 transition-all cursor-pointer"
>
<svg class="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 4v16m8-8H4" />
</svg>
NEW CHAT
</button>
</div>
<!-- Search -->
<div class="px-4 py-3">
<div class="relative">
<svg class="absolute left-3 top-1/2 -translate-y-1/2 w-3.5 h-3.5 text-white/50" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
</svg>
<input
type="text"
bind:value={searchQuery}
placeholder="Search conversations..."
class="w-full bg-exo-black/40 border border-exo-medium-gray/30 rounded px-3 py-2 pl-9 text-xs text-white/90 placeholder:text-white/40 focus:outline-none focus:border-exo-yellow/30"
/>
</div>
</div>
<!-- Conversation List -->
<div class="flex-1 overflow-y-auto">
{#if filteredConversations.length > 0}
<div class="py-2">
<div class="px-4 py-2">
<span class="text-sm text-white/70 font-mono tracking-wider uppercase">
{searchQuery ? 'SEARCH RESULTS' : 'CONVERSATIONS'}
</span>
</div>
{#each filteredConversations as conversation (conversation.id)}
{@const info = resolveConversationInfo(conversation)}
<div class="px-2">
{#if editingId === conversation.id}
<!-- Edit mode -->
<div class="p-2 bg-transparent border border-exo-yellow/20 rounded mb-1">
<input
type="text"
bind:value={editingName}
onkeydown={handleEditKeydown}
class="w-full bg-exo-black/60 border border-exo-yellow/30 rounded px-2 py-1.5 text-xs text-exo-light-gray focus:outline-none focus:border-exo-yellow/50 mb-2"
autofocus
/>
<div class="flex gap-2">
<button
onclick={handleSaveEdit}
class="flex-1 py-1.5 text-xs font-mono tracking-wider uppercase bg-transparent text-exo-yellow border border-exo-yellow/30 rounded hover:border-exo-yellow/50 cursor-pointer"
>
SAVE
</button>
<button
onclick={handleCancelEdit}
class="flex-1 py-1.5 text-xs font-mono tracking-wider uppercase bg-exo-medium-gray/20 text-exo-light-gray border border-exo-medium-gray/30 rounded hover:bg-exo-medium-gray/30 cursor-pointer"
>
CANCEL
</button>
</div>
</div>
{:else if deleteConfirmId === conversation.id}
<!-- Delete confirmation -->
<div class="p-2 bg-red-500/10 border border-red-500/30 rounded mb-1">
<p class="text-xs text-red-400 mb-2">Delete "{conversation.name}"?</p>
<div class="flex gap-2">
<button
onclick={handleConfirmDelete}
class="flex-1 py-1.5 text-xs font-mono tracking-wider uppercase bg-red-500/20 text-red-400 border border-red-500/30 rounded hover:bg-red-500/30 cursor-pointer"
>
DELETE
</button>
<button
onclick={handleCancelDelete}
class="flex-1 py-1.5 text-xs font-mono tracking-wider uppercase bg-exo-medium-gray/20 text-exo-light-gray border border-exo-medium-gray/30 rounded hover:bg-exo-medium-gray/30 cursor-pointer"
>
CANCEL
</button>
</div>
</div>
{:else}
<!-- Normal view -->
{@const stats = getLastAssistantStats(conversation)}
<div
role="button"
tabindex="0"
onclick={() => handleSelectConversation(conversation.id)}
onkeydown={(e) => e.key === 'Enter' && handleSelectConversation(conversation.id)}
class="group w-full flex items-center justify-between p-2 rounded mb-1 transition-all text-left cursor-pointer
{activeId === conversation.id
? 'bg-transparent border border-exo-yellow/30'
: 'hover:border-exo-yellow/20 border border-transparent'}"
>
<div class="flex-1 min-w-0 pr-2">
<div class="text-sm truncate {activeId === conversation.id ? 'text-exo-yellow' : 'text-white/90'}">
{conversation.name}
</div>
<div class="text-sm text-white/50 mt-0.5">
{formatDate(conversation.updatedAt)}
</div>
<div class="text-sm text-white/70 truncate">
{info.modelLabel}
</div>
<div class="text-xs text-white/60 font-mono">
Strategy: <span class="text-white/80">{info.strategyLabel}</span>
</div>
{#if stats}
<div class="text-xs text-white/60 font-mono mt-1">
{#if stats.ttftMs}<span class="text-white/40">TTFT</span> {stats.ttftMs.toFixed(0)}ms{/if}{#if stats.ttftMs && stats.tps}<span class="text-white/30 mx-1.5"></span>{/if}{#if stats.tps}{stats.tps.toFixed(1)} <span class="text-white/40">tok/s</span>{/if}
</div>
{/if}
</div>
<div class="flex items-center gap-1 opacity-0 group-hover:opacity-100 transition-opacity">
<button
type="button"
onclick={(e) => handleStartEdit(conversation.id, conversation.name, e)}
class="p-1 text-exo-light-gray hover:text-exo-yellow transition-colors cursor-pointer"
title="Rename"
>
<svg class="w-3 h-3" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z" />
</svg>
</button>
<button
type="button"
onclick={(e) => handleDeleteClick(conversation.id, e)}
class="p-1 text-exo-light-gray hover:text-red-400 transition-colors cursor-pointer"
title="Delete"
>
<svg class="w-3 h-3" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
</svg>
</button>
</div>
</div>
{/if}
</div>
{/each}
</div>
{:else}
<div class="flex flex-col items-center justify-center h-full p-4 text-center">
<div class="w-12 h-12 border border-exo-yellow/20 rounded-full flex items-center justify-center mb-3">
<svg class="w-6 h-6 text-exo-yellow/40" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M8 12h.01M12 12h.01M16 12h.01M21 12c0 4.418-4.03 8-9 8a9.863 9.863 0 01-4.255-.949L3 20l1.395-3.72C3.512 15.042 3 13.574 3 12c0-4.418 4.03-8 9-8s9 3.582 9 8z" />
</svg>
</div>
<p class="text-xs text-white/70 font-mono tracking-wider uppercase mb-1">
{searchQuery ? 'NO RESULTS' : 'NO CONVERSATIONS'}
</p>
<p class="text-sm text-white/50">
{searchQuery ? 'Try a different search' : 'Start a new chat to begin'}
</p>
</div>
{/if}
</div>
<!-- Footer -->
<div class="p-3 border-t border-exo-yellow/10">
{#if showDeleteAllConfirm}
<div class="bg-red-500/10 border border-red-500/30 rounded p-2 mb-2">
<p class="text-xs text-red-400 text-center mb-2">Delete all {conversationList.length} conversations?</p>
<div class="flex gap-2">
<button
onclick={handleConfirmDeleteAll}
class="flex-1 py-1.5 text-xs font-mono tracking-wider uppercase bg-red-500/20 text-red-400 border border-red-500/30 rounded hover:bg-red-500/30 transition-colors cursor-pointer"
>
DELETE ALL
</button>
<button
onclick={handleCancelDeleteAll}
class="flex-1 py-1.5 text-xs font-mono tracking-wider uppercase bg-exo-medium-gray/20 text-exo-light-gray border border-exo-medium-gray/30 rounded hover:bg-exo-medium-gray/30 transition-colors cursor-pointer"
>
CANCEL
</button>
</div>
</div>
{:else if conversationList.length > 0}
<button
onclick={handleDeleteAllClick}
class="w-full flex items-center justify-center gap-2 py-1.5 text-sm font-mono tracking-wider uppercase text-white/70 hover:text-red-400 hover:bg-red-500/10 border border-transparent hover:border-red-500/20 rounded transition-all cursor-pointer"
>
<svg class="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
</svg>
DELETE ALL CHATS
</button>
{/if}
<div class="flex items-center justify-center gap-3 {conversationList.length > 0 && !showDeleteAllConfirm ? 'mt-2' : ''}">
<button
type="button"
onclick={toggleDebugMode}
class="p-1.5 rounded border border-exo-medium-gray/40 hover:border-exo-yellow/50 transition-colors cursor-pointer"
title="Toggle debug mode"
>
<svg class="w-4 h-4 {debugEnabled ? 'text-exo-yellow' : 'text-exo-medium-gray'}" fill="currentColor" viewBox="0 0 24 24">
<path d="M19 8h-1.81A6.002 6.002 0 0 0 12 2a6.002 6.002 0 0 0-5.19 3H5a1 1 0 0 0 0 2h1v2H5a1 1 0 0 0 0 2h1v2H5a1 1 0 0 0 0 2h1.81A6.002 6.002 0 0 0 12 22a6.002 6.002 0 0 0 5.19-3H19a1 1 0 0 0 0-2h-1v-2h1a1 1 0 0 0 0-2h-1v-2h1a1 1 0 1 0 0-2Zm-5 10.32V19a1 1 0 1 1-2 0v-.68a3.999 3.999 0 0 1-3-3.83V9.32a3.999 3.999 0 0 1 3-3.83V5a1 1 0 0 1 2 0v.49a3.999 3.999 0 0 1 3 3.83v5.17a3.999 3.999 0 0 1-3 3.83Z"/>
</svg>
</button>
<div class="text-xs text-white/60 font-mono tracking-wider text-center">
{conversationList.length} CONVERSATION{conversationList.length !== 1 ? 'S' : ''}
</div>
</div>
</div>
</aside>

View File

@@ -0,0 +1,57 @@
<script lang="ts">
import { browser } from '$app/environment';
export let showHome = true;
export let onHome: (() => void) | null = null;
function handleHome(): void {
if (onHome) {
onHome();
return;
}
if (browser) {
// Hash router: send to root
window.location.hash = '/';
}
}
</script>
<header class="relative z-20 flex items-center justify-center px-6 pt-8 pb-4 bg-exo-dark-gray">
<!-- Center: Logo (clickable to go home) -->
<button
onclick={handleHome}
class="hover:opacity-80 transition-opacity {showHome ? 'cursor-pointer' : 'cursor-default'}"
title={showHome ? 'Go to home' : ''}
disabled={!showHome}
>
<img src="/exo-logo.png" alt="EXO" class="h-18 drop-shadow-[0_0_20px_rgba(255,215,0,0.5)]" />
</button>
<!-- Right: Home + Downloads -->
<div class="absolute right-6 top-1/2 -translate-y-1/2 flex items-center gap-4">
{#if showHome}
<button
onclick={handleHome}
class="text-sm text-exo-light-gray hover:text-exo-yellow transition-colors tracking-wider uppercase flex items-center gap-2 cursor-pointer"
title="Back to topology view"
>
<svg class="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M3 12l2-2m0 0l7-7 7 7M5 10v10a1 1 0 001 1h3m10-11l2 2m-2-2v10a1 1 0 01-1 1h-3m-6 0a1 1 0 001-1v-4a1 1 0 011-1h2a1 1 0 011 1v4a1 1 0 001 1m-6 0h6" />
</svg>
Home
</button>
{/if}
<a
href="/#/downloads"
class="text-sm text-exo-light-gray hover:text-exo-yellow transition-colors tracking-wider uppercase flex items-center gap-2 cursor-pointer"
title="View downloads overview"
>
<svg class="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 3v12" />
<path d="M7 12l5 5 5-5" />
<path d="M5 21h14" />
</svg>
Downloads
</a>
</div>
</header>

View File

@@ -0,0 +1,660 @@
<script lang="ts">
import type { DownloadProgress, NodeInfo, PlacementPreview } from '$lib/stores/app.svelte';
interface Props {
model: { id: string; name?: string; storage_size_megabytes?: number };
isLaunching?: boolean;
downloadStatus?: {
isDownloading: boolean;
progress: DownloadProgress | null;
perNode?: Array<{
nodeId: string;
nodeName: string;
progress: DownloadProgress;
}>;
} | null;
nodes?: Record<string, NodeInfo>;
sharding?: 'Pipeline' | 'Tensor';
runtime?: 'MlxRing' | 'MlxIbv' | 'MlxJaccl';
onLaunch?: () => void;
tags?: string[];
apiPreview?: PlacementPreview | null;
modelIdOverride?: string | null;
}
let {
model,
isLaunching = false,
downloadStatus = null,
nodes = {},
sharding = 'Pipeline',
runtime = 'MlxRing',
onLaunch,
tags = [],
apiPreview = null,
modelIdOverride = null
}: Props = $props();
// Estimate memory requirements from model name
// Uses regex with word boundaries to avoid false matches like '4bit' matching '4b'
function estimateMemoryGB(modelId: string, modelName?: string): number {
// Check both ID and name for quantization info
const combined = `${modelId} ${modelName || ''}`.toLowerCase();
// Detect quantization level - affects memory by roughly 2x between levels
const is4bit = combined.includes('4bit') || combined.includes('4-bit') || combined.includes(':4bit');
const is8bit = combined.includes('8bit') || combined.includes('8-bit') || combined.includes(':8bit');
// 4-bit = 0.5 bytes/param, 8-bit = 1 byte/param, fp16 = 2 bytes/param
const quantMultiplier = is4bit ? 0.5 : is8bit ? 1 : 2;
const id = modelId.toLowerCase();
// Known large models that don't follow the standard naming pattern
// DeepSeek V3 has 685B parameters
if (id.includes('deepseek-v3')) {
return Math.round(685 * quantMultiplier);
}
// DeepSeek V2 has 236B parameters
if (id.includes('deepseek-v2')) {
return Math.round(236 * quantMultiplier);
}
// Llama 4 Scout/Maverick are large models
if (id.includes('llama-4')) {
return Math.round(400 * quantMultiplier);
}
// Match parameter counts with word boundaries (e.g., "70b" but not "4bit")
const paramMatch = id.match(/(\d+(?:\.\d+)?)\s*b(?![a-z])/i);
if (paramMatch) {
const params = parseFloat(paramMatch[1]);
return Math.max(4, Math.round(params * quantMultiplier));
}
// Fallback patterns for explicit size markers (assume fp16 baseline, adjust for quant)
if (id.includes('405b') || id.includes('400b')) return Math.round(405 * quantMultiplier);
if (id.includes('180b')) return Math.round(180 * quantMultiplier);
if (id.includes('141b') || id.includes('140b')) return Math.round(140 * quantMultiplier);
if (id.includes('123b') || id.includes('120b')) return Math.round(123 * quantMultiplier);
if (id.includes('72b') || id.includes('70b')) return Math.round(70 * quantMultiplier);
if (id.includes('67b') || id.includes('65b')) return Math.round(65 * quantMultiplier);
if (id.includes('35b') || id.includes('34b') || id.includes('32b') || id.includes('30b')) return Math.round(32 * quantMultiplier);
if (id.includes('27b') || id.includes('26b') || id.includes('22b')) return Math.round(24 * quantMultiplier);
if (id.includes('14b') || id.includes('13b') || id.includes('15b')) return Math.round(14 * quantMultiplier);
if (id.includes('8b') || id.includes('9b') || id.includes('7b')) return Math.round(8 * quantMultiplier);
if (id.includes('3b') || id.includes('3.8b')) return Math.round(4 * quantMultiplier);
if (id.includes('2b') || id.includes('1b') || id.includes('1.5b') || id.includes('0.5b')) return Math.round(2 * quantMultiplier);
return 16; // Default fallback
}
function formatBytes(bytes: number, decimals = 1): string {
if (!bytes || bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(decimals)) + ' ' + sizes[i];
}
function formatSpeed(bps: number): string {
if (!bps || bps <= 0) return '0 B/s';
return formatBytes(bps) + '/s';
}
function formatEta(ms: number): string {
if (!ms || ms <= 0) return '--';
const totalSeconds = Math.round(ms / 1000);
const s = totalSeconds % 60;
const m = Math.floor(totalSeconds / 60) % 60;
const h = Math.floor(totalSeconds / 3600);
if (h > 0) return `${h}h ${m}m`;
if (m > 0) return `${m}m ${s}s`;
return `${s}s`;
}
const isDownloading = $derived(downloadStatus?.isDownloading ?? false);
const progress = $derived(downloadStatus?.progress);
const percentage = $derived(progress?.percentage ?? 0);
let expandedNodes = $state<Set<string>>(new Set());
function toggleNodeDetails(nodeId: string): void {
const next = new Set(expandedNodes);
if (next.has(nodeId)) {
next.delete(nodeId);
} else {
next.add(nodeId);
}
expandedNodes = next;
}
// Use actual storage_size_megabytes from API if available, otherwise fall back to estimate
const estimatedMemory = $derived(
model.storage_size_megabytes
? Math.round(model.storage_size_megabytes / 1024)
: estimateMemoryGB(model.id, model.name)
);
function getDeviceType(name: string): 'macbook' | 'studio' | 'mini' | 'unknown' {
const lower = name.toLowerCase();
if (lower.includes('macbook')) return 'macbook';
if (lower.includes('studio')) return 'studio';
if (lower.includes('mini')) return 'mini';
return 'unknown';
}
const clampPercent = (value: number): number => Math.min(100, Math.max(0, value));
const huggingFaceModelId = $derived(modelIdOverride ?? model.id);
// Get node list in the same order as the topology graph (insertion order of
// topology nodes), while still ensuring preview nodes render even if the
// topology payload is missing them. Topology order is preserved exactly so
// that the mini preview matches the main TopologyGraph layout.
const nodeList = $derived(() => {
const nodesFromTopology = Object.keys(nodes).map((id) => {
const info = nodes[id];
const totalBytes = info.macmon_info?.memory?.ram_total ?? info.system_info?.memory ?? 0;
const usedBytes = info.macmon_info?.memory?.ram_usage ?? 0;
const availableBytes = Math.max(totalBytes - usedBytes, 0);
const totalGB = totalBytes / (1024 * 1024 * 1024);
const availableGB = availableBytes / (1024 * 1024 * 1024);
const usedGB = Math.max(totalGB - availableGB, 0);
const deviceName = info.system_info?.model_id ?? 'Unknown';
const deviceType = getDeviceType(deviceName);
return { id, totalGB, availableGB, usedGB, deviceName, deviceType, usedBytes, totalBytes };
});
const previewEntries = apiPreview?.memory_delta_by_node ?? null;
const previewIds = previewEntries ? Object.keys(previewEntries) : [];
if (previewIds.length === 0) return nodesFromTopology;
// Append any preview-only nodes (not in topology) at the end
const topologyIds = new Set(nodesFromTopology.map((n) => n.id));
const extraPreviewNodes = previewIds
.filter((id) => !topologyIds.has(id))
.map((id) => {
const deltaBytes = previewEntries?.[id] ?? 0;
const deltaGB = deltaBytes / (1024 * 1024 * 1024);
const totalGB = Math.max(deltaGB * 1.2, 1);
const usedGB = Math.max(totalGB - deltaGB, 0);
return {
id,
totalGB,
availableGB: Math.max(totalGB - usedGB, 0),
usedGB,
deviceName: 'Unknown',
deviceType: 'unknown' as const,
usedBytes: usedGB * 1024 * 1024 * 1024,
totalBytes: totalGB * 1024 * 1024 * 1024
};
});
return [...nodesFromTopology, ...extraPreviewNodes];
});
// Calculate placement preview with all SVG metrics pre-computed
// Uses API preview data when available, falls back to local estimation
const placementPreview = $derived(() => {
const nodeArray = nodeList();
if (nodeArray.length === 0) return { nodes: [], canFit: false, totalAvailable: 0, error: null };
const numNodes = nodeArray.length;
const iconSize = numNodes === 1 ? 50 : 36;
const topoWidth = 260;
const topoHeight = numNodes === 1 ? 90 : numNodes === 2 ? 140 : numNodes * 50 + 20;
const centerX = topoWidth / 2;
const centerY = topoHeight / 2;
const radius = numNodes === 1 ? 0 : numNodes === 2 ? 45 : Math.min(topoWidth, topoHeight) * 0.32;
// Use API preview data if available
const hasApiPreview = apiPreview !== null && apiPreview.error === null && apiPreview.memory_delta_by_node !== null;
const canFit = hasApiPreview ? true : (() => {
const totalAvailable = nodeArray.reduce((sum, n) => sum + n.availableGB, 0);
return totalAvailable >= estimatedMemory;
})();
const error = apiPreview?.error ?? null;
let placementNodes: Array<{
id: string;
deviceName: string;
deviceType: 'macbook' | 'studio' | 'mini' | 'unknown';
totalGB: number;
currentUsedGB: number;
modelUsageGB: number;
currentPercent: number;
newPercent: number;
isUsed: boolean;
x: number;
y: number;
iconSize: number;
screenHeight: number;
currentFillHeight: number;
modelFillHeight: number;
}> = [];
if (hasApiPreview && apiPreview.memory_delta_by_node) {
// Use API placement data
const memoryDelta = apiPreview.memory_delta_by_node;
placementNodes = nodeArray.map((n, i) => {
const deltaBytes = memoryDelta[n.id] ?? 0;
const modelUsageGB = deltaBytes / (1024 * 1024 * 1024);
const isUsed = deltaBytes > 0;
const angle = numNodes === 1 ? 0 : (i / numNodes) * Math.PI * 2 - Math.PI / 2;
const safeTotal = Math.max(n.totalGB, 0.001);
const currentPercent = clampPercent((n.usedGB / safeTotal) * 100);
const newPercent = clampPercent(((n.usedGB + modelUsageGB) / safeTotal) * 100);
const screenHeight = iconSize * 0.58;
return {
id: n.id,
deviceName: n.deviceName,
deviceType: n.deviceType,
totalGB: n.totalGB,
currentUsedGB: n.usedGB,
modelUsageGB,
currentPercent,
newPercent,
isUsed,
x: centerX + Math.cos(angle) * radius,
y: centerY + Math.sin(angle) * radius,
iconSize,
screenHeight,
currentFillHeight: screenHeight * (currentPercent / 100),
modelFillHeight: screenHeight * ((newPercent - currentPercent) / 100)
};
});
} else if (apiPreview?.error) {
// API returned an error - model can't fit, show all nodes as unused
placementNodes = nodeArray.map((n, i) => {
const angle = numNodes === 1 ? 0 : (i / numNodes) * Math.PI * 2 - Math.PI / 2;
const safeTotal = Math.max(n.totalGB, 0.001);
const currentPercent = clampPercent((n.usedGB / safeTotal) * 100);
const screenHeight = iconSize * 0.58;
return {
id: n.id,
deviceName: n.deviceName,
deviceType: n.deviceType,
totalGB: n.totalGB,
currentUsedGB: n.usedGB,
modelUsageGB: 0,
currentPercent,
newPercent: currentPercent,
isUsed: false,
x: centerX + Math.cos(angle) * radius,
y: centerY + Math.sin(angle) * radius,
iconSize,
screenHeight,
currentFillHeight: screenHeight * (currentPercent / 100),
modelFillHeight: 0
};
});
} else {
// Fallback: local estimation based on sharding strategy
const memoryNeeded = estimatedMemory;
if (sharding === 'Pipeline') {
const memoryPerNode = memoryNeeded / numNodes;
placementNodes = nodeArray.map((n, i) => {
const angle = numNodes === 1 ? 0 : (i / numNodes) * Math.PI * 2 - Math.PI / 2;
const safeTotal = Math.max(n.totalGB, 0.001);
const currentPercent = clampPercent((n.usedGB / safeTotal) * 100);
const newPercent = clampPercent(((n.usedGB + memoryPerNode) / safeTotal) * 100);
const screenHeight = iconSize * 0.58;
return {
id: n.id,
deviceName: n.deviceName,
deviceType: n.deviceType,
totalGB: n.totalGB,
currentUsedGB: n.usedGB,
modelUsageGB: memoryPerNode,
currentPercent,
newPercent,
isUsed: true,
x: centerX + Math.cos(angle) * radius,
y: centerY + Math.sin(angle) * radius,
iconSize,
screenHeight,
currentFillHeight: screenHeight * (currentPercent / 100),
modelFillHeight: screenHeight * ((newPercent - currentPercent) / 100)
};
});
} else {
let remaining = memoryNeeded;
placementNodes = nodeArray.map((n, i) => {
const allocated = Math.min(remaining, n.availableGB);
remaining -= allocated;
const isUsed = allocated > 0;
const angle = numNodes === 1 ? 0 : (i / numNodes) * Math.PI * 2 - Math.PI / 2;
const safeTotal = Math.max(n.totalGB, 0.001);
const currentPercent = clampPercent((n.usedGB / safeTotal) * 100);
const newPercent = clampPercent(((n.usedGB + allocated) / safeTotal) * 100);
const screenHeight = iconSize * 0.58;
return {
id: n.id,
deviceName: n.deviceName,
deviceType: n.deviceType,
totalGB: n.totalGB,
currentUsedGB: n.usedGB,
modelUsageGB: allocated,
currentPercent,
newPercent,
isUsed,
x: centerX + Math.cos(angle) * radius,
y: centerY + Math.sin(angle) * radius,
iconSize,
screenHeight,
currentFillHeight: screenHeight * (currentPercent / 100),
modelFillHeight: screenHeight * ((newPercent - currentPercent) / 100)
};
});
}
}
const totalAvailable = nodeArray.reduce((sum, n) => sum + n.availableGB, 0);
return { nodes: placementNodes, canFit: hasApiPreview || canFit, totalAvailable, topoWidth, topoHeight, error };
});
const canFit = $derived(apiPreview ? apiPreview.error === null : placementPreview().canFit);
const placementError = $derived(apiPreview?.error ?? null);
const nodeCount = $derived(nodeList().length);
const filterId = $derived(model.id.replace(/[^a-zA-Z0-9]/g, ''));
</script>
<div class="relative group">
<!-- Corner accents -->
<div class="absolute -top-px -left-px w-2 h-2 border-l border-t {canFit ? 'border-exo-yellow/30 group-hover:border-exo-yellow/60' : 'border-red-500/30'} transition-colors"></div>
<div class="absolute -top-px -right-px w-2 h-2 border-r border-t {canFit ? 'border-exo-yellow/30 group-hover:border-exo-yellow/60' : 'border-red-500/30'} transition-colors"></div>
<div class="absolute -bottom-px -left-px w-2 h-2 border-l border-b {canFit ? 'border-exo-yellow/30 group-hover:border-exo-yellow/60' : 'border-red-500/30'} transition-colors"></div>
<div class="absolute -bottom-px -right-px w-2 h-2 border-r border-b {canFit ? 'border-exo-yellow/30 group-hover:border-exo-yellow/60' : 'border-red-500/30'} transition-colors"></div>
<div class="bg-exo-dark-gray/60 border {canFit ? 'border-exo-yellow/20 group-hover:border-exo-yellow/40' : 'border-red-500/20'} p-3 transition-all duration-200 group-hover:shadow-[0_0_15px_rgba(255,215,0,0.1)]">
<!-- Model Name & Memory Required -->
<div class="flex items-start justify-between gap-2 mb-2">
<div class="flex-1 min-w-0">
<div class="flex items-center gap-2">
<div class="text-exo-yellow text-xs font-mono tracking-wide truncate" title={model.name || model.id}>
{model.name || model.id}
</div>
{#if huggingFaceModelId}
<a
class="shrink-0 text-white/60 hover:text-exo-yellow transition-colors"
href={`https://huggingface.co/${huggingFaceModelId}`}
target="_blank"
rel="noreferrer noopener"
aria-label="View model on Hugging Face"
>
<svg class="w-3.5 h-3.5" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M14 3h7v7"/>
<path d="M10 14l11-11"/>
<path d="M21 14v6a1 1 0 0 1-1 1h-16a1 1 0 0 1-1-1v-16a1 1 0 0 1 1-1h6"/>
</svg>
</a>
{/if}
{#if tags.length > 0}
<div class="flex gap-1 flex-shrink-0">
{#each tags as tag}
<span class="px-1.5 py-0.5 text-xs font-mono tracking-wider uppercase rounded {tag === 'FASTEST' ? 'bg-green-500/20 text-green-400 border border-green-500/30' : 'bg-purple-500/20 text-purple-400 border border-purple-500/30'}">
{tag}
</span>
{/each}
</div>
{/if}
</div>
{#if model.name && model.name !== model.id}
<div class="text-xs text-exo-light-gray font-mono truncate mt-0.5" title={model.id}>
{model.id}
</div>
{/if}
</div>
<div class="flex-shrink-0 text-right">
<div class="text-xs font-mono {canFit ? 'text-exo-yellow' : 'text-red-400'}">
{estimatedMemory}GB
</div>
</div>
</div>
<!-- Configuration Badge -->
<div class="flex items-center gap-1.5 mb-2">
<span class="px-1.5 py-0.5 text-xs font-mono tracking-wider uppercase bg-exo-medium-gray/30 text-exo-light-gray border border-exo-medium-gray/40">
{sharding}
</span>
<span class="px-1.5 py-0.5 text-xs font-mono tracking-wider uppercase bg-exo-medium-gray/30 text-exo-light-gray border border-exo-medium-gray/40">
{runtime === 'MlxRing' ? 'MLX Ring' : runtime === 'MlxIbv' || runtime === 'MlxJaccl' ? 'MLX RDMA' : runtime}
</span>
</div>
<!-- Mini Topology Preview -->
{#if placementPreview().nodes.length > 0}
{@const preview = placementPreview()}
<div class="mb-3 bg-exo-black/60 rounded border border-exo-medium-gray/20 p-2 relative overflow-hidden">
<!-- Scanline effect -->
<div class="absolute inset-0 bg-[repeating-linear-gradient(0deg,transparent,transparent_2px,rgba(255,215,0,0.02)_2px,rgba(255,215,0,0.02)_4px)] pointer-events-none"></div>
<svg width="100%" height={preview.topoHeight} viewBox="0 0 {preview.topoWidth} {preview.topoHeight}" class="overflow-visible">
<defs>
<!-- Glow filter for active nodes -->
<filter id="nodeGlow-{filterId}" x="-50%" y="-50%" width="200%" height="200%">
<feGaussianBlur stdDeviation="2" result="blur"/>
<feMerge>
<feMergeNode in="blur"/>
<feMergeNode in="SourceGraphic"/>
</feMerge>
</filter>
<!-- Strong glow for new memory -->
<filter id="memGlow-{filterId}" x="-100%" y="-100%" width="300%" height="300%">
<feGaussianBlur stdDeviation="3" result="blur"/>
<feComposite in="SourceGraphic" in2="blur" operator="over"/>
</filter>
</defs>
<!-- Connection lines between nodes (if multiple) -->
{#if preview.nodes.length > 1}
{#each preview.nodes as node, i}
{#each preview.nodes.slice(i + 1) as node2}
<line
x1={node.x} y1={node.y} x2={node2.x} y2={node2.y}
stroke={node.isUsed && node2.isUsed ? '#FFD700' : '#374151'}
stroke-width="1"
stroke-dasharray={node.isUsed && node2.isUsed ? '4,2' : '2,4'}
opacity={node.isUsed && node2.isUsed ? 0.4 : 0.15}
/>
{/each}
{/each}
{/if}
{#each preview.nodes as node}
<g
transform="translate({node.x}, {node.y})"
opacity={node.isUsed ? 1 : 0.25}
filter={node.isUsed ? `url(#nodeGlow-${filterId})` : 'none'}
>
<!-- Device icon based on type -->
{#if node.deviceType === 'macbook'}
<!-- MacBook Pro icon with memory fill -->
<g transform="translate({-node.iconSize/2}, {-node.iconSize/2})">
<!-- Screen bezel -->
<rect
x="2" y="0"
width={node.iconSize - 4} height={node.iconSize * 0.65}
rx="2"
fill="none"
stroke={node.isUsed ? '#FFD700' : '#4B5563'}
stroke-width="1.5"
/>
<!-- Screen area (memory fill container) -->
<rect
x="4" y="2"
width={node.iconSize - 8} height={node.screenHeight}
fill="#0a0a0a"
/>
<!-- Current memory fill (gray) -->
<rect
x="4"
y={2 + node.screenHeight - node.currentFillHeight}
width={node.iconSize - 8}
height={node.currentFillHeight}
fill="#374151"
/>
<!-- New model memory fill (glowing yellow) -->
{#if node.modelUsageGB > 0 && node.isUsed}
<rect
x="4"
y={2 + node.screenHeight - node.currentFillHeight - node.modelFillHeight}
width={node.iconSize - 8}
height={node.modelFillHeight}
fill="#FFD700"
filter="url(#memGlow-{filterId})"
class="animate-pulse-slow"
/>
{/if}
<!-- Base/keyboard -->
<path
d="M 0 {node.iconSize * 0.68} L {node.iconSize} {node.iconSize * 0.68} L {node.iconSize - 2} {node.iconSize * 0.78} L 2 {node.iconSize * 0.78} Z"
fill="none"
stroke={node.isUsed ? '#FFD700' : '#4B5563'}
stroke-width="1.5"
/>
</g>
{:else if node.deviceType === 'studio'}
<!-- Mac Studio icon -->
<g transform="translate({-node.iconSize/2}, {-node.iconSize/2})">
<rect
x="2" y="2"
width={node.iconSize - 4} height={node.iconSize - 4}
rx="4"
fill="none"
stroke={node.isUsed ? '#FFD700' : '#4B5563'}
stroke-width="1.5"
/>
<!-- Memory fill background -->
<rect
x="4" y="4"
width={node.iconSize - 8} height={node.iconSize - 8}
fill="#0a0a0a"
/>
<!-- Current memory fill -->
<rect
x="4"
y={4 + (node.iconSize - 8) * (1 - node.currentPercent / 100)}
width={node.iconSize - 8}
height={(node.iconSize - 8) * (node.currentPercent / 100)}
fill="#374151"
/>
<!-- New model memory fill -->
{#if node.modelUsageGB > 0 && node.isUsed}
<rect
x="4"
y={4 + (node.iconSize - 8) * (1 - node.newPercent / 100)}
width={node.iconSize - 8}
height={(node.iconSize - 8) * ((node.newPercent - node.currentPercent) / 100)}
fill="#FFD700"
filter="url(#memGlow-{filterId})"
class="animate-pulse-slow"
/>
{/if}
</g>
{:else if node.deviceType === 'mini'}
<!-- Mac Mini icon -->
<g transform="translate({-node.iconSize/2}, {-node.iconSize/2})">
<rect
x="2" y={node.iconSize * 0.3}
width={node.iconSize - 4} height={node.iconSize * 0.4}
rx="3"
fill="none"
stroke={node.isUsed ? '#FFD700' : '#4B5563'}
stroke-width="1.5"
/>
<!-- Memory fill background -->
<rect
x="4" y={node.iconSize * 0.32}
width={node.iconSize - 8} height={node.iconSize * 0.36}
fill="#0a0a0a"
/>
<!-- Current memory fill -->
<rect
x="4"
y={node.iconSize * 0.32 + (node.iconSize * 0.36) * (1 - node.currentPercent / 100)}
width={node.iconSize - 8}
height={(node.iconSize * 0.36) * (node.currentPercent / 100)}
fill="#374151"
/>
<!-- New model memory fill -->
{#if node.modelUsageGB > 0 && node.isUsed}
<rect
x="4"
y={node.iconSize * 0.32 + (node.iconSize * 0.36) * (1 - node.newPercent / 100)}
width={node.iconSize - 8}
height={(node.iconSize * 0.36) * ((node.newPercent - node.currentPercent) / 100)}
fill="#FFD700"
filter="url(#memGlow-{filterId})"
class="animate-pulse-slow"
/>
{/if}
</g>
{:else}
<!-- Unknown device - hexagon -->
<g transform="translate({-node.iconSize/2}, {-node.iconSize/2})">
<polygon
points="{node.iconSize/2},0 {node.iconSize},{node.iconSize*0.25} {node.iconSize},{node.iconSize*0.75} {node.iconSize/2},{node.iconSize} 0,{node.iconSize*0.75} 0,{node.iconSize*0.25}"
fill={node.isUsed ? 'rgba(255,215,0,0.1)' : '#0a0a0a'}
stroke={node.isUsed ? '#FFD700' : '#4B5563'}
stroke-width="1.5"
/>
</g>
{/if}
<!-- Percentage label -->
<text
y={node.iconSize/2 + 12}
text-anchor="middle"
font-size="8"
font-family="SF Mono, Monaco, monospace"
fill={node.isUsed ? (node.newPercent > 90 ? '#f87171' : '#FFD700') : '#4B5563'}
>
{node.newPercent.toFixed(0)}%
</text>
</g>
{/each}
</svg>
</div>
{/if}
<!-- Launch Button -->
<button
onclick={onLaunch}
disabled={isLaunching || !canFit}
class="w-full py-2 text-sm font-mono tracking-wider uppercase border transition-all duration-200
{isLaunching
? 'bg-transparent text-exo-yellow border-exo-yellow/50 cursor-wait'
: !canFit
? 'bg-red-500/10 text-red-400/70 border-red-500/30 cursor-not-allowed'
: 'bg-transparent text-exo-light-gray border-exo-light-gray/40 hover:text-exo-yellow hover:border-exo-yellow/50 cursor-pointer'
}"
>
{#if isLaunching}
<span class="flex items-center justify-center gap-1.5">
<span class="w-2 h-2 border border-exo-yellow border-t-transparent rounded-full animate-spin"></span>
LAUNCHING...
</span>
{:else if !canFit}
INSUFFICIENT MEMORY
{:else}
▸ LAUNCH
{/if}
</button>
</div>
</div>
<style>
@keyframes pulse-slow {
0%, 100% { opacity: 0.8; }
50% { opacity: 1; }
}
.animate-pulse-slow {
animation: pulse-slow 1.5s ease-in-out infinite;
}
</style>

View File

@@ -0,0 +1,971 @@
<script lang="ts">
import { onMount, onDestroy } from 'svelte';
import * as d3 from 'd3';
import { topologyData, isTopologyMinimized, debugMode } from '$lib/stores/app.svelte';
interface Props {
class?: string;
highlightedNodes?: Set<string>;
}
let { class: className = '', highlightedNodes = new Set() }: Props = $props();
let svgContainer: SVGSVGElement | undefined = $state();
let resizeObserver: ResizeObserver | undefined;
const isMinimized = $derived(isTopologyMinimized());
const data = $derived(topologyData());
const debugEnabled = $derived(debugMode());
function getNodeLabel(nodeId: string): string {
const node = data?.nodes?.[nodeId];
return node?.friendly_name || nodeId.slice(0, 8);
}
function getInterfaceLabel(nodeId: string, ip?: string): { label: string; missing: boolean } {
if (!ip) return { label: '?', missing: true };
const node = data?.nodes?.[nodeId];
if (!node) return { label: '?', missing: true };
const matchFromInterfaces = node.network_interfaces?.find((iface) =>
(iface.addresses || []).some((addr) => addr === ip)
);
if (matchFromInterfaces?.name) {
return { label: matchFromInterfaces.name, missing: false };
}
const mapped = node.ip_to_interface?.[ip];
if (mapped && mapped.trim().length > 0) {
return { label: mapped, missing: false };
}
return { label: '?', missing: true };
}
function wrapLine(text: string, maxLen: number): string[] {
if (text.length <= maxLen) return [text];
const words = text.split(' ');
const lines: string[] = [];
let current = '';
for (const word of words) {
if (word.length > maxLen) {
if (current) {
lines.push(current);
current = '';
}
for (let i = 0; i < word.length; i += maxLen) {
lines.push(word.slice(i, i + maxLen));
}
} else if ((current + ' ' + word).trim().length > maxLen) {
lines.push(current);
current = word;
} else {
current = current ? `${current} ${word}` : word;
}
}
if (current) lines.push(current);
return lines;
}
// Apple logo path for MacBook Pro screen
const APPLE_LOGO_PATH = "M788.1 340.9c-5.8 4.5-108.2 62.2-108.2 190.5 0 148.4 130.3 200.9 134.2 202.2-.6 3.2-20.7 71.9-68.7 141.9-42.8 61.6-87.5 123.1-155.5 123.1s-85.5-39.5-164-39.5c-76.5 0-103.7 40.8-165.9 40.8s-105.6-57-155.5-127C46.7 790.7 0 663 0 541.8c0-194.4 126.4-297.5 250.8-297.5 66.1 0 121.2 43.4 162.7 43.4 39.5 0 101.1-46 176.3-46 28.5 0 130.9 2.6 198.3 99.2zm-234-181.5c31.1-36.9 53.1-88.1 53.1-139.3 0-7.1-.6-14.3-1.9-20.1-50.6 1.9-110.8 33.7-147.1 75.8-28.5 32.4-55.1 83.6-55.1 135.5 0 7.8 1.3 15.6 1.9 18.1 3.2.6 8.4 1.3 13.6 1.3 45.4 0 102.5-30.4 135.5-71.3z";
const LOGO_NATIVE_WIDTH = 814;
const LOGO_NATIVE_HEIGHT = 1000;
function formatBytes(bytes: number, decimals = 1): string {
if (!bytes || bytes === 0) return '0B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(decimals)) + sizes[i];
}
function getTemperatureColor(temp: number): string {
// Default for N/A temp - light gray
if (isNaN(temp) || temp === null) return 'rgba(179, 179, 179, 0.8)';
const coolTemp = 45; // Temp for pure blue
const midTemp = 57.5; // Temp for pure yellow
const hotTemp = 75; // Temp for pure red
const coolColor = { r: 93, g: 173, b: 226 }; // #5DADE2 (Blue)
const midColor = { r: 255, g: 215, b: 0 }; // #FFD700 (Yellow)
const hotColor = { r: 244, g: 67, b: 54 }; // #F44336 (Red)
let r: number, g: number, b: number;
if (temp <= coolTemp) {
({ r, g, b } = coolColor);
} else if (temp <= midTemp) {
const ratio = (temp - coolTemp) / (midTemp - coolTemp);
r = Math.round(coolColor.r * (1 - ratio) + midColor.r * ratio);
g = Math.round(coolColor.g * (1 - ratio) + midColor.g * ratio);
b = Math.round(coolColor.b * (1 - ratio) + midColor.b * ratio);
} else if (temp < hotTemp) {
const ratio = (temp - midTemp) / (hotTemp - midTemp);
r = Math.round(midColor.r * (1 - ratio) + hotColor.r * ratio);
g = Math.round(midColor.g * (1 - ratio) + hotColor.g * ratio);
b = Math.round(midColor.b * (1 - ratio) + hotColor.b * ratio);
} else {
({ r, g, b } = hotColor);
}
return `rgb(${r}, ${g}, ${b})`;
}
function renderGraph() {
if (!svgContainer || !data) return;
d3.select(svgContainer).selectAll('*').remove();
const nodes = data.nodes || {};
const edges = data.edges || [];
const nodeIds = Object.keys(nodes);
const rect = svgContainer.getBoundingClientRect();
const width = rect.width;
const height = rect.height;
const centerX = width / 2;
const centerY = height / 2;
const svg = d3.select(svgContainer);
// Add defs for clip paths and filters
const defs = svg.append('defs');
// Glow filter
const glowFilter = defs.append('filter')
.attr('id', 'glow')
.attr('x', '-50%')
.attr('y', '-50%')
.attr('width', '200%')
.attr('height', '200%');
glowFilter.append('feGaussianBlur')
.attr('stdDeviation', '2')
.attr('result', 'coloredBlur');
const glowMerge = glowFilter.append('feMerge');
glowMerge.append('feMergeNode').attr('in', 'coloredBlur');
glowMerge.append('feMergeNode').attr('in', 'SourceGraphic');
// Arrowhead marker for directional edges
const marker = defs.append('marker')
.attr('id', 'arrowhead')
.attr('viewBox', '0 0 10 10')
.attr('refX', '10')
.attr('refY', '5')
.attr('markerWidth', '11')
.attr('markerHeight', '11')
.attr('orient', 'auto-start-reverse');
marker.append('path')
.attr('d', 'M 0 0 L 10 5 L 0 10')
.attr('fill', 'none')
.attr('stroke', 'var(--exo-light-gray, #B3B3B3)')
.attr('stroke-width', '1.6')
.attr('stroke-linecap', 'round')
.attr('stroke-linejoin', 'round')
.style('animation', 'none');
if (nodeIds.length === 0) {
svg.append('text')
.attr('x', centerX)
.attr('y', centerY)
.attr('text-anchor', 'middle')
.attr('dominant-baseline', 'middle')
.attr('fill', 'rgba(255,215,0,0.4)')
.attr('font-size', isMinimized ? 10 : 12)
.attr('font-family', 'SF Mono, monospace')
.attr('letter-spacing', '0.1em')
.text('AWAITING NODES');
return;
}
const numNodes = nodeIds.length;
const minDimension = Math.min(width, height);
// Dynamic scaling - larger nodes for big displays
const sizeScale = numNodes === 1 ? 1 : Math.max(0.6, 1 - (numNodes - 1) * 0.10);
const baseNodeRadius = isMinimized
? Math.max(36, Math.min(60, minDimension * 0.22))
: Math.min(120, minDimension * 0.20);
const nodeRadius = baseNodeRadius * sizeScale;
// Orbit radius - balanced spacing for nodes
const circumference = numNodes * nodeRadius * 4;
const radiusFromCircumference = circumference / (2 * Math.PI);
const minOrbitRadius = Math.max(radiusFromCircumference, minDimension * 0.18);
const maxOrbitRadius = minDimension * 0.30;
const orbitRadius = isMinimized
? Math.min(maxOrbitRadius, Math.max(minOrbitRadius, minDimension * 0.26))
: Math.min(maxOrbitRadius, Math.max(minOrbitRadius, minDimension * (0.22 + numNodes * 0.02)));
// Determine display mode based on space and node count
const showFullLabels = !isMinimized && numNodes <= 4;
const showCompactLabels = !isMinimized && numNodes > 4;
// Add padding for labels (top/bottom)
const topPadding = 70; // Space for "NETWORK TOPOLOGY" label and node names
const bottomPadding = 70; // Space for stats and bottom label
const safeCenterY = topPadding + (height - topPadding - bottomPadding) / 2;
// Calculate node positions
const nodesWithPositions = nodeIds.map((id, index) => {
if (numNodes === 1) {
// Single node: center it
return {
id,
data: nodes[id],
x: centerX,
y: safeCenterY
};
}
// Distribute nodes around the orbit
// Start from top (-90 degrees) and go clockwise
const angle = (index / numNodes) * 2 * Math.PI - (Math.PI / 2);
return {
id,
data: nodes[id],
x: centerX + orbitRadius * Math.cos(angle),
y: safeCenterY + orbitRadius * Math.sin(angle)
};
});
const positionById: Record<string, { x: number; y: number }> = {};
nodesWithPositions.forEach(n => { positionById[n.id] = { x: n.x, y: n.y }; });
// Draw edges
const linksGroup = svg.append('g').attr('class', 'links-group');
const arrowsGroup = svg.append('g').attr('class', 'arrows-group');
const debugLabelsGroup = svg.append('g').attr('class', 'debug-edge-labels');
const pairMap = new Map<string, { a: string; b: string; aToB: boolean; bToA: boolean; connections: Array<{ from: string; to: string; ip: string; ifaceLabel: string; missingIface: boolean }> }>();
edges.forEach(edge => {
if (!edge.source || !edge.target || edge.source === edge.target) return;
if (!positionById[edge.source] || !positionById[edge.target]) return;
const a = edge.source < edge.target ? edge.source : edge.target;
const b = edge.source < edge.target ? edge.target : edge.source;
const key = `${a}|${b}`;
const entry = pairMap.get(key) || { a, b, aToB: false, bToA: false, connections: [] };
if (edge.source === a) entry.aToB = true;
else entry.bToA = true;
const ip = edge.sendBackIp || edge.sendBackMultiaddr?.ip_address || '?';
const ifaceInfo = getInterfaceLabel(edge.source, ip);
entry.connections.push({
from: edge.source,
to: edge.target,
ip,
ifaceLabel: ifaceInfo.label,
missingIface: ifaceInfo.missing
});
pairMap.set(key, entry);
});
pairMap.forEach(entry => {
const posA = positionById[entry.a];
const posB = positionById[entry.b];
if (!posA || !posB) return;
// Base dashed line
linksGroup.append('line')
.attr('x1', posA.x)
.attr('y1', posA.y)
.attr('x2', posB.x)
.attr('y2', posB.y)
.attr('class', 'graph-link');
// Calculate midpoint and direction for arrows
const dx = posB.x - posA.x;
const dy = posB.y - posA.y;
const len = Math.hypot(dx, dy) || 1;
const ux = dx / len;
const uy = dy / len;
const mx = (posA.x + posB.x) / 2;
const my = (posA.y + posB.y) / 2;
const tipOffset = 16; // Distance from center for arrow tips
const carrier = 2; // Short segment length for arrow orientation
// Arrow A -> B (if connection exists in that direction)
if (entry.aToB) {
const tipX = mx - ux * tipOffset;
const tipY = my - uy * tipOffset;
arrowsGroup.append('line')
.attr('x1', tipX - ux * carrier)
.attr('y1', tipY - uy * carrier)
.attr('x2', tipX)
.attr('y2', tipY)
.attr('stroke', 'none')
.attr('fill', 'none')
.attr('marker-end', 'url(#arrowhead)');
}
// Arrow B -> A (if connection exists in that direction)
if (entry.bToA) {
const tipX = mx + ux * tipOffset;
const tipY = my + uy * tipOffset;
arrowsGroup.append('line')
.attr('x1', tipX + ux * carrier)
.attr('y1', tipY + uy * carrier)
.attr('x2', tipX)
.attr('y2', tipY)
.attr('stroke', 'none')
.attr('fill', 'none')
.attr('marker-end', 'url(#arrowhead)');
}
if (debugEnabled && entry.connections.length > 0) {
const maxBoxes = 6;
const fontSize = isMinimized ? 8 : 9;
const lineGap = 2;
const labelOffsetOut = Math.max(140, minDimension * 0.38);
const labelOffsetSide = isMinimized ? 16 : 20;
const boxWidth = 170;
const maxLineLen = 26;
const connections = entry.connections.slice(0, maxBoxes);
if (entry.connections.length > maxBoxes) {
const remaining = entry.connections.length - maxBoxes;
connections.push({
from: '',
to: '',
ip: `(+${remaining} more)`,
ifaceLabel: '',
missingIface: false
});
}
let dirX = mx - centerX;
let dirY = my - centerY;
const dirLen = Math.hypot(dirX, dirY);
if (dirLen < 1) {
dirX = -uy;
dirY = ux;
} else {
dirX /= dirLen;
dirY /= dirLen;
}
const nx = -dirY;
const ny = dirX;
const labelXRaw = mx + dirX * labelOffsetOut + nx * labelOffsetSide;
const labelYRaw = my + dirY * labelOffsetOut + ny * labelOffsetSide;
const clampPad = Math.min(120, minDimension * 0.12);
const labelX = Math.max(clampPad, Math.min(width - clampPad, labelXRaw));
const labelY = Math.max(clampPad, Math.min(height - clampPad, labelYRaw));
const labelGroup = debugLabelsGroup.append('g')
.attr('transform', `translate(${labelX}, ${labelY})`);
const textGroup = labelGroup.append('g');
connections.forEach((conn, idx) => {
const rawLines = conn.from && conn.to
? [
`${getNodeLabel(conn.from)}${getNodeLabel(conn.to)}`,
`${conn.ip}`,
`${conn.ifaceLabel}`
]
: [conn.ip];
const wrapped = rawLines.flatMap(line => wrapLine(line, maxLineLen));
wrapped.forEach((line, lineIdx) => {
textGroup.append('text')
.attr('x', 0)
.attr('y', (idx * (wrapped.length * (fontSize + lineGap))) + lineIdx * (fontSize + lineGap))
.attr('text-anchor', 'middle')
.attr('dominant-baseline', 'hanging')
.attr('font-size', fontSize)
.attr('font-family', 'SF Mono, monospace')
.attr('fill', conn.missingIface ? 'rgba(248,113,113,0.9)' : 'rgba(255,255,255,0.9)')
.text(line);
});
});
const bbox = textGroup.node()?.getBBox();
if (bbox) {
const paddedWidth = Math.max(boxWidth, bbox.width + 14);
const boxHeight = bbox.height + 8;
const boxMinX = labelX - paddedWidth / 2;
const boxMaxX = labelX + paddedWidth / 2;
const boxMinY = labelY + bbox.y - 4;
const boxMaxY = boxMinY + boxHeight;
const clampPadDynamic = Math.min(140, minDimension * 0.18);
let shiftX = 0;
let shiftY = 0;
if (boxMinX < clampPadDynamic) shiftX = clampPadDynamic - boxMinX;
if (boxMaxX > width - clampPadDynamic) shiftX = (width - clampPadDynamic) - boxMaxX;
if (boxMinY < clampPadDynamic) shiftY = clampPadDynamic - boxMinY;
if (boxMaxY > height - clampPadDynamic) shiftY = (height - clampPadDynamic) - boxMaxY;
const finalX = labelX + shiftX;
const finalY = labelY + shiftY;
labelGroup.attr('transform', `translate(${finalX}, ${finalY})`);
labelGroup.insert('rect', 'g')
.attr('x', -paddedWidth / 2)
.attr('y', bbox.y - 4)
.attr('width', paddedWidth)
.attr('height', boxHeight)
.attr('rx', 4)
.attr('fill', 'rgba(0,0,0,0.75)')
.attr('stroke', 'rgba(255,255,255,0.12)')
.attr('stroke-width', 0.6);
}
}
});
// Draw nodes
const nodesGroup = svg.append('g').attr('class', 'nodes-group');
nodesWithPositions.forEach(nodeInfo => {
const node = nodeInfo.data;
const macmon = node.macmon_info;
const modelId = node.system_info?.model_id || 'Unknown';
const friendlyName = node.friendly_name || modelId;
let ramUsagePercent = 0;
let gpuTemp = NaN;
let ramTotal = 0;
let ramUsed = 0;
let gpuUsagePercent = 0;
let sysPower: number | null = null;
if (macmon) {
if (macmon.memory && macmon.memory.ram_total > 0) {
ramUsagePercent = (macmon.memory.ram_usage / macmon.memory.ram_total) * 100;
ramTotal = macmon.memory.ram_total;
ramUsed = macmon.memory.ram_usage;
}
if (macmon.temp && typeof macmon.temp.gpu_temp_avg === 'number') {
gpuTemp = Math.max(30, macmon.temp.gpu_temp_avg);
}
if (macmon.gpu_usage) {
gpuUsagePercent = macmon.gpu_usage[1] * 100;
}
if (macmon.sys_power) {
sysPower = macmon.sys_power;
}
}
const nodeG = nodesGroup.append('g')
.attr('class', 'graph-node')
.style('cursor', 'pointer');
// Add tooltip
nodeG.append('title')
.text(`${friendlyName}\nID: ${nodeInfo.id.slice(-8)}\nMemory: ${formatBytes(ramUsed)}/${formatBytes(ramTotal)}`);
let iconBaseWidth = nodeRadius * 1.2;
let iconBaseHeight = nodeRadius * 1.0;
const clipPathId = `clip-${nodeInfo.id.replace(/[^a-zA-Z0-9]/g, '-')}`;
const modelLower = modelId.toLowerCase();
// Check if this node should be highlighted (from hovered instance)
const isHighlighted = highlightedNodes.has(nodeInfo.id);
// Holographic wireframe colors - yellow border when highlighted
const wireColor = isHighlighted ? 'rgba(255,215,0,0.9)' : 'rgba(179,179,179,0.8)';
const wireColorBright = 'rgba(255,255,255,0.9)';
const fillColor = isHighlighted ? 'rgba(255,215,0,0.15)' : 'rgba(255,215,0,0.08)';
const strokeWidth = isHighlighted ? 2.5 : 1.5;
const screenFill = 'rgba(0,20,40,0.9)';
const glowColor = 'rgba(255,215,0,0.3)';
if (modelLower === 'mac studio') {
// Mac Studio - classic cube with memory fill
iconBaseWidth = nodeRadius * 1.25;
iconBaseHeight = nodeRadius * 0.85;
const x = nodeInfo.x - iconBaseWidth / 2;
const y = nodeInfo.y - iconBaseHeight / 2;
const cornerRadius = 4;
const topSurfaceHeight = iconBaseHeight * 0.15;
// Create clip path for memory fill area (front body)
const studioClipId = `studio-clip-${nodeInfo.id.replace(/[^a-zA-Z0-9]/g, '-')}`;
defs.append('clipPath')
.attr('id', studioClipId)
.append('rect')
.attr('x', x)
.attr('y', y + topSurfaceHeight)
.attr('width', iconBaseWidth)
.attr('height', iconBaseHeight - topSurfaceHeight)
.attr('rx', cornerRadius - 1);
// Main body (uniform color)
nodeG.append('rect')
.attr('x', x)
.attr('y', y)
.attr('width', iconBaseWidth)
.attr('height', iconBaseHeight)
.attr('rx', cornerRadius)
.attr('fill', '#1a1a1a')
.attr('stroke', wireColor)
.attr('stroke-width', strokeWidth);
// Memory fill (fills from bottom up)
if (ramUsagePercent > 0) {
const memFillTotalHeight = iconBaseHeight - topSurfaceHeight;
const memFillActualHeight = (ramUsagePercent / 100) * memFillTotalHeight;
nodeG.append('rect')
.attr('x', x)
.attr('y', y + topSurfaceHeight + (memFillTotalHeight - memFillActualHeight))
.attr('width', iconBaseWidth)
.attr('height', memFillActualHeight)
.attr('fill', 'rgba(255,215,0,0.75)')
.attr('clip-path', `url(#${studioClipId})`);
}
// Front panel details - vertical slots
const detailColor = 'rgba(0,0,0,0.35)';
const slotHeight = iconBaseHeight * 0.14;
const vSlotWidth = iconBaseWidth * 0.05;
const vSlotY = y + topSurfaceHeight + (iconBaseHeight - topSurfaceHeight) * 0.6;
const vSlot1X = x + iconBaseWidth * 0.18;
const vSlot2X = x + iconBaseWidth * 0.28;
[vSlot1X, vSlot2X].forEach(vx => {
nodeG.append('rect')
.attr('x', vx - vSlotWidth / 2)
.attr('y', vSlotY)
.attr('width', vSlotWidth)
.attr('height', slotHeight)
.attr('fill', detailColor)
.attr('rx', 1.5);
});
// Horizontal slot (SD card)
const hSlotWidth = iconBaseWidth * 0.2;
const hSlotX = x + iconBaseWidth * 0.5 - hSlotWidth / 2;
nodeG.append('rect')
.attr('x', hSlotX)
.attr('y', vSlotY)
.attr('width', hSlotWidth)
.attr('height', slotHeight * 0.6)
.attr('fill', detailColor)
.attr('rx', 1);
} else if (modelLower === 'mac mini') {
// Mac Mini - classic flat box with memory fill
iconBaseWidth = nodeRadius * 1.3;
iconBaseHeight = nodeRadius * 0.7;
const x = nodeInfo.x - iconBaseWidth / 2;
const y = nodeInfo.y - iconBaseHeight / 2;
const cornerRadius = 3;
const topSurfaceHeight = iconBaseHeight * 0.20;
// Create clip path for memory fill area
const miniClipId = `mini-clip-${nodeInfo.id.replace(/[^a-zA-Z0-9]/g, '-')}`;
defs.append('clipPath')
.attr('id', miniClipId)
.append('rect')
.attr('x', x)
.attr('y', y + topSurfaceHeight)
.attr('width', iconBaseWidth)
.attr('height', iconBaseHeight - topSurfaceHeight)
.attr('rx', cornerRadius - 1);
// Main body (uniform color)
nodeG.append('rect')
.attr('x', x)
.attr('y', y)
.attr('width', iconBaseWidth)
.attr('height', iconBaseHeight)
.attr('rx', cornerRadius)
.attr('fill', '#1a1a1a')
.attr('stroke', wireColor)
.attr('stroke-width', strokeWidth);
// Memory fill (fills from bottom up)
if (ramUsagePercent > 0) {
const memFillTotalHeight = iconBaseHeight - topSurfaceHeight;
const memFillActualHeight = (ramUsagePercent / 100) * memFillTotalHeight;
nodeG.append('rect')
.attr('x', x)
.attr('y', y + topSurfaceHeight + (memFillTotalHeight - memFillActualHeight))
.attr('width', iconBaseWidth)
.attr('height', memFillActualHeight)
.attr('fill', 'rgba(255,215,0,0.75)')
.attr('clip-path', `url(#${miniClipId})`);
}
// Front panel details - vertical slots (no horizontal slot for Mini)
const detailColor = 'rgba(0,0,0,0.35)';
const slotHeight = iconBaseHeight * 0.20;
const vSlotWidth = iconBaseWidth * 0.045;
const vSlotY = y + topSurfaceHeight + (iconBaseHeight - topSurfaceHeight) * 0.45;
const vSlot1X = x + iconBaseWidth * 0.20;
const vSlot2X = x + iconBaseWidth * 0.30;
[vSlot1X, vSlot2X].forEach(vx => {
nodeG.append('rect')
.attr('x', vx - vSlotWidth / 2)
.attr('y', vSlotY)
.attr('width', vSlotWidth)
.attr('height', slotHeight)
.attr('fill', detailColor)
.attr('rx', 1.2);
});
} else if (modelLower === 'macbook pro' || modelLower.includes('macbook')) {
// MacBook Pro - classic style with memory fill on screen
iconBaseWidth = nodeRadius * 1.6;
iconBaseHeight = nodeRadius * 1.15;
const x = nodeInfo.x - iconBaseWidth / 2;
const y = nodeInfo.y - iconBaseHeight / 2;
const screenHeight = iconBaseHeight * 0.70;
const baseHeight = iconBaseHeight * 0.30;
const screenWidth = iconBaseWidth * 0.85;
const screenX = nodeInfo.x - screenWidth / 2;
const screenBezel = 3;
// Create clip path for screen content
const screenClipId = `screen-clip-${nodeInfo.id.replace(/[^a-zA-Z0-9]/g, '-')}`;
defs.append('clipPath')
.attr('id', screenClipId)
.append('rect')
.attr('x', screenX + screenBezel)
.attr('y', y + screenBezel)
.attr('width', screenWidth - screenBezel * 2)
.attr('height', screenHeight - screenBezel * 2)
.attr('rx', 2);
// Screen outer frame
nodeG.append('rect')
.attr('x', screenX)
.attr('y', y)
.attr('width', screenWidth)
.attr('height', screenHeight)
.attr('rx', 3)
.attr('fill', '#1a1a1a')
.attr('stroke', wireColor)
.attr('stroke-width', strokeWidth);
// Screen inner (dark background)
nodeG.append('rect')
.attr('x', screenX + screenBezel)
.attr('y', y + screenBezel)
.attr('width', screenWidth - screenBezel * 2)
.attr('height', screenHeight - screenBezel * 2)
.attr('rx', 2)
.attr('fill', '#0a0a12');
// Memory fill on screen (fills from bottom up - classic style)
if (ramUsagePercent > 0) {
const memFillTotalHeight = screenHeight - screenBezel * 2;
const memFillActualHeight = (ramUsagePercent / 100) * memFillTotalHeight;
nodeG.append('rect')
.attr('x', screenX + screenBezel)
.attr('y', y + screenBezel + (memFillTotalHeight - memFillActualHeight))
.attr('width', screenWidth - screenBezel * 2)
.attr('height', memFillActualHeight)
.attr('fill', 'rgba(255,215,0,0.85)')
.attr('clip-path', `url(#${screenClipId})`);
}
// Apple logo on screen (centered, on top of memory fill)
const targetLogoHeight = screenHeight * 0.22;
const logoScale = targetLogoHeight / LOGO_NATIVE_HEIGHT;
const logoX = nodeInfo.x - (LOGO_NATIVE_WIDTH * logoScale / 2);
const logoY = y + screenHeight / 2 - (LOGO_NATIVE_HEIGHT * logoScale / 2);
nodeG.append('path')
.attr('d', APPLE_LOGO_PATH)
.attr('transform', `translate(${logoX}, ${logoY}) scale(${logoScale})`)
.attr('fill', '#FFFFFF')
.attr('opacity', 0.9);
// Base (keyboard) - trapezoidal
const baseY = y + screenHeight;
const baseTopWidth = screenWidth;
const baseBottomWidth = iconBaseWidth;
const baseTopX = nodeInfo.x - baseTopWidth / 2;
const baseBottomX = nodeInfo.x - baseBottomWidth / 2;
nodeG.append('path')
.attr('d', `M ${baseTopX} ${baseY} L ${baseTopX + baseTopWidth} ${baseY} L ${baseBottomX + baseBottomWidth} ${baseY + baseHeight} L ${baseBottomX} ${baseY + baseHeight} Z`)
.attr('fill', '#2c2c2c')
.attr('stroke', wireColor)
.attr('stroke-width', 1);
// Keyboard area
const keyboardX = baseTopX + 6;
const keyboardY = baseY + 3;
const keyboardWidth = baseTopWidth - 12;
const keyboardHeight = baseHeight * 0.55;
nodeG.append('rect')
.attr('x', keyboardX)
.attr('y', keyboardY)
.attr('width', keyboardWidth)
.attr('height', keyboardHeight)
.attr('fill', 'rgba(0,0,0,0.2)')
.attr('rx', 2);
// Trackpad
const trackpadWidth = baseTopWidth * 0.4;
const trackpadX = nodeInfo.x - trackpadWidth / 2;
const trackpadY = baseY + keyboardHeight + 5;
const trackpadHeight = baseHeight * 0.30;
nodeG.append('rect')
.attr('x', trackpadX)
.attr('y', trackpadY)
.attr('width', trackpadWidth)
.attr('height', trackpadHeight)
.attr('fill', 'rgba(255,255,255,0.08)')
.attr('rx', 2);
} else {
// Default/Unknown - holographic hexagon
const hexRadius = nodeRadius * 0.6;
const hexPoints = Array.from({ length: 6 }, (_, i) => {
const angle = (i * 60 - 30) * Math.PI / 180;
return `${nodeInfo.x + hexRadius * Math.cos(angle)},${nodeInfo.y + hexRadius * Math.sin(angle)}`;
}).join(' ');
// Main shape
nodeG.append('polygon')
.attr('points', hexPoints)
.attr('fill', fillColor)
.attr('stroke', wireColor)
.attr('stroke-width', strokeWidth);
}
// --- Vertical GPU Bar (right side of icon) ---
// Show in both full mode and minimized mode (scaled appropriately)
if (showFullLabels || isMinimized) {
const gpuBarWidth = isMinimized ? Math.max(16, nodeRadius * 0.32) : Math.max(28, nodeRadius * 0.30);
const gpuBarHeight = iconBaseHeight * 0.95;
const barXOffset = iconBaseWidth / 2 + (isMinimized ? 5 : 10);
const gpuBarX = nodeInfo.x + barXOffset;
const gpuBarY = nodeInfo.y - gpuBarHeight / 2;
// GPU Bar Background (grey, no border)
nodeG.append('rect')
.attr('x', gpuBarX)
.attr('y', gpuBarY)
.attr('width', gpuBarWidth)
.attr('height', gpuBarHeight)
.attr('fill', 'rgba(80, 80, 90, 0.7)')
.attr('rx', 2);
// GPU Bar Fill (from bottom up, colored by temperature)
if (gpuUsagePercent > 0) {
const fillHeight = (gpuUsagePercent / 100) * gpuBarHeight;
const gpuFillColor = getTemperatureColor(gpuTemp);
nodeG.append('rect')
.attr('x', gpuBarX)
.attr('y', gpuBarY + (gpuBarHeight - fillHeight))
.attr('width', gpuBarWidth)
.attr('height', fillHeight)
.attr('fill', gpuFillColor)
.attr('opacity', 0.9)
.attr('rx', 2);
}
// GPU Stats Text (centered on bar, multiline, bigger and bold)
const gpuTextX = gpuBarX + gpuBarWidth / 2;
const gpuTextY = gpuBarY + gpuBarHeight / 2;
const gpuTextFontSize = isMinimized ? Math.max(10, gpuBarWidth * 0.6) : Math.min(16, Math.max(12, gpuBarWidth * 0.55));
const lineSpacing = gpuTextFontSize * 1.25;
const gpuUsageText = `${gpuUsagePercent.toFixed(0)}%`;
const tempText = !isNaN(gpuTemp) ? `${gpuTemp.toFixed(0)}°C` : '-';
const powerText = sysPower !== null ? `${sysPower.toFixed(0)}W` : '-';
// GPU Usage %
nodeG.append('text')
.attr('x', gpuTextX)
.attr('y', gpuTextY - lineSpacing)
.attr('text-anchor', 'middle')
.attr('dominant-baseline', 'middle')
.attr('fill', '#FFFFFF')
.attr('font-size', gpuTextFontSize)
.attr('font-weight', '700')
.attr('font-family', 'SF Mono, Monaco, monospace')
.text(gpuUsageText);
// Temperature
nodeG.append('text')
.attr('x', gpuTextX)
.attr('y', gpuTextY)
.attr('text-anchor', 'middle')
.attr('dominant-baseline', 'middle')
.attr('fill', '#FFFFFF')
.attr('font-size', gpuTextFontSize)
.attr('font-weight', '700')
.attr('font-family', 'SF Mono, Monaco, monospace')
.text(tempText);
// Power (Watts)
nodeG.append('text')
.attr('x', gpuTextX)
.attr('y', gpuTextY + lineSpacing)
.attr('text-anchor', 'middle')
.attr('dominant-baseline', 'middle')
.attr('fill', '#FFFFFF')
.attr('font-size', gpuTextFontSize)
.attr('font-weight', '700')
.attr('font-family', 'SF Mono, Monaco, monospace')
.text(powerText);
}
// Labels - adapt based on mode
if (showFullLabels) {
// FULL MODE: Name above, memory info below (1-4 nodes)
const nameY = nodeInfo.y - iconBaseHeight / 2 - 15;
const fontSize = Math.max(10, nodeRadius * 0.16);
// Truncate name based on node count
const maxNameLen = numNodes === 1 ? 22 : (numNodes === 2 ? 18 : numNodes === 3 ? 16 : 14);
const displayName = friendlyName.length > maxNameLen
? friendlyName.slice(0, maxNameLen - 2) + '..'
: friendlyName;
// Name label above
nodeG.append('text')
.attr('x', nodeInfo.x)
.attr('y', nameY)
.attr('text-anchor', 'middle')
.attr('dominant-baseline', 'middle')
.attr('fill', '#FFD700')
.attr('font-size', fontSize)
.attr('font-weight', 500)
.attr('font-family', 'SF Mono, Monaco, monospace')
.text(displayName);
// Memory info below - used in grey, total in yellow
const infoY = nodeInfo.y + iconBaseHeight / 2 + 16;
const memText = nodeG.append('text')
.attr('x', nodeInfo.x)
.attr('y', infoY)
.attr('text-anchor', 'middle')
.attr('font-size', fontSize * 0.85)
.attr('font-family', 'SF Mono, Monaco, monospace');
memText.append('tspan')
.attr('fill', 'rgba(255,215,0,0.9)')
.text(`${formatBytes(ramUsed)}`);
memText.append('tspan')
.attr('fill', 'rgba(179,179,179,0.9)')
.text(`/${formatBytes(ramTotal)}`);
memText.append('tspan')
.attr('fill', 'rgba(179,179,179,0.7)')
.text(` (${ramUsagePercent.toFixed(0)}%)`);
} else if (showCompactLabels) {
// COMPACT MODE: Just name and basic info (4+ nodes)
const fontSize = Math.max(7, nodeRadius * 0.11);
// Very compact name below icon
const nameY = nodeInfo.y + iconBaseHeight / 2 + 9;
const shortName = friendlyName.length > 10
? friendlyName.slice(0, 8) + '..'
: friendlyName;
nodeG.append('text')
.attr('x', nodeInfo.x)
.attr('y', nameY)
.attr('text-anchor', 'middle')
.attr('fill', '#FFD700')
.attr('font-size', fontSize)
.attr('font-family', 'SF Mono, Monaco, monospace')
.text(shortName);
// Single line of key stats
const statsY = nameY + 9;
nodeG.append('text')
.attr('x', nodeInfo.x)
.attr('y', statsY)
.attr('text-anchor', 'middle')
.attr('fill', 'rgba(255,215,0,0.7)')
.attr('font-size', fontSize * 0.85)
.attr('font-family', 'SF Mono, Monaco, monospace')
.text(`${ramUsagePercent.toFixed(0)}%${!isNaN(gpuTemp) ? ' ' + gpuTemp.toFixed(0) + '°C' : ''}`);
} else {
// MINIMIZED MODE: Show name above and memory info below (like main topology)
const fontSize = 8;
// Friendly name (shortened) above icon
const nameY = nodeInfo.y - iconBaseHeight / 2 - 8;
const shortName = friendlyName.length > 12
? friendlyName.slice(0, 10) + '..'
: friendlyName;
nodeG.append('text')
.attr('x', nodeInfo.x)
.attr('y', nameY)
.attr('text-anchor', 'middle')
.attr('fill', '#FFD700')
.attr('font-size', fontSize)
.attr('font-weight', '500')
.attr('font-family', 'SF Mono, Monaco, monospace')
.text(shortName);
// Memory info below icon - used in grey, total in yellow (same as main topology)
const infoY = nodeInfo.y + iconBaseHeight / 2 + 10;
const memTextMini = nodeG.append('text')
.attr('x', nodeInfo.x)
.attr('y', infoY)
.attr('text-anchor', 'middle')
.attr('font-size', fontSize * 0.85)
.attr('font-family', 'SF Mono, Monaco, monospace');
memTextMini.append('tspan')
.attr('fill', 'rgba(255,215,0,0.9)')
.text(`${formatBytes(ramUsed)}`);
memTextMini.append('tspan')
.attr('fill', 'rgba(179,179,179,0.9)')
.text(`/${formatBytes(ramTotal)}`);
memTextMini.append('tspan')
.attr('fill', 'rgba(179,179,179,0.7)')
.text(` (${ramUsagePercent.toFixed(0)}%)`);
}
});
}
$effect(() => {
if (data) {
renderGraph();
}
});
onMount(() => {
if (svgContainer) {
resizeObserver = new ResizeObserver(() => {
renderGraph();
});
resizeObserver.observe(svgContainer);
}
});
onDestroy(() => {
resizeObserver?.disconnect();
});
</script>
<svg
bind:this={svgContainer}
class="w-full h-full {className}"
></svg>
<style>
:global(.graph-node) {
transition: transform 0.2s ease, opacity 0.2s ease;
}
:global(.graph-node:hover) {
filter: brightness(1.1);
}
:global(.graph-link) {
stroke: var(--exo-light-gray, #B3B3B3);
stroke-width: 1px;
stroke-dasharray: 4, 4;
opacity: 0.8;
animation: flowAnimation 0.75s linear infinite;
}
@keyframes flowAnimation {
from { stroke-dashoffset: 0; }
to { stroke-dashoffset: -10; }
}
</style>

View File

@@ -0,0 +1,7 @@
export { default as TopologyGraph } from './TopologyGraph.svelte';
export { default as ChatForm } from './ChatForm.svelte';
export { default as ChatMessages } from './ChatMessages.svelte';
export { default as ChatAttachments } from './ChatAttachments.svelte';
export { default as ChatSidebar } from './ChatSidebar.svelte';
export { default as ModelCard } from './ModelCard.svelte';

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,169 @@
/**
* File attachment types for the chat interface
*/
export interface ChatUploadedFile {
id: string;
name: string;
size: number;
type: string;
file: File;
preview?: string;
textContent?: string;
}
export interface ChatAttachment {
type: 'image' | 'text' | 'pdf' | 'audio';
name: string;
content?: string;
base64Url?: string;
mimeType?: string;
}
export type FileCategory = 'image' | 'text' | 'pdf' | 'audio' | 'unknown';
export const IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg'];
export const IMAGE_MIME_TYPES = ['image/jpeg', 'image/png', 'image/gif', 'image/webp', 'image/svg+xml'];
export const TEXT_EXTENSIONS = [
'.txt', '.md', '.json', '.xml', '.yaml', '.yml', '.csv', '.log',
'.js', '.ts', '.jsx', '.tsx', '.py', '.java', '.cpp', '.c', '.h',
'.css', '.html', '.htm', '.sql', '.sh', '.bat', '.rs', '.go',
'.rb', '.php', '.swift', '.kt', '.scala', '.r', '.dart', '.vue', '.svelte'
];
export const TEXT_MIME_TYPES = [
'text/plain', 'text/markdown', 'text/csv', 'text/html', 'text/css',
'application/json', 'application/xml', 'text/xml', 'application/javascript',
'text/javascript', 'application/typescript'
];
export const PDF_EXTENSIONS = ['.pdf'];
export const PDF_MIME_TYPES = ['application/pdf'];
export const AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a'];
export const AUDIO_MIME_TYPES = ['audio/mpeg', 'audio/wav', 'audio/ogg', 'audio/mp4'];
/**
* Get file category based on MIME type and extension
*/
export function getFileCategory(mimeType: string, fileName: string): FileCategory {
const extension = fileName.toLowerCase().slice(fileName.lastIndexOf('.'));
if (IMAGE_MIME_TYPES.includes(mimeType) || IMAGE_EXTENSIONS.includes(extension)) {
return 'image';
}
if (PDF_MIME_TYPES.includes(mimeType) || PDF_EXTENSIONS.includes(extension)) {
return 'pdf';
}
if (AUDIO_MIME_TYPES.includes(mimeType) || AUDIO_EXTENSIONS.includes(extension)) {
return 'audio';
}
if (TEXT_MIME_TYPES.includes(mimeType) || TEXT_EXTENSIONS.includes(extension) || mimeType.startsWith('text/')) {
return 'text';
}
return 'unknown';
}
/**
* Get accept string for file input based on categories
*/
export function getAcceptString(categories: FileCategory[]): string {
const accepts: string[] = [];
for (const category of categories) {
switch (category) {
case 'image':
accepts.push(...IMAGE_EXTENSIONS, ...IMAGE_MIME_TYPES);
break;
case 'text':
accepts.push(...TEXT_EXTENSIONS, ...TEXT_MIME_TYPES);
break;
case 'pdf':
accepts.push(...PDF_EXTENSIONS, ...PDF_MIME_TYPES);
break;
case 'audio':
accepts.push(...AUDIO_EXTENSIONS, ...AUDIO_MIME_TYPES);
break;
}
}
return accepts.join(',');
}
/**
* Format file size for display
*/
export function formatFileSize(bytes: number): string {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i];
}
/**
* Read file as data URL (base64)
*/
export function readFileAsDataURL(file: File): Promise<string> {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result as string);
reader.onerror = () => reject(reader.error);
reader.readAsDataURL(file);
});
}
/**
* Read file as text
*/
export function readFileAsText(file: File): Promise<string> {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result as string);
reader.onerror = () => reject(reader.error);
reader.readAsText(file);
});
}
/**
* Process uploaded files into ChatUploadedFile format
*/
export async function processUploadedFiles(files: File[]): Promise<ChatUploadedFile[]> {
const results: ChatUploadedFile[] = [];
for (const file of files) {
const id = Date.now().toString() + Math.random().toString(36).substring(2, 9);
const category = getFileCategory(file.type, file.name);
const base: ChatUploadedFile = {
id,
name: file.name,
size: file.size,
type: file.type,
file
};
try {
if (category === 'image') {
const preview = await readFileAsDataURL(file);
results.push({ ...base, preview });
} else if (category === 'text' || category === 'unknown') {
const textContent = await readFileAsText(file);
results.push({ ...base, textContent });
} else if (category === 'pdf') {
results.push(base);
} else if (category === 'audio') {
const preview = await readFileAsDataURL(file);
results.push({ ...base, preview });
} else {
results.push(base);
}
} catch (error) {
console.error('Error processing file:', file.name, error);
results.push(base);
}
}
return results;
}

View File

@@ -0,0 +1,15 @@
<script lang="ts">
import '../app.css';
let { children } = $props();
</script>
<svelte:head>
<title>EXO</title>
<meta name="description" content="EXO - Distributed AI Cluster Dashboard" />
</svelte:head>
<div class="min-h-screen bg-background text-foreground">
{@render children?.()}
</div>

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,441 @@
<script lang="ts">
import { onMount } from 'svelte';
import {
topologyData,
downloads,
type DownloadProgress,
refreshState,
lastUpdate as lastUpdateStore
} from '$lib/stores/app.svelte';
import HeaderNav from '$lib/components/HeaderNav.svelte';
type FileProgress = {
name: string;
totalBytes: number;
downloadedBytes: number;
speed: number;
etaMs: number;
percentage: number;
};
type ModelEntry = {
modelId: string;
prettyName?: string | null;
percentage: number;
downloadedBytes: number;
totalBytes: number;
speed: number;
etaMs: number;
status: 'completed' | 'downloading';
files: FileProgress[];
};
type NodeEntry = {
nodeId: string;
nodeName: string;
models: ModelEntry[];
};
const data = $derived(topologyData());
const downloadsData = $derived(downloads());
function getNodeLabel(nodeId: string): string {
const node = data?.nodes?.[nodeId];
if (!node) return nodeId.slice(0, 8);
return node.friendly_name || node.system_info?.model_id || nodeId.slice(0, 8);
}
function getBytes(value: unknown): number {
if (typeof value === 'number') return value;
if (value && typeof value === 'object') {
const v = value as Record<string, unknown>;
if (typeof v.in_bytes === 'number') return v.in_bytes;
if (typeof v.inBytes === 'number') return v.inBytes;
}
return 0;
}
function formatBytes(bytes: number): string {
if (!bytes || bytes <= 0) return '0B';
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
const val = bytes / Math.pow(1024, i);
return `${val.toFixed(val >= 10 ? 0 : 1)}${units[i]}`;
}
function formatEta(ms: number): string {
if (!ms || ms <= 0) return '--';
const totalSeconds = Math.round(ms / 1000);
const s = totalSeconds % 60;
const m = Math.floor(totalSeconds / 60) % 60;
const h = Math.floor(totalSeconds / 3600);
if (h > 0) return `${h}h ${m}m`;
if (m > 0) return `${m}m ${s}s`;
return `${s}s`;
}
function formatSpeed(bytesPerSecond: number): string {
if (!bytesPerSecond || bytesPerSecond <= 0) return '--';
const units = ['B/s', 'KB/s', 'MB/s', 'GB/s'];
const i = Math.min(Math.floor(Math.log(bytesPerSecond) / Math.log(1024)), units.length - 1);
const val = bytesPerSecond / Math.pow(1024, i);
return `${val.toFixed(val >= 10 ? 0 : 1)}${units[i]}`;
}
function clampPercent(value: number | undefined): number {
if (!Number.isFinite(value)) return 0;
return Math.min(100, Math.max(0, value as number));
}
function extractModelIdFromDownload(downloadPayload: Record<string, unknown>): string | null {
const shardMetadata = downloadPayload.shard_metadata ?? downloadPayload.shardMetadata;
if (!shardMetadata || typeof shardMetadata !== 'object') return null;
const shardObj = shardMetadata as Record<string, unknown>;
const shardKeys = Object.keys(shardObj);
if (shardKeys.length !== 1) return null;
const shardData = shardObj[shardKeys[0]] as Record<string, unknown>;
if (!shardData) return null;
const modelMeta = shardData.model_meta ?? shardData.modelMeta;
if (!modelMeta || typeof modelMeta !== 'object') return null;
const meta = modelMeta as Record<string, unknown>;
return (meta.model_id as string) ?? (meta.modelId as string) ?? null;
}
function parseDownloadProgress(payload: Record<string, unknown>): DownloadProgress | null {
const progress = payload.download_progress ?? payload.downloadProgress;
if (!progress || typeof progress !== 'object') return null;
const prog = progress as Record<string, unknown>;
const totalBytes = getBytes(prog.total_bytes ?? prog.totalBytes);
const downloadedBytes = getBytes(prog.downloaded_bytes ?? prog.downloadedBytes);
const speed = (prog.speed as number) ?? 0;
const completedFiles = (prog.completed_files as number) ?? (prog.completedFiles as number) ?? 0;
const totalFiles = (prog.total_files as number) ?? (prog.totalFiles as number) ?? 0;
const etaMs = (prog.eta_ms as number) ?? (prog.etaMs as number) ?? 0;
const files: DownloadProgress['files'] = [];
const filesObj = (prog.files ?? {}) as Record<string, unknown>;
for (const [fileName, fileData] of Object.entries(filesObj)) {
if (!fileData || typeof fileData !== 'object') continue;
const fd = fileData as Record<string, unknown>;
const fTotal = getBytes(fd.total_bytes ?? fd.totalBytes);
const fDownloaded = getBytes(fd.downloaded_bytes ?? fd.downloadedBytes);
files.push({
name: fileName,
totalBytes: fTotal,
downloadedBytes: fDownloaded,
speed: (fd.speed as number) ?? 0,
etaMs: (fd.eta_ms as number) ?? (fd.etaMs as number) ?? 0,
percentage: fTotal > 0 ? (fDownloaded / fTotal) * 100 : 0
});
}
return {
totalBytes,
downloadedBytes,
speed,
etaMs: etaMs || (speed > 0 ? ((totalBytes - downloadedBytes) / speed) * 1000 : 0),
percentage: totalBytes > 0 ? (downloadedBytes / totalBytes) * 100 : 0,
completedFiles,
totalFiles,
files
};
}
function getBarGradient(percentage: number): string {
if (percentage >= 100) return 'from-green-500 to-green-400';
if (percentage <= 0) return 'from-red-500 to-red-400';
return 'from-exo-yellow to-exo-yellow/70';
}
let downloadOverview = $state<NodeEntry[]>([]);
$effect(() => {
try {
if (!downloadsData || Object.keys(downloadsData).length === 0) {
downloadOverview = [];
return;
}
const entries = Object.entries(downloadsData);
const built: NodeEntry[] = [];
for (const [nodeId, nodeDownloads] of entries) {
const modelMap = new Map<string, ModelEntry>();
const nodeEntries = Array.isArray(nodeDownloads)
? nodeDownloads
: nodeDownloads && typeof nodeDownloads === 'object'
? Object.values(nodeDownloads as Record<string, unknown>)
: [];
for (const downloadWrapped of nodeEntries) {
if (!downloadWrapped || typeof downloadWrapped !== 'object') continue;
const keys = Object.keys(downloadWrapped as Record<string, unknown>);
if (keys.length !== 1) continue;
const downloadKind = keys[0];
const downloadPayload = (downloadWrapped as Record<string, unknown>)[downloadKind] as Record<string, unknown>;
if (!downloadPayload) continue;
const modelId = extractModelIdFromDownload(downloadPayload) ?? 'unknown-model';
const prettyName = (() => {
const shardMetadata = downloadPayload.shard_metadata ?? downloadPayload.shardMetadata;
if (!shardMetadata || typeof shardMetadata !== 'object') return null;
const shardObj = shardMetadata as Record<string, unknown>;
const shardKeys = Object.keys(shardObj);
if (shardKeys.length !== 1) return null;
const shardData = shardObj[shardKeys[0]] as Record<string, unknown>;
const modelMeta = shardData?.model_meta ?? shardData?.modelMeta;
if (!modelMeta || typeof modelMeta !== 'object') return null;
const meta = modelMeta as Record<string, unknown>;
return (meta.prettyName as string) ?? null;
})();
const rawProgress = (downloadPayload as Record<string, unknown>).download_progress
?? (downloadPayload as Record<string, unknown>).downloadProgress
?? {};
const totalBytes = getBytes((rawProgress as Record<string, unknown>).total_bytes ?? (rawProgress as Record<string, unknown>).totalBytes);
const downloadedBytes = getBytes((rawProgress as Record<string, unknown>).downloaded_bytes ?? (rawProgress as Record<string, unknown>).downloadedBytes);
const speed = (rawProgress as Record<string, unknown>).speed as number ?? 0;
const etaMs = (rawProgress as Record<string, unknown>).eta_ms as number ?? (rawProgress as Record<string, unknown>).etaMs as number ?? 0;
const percentage = totalBytes > 0 ? (downloadedBytes / totalBytes) * 100 : 0;
const files: FileProgress[] = [];
const filesObj = (rawProgress as Record<string, unknown>).files as Record<string, unknown> | undefined;
if (filesObj && typeof filesObj === 'object') {
for (const [fileName, fileData] of Object.entries(filesObj)) {
if (!fileData || typeof fileData !== 'object') continue;
const fd = fileData as Record<string, unknown>;
const fTotal = getBytes(fd.total_bytes ?? fd.totalBytes);
const fDownloaded = getBytes(fd.downloaded_bytes ?? fd.downloadedBytes);
files.push({
name: fileName,
totalBytes: fTotal,
downloadedBytes: fDownloaded,
speed: (fd.speed as number) ?? 0,
etaMs: (fd.eta_ms as number) ?? (fd.etaMs as number) ?? 0,
percentage: clampPercent(fTotal > 0 ? (fDownloaded / fTotal) * 100 : 0)
});
}
}
const entry: ModelEntry = {
modelId,
prettyName,
percentage: downloadKind === 'DownloadCompleted' ? 100 : clampPercent(percentage),
downloadedBytes,
totalBytes,
speed,
etaMs,
status: downloadKind === 'DownloadCompleted' ? 'completed' : 'downloading',
files
};
const existing = modelMap.get(modelId);
if (!existing) {
modelMap.set(modelId, entry);
} else if (
(entry.status === 'completed' && existing.status !== 'completed') ||
(entry.status === existing.status && entry.downloadedBytes > existing.downloadedBytes)
) {
modelMap.set(modelId, entry);
}
}
let models = Array.from(modelMap.values()).sort((a, b) => b.percentage - a.percentage);
if (models.length === 0 && nodeEntries.length > 0) {
models = [{
modelId: 'Unknown download',
percentage: 0,
downloadedBytes: 0,
totalBytes: 0,
speed: 0,
etaMs: 0,
status: 'downloading',
files: []
}];
}
built.push({
nodeId,
nodeName: getNodeLabel(nodeId),
models
});
}
downloadOverview = built;
} catch (err) {
console.error('Parse downloads error', err);
downloadOverview = [];
}
});
const hasDownloads = $derived(downloadOverview.length > 0);
const lastUpdateTs = $derived(lastUpdateStore());
const downloadKeys = $derived(Object.keys(downloadsData || {}));
let expanded = $state<Set<string>>(new Set());
function toggleExpand(key: string): void {
const next = new Set(expanded);
if (next.has(key)) next.delete(key);
else next.add(key);
expanded = next;
}
onMount(() => {
// Ensure we fetch at least once when visiting downloads directly
refreshState();
});
</script>
<div class="min-h-screen bg-exo-dark-gray text-white">
<HeaderNav showHome={true} />
<div class="max-w-7xl mx-auto px-4 lg:px-8 py-6 space-y-6">
<div class="flex items-center justify-between gap-4 flex-wrap">
<div>
<h1 class="text-2xl font-mono tracking-[0.2em] uppercase text-exo-yellow">Downloads</h1>
<p class="text-sm text-exo-light-gray">Overview of models on each node</p>
</div>
<div class="flex items-center gap-3">
<button
type="button"
class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-2 py-1 rounded"
onclick={() => refreshState()}
title="Force refresh from /state"
>
Refresh
</button>
<div class="text-[11px] font-mono text-exo-light-gray">
Last update: {lastUpdateTs ? new Date(lastUpdateTs).toLocaleTimeString() : 'n/a'}
</div>
</div>
</div>
{#if !hasDownloads}
<div class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-6 text-center text-exo-light-gray space-y-2">
<div class="text-sm">No downloads found. Start a model download to see progress here.</div>
<div class="text-[11px] text-exo-light-gray/70">
Download keys detected: {downloadKeys.length === 0 ? 'none' : downloadKeys.join(', ')}
</div>
</div>
{:else}
<div class="downloads-grid gap-4">
{#each downloadOverview as node}
<div class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-4 space-y-3 flex flex-col">
<div class="flex items-center justify-between gap-3">
<div class="min-w-0 flex-1">
<div class="text-lg font-mono text-white truncate">{node.nodeName}</div>
<div class="text-xs text-exo-light-gray font-mono truncate">{node.nodeId}</div>
</div>
<div class="text-xs font-mono uppercase tracking-wider whitespace-nowrap shrink-0">
<span class="text-green-400">{node.models.filter(m => m.status === 'completed').length}</span><span class="text-exo-yellow"> /{node.models.length} models</span>
</div>
</div>
{#each node.models as model}
{@const key = `${node.nodeId}|${model.modelId}`}
{@const pct = clampPercent(model.percentage)}
{@const gradient = getBarGradient(pct)}
{@const isExpanded = expanded.has(key)}
<div class="rounded border border-exo-medium-gray/30 bg-exo-dark-gray/60 p-3 space-y-2">
<div class="flex items-center justify-between gap-3">
<div class="min-w-0 space-y-0.5">
<div class="text-sm font-mono text-white truncate">{model.prettyName ?? model.modelId}</div>
<div class="text-[11px] text-exo-light-gray font-mono truncate">
{model.modelId}
</div>
<div class="text-[11px] text-exo-light-gray font-mono">
{formatBytes(model.downloadedBytes)} / {formatBytes(model.totalBytes)}
</div>
</div>
<div class="flex items-center gap-2">
<span class="text-xs font-mono {pct >= 100 ? 'text-green-400' : pct <= 0 ? 'text-red-400' : 'text-exo-yellow'}">
{pct.toFixed(1)}%
</span>
<button
type="button"
class="text-exo-light-gray hover:text-exo-yellow transition-colors"
onclick={() => toggleExpand(key)}
aria-expanded={isExpanded}
title="Toggle file details"
>
<svg class="w-4 h-4" viewBox="0 0 20 20" fill="none" stroke="currentColor" stroke-width="2">
<path d="M6 8l4 4 4-4" class={isExpanded ? 'transform rotate-180 origin-center transition-transform duration-150' : 'transition-transform duration-150'}></path>
</svg>
</button>
</div>
</div>
<div class="relative h-2 bg-exo-black/60 rounded-sm overflow-hidden">
<div
class={`absolute inset-y-0 left-0 bg-gradient-to-r ${gradient} transition-all duration-300`}
style={`width: ${pct.toFixed(1)}%`}
></div>
</div>
<div class="flex items-center justify-between text-xs font-mono text-exo-light-gray">
<span>{model.status === 'completed' ? 'Completed' : `${formatSpeed(model.speed)} ETA ${formatEta(model.etaMs)}`}</span>
{#if model.status !== 'completed'}
<span>{model.files.length} file{model.files.length === 1 ? '' : 's'}</span>
{/if}
</div>
{#if isExpanded}
<div class="mt-2 space-y-1.5">
{#if model.files.length === 0}
<div class="text-[11px] font-mono text-exo-light-gray/70">No file details reported.</div>
{:else}
{#each model.files as f}
{@const fpct = clampPercent(f.percentage)}
{@const fgradient = getBarGradient(fpct)}
<div class="rounded border border-exo-medium-gray/20 bg-exo-black/40 p-2 space-y-1">
<div class="flex items-center justify-between text-[11px] font-mono text-exo-light-gray/90">
<span class="truncate pr-2">{f.name}</span>
<span class="{fpct >= 100 ? 'text-green-400' : fpct <= 0 ? 'text-red-400' : 'text-exo-yellow'}">{fpct.toFixed(1)}%</span>
</div>
<div class="relative h-1.5 bg-exo-black/60 rounded-sm overflow-hidden">
<div
class={`absolute inset-y-0 left-0 bg-gradient-to-r ${fgradient} transition-all duration-300`}
style={`width: ${fpct.toFixed(1)}%`}
></div>
</div>
<div class="flex items-center justify-between text-[10px] text-exo-light-gray/70">
<span>{formatBytes(f.downloadedBytes)} / {formatBytes(f.totalBytes)}</span>
<span>{formatSpeed(f.speed)} • ETA {formatEta(f.etaMs)}</span>
</div>
</div>
{/each}
{/if}
</div>
{/if}
</div>
{/each}
</div>
{/each}
</div>
{/if}
</div>
</div>
<style>
.downloads-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(260px, 1fr));
}
@media (min-width: 1024px) {
.downloads-grid {
grid-template-columns: repeat(3, minmax(0, 1fr));
}
}
@media (min-width: 1440px) {
.downloads-grid {
grid-template-columns: repeat(4, minmax(0, 1fr));
}
}
</style>

View File

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

View File

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

View File

@@ -0,0 +1,28 @@
import adapter from '@sveltejs/adapter-static';
import { vitePreprocess } from '@sveltejs/vite-plugin-svelte';
/** @type {import('@sveltejs/kit').Config} */
const config = {
preprocess: [vitePreprocess()],
kit: {
paths: {
relative: true
},
router: { type: 'hash' },
adapter: adapter({
pages: 'build',
assets: 'build',
fallback: 'index.html',
precompress: false,
strict: true
}),
alias: {
$lib: 'src/lib',
$components: 'src/lib/components'
}
}
};
export default config;

15
dashboard/tsconfig.json Normal file
View File

@@ -0,0 +1,15 @@
{
"extends": "./.svelte-kit/tsconfig.json",
"compilerOptions": {
"allowJs": true,
"checkJs": true,
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"resolveJsonModule": true,
"skipLibCheck": true,
"sourceMap": true,
"strict": true,
"moduleResolution": "bundler"
}
}

16
dashboard/vite.config.ts Normal file
View File

@@ -0,0 +1,16 @@
import tailwindcss from '@tailwindcss/vite';
import { sveltekit } from '@sveltejs/kit/vite';
import { defineConfig } from 'vite';
export default defineConfig({
plugins: [tailwindcss(), sveltekit()],
server: {
proxy: {
'/v1': 'http://localhost:8000',
'/state': 'http://localhost:8000',
'/models': 'http://localhost:8000',
'/instance': 'http://localhost:8000'
}
}
});

View File

@@ -81,6 +81,9 @@
# NIX # NIX
nixpkgs-fmt nixpkgs-fmt
# SVELTE
nodejs
# MISC # MISC
just just
jq jq
@@ -96,7 +99,6 @@
shellHook = '' shellHook = ''
# PYTHON # PYTHON
export DASHBOARD_DIR="$(git rev-parse --show-toplevel)/dashboard"
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${pkgs.python313}/lib" export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${pkgs.python313}/lib"
echo echo
echo "🍎🍎 Run 'just <recipe>' to get started" echo "🍎🍎 Run 'just <recipe>' to get started"

View File

@@ -20,7 +20,19 @@ rust-rebuild:
cargo run --bin stub_gen cargo run --bin stub_gen
just sync-clean just sync-clean
build-dashboard:
#!/usr/bin/env bash
cd dashboard
npm install
npm run build
package:
uv run pyinstaller packaging/pyinstaller/exo.spec
clean: clean:
rm -rf **/__pycache__ rm -rf **/__pycache__
rm -rf target/ rm -rf target/
rm -rf .venv rm -rf .venv
rm -rf dashboard/node_modules
rm -rf dashboard/.svelte-kit
rm -rf dashboard/build

View File

@@ -1,4 +1,3 @@
import os
import time import time
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
from typing import cast from typing import cast
@@ -15,6 +14,7 @@ from hypercorn.config import Config
from hypercorn.typing import ASGIFramework from hypercorn.typing import ASGIFramework
from loguru import logger from loguru import logger
from exo.master.placement import place_instance as get_instance_placements
from exo.shared.apply import apply from exo.shared.apply import apply
from exo.shared.election import ElectionMessage from exo.shared.election import ElectionMessage
from exo.shared.logging import InterceptLogger from exo.shared.logging import InterceptLogger
@@ -23,11 +23,14 @@ from exo.shared.models.model_meta import get_model_meta
from exo.shared.types.api import ( from exo.shared.types.api import (
ChatCompletionMessage, ChatCompletionMessage,
ChatCompletionResponse, ChatCompletionResponse,
CreateInstanceParams,
CreateInstanceResponse, CreateInstanceResponse,
CreateInstanceTaskParams,
DeleteInstanceResponse, DeleteInstanceResponse,
ModelList, ModelList,
ModelListModel, ModelListModel,
PlaceInstanceParams,
PlacementPreview,
PlacementPreviewResponse,
StreamingChoiceResponse, StreamingChoiceResponse,
) )
from exo.shared.types.chunks import TokenChunk from exo.shared.types.chunks import TokenChunk
@@ -37,17 +40,20 @@ from exo.shared.types.commands import (
CreateInstance, CreateInstance,
DeleteInstance, DeleteInstance,
ForwarderCommand, ForwarderCommand,
PlaceInstance,
TaskFinished, TaskFinished,
) )
from exo.shared.types.common import CommandId, NodeId, SessionId from exo.shared.types.common import CommandId, NodeId, SessionId
from exo.shared.types.events import ChunkGenerated, Event, ForwarderEvent, IndexedEvent from exo.shared.types.events import ChunkGenerated, Event, ForwarderEvent, IndexedEvent
from exo.shared.types.memory import Memory from exo.shared.types.memory import Memory
from exo.shared.types.models import ModelMetadata from exo.shared.types.models import ModelId, ModelMetadata
from exo.shared.types.state import State from exo.shared.types.state import State
from exo.shared.types.tasks import ChatCompletionTaskParams from exo.shared.types.tasks import ChatCompletionTaskParams
from exo.shared.types.worker.instances import Instance, InstanceId from exo.shared.types.worker.instances import Instance, InstanceId, InstanceMeta
from exo.shared.types.worker.shards import Sharding
from exo.utils.banner import print_startup_banner from exo.utils.banner import print_startup_banner
from exo.utils.channels import Receiver, Sender, channel from exo.utils.channels import Receiver, Sender, channel
from exo.utils.dashboard_path import find_dashboard
from exo.utils.event_buffer import OrderedBuffer from exo.utils.event_buffer import OrderedBuffer
HIDE_THINKING = False HIDE_THINKING = False
@@ -91,7 +97,8 @@ class API:
# This lets us pause the API if an election is running # This lets us pause the API if an election is running
election_receiver: Receiver[ElectionMessage], election_receiver: Receiver[ElectionMessage],
) -> None: ) -> None:
self._state = State() self.state = State()
self._event_log: list[Event] = []
self.command_sender = command_sender self.command_sender = command_sender
self.global_event_receiver = global_event_receiver self.global_event_receiver = global_event_receiver
self.election_receiver = election_receiver self.election_receiver = election_receiver
@@ -111,12 +118,7 @@ class API:
self.app.mount( self.app.mount(
"/", "/",
StaticFiles( StaticFiles(
directory=os.environ.get( directory=find_dashboard(),
"DASHBOARD_DIR",
os.path.abspath(
os.path.join(os.path.dirname(__file__), "../../../dashboard")
),
),
html=True, html=True,
), ),
name="dashboard", name="dashboard",
@@ -127,7 +129,7 @@ class API:
def reset(self, new_session_id: SessionId, result_clock: int): def reset(self, new_session_id: SessionId, result_clock: int):
logger.info("Resetting API State") logger.info("Resetting API State")
self._state = State() self.state = State()
self.session_id = new_session_id self.session_id = new_session_id
self.event_buffer = OrderedBuffer[Event]() self.event_buffer = OrderedBuffer[Event]()
self._chat_completion_queues = {} self._chat_completion_queues = {}
@@ -150,51 +152,194 @@ class API:
) )
def _setup_routes(self) -> None: def _setup_routes(self) -> None:
self.app.get("/node_id")(lambda: self.node_id)
self.app.post("/instance")(self.create_instance) self.app.post("/instance")(self.create_instance)
self.app.post("/place_instance")(self.place_instance)
self.app.get("/instance/placement")(self.get_placement)
self.app.get("/instance/previews")(self.get_placement_previews)
self.app.get("/instance/{instance_id}")(self.get_instance) self.app.get("/instance/{instance_id}")(self.get_instance)
self.app.delete("/instance/{instance_id}")(self.delete_instance) self.app.delete("/instance/{instance_id}")(self.delete_instance)
self.app.get("/models")(self.get_models) self.app.get("/models")(self.get_models)
self.app.get("/v1/models")(self.get_models) self.app.get("/v1/models")(self.get_models)
self.app.post("/v1/chat/completions")(self.chat_completions) self.app.post("/v1/chat/completions")(self.chat_completions)
self.app.get("/state")(self.state) self.app.get("/state")(lambda: self.state)
self.app.get("/events")(lambda: self._event_log)
async def state(self) -> State: async def place_instance(self, payload: PlaceInstanceParams):
return self._state command = PlaceInstance(
model_meta=await resolve_model_meta(payload.model_id),
async def create_instance( sharding=payload.sharding,
self, payload: CreateInstanceTaskParams
) -> CreateInstanceResponse:
model_meta = await resolve_model_meta(payload.model_id)
required_memory = model_meta.storage_size
available_memory = self._calculate_total_available_memory()
if required_memory > available_memory:
raise HTTPException(
status_code=400,
detail=f"Insufficient memory to create instance. Required: {required_memory.in_gb:.1f}GB, Available: {available_memory.in_gb:.1f}GB",
)
command = CreateInstance(
model_meta=model_meta,
instance_meta=payload.instance_meta, instance_meta=payload.instance_meta,
min_nodes=payload.min_nodes, min_nodes=payload.min_nodes,
sharding=payload.sharding,
) )
await self._send(command) await self._send(command)
return CreateInstanceResponse( return CreateInstanceResponse(
message="Command received.", message="Command received.",
command_id=command.command_id, command_id=command.command_id,
model_meta=model_meta,
) )
async def create_instance(
self, payload: CreateInstanceParams
) -> CreateInstanceResponse:
command = CreateInstance(instance=payload.instance)
await self._send(command)
return CreateInstanceResponse(
message="Command received.",
command_id=command.command_id,
)
async def get_placement(
self,
model_id: str,
sharding: Sharding = Sharding.Pipeline,
instance_meta: InstanceMeta = InstanceMeta.MlxRing,
min_nodes: int = 1,
) -> Instance:
model_meta = await resolve_model_meta(model_id)
try:
placements = get_instance_placements(
PlaceInstance(
model_meta=model_meta,
sharding=sharding,
instance_meta=instance_meta,
min_nodes=min_nodes,
),
topology=self.state.topology,
current_instances=self.state.instances,
)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
current_ids = set(self.state.instances.keys())
new_ids = [
instance_id for instance_id in placements if instance_id not in current_ids
]
if len(new_ids) != 1:
raise HTTPException(
status_code=500,
detail="Expected exactly one new instance from placement",
)
return placements[new_ids[0]]
async def get_placement_previews(
self, model_id: ModelId
) -> PlacementPreviewResponse:
seen: set[tuple[ModelId, Sharding, InstanceMeta, int]] = set()
previews: list[PlacementPreview] = []
if len(list(self.state.topology.list_nodes())) == 0:
return PlacementPreviewResponse(previews=[])
cards = [card for card in MODEL_CARDS.values() if card.short_id == model_id]
if not cards:
raise HTTPException(status_code=404, detail=f"Model {model_id} not found")
instance_combinations: list[tuple[Sharding, InstanceMeta, int]] = []
for sharding in (Sharding.Pipeline, Sharding.Tensor):
for instance_meta in (InstanceMeta.MlxRing, InstanceMeta.MlxJaccl):
instance_combinations.extend(
[
(sharding, instance_meta, i)
for i in range(
1, len(list(self.state.topology.list_nodes())) + 1
)
]
)
# TODO: PDD
# instance_combinations.append((Sharding.PrefillDecodeDisaggregation, InstanceMeta.MlxRing, 1))
for card in cards:
model_meta = card.metadata
for sharding, instance_meta, min_nodes in instance_combinations:
try:
placements = get_instance_placements(
PlaceInstance(
model_meta=model_meta,
sharding=sharding,
instance_meta=instance_meta,
min_nodes=min_nodes,
),
topology=self.state.topology,
current_instances=self.state.instances,
)
except ValueError as exc:
if (card.model_id, sharding, instance_meta, 0) not in seen:
previews.append(
PlacementPreview(
model_id=card.model_id,
sharding=sharding,
instance_meta=instance_meta,
instance=None,
error=str(exc),
)
)
seen.add((card.model_id, sharding, instance_meta, 0))
continue
current_ids = set(self.state.instances.keys())
new_instances = [
instance
for instance_id, instance in placements.items()
if instance_id not in current_ids
]
if len(new_instances) != 1:
if (card.model_id, sharding, instance_meta, 0) not in seen:
previews.append(
PlacementPreview(
model_id=card.model_id,
sharding=sharding,
instance_meta=instance_meta,
instance=None,
error="Expected exactly one new instance from placement",
)
)
seen.add((card.model_id, sharding, instance_meta, 0))
continue
instance = new_instances[0]
shard_assignments = instance.shard_assignments
node_ids = list(shard_assignments.node_to_runner.keys())
memory_delta_by_node: dict[str, int] = {}
if node_ids:
total_bytes = model_meta.storage_size.in_bytes
per_node = total_bytes // len(node_ids)
remainder = total_bytes % len(node_ids)
for index, node_id in enumerate(sorted(node_ids, key=str)):
extra = 1 if index < remainder else 0
memory_delta_by_node[str(node_id)] = per_node + extra
if (
card.model_id,
sharding,
instance_meta,
len(node_ids),
) not in seen:
previews.append(
PlacementPreview(
model_id=card.model_id,
sharding=sharding,
instance_meta=instance_meta,
instance=instance,
memory_delta_by_node=memory_delta_by_node or None,
error=None,
)
)
seen.add((card.model_id, sharding, instance_meta, len(node_ids)))
return PlacementPreviewResponse(previews=previews)
def get_instance(self, instance_id: InstanceId) -> Instance: def get_instance(self, instance_id: InstanceId) -> Instance:
if instance_id not in self._state.instances: if instance_id not in self.state.instances:
raise HTTPException(status_code=404, detail="Instance not found") raise HTTPException(status_code=404, detail="Instance not found")
return self._state.instances[instance_id] return self.state.instances[instance_id]
async def delete_instance(self, instance_id: InstanceId) -> DeleteInstanceResponse: async def delete_instance(self, instance_id: InstanceId) -> DeleteInstanceResponse:
if instance_id not in self._state.instances: if instance_id not in self.state.instances:
raise HTTPException(status_code=404, detail="Instance not found") raise HTTPException(status_code=404, detail="Instance not found")
command = DeleteInstance( command = DeleteInstance(
@@ -261,7 +406,7 @@ class API:
if not any( if not any(
instance.shard_assignments.model_id == payload.model instance.shard_assignments.model_id == payload.model
for instance in self._state.instances.values() for instance in self.state.instances.values()
): ):
await self._trigger_notify_user_to_download_model(payload.model) await self._trigger_notify_user_to_download_model(payload.model)
raise HTTPException( raise HTTPException(
@@ -281,7 +426,7 @@ class API:
"""Calculate total available memory across all nodes in bytes.""" """Calculate total available memory across all nodes in bytes."""
total_available = Memory() total_available = Memory()
for node in self._state.topology.list_nodes(): for node in self.state.topology.list_nodes():
if node.node_profile is not None: if node.node_profile is not None:
total_available += node.node_profile.memory.ram_available total_available += node.node_profile.memory.ram_available
@@ -313,7 +458,7 @@ class API:
async with create_task_group() as tg: async with create_task_group() as tg:
self._tg = tg self._tg = tg
logger.info("Starting API") logger.info("Starting API")
tg.start_soon(self._apply_state) tg.start_soon(self._applystate)
tg.start_soon(self._pause_on_new_election) tg.start_soon(self._pause_on_new_election)
print_startup_banner(self.port) print_startup_banner(self.port)
await serve( await serve(
@@ -325,14 +470,15 @@ class API:
self.command_sender.close() self.command_sender.close()
self.global_event_receiver.close() self.global_event_receiver.close()
async def _apply_state(self): async def _applystate(self):
with self.global_event_receiver as events: with self.global_event_receiver as events:
async for f_event in events: async for f_event in events:
if f_event.origin != self.session_id.master_node_id: if f_event.origin != self.session_id.master_node_id:
continue continue
self.event_buffer.ingest(f_event.origin_idx, f_event.event) self.event_buffer.ingest(f_event.origin_idx, f_event.event)
for idx, event in self.event_buffer.drain_indexed(): for idx, event in self.event_buffer.drain_indexed():
self._state = apply(self._state, IndexedEvent(event=event, idx=idx)) self._event_log.append(event)
self.state = apply(self.state, IndexedEvent(event=event, idx=idx))
if ( if (
isinstance(event, ChunkGenerated) isinstance(event, ChunkGenerated)
and event.command_id in self._chat_completion_queues and event.command_id in self._chat_completion_queues

View File

@@ -5,9 +5,10 @@ from anyio.abc import TaskGroup
from loguru import logger from loguru import logger
from exo.master.placement import ( from exo.master.placement import (
get_instance_placements_after_create, add_instance_to_placements,
get_instance_placements_after_delete, delete_instance,
get_transition_events, get_transition_events,
place_instance,
) )
from exo.shared.apply import apply from exo.shared.apply import apply
from exo.shared.types.commands import ( from exo.shared.types.commands import (
@@ -15,6 +16,7 @@ from exo.shared.types.commands import (
CreateInstance, CreateInstance,
DeleteInstance, DeleteInstance,
ForwarderCommand, ForwarderCommand,
PlaceInstance,
RequestEventLog, RequestEventLog,
TaskFinished, TaskFinished,
TestCommand, TestCommand,
@@ -148,19 +150,26 @@ class Master:
self.command_task_mapping[command.command_id] = task_id self.command_task_mapping[command.command_id] = task_id
case DeleteInstance(): case DeleteInstance():
placement = get_instance_placements_after_delete( placement = delete_instance(command, self.state.instances)
command, self.state.instances transition_events = get_transition_events(
self.state.instances, placement
)
generated_events.extend(transition_events)
case PlaceInstance():
placement = place_instance(
command,
self.state.topology,
self.state.instances,
) )
transition_events = get_transition_events( transition_events = get_transition_events(
self.state.instances, placement self.state.instances, placement
) )
generated_events.extend(transition_events) generated_events.extend(transition_events)
case CreateInstance(): case CreateInstance():
placement = get_instance_placements_after_create( placement = add_instance_to_placements(
command, command,
self.state.topology, self.state.topology,
self.state.instances, self.state.instances,
tb_only=self.tb_only,
) )
transition_events = get_transition_events( transition_events = get_transition_events(
self.state.instances, placement self.state.instances, placement

View File

@@ -17,6 +17,7 @@ from exo.shared.topology import Topology
from exo.shared.types.commands import ( from exo.shared.types.commands import (
CreateInstance, CreateInstance,
DeleteInstance, DeleteInstance,
PlaceInstance,
) )
from exo.shared.types.common import Host from exo.shared.types.common import Host
from exo.shared.types.events import Event, InstanceCreated, InstanceDeleted from exo.shared.types.events import Event, InstanceCreated, InstanceDeleted
@@ -35,12 +36,20 @@ def random_ephemeral_port() -> int:
return random.randint(49152, 65535) return random.randint(49152, 65535)
def get_instance_placements_after_create( def add_instance_to_placements(
command: CreateInstance, command: CreateInstance,
topology: Topology, topology: Topology,
current_instances: Mapping[InstanceId, Instance], current_instances: Mapping[InstanceId, Instance],
*, ) -> Mapping[InstanceId, Instance]:
tb_only: bool = False, # TODO: validate against topology
return {**current_instances, command.instance.instance_id: command.instance}
def place_instance(
command: PlaceInstance,
topology: Topology,
current_instances: Mapping[InstanceId, Instance],
) -> dict[InstanceId, Instance]: ) -> dict[InstanceId, Instance]:
all_nodes = list(topology.list_nodes()) all_nodes = list(topology.list_nodes())
@@ -64,9 +73,7 @@ def get_instance_placements_after_create(
if topology.get_subgraph_from_nodes(cycle).is_thunderbolt_cycle(cycle) if topology.get_subgraph_from_nodes(cycle).is_thunderbolt_cycle(cycle)
] ]
if tb_only and smallest_tb_cycles == []: if smallest_tb_cycles != []:
raise ValueError("No TB cycles found with sufficient memory")
elif smallest_tb_cycles != []:
smallest_cycles = smallest_tb_cycles smallest_cycles = smallest_tb_cycles
cycles_with_leaf_nodes: list[list[NodeInfo]] = [ cycles_with_leaf_nodes: list[list[NodeInfo]] = [
@@ -138,7 +145,7 @@ def get_instance_placements_after_create(
return target_instances return target_instances
def get_instance_placements_after_delete( def delete_instance(
command: DeleteInstance, command: DeleteInstance,
current_instances: Mapping[InstanceId, Instance], current_instances: Mapping[InstanceId, Instance],
) -> dict[InstanceId, Instance]: ) -> dict[InstanceId, Instance]:

View File

@@ -11,8 +11,8 @@ from exo.shared.types.api import ChatCompletionMessage, ChatCompletionTaskParams
from exo.shared.types.commands import ( from exo.shared.types.commands import (
ChatCompletion, ChatCompletion,
CommandId, CommandId,
CreateInstance,
ForwarderCommand, ForwarderCommand,
PlaceInstance,
) )
from exo.shared.types.common import NodeId, SessionId from exo.shared.types.common import NodeId, SessionId
from exo.shared.types.events import ( from exo.shared.types.events import (
@@ -117,7 +117,7 @@ async def test_master():
ForwarderCommand( ForwarderCommand(
origin=node_id, origin=node_id,
command=( command=(
CreateInstance( PlaceInstance(
command_id=CommandId(), command_id=CommandId(),
model_meta=ModelMetadata( model_meta=ModelMetadata(
model_id=ModelId("llama-3.2-1b"), model_id=ModelId("llama-3.2-1b"),

View File

@@ -4,11 +4,11 @@ import pytest
from loguru import logger from loguru import logger
from exo.master.placement import ( from exo.master.placement import (
get_instance_placements_after_create,
get_transition_events, get_transition_events,
place_instance,
) )
from exo.shared.topology import Topology from exo.shared.topology import Topology
from exo.shared.types.commands import CreateInstance from exo.shared.types.commands import PlaceInstance
from exo.shared.types.common import CommandId, NodeId from exo.shared.types.common import CommandId, NodeId
from exo.shared.types.events import InstanceCreated, InstanceDeleted from exo.shared.types.events import InstanceCreated, InstanceDeleted
from exo.shared.types.memory import Memory from exo.shared.types.memory import Memory
@@ -52,8 +52,8 @@ def model_meta() -> ModelMetadata:
) )
def create_instance_command(model_meta: ModelMetadata) -> CreateInstance: def place_instance_command(model_meta: ModelMetadata) -> PlaceInstance:
return CreateInstance( return PlaceInstance(
command_id=CommandId(), command_id=CommandId(),
model_meta=model_meta, model_meta=model_meta,
sharding=Sharding.Pipeline, sharding=Sharding.Pipeline,
@@ -85,7 +85,7 @@ def test_get_instance_placements_create_instance(
available_memory available_memory
) # make it exactly fit across all nodes ) # make it exactly fit across all nodes
cic = create_instance_command(model_meta) cic = place_instance_command(model_meta)
node_id_a = NodeId() node_id_a = NodeId()
node_id_b = NodeId() node_id_b = NodeId()
node_id_c = NodeId() node_id_c = NodeId()
@@ -97,7 +97,7 @@ def test_get_instance_placements_create_instance(
topology.add_connection(create_connection(node_id_c, node_id_a)) topology.add_connection(create_connection(node_id_c, node_id_a))
# act # act
placements = get_instance_placements_after_create(cic, topology, {}) placements = place_instance(cic, topology, {})
# assert # assert
assert len(placements) == 1 assert len(placements) == 1
@@ -129,7 +129,7 @@ def test_get_instance_placements_one_node_exact_fit(
topology = Topology() topology = Topology()
node_id = NodeId() node_id = NodeId()
topology.add_node(create_node(1000 * 1024, node_id)) topology.add_node(create_node(1000 * 1024, node_id))
cic = create_instance_command( cic = place_instance_command(
ModelMetadata( ModelMetadata(
model_id=ModelId("test-model"), model_id=ModelId("test-model"),
storage_size=Memory.from_kb(1000), storage_size=Memory.from_kb(1000),
@@ -137,7 +137,7 @@ def test_get_instance_placements_one_node_exact_fit(
n_layers=10, n_layers=10,
), ),
) )
placements = get_instance_placements_after_create(cic, topology, {}) placements = place_instance(cic, topology, {})
assert len(placements) == 1 assert len(placements) == 1
instance_id = list(placements.keys())[0] instance_id = list(placements.keys())[0]
@@ -154,7 +154,7 @@ def test_get_instance_placements_one_node_fits_with_extra_memory(
topology = Topology() topology = Topology()
node_id = NodeId() node_id = NodeId()
topology.add_node(create_node(1001 * 1024, node_id)) topology.add_node(create_node(1001 * 1024, node_id))
cic = create_instance_command( cic = place_instance_command(
ModelMetadata( ModelMetadata(
model_id=ModelId("test-model"), model_id=ModelId("test-model"),
storage_size=Memory.from_kb(1000), storage_size=Memory.from_kb(1000),
@@ -162,7 +162,7 @@ def test_get_instance_placements_one_node_fits_with_extra_memory(
n_layers=10, n_layers=10,
), ),
) )
placements = get_instance_placements_after_create(cic, topology, {}) placements = place_instance(cic, topology, {})
assert len(placements) == 1 assert len(placements) == 1
instance_id = list(placements.keys())[0] instance_id = list(placements.keys())[0]
@@ -179,7 +179,7 @@ def test_get_instance_placements_one_node_not_fit(
topology = Topology() topology = Topology()
node_id = NodeId() node_id = NodeId()
topology.add_node(create_node(1000 * 1024, node_id)) topology.add_node(create_node(1000 * 1024, node_id))
cic = create_instance_command( cic = place_instance_command(
model_meta=ModelMetadata( model_meta=ModelMetadata(
model_id=ModelId("test-model"), model_id=ModelId("test-model"),
storage_size=Memory.from_kb(1001), storage_size=Memory.from_kb(1001),
@@ -189,7 +189,7 @@ def test_get_instance_placements_one_node_not_fit(
) )
with pytest.raises(ValueError, match="No cycles found with sufficient memory"): with pytest.raises(ValueError, match="No cycles found with sufficient memory"):
get_instance_placements_after_create(cic, topology, {}) place_instance(cic, topology, {})
def test_get_transition_events_no_change(instance: Instance): def test_get_transition_events_no_change(instance: Instance):
@@ -292,12 +292,12 @@ def test_placement_prioritizes_leaf_cycle_with_less_memory(
topology.add_connection(create_connection(node_id_e, node_id_y)) topology.add_connection(create_connection(node_id_e, node_id_y))
topology.add_connection(create_connection(node_id_f, node_id_z)) topology.add_connection(create_connection(node_id_f, node_id_z))
cic = create_instance_command( cic = place_instance_command(
model_meta=model_meta, model_meta=model_meta,
) )
# Act # Act
placements = get_instance_placements_after_create(cic, topology, {}) placements = place_instance(cic, topology, {})
# Assert the chosen cycle is A-B-C (contains at least one leaf node), even though # Assert the chosen cycle is A-B-C (contains at least one leaf node), even though
# D-E-F has more total memory. # D-E-F has more total memory.
@@ -420,7 +420,7 @@ def test_tensor_rdma_backend_connectivity_matrix(
topology.add_connection(conn_c_b) topology.add_connection(conn_c_b)
topology.add_connection(conn_a_c) topology.add_connection(conn_a_c)
cic = CreateInstance( cic = PlaceInstance(
sharding=Sharding.Tensor, sharding=Sharding.Tensor,
instance_meta=InstanceMeta.MlxJaccl, instance_meta=InstanceMeta.MlxJaccl,
command_id=CommandId(), command_id=CommandId(),
@@ -428,7 +428,7 @@ def test_tensor_rdma_backend_connectivity_matrix(
min_nodes=1, min_nodes=1,
) )
placements = get_instance_placements_after_create(cic, topology, {}) placements = place_instance(cic, topology, {})
assert len(placements) == 1 assert len(placements) == 1
instance_id = list(placements.keys())[0] instance_id = list(placements.keys())[0]

View File

@@ -5,7 +5,7 @@ from exo.utils.pydantic_ext import CamelCaseModel
class ModelCard(CamelCaseModel): class ModelCard(CamelCaseModel):
short_id: str short_id: str
model_id: str model_id: ModelId
name: str name: str
description: str description: str
tags: list[str] tags: list[str]
@@ -40,35 +40,63 @@ MODEL_CARDS: dict[str, ModelCard] = {
# n_layers=61, # n_layers=61,
# ), # ),
# ), # ),
"deepseek-v3.1": ModelCard( "deepseek-v3.1-4bit": ModelCard(
short_id="deepseek-v3.1", short_id="deepseek-v3.1-4bit",
model_id="mlx-community/DeepSeek-V3.1-8bit", model_id=ModelId("mlx-community/DeepSeek-V3.1-4bit"),
name="DeepSeek V3.1 (8-bit)",
description="""DeepSeek V3.1 is a large language model trained on the DeepSeek V3.1 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id=ModelId("mlx-community/DeepSeek-V3.1-8bit"),
pretty_name="DeepSeek V3.1 (8-bit)",
storage_size=Memory.from_kb(754706307),
n_layers=61,
),
),
"deepseek-v3.1:4bit": ModelCard(
short_id="deepseek-v3.1:4bit",
model_id="mlx-community/DeepSeek-V3.1-4bit",
name="DeepSeek V3.1 (4-bit)", name="DeepSeek V3.1 (4-bit)",
description="""DeepSeek V3.1 is a large language model trained on the DeepSeek V3.1 dataset.""", description="""DeepSeek V3.1 is a large language model trained on the DeepSeek V3.1 dataset.""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/DeepSeek-V3.1-4bit"), model_id=ModelId("mlx-community/DeepSeek-V3.1-4bit"),
pretty_name="DeepSeek V3.1 (4-bit)", pretty_name="DeepSeek V3.1 (4-bit)",
storage_size=Memory.from_kb(754706307 // 2), # TODO !!!!! storage_size=Memory.from_gb(378),
n_layers=61, n_layers=61,
), ),
), ),
"deepseek-v3.1-8bit": ModelCard(
short_id="deepseek-v3.1-8bit",
model_id=ModelId("mlx-community/DeepSeek-V3.1-8bit"),
name="DeepSeek V3.1 (8-bit)",
description="""DeepSeek V3.1 is a large language model trained on the DeepSeek V3.1 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id=ModelId("mlx-community/DeepSeek-V3.1-8bit"),
pretty_name="DeepSeek V3.1 (8-bit)",
storage_size=Memory.from_gb(713),
n_layers=61,
),
),
# "deepseek-v3.2": ModelCard(
# short_id="deepseek-v3.2",
# model_id=ModelId("mlx-community/DeepSeek-V3.2-8bit"),
# name="DeepSeek V3.2 (8-bit)",
# description="""DeepSeek V3.2 is a large language model trained on the DeepSeek V3.2 dataset.""",
# tags=[],
# metadata=ModelMetadata(
# model_id=ModelId("mlx-community/DeepSeek-V3.2-8bit"),
# pretty_name="DeepSeek V3.2 (8-bit)",
# storage_size=Memory.from_kb(754706307),
# n_layers=61,
# hidden_size=7168,
# ),
# ),
# "deepseek-v3.2-4bit": ModelCard(
# short_id="deepseek-v3.2-4bit",
# model_id=ModelId("mlx-community/DeepSeek-V3.2-4bit"),
# name="DeepSeek V3.2 (4-bit)",
# description="""DeepSeek V3.2 is a large language model trained on the DeepSeek V3.2 dataset.""",
# tags=[],
# metadata=ModelMetadata(
# model_id=ModelId("mlx-community/DeepSeek-V3.2-4bit"),
# pretty_name="DeepSeek V3.2 (4-bit)",
# storage_size=Memory.from_kb(754706307 // 2), # TODO !!!!!
# n_layers=61,
# hidden_size=7168,
# ),
# ),
# deepseek r1 # deepseek r1
# "deepseek-r1-0528:4bit": ModelCard( # "deepseek-r1-0528-4bit": ModelCard(
# short_id="deepseek-r1-0528:4bit", # short_id="deepseek-r1-0528-4bit",
# model_id="mlx-community/DeepSeek-R1-0528-4bit", # model_id="mlx-community/DeepSeek-R1-0528-4bit",
# name="DeepSeek-R1-0528 (4-bit)", # name="DeepSeek-R1-0528 (4-bit)",
# description="""DeepSeek R1 is a large language model trained on the DeepSeek R1 dataset.""", # description="""DeepSeek R1 is a large language model trained on the DeepSeek R1 dataset.""",
@@ -78,6 +106,7 @@ MODEL_CARDS: dict[str, ModelCard] = {
# pretty_name="DeepSeek R1 671B (4-bit)", # pretty_name="DeepSeek R1 671B (4-bit)",
# storage_size=Memory.from_kb(409706307), # storage_size=Memory.from_kb(409706307),
# n_layers=61, # n_layers=61,
# hidden_size=7168,
# ), # ),
# ), # ),
# "deepseek-r1-0528": ModelCard( # "deepseek-r1-0528": ModelCard(
@@ -91,226 +120,279 @@ MODEL_CARDS: dict[str, ModelCard] = {
# pretty_name="DeepSeek R1 671B (8-bit)", # pretty_name="DeepSeek R1 671B (8-bit)",
# storage_size=Memory.from_bytes(754998771712), # storage_size=Memory.from_bytes(754998771712),
# n_layers=61, # n_layers=61,
# . hidden_size=7168,
# ), # ),
# ), # ),
# kimi k2 # kimi k2
"kimi-k2-instruct-4bit": ModelCard( "kimi-k2-instruct-4bit": ModelCard(
short_id="kimi-k2-instruct-4bit", short_id="kimi-k2-instruct-4bit",
model_id="mlx-community/Kimi-K2-Instruct-4bit", model_id=ModelId("mlx-community/Kimi-K2-Instruct-4bit"),
name="Kimi K2 Instruct (4-bit)", name="Kimi K2 Instruct (4-bit)",
description="""Kimi K2 is a large language model trained on the Kimi K2 dataset.""", description="""Kimi K2 is a large language model trained on the Kimi K2 dataset.""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/Kimi-K2-Instruct-4bit"), model_id=ModelId("mlx-community/Kimi-K2-Instruct-4bit"),
pretty_name="Kimi K2 Instruct (4-bit)", pretty_name="Kimi K2 Instruct (4-bit)",
storage_size=Memory.from_bytes(577597603840), storage_size=Memory.from_gb(578),
n_layers=61, n_layers=61,
), ),
), ),
"kimi-k2-thinking": ModelCard( "kimi-k2-thinking": ModelCard(
short_id="kimi-k2-thinking", short_id="kimi-k2-thinking",
model_id="mlx-community/Kimi-K2-Thinking", model_id=ModelId("mlx-community/Kimi-K2-Thinking"),
name="Kimi K2 Thinking", name="Kimi K2 Thinking (4-bit)",
description="""Kimi K2 Thinking is the latest, most capable version of open-source thinking model.""", description="""Kimi K2 Thinking is the latest, most capable version of open-source thinking model.""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/Kimi-K2-Thinking"), model_id=ModelId("mlx-community/Kimi-K2-Thinking"),
pretty_name="Kimi K2 Thinking", pretty_name="Kimi K2 Thinking (4-bit)",
storage_size=Memory.from_bytes(577597603840), storage_size=Memory.from_gb(658),
n_layers=61, n_layers=61,
), ),
), ),
# llama-3.1 # llama-3.1
"llama-3.1-8b": ModelCard( "llama-3.1-8b": ModelCard(
short_id="llama-3.1-8b", short_id="llama-3.1-8b",
model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"),
name="Llama 3.1 8B", name="Llama 3.1 8B (4-bit)",
description="""Llama 3.1 is a large language model trained on the Llama 3.1 dataset.""", description="""Llama 3.1 is a large language model trained on the Llama 3.1 dataset.""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"), model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"),
pretty_name="Llama 3.1 8B", pretty_name="Llama 3.1 8B (4-bit)",
storage_size=Memory.from_kb(4411528), storage_size=Memory.from_mb(4423),
n_layers=32, n_layers=32,
), ),
), ),
"llama-3.1-70b": ModelCard( "llama-3.1-70b": ModelCard(
short_id="llama-3.1-70b", short_id="llama-3.1-70b",
model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", model_id=ModelId("mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"),
name="Llama 3.1 70B", name="Llama 3.1 70B (4-bit)",
description="""Llama 3.1 is a large language model trained on the Llama 3.1 dataset.""", description="""Llama 3.1 is a large language model trained on the Llama 3.1 dataset.""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"), model_id=ModelId("mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"),
pretty_name="Llama 3.1 70B", pretty_name="Llama 3.1 70B (4-bit)",
storage_size=Memory.from_kb(38758160), storage_size=Memory.from_mb(38769),
n_layers=80, n_layers=80,
), ),
), ),
# llama-3.2 # llama-3.2
"llama-3.2-1b": ModelCard( "llama-3.2-1b": ModelCard(
short_id="llama-3.2-1b", short_id="llama-3.2-1b",
model_id="mlx-community/Llama-3.2-1B-Instruct-4bit", model_id=ModelId("mlx-community/Llama-3.2-1B-Instruct-4bit"),
name="Llama 3.2 1B", name="Llama 3.2 1B (4-bit)",
description="""Llama 3.2 is a large language model trained on the Llama 3.2 dataset.""", description="""Llama 3.2 is a large language model trained on the Llama 3.2 dataset.""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/Llama-3.2-1B-Instruct-4bit"), model_id=ModelId("mlx-community/Llama-3.2-1B-Instruct-4bit"),
pretty_name="Llama 3.2 1B", pretty_name="Llama 3.2 1B (4-bit)",
storage_size=Memory.from_kb(678948), storage_size=Memory.from_mb(696),
n_layers=16, n_layers=16,
), ),
), ),
"llama-3.2-3b": ModelCard( "llama-3.2-3b": ModelCard(
short_id="llama-3.2-3b", short_id="llama-3.2-3b",
model_id="mlx-community/Llama-3.2-3B-Instruct-4bit", model_id=ModelId("mlx-community/Llama-3.2-3B-Instruct-4bit"),
name="Llama 3.2 3B", name="Llama 3.2 3B (4-bit)",
description="""Llama 3.2 is a large language model trained on the Llama 3.2 dataset.""", description="""Llama 3.2 is a large language model trained on the Llama 3.2 dataset.""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/Llama-3.2-3B-Instruct-4bit"), model_id=ModelId("mlx-community/Llama-3.2-3B-Instruct-4bit"),
pretty_name="Llama 3.2 3B", pretty_name="Llama 3.2 3B (4-bit)",
storage_size=Memory.from_kb(1765062), storage_size=Memory.from_mb(1777),
n_layers=28,
),
),
"llama-3.2-3b-8bit": ModelCard(
short_id="llama-3.2-3b-8bit",
model_id=ModelId("mlx-community/Llama-3.2-3B-Instruct-8bit"),
name="Llama 3.2 3B (8-bit)",
description="""Llama 3.2 is a large language model trained on the Llama 3.2 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id=ModelId("mlx-community/Llama-3.2-3B-Instruct-8bit"),
pretty_name="Llama 3.2 3B (8-bit)",
storage_size=Memory.from_mb(3339),
n_layers=28, n_layers=28,
), ),
), ),
# llama-3.3 # llama-3.3
"llama-3.3-70b": ModelCard( "llama-3.3-70b": ModelCard(
short_id="llama-3.3-70b", short_id="llama-3.3-70b",
model_id="mlx-community/Llama-3.3-70B-Instruct-4bit", model_id=ModelId("mlx-community/Llama-3.3-70B-Instruct-4bit"),
name="Llama 3.3 70B (4-bit)", name="Llama 3.3 70B (4-bit)",
description="""The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)""", description="""The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/Llama-3.3-70B-Instruct-4bit"), model_id=ModelId("mlx-community/Llama-3.3-70B-Instruct-4bit"),
pretty_name="Llama 3.3 70B", pretty_name="Llama 3.3 70B",
storage_size=Memory.from_kb(38758160), storage_size=Memory.from_mb(38769),
n_layers=80, n_layers=80,
), ),
), ),
"llama-3.3-70b-8bit": ModelCard( "llama-3.3-70b-8bit": ModelCard(
short_id="llama-3.3-70b-8bit", short_id="llama-3.3-70b-8bit",
model_id="mlx-community/Llama-3.3-70B-Instruct-8bit", model_id=ModelId("mlx-community/Llama-3.3-70B-Instruct-8bit"),
name="Llama 3.3 70B (8-bit)", name="Llama 3.3 70B (8-bit)",
description="""The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)""", description="""The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/Llama-3.3-70B-Instruct-8bit"), model_id=ModelId("mlx-community/Llama-3.3-70B-Instruct-8bit"),
pretty_name="Llama 3.3 70B (8-bit)", pretty_name="Llama 3.3 70B (8-bit)",
storage_size=Memory.from_kb(77516320), storage_size=Memory.from_mb(73242),
n_layers=80, n_layers=80,
), ),
), ),
"llama-3.3-70b-fp16": ModelCard( "llama-3.3-70b-fp16": ModelCard(
short_id="llama-3.3-70b-fp16", short_id="llama-3.3-70b-fp16",
model_id="mlx-community/llama-3.3-70b-instruct-fp16", model_id=ModelId("mlx-community/llama-3.3-70b-instruct-fp16"),
name="Llama 3.3 70B (FP16)", name="Llama 3.3 70B (FP16)",
description="""The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)""", description="""The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/llama-3.3-70b-instruct-fp16"), model_id=ModelId("mlx-community/llama-3.3-70b-instruct-fp16"),
pretty_name="Llama 3.3 70B (FP16)", pretty_name="Llama 3.3 70B (FP16)",
storage_size=Memory.from_kb(155032640), storage_size=Memory.from_mb(137695),
n_layers=80, n_layers=80,
), ),
), ),
# phi-3 # phi-3
"phi-3-mini": ModelCard( "phi-3-mini": ModelCard(
short_id="phi-3-mini", short_id="phi-3-mini",
model_id="mlx-community/Phi-3-mini-128k-instruct-4bit", model_id=ModelId("mlx-community/Phi-3-mini-128k-instruct-4bit"),
name="Phi 3 Mini 128k", name="Phi 3 Mini 128k (4-bit)",
description="""Phi 3 Mini is a large language model trained on the Phi 3 Mini dataset.""", description="""Phi 3 Mini is a large language model trained on the Phi 3 Mini dataset.""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/Phi-3-mini-128k-instruct-4bit"), model_id=ModelId("mlx-community/Phi-3-mini-128k-instruct-4bit"),
pretty_name="Phi 3 Mini 128k", pretty_name="Phi 3 Mini 128k (4-bit)",
storage_size=Memory.from_kb(2099262), storage_size=Memory.from_mb(2099),
n_layers=32, n_layers=32,
), ),
), ),
# "phi-3-mini:128k": ModelCard(
# short_id="phi-3-mini:128k",
# model_id="mlx-community/Phi-3-mini-128k-instruct-4bit",
# name="Phi 3 Mini 128k",
# description="""Phi 3 Mini is a large language model trained on the Phi 3 Mini dataset.""",
# tags=[],
# metadata=ModelMetadata(
# model_id=ModelId("mlx-community/Phi-3-mini-128k-instruct-4bit"),
# pretty_name="Phi 3 Mini 128k",
# storage_size=Memory.from_kb(2099262),
# n_layers=32,
# ),
# ),
# qwen3 # qwen3
"qwen3-0.6b": ModelCard( "qwen3-0.6b": ModelCard(
short_id="qwen3-0.6b", short_id="qwen3-0.6b",
model_id="mlx-community/Qwen3-0.6B-4bit", model_id=ModelId("mlx-community/Qwen3-0.6B-4bit"),
name="Qwen3 0.6B", name="Qwen3 0.6B (4-bit)",
description="""Qwen3 0.6B is a large language model trained on the Qwen3 0.6B dataset.""", description="""Qwen3 0.6B is a large language model trained on the Qwen3 0.6B dataset.""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/Qwen3-0.6B-4bit"), model_id=ModelId("mlx-community/Qwen3-0.6B-4bit"),
pretty_name="Qwen3 0.6B", pretty_name="Qwen3 0.6B (4-bit)",
storage_size=Memory.from_kb(327512), storage_size=Memory.from_mb(327),
n_layers=28,
),
),
"qwen3-0.6b-8bit": ModelCard(
short_id="qwen3-0.6b-8bit",
model_id=ModelId("mlx-community/Qwen3-0.6B-8bit"),
name="Qwen3 0.6B (8-bit)",
description="""Qwen3 0.6B is a large language model trained on the Qwen3 0.6B dataset.""",
tags=[],
metadata=ModelMetadata(
model_id=ModelId("mlx-community/Qwen3-0.6B-8bit"),
pretty_name="Qwen3 0.6B (8-bit)",
storage_size=Memory.from_mb(666),
n_layers=28, n_layers=28,
), ),
), ),
"qwen3-30b": ModelCard( "qwen3-30b": ModelCard(
short_id="qwen3-30b", short_id="qwen3-30b",
model_id="mlx-community/Qwen3-30B-A3B-4bit", model_id=ModelId("mlx-community/Qwen3-30B-A3B-4bit"),
name="Qwen3 30B (Active 3B)", name="Qwen3 30B A3B (4-bit)",
description="""Qwen3 30B is a large language model trained on the Qwen3 30B dataset.""", description="""Qwen3 30B is a large language model trained on the Qwen3 30B dataset.""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/Qwen3-30B-A3B-4bit"), model_id=ModelId("mlx-community/Qwen3-30B-A3B-4bit"),
pretty_name="Qwen3 30B (Active 3B)", pretty_name="Qwen3 30B A3B (4-bit)",
storage_size=Memory.from_kb(16772092), storage_size=Memory.from_mb(16797),
n_layers=48, n_layers=48,
), ),
), ),
# "qwen3-235b-a22b": ModelCard( "qwen3-30b-8bit": ModelCard(
# short_id="qwen3-235b-a22b", short_id="qwen3-30b-8bit",
# model_id="mlx-community/Qwen3-235B-A22B-4bit", model_id=ModelId("mlx-community/Qwen3-30B-A3B-8bit"),
# name="Qwen3 235B, Active 22B (4-bit)", name="Qwen3 30B A3B (8-bit)",
# description="""Qwen3 235B (Active 22B) is a large language model trained on the Qwen3 235B dataset.""", description="""Qwen3 30B is a large language model trained on the Qwen3 30B dataset.""",
# tags=[], tags=[],
# metadata=ModelMetadata( metadata=ModelMetadata(
# model_id=ModelId("mlx-community/Qwen3-235B-A22B-4bit"), model_id=ModelId("mlx-community/Qwen3-30B-A3B-8bit"),
# pretty_name="Qwen3 235B, Active 22B (4-bit)", pretty_name="Qwen3 30B A3B (8-bit)",
# storage_size=Memory.from_kb(123207680), storage_size=Memory.from_mb(31738),
# n_layers=94, n_layers=48,
# ), ),
# ), ),
"qwen3-235b-a22b-4bit": ModelCard(
short_id="qwen3-235b-a22b-4bit",
model_id=ModelId("mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"),
name="Qwen3 235B A22B (4-bit)",
description="""Qwen3 235B (Active 22B) is a large language model trained on the Qwen3 235B dataset.""",
tags=[],
metadata=ModelMetadata(
model_id=ModelId("mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"),
pretty_name="Qwen3 235B A22B (4-bit)",
storage_size=Memory.from_gb(132),
n_layers=94,
),
),
"qwen3-235b-a22b-8bit": ModelCard( "qwen3-235b-a22b-8bit": ModelCard(
short_id="qwen3-235b-a22b-8bit", short_id="qwen3-235b-a22b-8bit",
model_id="mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit", model_id=ModelId("mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"),
name="Qwen3 235B, Active 22B (8-bit)", name="Qwen3 235B A22B (8-bit)",
description="""Qwen3 235B (Active 22B) is a large language model trained on the Qwen3 235B dataset.""", description="""Qwen3 235B (Active 22B) is a large language model trained on the Qwen3 235B dataset.""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"), model_id=ModelId("mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"),
pretty_name="Qwen3 235B, Active 22B (8-bit)", pretty_name="Qwen3 235B A22B (8-bit)",
storage_size=Memory.from_kb(246415360), storage_size=Memory.from_gb(250),
n_layers=94, n_layers=94,
), ),
), ),
"qwen3-coder-480b-a35b-4bit": ModelCard(
short_id="qwen3-coder-480b-a35b-4bit",
model_id=ModelId("mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"),
name="Qwen3 Coder 480B A35B (4-bit)",
description="""Qwen3 Coder 480B (Active 35B) is a large language model trained on the Qwen3 Coder 480B dataset.""",
tags=[],
metadata=ModelMetadata(
model_id=ModelId("mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"),
pretty_name="Qwen3 Coder 480B A35B (4-bit)",
storage_size=Memory.from_gb(270),
n_layers=62,
),
),
"qwen3-coder-480b-a35b-8bit": ModelCard(
short_id="qwen3-coder-480b-a35b-8bit",
model_id=ModelId("mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"),
name="Qwen3 Coder 480B A35B (8-bit)",
description="""Qwen3 Coder 480B (Active 35B) is a large language model trained on the Qwen3 Coder 480B dataset.""",
tags=[],
metadata=ModelMetadata(
model_id=ModelId("mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"),
pretty_name="Qwen3 Coder 480B A35B (8-bit)",
storage_size=Memory.from_gb(540),
n_layers=62,
),
),
# granite # granite
"granite-3.3-2b": ModelCard( "granite-3.3-2b": ModelCard(
short_id="granite-3.3-2b", short_id="granite-3.3-2b",
model_id="mlx-community/granite-3.3-2b-instruct-fp16", model_id=ModelId("mlx-community/granite-3.3-2b-instruct-fp16"),
name="Granite 3.3 2B", name="Granite 3.3 2B (FP16)",
description="""Granite-3.3-2B-Instruct is a 2-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities.""", description="""Granite-3.3-2B-Instruct is a 2-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities.""",
tags=[], tags=[],
metadata=ModelMetadata( metadata=ModelMetadata(
model_id=ModelId("mlx-community/granite-3.3-2b-instruct-fp16"), model_id=ModelId("mlx-community/granite-3.3-2b-instruct-fp16"),
pretty_name="Granite 3.3 2B", pretty_name="Granite 3.3 2B (FP16)",
storage_size=Memory.from_kb(4948320), storage_size=Memory.from_mb(4951),
n_layers=40, n_layers=40,
), ),
), ),
# "granite-3.3-8b": ModelCard( # "granite-3.3-8b": ModelCard(
# short_id="granite-3.3-8b", # short_id="granite-3.3-8b",
# model_id="mlx-community/granite-3.3-8b-instruct-fp16", # model_id=ModelId("mlx-community/granite-3.3-8b-instruct-fp16"),
# name="Granite 3.3 8B", # name="Granite 3.3 8B",
# description="""Granite-3.3-8B-Instruct is a 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities.""", # description="""Granite-3.3-8B-Instruct is a 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities.""",
# tags=[], # tags=[],
@@ -335,4 +417,35 @@ MODEL_CARDS: dict[str, ModelCard] = {
# n_layers=30, # n_layers=30,
# ), # ),
# ), # ),
# gpt-oss
# "gpt-oss-120b-MXFP4-Q8": ModelCard(
# short_id="gpt-oss-120b-MXFP4-Q8",
# model_id=ModelId("mlx-community/gpt-oss-120b-MXFP4-Q8"),
# name="GPT-OSS 120B (MXFP4-Q8, MLX)",
# description="""OpenAI's GPT-OSS 120B is a 117B-parameter Mixture-of-Experts model designed for high-reasoning and general-purpose use; this variant is a 4-bit MLX conversion for Apple Silicon.""",
# tags=[],
# metadata=ModelMetadata(
# model_id=ModelId("mlx-community/gpt-oss-120b-MXFP4-Q8"),
# pretty_name="GPT-OSS 120B (MXFP4-Q8, MLX)",
# storage_size=Memory.from_kb(68_996_301),
# n_layers=36,
# hidden_size=2880,
# supports_tensor=True,
# ),
# ),
# "gpt-oss-20b-4bit": ModelCard(
# short_id="gpt-oss-20b-4bit",
# model_id=ModelId("mlx-community/gpt-oss-20b-MXFP4-Q4"),
# name="GPT-OSS 20B (MXFP4-Q4, MLX)",
# description="""OpenAI's GPT-OSS 20B is a medium-sized MoE model for lower-latency and local or specialized use cases; this MLX variant uses MXFP4 4-bit quantization.""",
# tags=[],
# metadata=ModelMetadata(
# model_id=ModelId("mlx-community/gpt-oss-20b-MXFP4-Q4"),
# pretty_name="GPT-OSS 20B (MXFP4-Q4, MLX)",
# storage_size=Memory.from_kb(11_744_051),
# n_layers=24,
# hidden_size=2880,
# supports_tensor=True,
# ),
# ),
} }

View File

@@ -1,11 +1,12 @@
import time import time
from typing import Any, Literal from typing import Any, Literal
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, field_validator
from pydantic_core import PydanticUseDefault
from exo.shared.types.common import CommandId from exo.shared.types.common import CommandId
from exo.shared.types.models import ModelMetadata from exo.shared.types.models import ModelId
from exo.shared.types.worker.instances import InstanceId, InstanceMeta from exo.shared.types.worker.instances import Instance, InstanceId, InstanceMeta
from exo.shared.types.worker.shards import Sharding from exo.shared.types.worker.shards import Sharding
FinishReason = Literal[ FinishReason = Literal[
@@ -24,6 +25,8 @@ class ModelListModel(BaseModel):
description: str = Field(default="") description: str = Field(default="")
context_length: int = Field(default=0) context_length: int = Field(default=0)
tags: list[str] = Field(default=[]) tags: list[str] = Field(default=[])
storage_size_megabytes: int = Field(default=0)
supports_tensor: bool = Field(default=False)
class ModelList(BaseModel): class ModelList(BaseModel):
@@ -132,13 +135,37 @@ class ChatCompletionTaskParams(BaseModel):
user: str | None = None user: str | None = None
class CreateInstanceTaskParams(BaseModel): class PlaceInstanceParams(BaseModel):
# TODO: in future the user could specify a specific Instance, not just a model_id
model_id: str model_id: str
sharding: Sharding = Sharding.Pipeline sharding: Sharding = Sharding.Pipeline
instance_meta: InstanceMeta = InstanceMeta.MlxRing instance_meta: InstanceMeta = InstanceMeta.MlxRing
min_nodes: int = 1 min_nodes: int = 1
@field_validator("sharding", "instance_meta", mode="plain")
@classmethod
def use_default(cls, v: object):
if not v or not isinstance(v, (Sharding, InstanceMeta)):
raise PydanticUseDefault()
return v
class CreateInstanceParams(BaseModel):
instance: Instance
class PlacementPreview(BaseModel):
model_id: ModelId
sharding: Sharding
instance_meta: InstanceMeta
instance: Instance | None = None
# Keys are NodeId strings, values are additional bytes that would be used on that node
memory_delta_by_node: dict[str, int] | None = None
error: str | None = None
class PlacementPreviewResponse(BaseModel):
previews: list[PlacementPreview]
class DeleteInstanceTaskParams(BaseModel): class DeleteInstanceTaskParams(BaseModel):
instance_id: str instance_id: str
@@ -147,7 +174,6 @@ class DeleteInstanceTaskParams(BaseModel):
class CreateInstanceResponse(BaseModel): class CreateInstanceResponse(BaseModel):
message: str message: str
command_id: CommandId command_id: CommandId
model_meta: ModelMetadata
class DeleteInstanceResponse(BaseModel): class DeleteInstanceResponse(BaseModel):

View File

@@ -3,7 +3,7 @@ from pydantic import Field
from exo.shared.types.api import ChatCompletionTaskParams from exo.shared.types.api import ChatCompletionTaskParams
from exo.shared.types.common import CommandId, NodeId from exo.shared.types.common import CommandId, NodeId
from exo.shared.types.models import ModelMetadata from exo.shared.types.models import ModelMetadata
from exo.shared.types.worker.instances import InstanceId, InstanceMeta from exo.shared.types.worker.instances import Instance, InstanceId, InstanceMeta
from exo.shared.types.worker.shards import Sharding from exo.shared.types.worker.shards import Sharding
from exo.utils.pydantic_ext import CamelCaseModel, TaggedModel from exo.utils.pydantic_ext import CamelCaseModel, TaggedModel
@@ -20,13 +20,17 @@ class ChatCompletion(BaseCommand):
request_params: ChatCompletionTaskParams request_params: ChatCompletionTaskParams
class CreateInstance(BaseCommand): class PlaceInstance(BaseCommand):
model_meta: ModelMetadata model_meta: ModelMetadata
sharding: Sharding sharding: Sharding
instance_meta: InstanceMeta instance_meta: InstanceMeta
min_nodes: int min_nodes: int
class CreateInstance(BaseCommand):
instance: Instance
class DeleteInstance(BaseCommand): class DeleteInstance(BaseCommand):
instance_id: InstanceId instance_id: InstanceId
@@ -43,6 +47,7 @@ Command = (
TestCommand TestCommand
| RequestEventLog | RequestEventLog
| ChatCompletion | ChatCompletion
| PlaceInstance
| CreateInstance | CreateInstance
| DeleteInstance | DeleteInstance
| TaskFinished | TaskFinished

View File

@@ -47,6 +47,11 @@ class Memory(CamelCaseModel):
"""Construct a new Memory object from a number of megabytes""" """Construct a new Memory object from a number of megabytes"""
return cls(in_bytes=round(val * (1024**2))) return cls(in_bytes=round(val * (1024**2)))
@classmethod
def from_gb(cls, val: float) -> Self:
"""Construct a new Memory object from a number of megabytes"""
return cls(in_bytes=round(val * (1024**3)))
@property @property
def in_gb(self) -> float: def in_gb(self) -> float:
"""The approximate gigabytes this memory represents.""" """The approximate gigabytes this memory represents."""

View File

@@ -0,0 +1,45 @@
import os
import sys
from pathlib import Path
from typing import cast
def find_dashboard() -> Path:
dashboard = (
_find_dashboard_in_env()
or _find_dashboard_in_repo()
or _find_dashboard_in_bundle()
)
if not dashboard:
raise FileNotFoundError(
"Unable to locate dashboard assets. Export DASHBOARD_DIR or rebuild the binary."
)
return dashboard
def _find_dashboard_in_env() -> Path | None:
env = os.environ.get("DASHBOARD_DIR")
if not env:
return None
resolved_env = Path(env).expanduser().resolve()
return resolved_env
def _find_dashboard_in_repo() -> Path | None:
current_module = Path(__file__).resolve()
for parent in current_module.parents:
build = parent / "dashboard" / "build"
if build.is_dir() and (build / "index.html").exists():
return build
return None
def _find_dashboard_in_bundle() -> Path | None:
frozen_root = cast(str | None, getattr(sys, "_MEIPASS", None))
if frozen_root is None:
return None
candidate = Path(frozen_root) / "dashboard"
if candidate.is_dir():
return candidate
return None