Point mlx-lm to davidmcc73 fork with context parallelism support

Add context parallelism support to DeepSeek sharding
Store pre-shard head count and distributed group on each attention layer during sharding, enabling automatic TP→CP switching at runtime when context length exceeds a threshold. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-19 07:17:30 -05:00 · 2026-02-02 19:16:43 +00:00 · 2026-02-02 18:35:20 +00:00 · 2026-01-28 18:01:23 +00:00 · 2026-01-28 15:23:28 +00:00 · 2026-01-28 15:11:57 +00:00
220 changed files with 7565 additions and 14954 deletions
--- a/.github/actions/typecheck/action.yml
+++ b/.github/actions/typecheck/action.yml
@@ -0,0 +1,12 @@
+name: Type Check
+
+description: "Run type checker"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Run type checker
+      run: |
+        nix --extra-experimental-features nix-command --extra-experimental-features flakes develop -c just sync
+        nix --extra-experimental-features nix-command --extra-experimental-features flakes develop -c just check
+      shell: bash
--- a/.github/workflows/build-app.yml
+++ b/.github/workflows/build-app.yml
@@ -396,7 +396,7 @@ jobs:
          path: output/EXO-${{ env.RELEASE_VERSION }}.dmg

      - name: Upload to S3
-        if: env.SPARKLE_S3_BUCKET != ''
+        if: env.SPARKLE_S3_BUCKET != '' && github.ref_type == 'tag'
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
@@ -412,12 +412,6 @@ jobs:
            PREFIX="${PREFIX}/"
          fi
          DMG_NAME="EXO-${RELEASE_VERSION}.dmg"
-
-          if [[ "${{ github.ref_type }}" != "tag" ]]; then
-            aws s3 cp "$DMG_NAME" "s3://${SPARKLE_S3_BUCKET}/${PREFIX}EXO-${GITHUB_SHA}.dmg"
-            exit 0
-          fi
-
          aws s3 cp "$DMG_NAME" "s3://${SPARKLE_S3_BUCKET}/${PREFIX}${DMG_NAME}"
          if [[ "$IS_ALPHA" != "true" ]]; then
            aws s3 cp "$DMG_NAME" "s3://${SPARKLE_S3_BUCKET}/${PREFIX}EXO-latest.dmg"
--- a/.github/workflows/pipeline.yml
+++ b/.github/workflows/pipeline.yml
@@ -26,14 +26,73 @@ jobs:
          name: exo
          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"

-      - name: Load nix develop environment
-        run: nix run github:nicknovitski/nix-develop/v1
+      - name: Configure git user
+        run: |
+          git config --local user.email "github-actions@users.noreply.github.com"
+          git config --local user.name  "github-actions bot"
+        shell: bash

-      - name: Sync dependencies
-        run: uv sync --all-packages
+      - name: Pull LFS files
+        run: |
+          echo "Pulling Git LFS files..."
+          git lfs pull
+        shell: bash

-      - name: Run type checker
-        run: uv run basedpyright --project pyproject.toml
+      - name: Setup Nix Environment
+        run: |
+          echo "Checking for nix installation..."
+          
+          # Check if nix binary exists directly
+          if [ -f /nix/var/nix/profiles/default/bin/nix ]; then
+            echo "Found nix binary at /nix/var/nix/profiles/default/bin/nix"
+            export PATH="/nix/var/nix/profiles/default/bin:$PATH"
+            echo "PATH=$PATH" >> $GITHUB_ENV
+            nix --version
+          elif [ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]; then
+            echo "Found nix profile script, sourcing..."
+            source /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
+            nix --version
+          elif command -v nix >/dev/null 2>&1; then
+            echo "Nix already in PATH"
+            nix --version
+          else
+            echo "Nix not found. Debugging info:"
+            echo "Contents of /nix/var/nix/profiles/default/:"
+            ls -la /nix/var/nix/profiles/default/ 2>/dev/null || echo "Directory not found"
+            echo "Contents of /nix/var/nix/profiles/default/bin/:"
+            ls -la /nix/var/nix/profiles/default/bin/ 2>/dev/null || echo "Directory not found"
+            exit 1
+          fi
+        shell: bash
+
+      - name: Configure basedpyright include for local MLX
+        run: |
+          RUNNER_LABELS='${{ toJSON(runner.labels) }}'
+          if echo "$RUNNER_LABELS" | grep -q "local_mlx"; then
+            if [ -d "/Users/Shared/mlx" ]; then
+              echo "Updating [tool.basedpyright].include to use /Users/Shared/mlx"
+              awk '
+                BEGIN { in=0 }
+                /^\[tool\.basedpyright\]/ { in=1; print; next }
+                in && /^\[/ { in=0 }  # next section
+                in && /^[ \t]*include[ \t]*=/ {
+                  print "include = [\"/Users/Shared/mlx\"]"
+                  next
+                }
+                { print }
+              ' pyproject.toml > pyproject.toml.tmp && mv pyproject.toml.tmp pyproject.toml
+
+              echo "New [tool.basedpyright] section:"
+              sed -n '/^\[tool\.basedpyright\]/,/^\[/p' pyproject.toml | sed '$d' || true
+            else
+              echo "local_mlx tag present but /Users/Shared/mlx not found; leaving pyproject unchanged."
+            fi
+          else
+            echo "Runner does not have 'local_mlx' tag; leaving pyproject unchanged."
+          fi
+        shell: bash
+
+      - uses: ./.github/actions/typecheck

  nix:
    name: Build and check (${{ matrix.system }})
@@ -64,63 +123,6 @@ jobs:
          name: exo
          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"

-      - name: Build Metal packages (macOS only)
-        if: runner.os == 'macOS'
-        run: |
-          # Try to build metal-toolchain first (may succeed via cachix cache hit)
-          if nix build .#metal-toolchain 2>/dev/null; then
-            echo "metal-toolchain built successfully (likely cache hit)"
-          else
-            echo "metal-toolchain build failed, extracting from Xcode..."
-
-            NAR_HASH="sha256-ayR5mXN4sZAddwKEG2OszGRF93k9ZFc7H0yi2xbylQw="
-            NAR_NAME="metal-toolchain-17C48.nar"
-
-            # Use RUNNER_TEMP to avoid /tmp symlink issues on macOS
-            WORK_DIR="${RUNNER_TEMP}/metal-work"
-            mkdir -p "$WORK_DIR"
-
-            # Download the Metal toolchain component
-            xcodebuild -downloadComponent MetalToolchain
-
-            # Find and mount the DMG
-            DMG_PATH=$(find /System/Library/AssetsV2/com_apple_MobileAsset_MetalToolchain -name '*.dmg' 2>/dev/null | head -1)
-            if [ -z "$DMG_PATH" ]; then
-              echo "Error: Could not find Metal toolchain DMG"
-              exit 1
-            fi
-
-            echo "Found DMG at: $DMG_PATH"
-            hdiutil attach "$DMG_PATH" -mountpoint "${WORK_DIR}/metal-dmg"
-
-            # Copy the toolchain
-            cp -R "${WORK_DIR}/metal-dmg/Metal.xctoolchain" "${WORK_DIR}/metal-export"
-            hdiutil detach "${WORK_DIR}/metal-dmg"
-
-            # Create NAR and add to store
-            nix nar pack "${WORK_DIR}/metal-export" > "${WORK_DIR}/${NAR_NAME}"
-            STORE_PATH=$(nix store add --mode flat "${WORK_DIR}/${NAR_NAME}")
-            echo "Added NAR to store: $STORE_PATH"
-
-            # Verify the hash matches
-            ACTUAL_HASH=$(nix hash file "${WORK_DIR}/${NAR_NAME}")
-            if [ "$ACTUAL_HASH" != "$NAR_HASH" ]; then
-              echo "Warning: NAR hash mismatch!"
-              echo "Expected: $NAR_HASH"
-              echo "Actual:   $ACTUAL_HASH"
-              echo "The metal-toolchain.nix may need updating"
-            fi
-
-            # Clean up
-            rm -rf "$WORK_DIR"
-
-            # Retry the build now that NAR is in store
-            nix build .#metal-toolchain
-          fi
-
-          # Build mlx (depends on metal-toolchain)
-          nix build .#mlx
-
      - name: Build all Nix outputs
        run: |
          nix flake show --json | jq -r '
@@ -132,16 +134,3 @@ jobs:

      - name: Run nix flake check
        run: nix flake check
-
-      - name: Run pytest (macOS only)
-        if: runner.os == 'macOS'
-        run: |
-          # Build the test environment (requires relaxed sandbox for uv2nix on macOS)
-          TEST_ENV=$(nix build '.#exo-test-env' --option sandbox relaxed --print-out-paths)
-
-          # Run pytest outside sandbox (needs GPU access for MLX)
-          export HOME="$RUNNER_TEMP"
-          export EXO_TESTS=1
-          export EXO_DASHBOARD_DIR="$PWD/dashboard/" 
-          export EXO_RESOURCES_DIR="$PWD/resources" 
-          $TEST_ENV/bin/python -m pytest src -m "not slow" --import-mode=importlib
--- a/.gitignore
+++ b/.gitignore
@@ -28,13 +28,3 @@ target/
 dashboard/build/
 dashboard/node_modules/
 dashboard/.svelte-kit/
-
-# host config snapshots
-hosts_*.json
-.swp
-
-# bench files
-bench/**/*.json
-
-# tmp
-tmp/models
--- a/.mlx_typings/mflux/models/flux/variants/kontext/init.pyi
+++ b/.mlx_typings/mflux/models/flux/variants/kontext/init.pyi
@@ -1,7 +0,0 @@
-"""
-This type stub file was generated by pyright.
-"""
-
-from mflux.models.flux.variants.kontext.flux_kontext import Flux1Kontext
-
-__all__ = ["Flux1Kontext"]
--- a/.mlx_typings/mflux/models/flux/variants/kontext/flux_kontext.pyi
+++ b/.mlx_typings/mflux/models/flux/variants/kontext/flux_kontext.pyi
@@ -1,49 +0,0 @@
-"""
-This type stub file was generated by pyright.
-"""
-
-from pathlib import Path
-from typing import Any
-
-from mlx import nn
-
-from mflux.models.common.config.model_config import ModelConfig
-from mflux.models.flux.model.flux_text_encoder.clip_encoder.clip_encoder import (
-    CLIPEncoder,
-)
-from mflux.models.flux.model.flux_text_encoder.t5_encoder.t5_encoder import T5Encoder
-from mflux.models.flux.model.flux_transformer.transformer import Transformer
-from mflux.models.flux.model.flux_vae.vae import VAE
-from mflux.utils.generated_image import GeneratedImage
-
-class Flux1Kontext(nn.Module):
-    vae: VAE
-    transformer: Transformer
-    t5_text_encoder: T5Encoder
-    clip_text_encoder: CLIPEncoder
-    bits: int | None
-    lora_paths: list[str] | None
-    lora_scales: list[float] | None
-    prompt_cache: dict[str, Any]
-    tokenizers: dict[str, Any]
-
-    def __init__(
-        self,
-        quantize: int | None = ...,
-        model_path: str | None = ...,
-        lora_paths: list[str] | None = ...,
-        lora_scales: list[float] | None = ...,
-        model_config: ModelConfig = ...,
-    ) -> None: ...
-    def generate_image(
-        self,
-        seed: int,
-        prompt: str,
-        num_inference_steps: int = ...,
-        height: int = ...,
-        width: int = ...,
-        guidance: float = ...,
-        image_path: Path | str | None = ...,
-        image_strength: float | None = ...,
-        scheduler: str = ...,
-    ) -> GeneratedImage: ...
--- a/.mlx_typings/mflux/models/flux/variants/kontext/kontext_util.pyi
+++ b/.mlx_typings/mflux/models/flux/variants/kontext/kontext_util.pyi
@@ -1,16 +0,0 @@
-"""
-This type stub file was generated by pyright.
-"""
-
-import mlx.core as mx
-
-from mflux.models.flux.model.flux_vae.vae import VAE
-
-class KontextUtil:
-    @staticmethod
-    def create_image_conditioning_latents(
-        vae: VAE,
-        height: int,
-        width: int,
-        image_path: str,
-    ) -> tuple[mx.array, mx.array]: ...
--- a/.mlx_typings/mlx/core/init.pyi
+++ b/.mlx_typings/mlx/core/init.pyi
@@ -1139,7 +1139,7 @@ class array:
    ) -> array:
        """See :func:`flatten`."""

-    def reshape(self, *shape: int, stream: Stream | Device | None = ...) -> array:
+    def reshape(self, *shape, stream: Stream | Device | None = ...) -> array:
        """
        Equivalent to :func:`reshape` but the shape can be passed either as a
        :obj:`tuple` or as separate arguments.
@@ -1222,7 +1222,7 @@ class array:
    ) -> array:
        """See :func:`swapaxes`."""

-    def transpose(self, *axes: int, stream: Stream | Device | None = ...) -> array:
+    def transpose(self, *axes, stream: Stream | Device | None = ...) -> array:
        """
        Equivalent to :func:`transpose` but the axes can be passed either as
        a tuple or as separate arguments.
@@ -2366,7 +2366,7 @@ class custom_function:
 def default_device() -> Device:
    """Get the default device."""

-def default_stream(device: Device | DeviceType) -> Stream:
+def default_stream(device: Device) -> Stream:
    """Get the device's default stream."""

 def degrees(a: array, /, *, stream: Stream | Device | None = ...) -> array:
--- a/.mlx_typings/mlx/nn/layers/convolution.pyi
+++ b/.mlx_typings/mlx/nn/layers/convolution.pyi
@@ -30,9 +30,6 @@ class Conv1d(Module):
        bias (bool, optional): If ``True`` add a learnable bias to the output.
            Default: ``True``
    """
-
-    weight: mx.array
-    groups: int
    def __init__(
        self,
        in_channels: int,
--- a/.mlx_typings/mlx_lm/models/cache.pyi
+++ b/.mlx_typings/mlx_lm/models/cache.pyi
@@ -11,10 +11,7 @@ import mlx.core as mx
 class Cache(Protocol):
    keys: mx.array
    values: mx.array
-    offset: int
-    def update_and_fetch(
-        self, keys: mx.array, values: mx.array
-    ) -> tuple[mx.array, mx.array]: ...
+    def update_and_fetch(self, keys: mx.array, values: mx.array) -> None: ...
    @property
    def state(self) -> tuple[mx.array, mx.array]: ...
    @state.setter
@@ -90,7 +87,6 @@ def create_attention_mask(
 class _BaseCache(Cache):
    keys: mx.array
    values: mx.array
-    offset: int
    @property
    def state(self) -> tuple[mx.array, mx.array]: ...
    @state.setter
--- a/.mlx_typings/mlx_lm/models/deepseek_v3.pyi
+++ b/.mlx_typings/mlx_lm/models/deepseek_v3.pyi
@@ -5,7 +5,6 @@ from typing import Any, Dict, Optional

 import mlx.core as mx
 import mlx.nn as nn
-from mlx_lm.models.mla import MultiLinear

 from .base import BaseModelArgs
 from .switch_layers import SwitchGLU
@@ -61,10 +60,7 @@ class DeepseekV3Attention(nn.Module):
    q_b_proj: nn.Linear
    kv_a_proj_with_mqa: nn.Linear
    kv_a_layernorm: nn.RMSNorm
-    # kv_b_proj: nn.Linear
-    embed_q: MultiLinear
-    unembed_out: MultiLinear
-
+    kv_b_proj: nn.Linear
    o_proj: nn.Linear
    rope: Any

--- a/.mlx_typings/mlx_lm/models/glm4_moe.pyi
+++ b/.mlx_typings/mlx_lm/models/glm4_moe.pyi
@@ -1,153 +0,0 @@
-from dataclasses import dataclass
-from typing import Any, Dict, Optional
-
-import mlx.core as mx
-import mlx.nn as nn
-
-from .base import BaseModelArgs
-from .switch_layers import SwitchGLU
-
-@dataclass
-class ModelArgs(BaseModelArgs):
-    model_type: str
-    vocab_size: int
-    hidden_size: int
-    intermediate_size: int
-    max_position_embeddings: int
-    moe_intermediate_size: int
-    norm_topk_prob: bool
-    num_attention_heads: int
-    n_group: int
-    head_dim: int
-    topk_group: int
-    n_shared_experts: int
-    n_routed_experts: int
-    routed_scaling_factor: float
-    num_experts_per_tok: int
-    first_k_dense_replace: int
-    num_hidden_layers: int
-    num_key_value_heads: int
-    rms_norm_eps: float
-    rope_theta: float
-    rope_scaling: Optional[Dict[str, Any]]
-    use_qk_norm: bool
-    tie_word_embeddings: bool
-    attention_bias: bool
-    partial_rotary_factor: float
-    scoring_func: str
-    topk_method: str
-
-class Attention(nn.Module):
-    n_heads: int
-    n_kv_heads: int
-    scale: float
-    q_proj: nn.Linear
-    k_proj: nn.Linear
-    v_proj: nn.Linear
-    o_proj: nn.Linear
-    use_qk_norm: bool
-    q_norm: nn.RMSNorm
-    k_norm: nn.RMSNorm
-    rope: nn.RoPE
-
-    def __init__(self, args: ModelArgs) -> None: ...
-    def __call__(
-        self,
-        x: mx.array,
-        mask: Optional[mx.array] = None,
-        cache: Optional[Any] = None,
-    ) -> mx.array: ...
-
-class MLP(nn.Module):
-    config: ModelArgs
-    hidden_size: int
-    intermediate_size: int
-    gate_proj: nn.Linear
-    up_proj: nn.Linear
-    down_proj: nn.Linear
-
-    def __init__(
-        self,
-        config: ModelArgs,
-        hidden_size: Optional[int] = None,
-        intermediate_size: Optional[int] = None,
-    ) -> None: ...
-    def __call__(self, x: mx.array) -> mx.array: ...
-
-class MoEGate(nn.Module):
-    config: ModelArgs
-    top_k: int
-    norm_topk_prob: bool
-    n_routed_experts: int
-    routed_scaling_factor: float
-    n_group: int
-    topk_group: int
-    weight: mx.array
-    e_score_correction_bias: mx.array
-
-    def __init__(self, config: ModelArgs) -> None: ...
-    def __call__(self, x: mx.array) -> tuple[mx.array, mx.array]: ...
-
-class MoE(nn.Module):
-    config: ModelArgs
-    num_experts_per_tok: int
-    switch_mlp: SwitchGLU
-    gate: MoEGate
-    shared_experts: MLP
-    sharding_group: Optional[mx.distributed.Group]
-
-    def __init__(self, config: ModelArgs) -> None: ...
-    def __call__(self, x: mx.array) -> mx.array: ...
-
-class DecoderLayer(nn.Module):
-    self_attn: Attention
-    mlp: MLP | MoE
-    input_layernorm: nn.RMSNorm
-    post_attention_layernorm: nn.RMSNorm
-
-    def __init__(self, config: ModelArgs, layer_idx: int) -> None: ...
-    def __call__(
-        self,
-        x: mx.array,
-        mask: Optional[mx.array] = None,
-        cache: Optional[Any] = None,
-    ) -> mx.array: ...
-
-class LanguageModel(nn.Module):
-    vocab_size: int
-    embed_tokens: nn.Embedding
-    layers: list[DecoderLayer]
-    norm: nn.RMSNorm
-    pipeline_rank: int
-    pipeline_size: int
-    start_idx: int
-    end_idx: Optional[int]
-    num_layers: int
-
-    def __init__(self, config: ModelArgs) -> None: ...
-    def __call__(
-        self,
-        x: mx.array,
-        cache: Optional[Any] = None,
-    ) -> mx.array: ...
-    @property
-    def pipeline_layers(self) -> list[DecoderLayer]: ...
-
-class Model(nn.Module):
-    args: ModelArgs
-    model_type: str
-    model: LanguageModel
-    lm_head: nn.Linear
-
-    def __init__(self, config: ModelArgs) -> None: ...
-    def __call__(
-        self,
-        inputs: mx.array,
-        cache: Optional[Any] = None,
-    ) -> mx.array: ...
-    def sanitize(self, weights: dict[str, Any]) -> dict[str, Any]: ...
-    def shard(self, group: Optional[mx.distributed.Group] = None) -> None: ...
-    @property
-    def layers(self) -> list[DecoderLayer]: ...
-    @property
-    def cast_predicate(self) -> Any: ...
--- a/.mlx_typings/mlx_lm/models/qwen3_next.pyi
+++ b/.mlx_typings/mlx_lm/models/qwen3_next.pyi
@@ -1,114 +0,0 @@
-"""Type stubs for mlx_lm.models.qwen3_next"""
-
-from typing import Any, Optional
-
-import mlx.core as mx
-import mlx.nn as nn
-
-from .switch_layers import SwitchGLU
-
-class Qwen3NextMLP(nn.Module):
-    gate_proj: nn.Linear
-    down_proj: nn.Linear
-    up_proj: nn.Linear
-
-    def __init__(self, dim: int, hidden_dim: int) -> None: ...
-    def __call__(self, x: mx.array) -> mx.array: ...
-
-class Qwen3NextGatedDeltaNet(nn.Module):
-    hidden_size: int
-    num_v_heads: int
-    num_k_heads: int
-    head_k_dim: int
-    head_v_dim: int
-    key_dim: int
-    value_dim: int
-    conv_kernel_size: int
-    conv_dim: int
-    conv1d: nn.Conv1d
-    in_proj_qkvz: nn.Linear
-    in_proj_ba: nn.Linear
-    dt_bias: mx.array
-    A_log: mx.array
-    out_proj: nn.Linear
-
-    def __init__(self, config: Any) -> None: ...
-    def __call__(
-        self,
-        inputs: mx.array,
-        mask: Optional[mx.array] = None,
-        cache: Optional[Any] = None,
-    ) -> mx.array: ...
-
-class Qwen3NextAttention(nn.Module):
-    num_attention_heads: int
-    num_key_value_heads: int
-    head_dim: int
-    scale: float
-    q_proj: nn.Linear
-    k_proj: nn.Linear
-    v_proj: nn.Linear
-    o_proj: nn.Linear
-
-    def __init__(self, args: Any) -> None: ...
-    def __call__(
-        self,
-        x: mx.array,
-        mask: Optional[mx.array] = None,
-        cache: Optional[Any] = None,
-    ) -> mx.array: ...
-
-class Qwen3NextSparseMoeBlock(nn.Module):
-    norm_topk_prob: bool
-    num_experts: int
-    top_k: int
-    gate: nn.Linear
-    switch_mlp: SwitchGLU
-    shared_expert: Qwen3NextMLP
-    shared_expert_gate: nn.Linear
-
-    def __init__(self, args: Any) -> None: ...
-    def __call__(self, x: mx.array) -> mx.array: ...
-
-class Qwen3NextDecoderLayer(nn.Module):
-    is_linear: bool
-    linear_attn: Qwen3NextGatedDeltaNet
-    self_attn: Qwen3NextAttention
-    input_layernorm: nn.RMSNorm
-    post_attention_layernorm: nn.RMSNorm
-    mlp: Qwen3NextMLP | Qwen3NextSparseMoeBlock
-
-    def __init__(self, args: Any, layer_idx: int) -> None: ...
-    def __call__(
-        self,
-        x: mx.array,
-        mask: Optional[mx.array] = None,
-        cache: Optional[Any] = None,
-    ) -> mx.array: ...
-
-class Qwen3NextModel(nn.Module):
-    embed_tokens: nn.Embedding
-    layers: list[Qwen3NextDecoderLayer]
-    norm: nn.RMSNorm
-
-    def __init__(self, args: Any) -> None: ...
-    def __call__(
-        self,
-        inputs: mx.array,
-        cache: Optional[Any] = None,
-    ) -> mx.array: ...
-
-class Model(nn.Module):
-    model_type: str
-    model: Qwen3NextModel
-    lm_head: nn.Linear
-
-    def __init__(self, args: Any) -> None: ...
-    def __call__(
-        self,
-        inputs: mx.array,
-        cache: Optional[Any] = None,
-    ) -> mx.array: ...
-    def sanitize(self, weights: dict[str, Any]) -> dict[str, Any]: ...
-    @property
-    def layers(self) -> list[Qwen3NextDecoderLayer]: ...
--- a/.mlx_typings/mlx_lm/tokenizer_utils.pyi
+++ b/.mlx_typings/mlx_lm/tokenizer_utils.pyi
@@ -108,21 +108,16 @@ class TokenizerWrapper:
    _tokenizer: PreTrainedTokenizerFast
    eos_token_id: int | None
    eos_token: str | None
-    eos_token_ids: list[int] | set[int] | None
    bos_token_id: int | None
    bos_token: str | None
    vocab_size: int
    all_special_tokens: list[str]
-    think_start: str | None
-    think_end: str | None
-    think_start_id: int | None
-    think_end_id: int | None

    def __init__(
        self,
        tokenizer: Any,
        detokenizer_class: Any = ...,
-        eos_token_ids: list[int] | set[int] | None = ...,
+        eos_token_ids: list[int] | None = ...,
        chat_template: Any = ...,
        tool_parser: Any = ...,
        tool_call_start: str | None = ...,
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -353,6 +353,31 @@ dependencies = [
 "generic-array",
 ]

+[[package]]
+name = "bon"
+version = "3.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebeb9aaf9329dff6ceb65c689ca3db33dbf15f324909c60e4e5eef5701ce31b1"
+dependencies = [
+ "bon-macros",
+ "rustversion",
+]
+
+[[package]]
+name = "bon-macros"
+version = "3.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645"
+dependencies = [
+ "darling",
+ "ident_case",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "rustversion",
+ "syn 2.0.111",
+]
+
 [[package]]
 name = "bs58"
 version = "0.5.1"
@@ -675,6 +700,41 @@ dependencies = [
 "syn 2.0.111",
 ]

+[[package]]
+name = "darling"
+version = "0.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0"
+dependencies = [
+ "darling_core",
+ "darling_macro",
+]
+
+[[package]]
+name = "darling_core"
+version = "0.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4"
+dependencies = [
+ "fnv",
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim",
+ "syn 2.0.111",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
+dependencies = [
+ "darling_core",
+ "quote",
+ "syn 2.0.111",
+]
+
 [[package]]
 name = "data-encoding"
 version = "2.9.0"
@@ -1565,6 +1625,12 @@ dependencies = [
 "zerovec",
 ]

+[[package]]
+name = "ident_case"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
+
 [[package]]
 name = "idna"
 version = "1.1.0"
@@ -1679,6 +1745,15 @@ dependencies = [
 "generic-array",
 ]

+[[package]]
+name = "internment"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "636d4b0f6a39fd684effe2a73f5310df16a3fa7954c26d36833e98f44d1977a2"
+dependencies = [
+ "hashbrown 0.15.5",
+]
+
 [[package]]
 name = "inventory"
 version = "0.3.21"
@@ -3232,6 +3307,16 @@ dependencies = [
 "zerocopy",
 ]

+[[package]]
+name = "prettyplease"
+version = "0.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
+dependencies = [
+ "proc-macro2",
+ "syn 2.0.111",
+]
+
 [[package]]
 name = "primeorder"
 version = "0.13.6"
@@ -3655,6 +3740,12 @@ dependencies = [
 "yasna",
 ]

+[[package]]
+name = "recursion"
+version = "0.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dba2197bf7b1d87b4dd460c195f4edeb45a94e82e8054f8d5f317c1f0e93ca1"
+
 [[package]]
 name = "redox_syscall"
 version = "0.5.18"
@@ -4716,6 +4807,15 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 [[package]]
 name = "util"
 version = "0.0.1"
+dependencies = [
+ "bon",
+ "derive_more",
+ "extend",
+ "internment",
+ "once_cell",
+ "recursion",
+ "thiserror 2.0.17",
+]

 [[package]]
 name = "uuid"
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
  <img alt="exo logo" src="/docs/imgs/exo-logo-transparent.png" width="50%" height="50%">
 </picture>

-exo: Run frontier AI locally. Maintained by [exo labs](https://x.com/exolabs).
+exo: Run your own AI cluster at home with everyday devices. Maintained by [exo labs](https://x.com/exolabs).

 <p align="center">
  <a href="https://discord.gg/TJ4P57arEm" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/Discord-Join%20Server-5865F2?logo=discord&logoColor=white" alt="Discord"></a>
@@ -107,10 +107,6 @@ uv run exo

 This starts the exo dashboard and API at http://localhost:52415/

-
-*Please view the section on RDMA to enable this feature on MacOS >=26.2!*
-
-
 ### Run from Source (Linux)

 **Prerequisites:**
@@ -234,7 +230,7 @@ This removes:

 RDMA is a new capability added to macOS 26.2. It works on any Mac with Thunderbolt 5 (M4 Pro Mac Mini, M4 Max Mac Studio, M4 Max MacBook Pro, M3 Ultra Mac Studio).

-Please refer to the caveats for immediate troubleshooting.
+Note that on Mac Studio, you cannot use the Thunderbolt 5 port next to the Ethernet port.

 To enable RDMA on macOS, follow these steps:

@@ -251,14 +247,6 @@ To enable RDMA on macOS, follow these steps:

 After that, RDMA will be enabled in macOS and exo will take care of the rest.

-**Important Caveats**
-
-1. Devices that wish to be part of an RDMA cluster must be connected to all other devices in the cluster.
-2. The cables must support TB5.
-3. On a Mac Studio, you cannot use the Thunderbolt 5 port next to the Ethernet port.
-4. If running from source, please use the script found at `tmp/set_rdma_network_config.sh`, which will disable Thunderbolt Bridge and set dhcp on each RDMA port.
-5. RDMA ports may be unable to discover each other on different versions of MacOS. Please ensure that OS versions match exactly (even beta version numbers) on all devices.
-
 ---

 ### Using the API
--- a/TODO.md
+++ b/TODO.md
@@ -1,14 +1,28 @@
+2. Currently a lot of requests from the API are timing out, but we still process those requests internally. If an API request times out, we should cancel all corresponding tasks to that API request (why process a request with nobody listening).
 3. Task cancellation. When API http request gets cancelled, it should cancel corresponding task.
 4. I'd like to see profiled network latency / bandwidth.
 5. I'd like to see how much bandwidth each link is using.
+6. We should handle the case where one machine doesn't have the model downloaded and then other machines are waiting on it. In this case we get loads of timeout errors because the others are waiting for the one that needs to download the model.
 7. Solve the problem of in continuous batching when a new prompt comes in, it will block decode of the current batch until the prefill is complete.
 8. We want people to be able to copy models over to a new device without ever connecting EXO to the internet. Right now EXO require internet connection once to cache some files to check if a download is complete. Instead, we should simply check if there is a non-empty model folder locally with no .partial files. This indicates it's a fully downloaded model that can be loaded.
+10. More granular control over how to deploy instances.
+12. Nix is great but installing it is a pain and we have ended up in a lot of cases having PATH issues or installation issues. For example, after rebooting mike it seemed to no longer have a nix installation and needed reinstalling. It has a bunch of broken symlinks left over from nix that caused ssh to fail, making it even harder to debug. We need consistent environments (perhaps MDM) so we can guarantee nix is installed properly on each machine.
 13. Memory pressure instead of memory used.
 14. Show the type of each connection (TB5, Ethernet, etc.) in the UI. Refer to old exo: https://github.com/exo-explore/exo/blob/56f783b38dc6b08ce606b07a5386dc40dae00330/exo/helpers.py#L251
 15. Prioritise certain connection types (or by latency). TB5 > Ethernet > WiFi. Refer to old exo: https://github.com/exo-explore/exo/blob/56f783b38dc6b08ce606b07a5386dc40dae00330/exo/helpers.py#L251
 16. Dynamically switch to higher priority connection when it becomes available. Probably bring back InstanceReplacedAtomically.
 17. Faster model loads by streaming model from other devices in cluster.
 18. Add support for specifying the type of network connection to use in a test. Depends on 15/16.
+20. Add chat completion cancellations (e.g OpenWebUI has something for cancelling an ongoing request).
+23. Do we need cache_limit? We went back and forth on that a lot because we thought it might be causing issues. One problem is it sets it relative to model size. So if you have multiple models loaded in it will take the most recent model size for the cache_limit. This is problematic if you launch DeepSeek -> Llama for example.
+24. further openai/lmstudio api compatibility
 25. Rethink retry logic
+26. Task cancellation. When API http request gets cancelled, it should cancel corresponding task.
 27. Log cleanup - per-module log filters and default to DEBUG log levels
 28. Validate RDMA connections with ibv_devinfo in the info gatherer
+
+Potential refactors:
+
+2. Topology can be simplified
+
+Random errors we've run into:
--- a/app/EXO/EXO.xcodeproj/project.pbxproj
+++ b/app/EXO/EXO.xcodeproj/project.pbxproj
@@ -342,8 +342,6 @@
 				SDKROOT = macosx;
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
-				SWIFT_TREAT_WARNINGS_AS_ERRORS = YES;
-				GCC_TREAT_WARNINGS_AS_ERRORS = YES;
 			};
 			name = Debug;
 		};
@@ -399,8 +397,6 @@
 				MTL_FAST_MATH = YES;
 				SDKROOT = macosx;
 				SWIFT_COMPILATION_MODE = wholemodule;
-				SWIFT_TREAT_WARNINGS_AS_ERRORS = YES;
-				GCC_TREAT_WARNINGS_AS_ERRORS = YES;
 			};
 			name = Release;
 		};
--- a/app/EXO/EXO/EXOApp.swift
+++ b/app/EXO/EXO/EXOApp.swift
@@ -14,6 +14,7 @@ import SwiftUI
 import UserNotifications
 import os.log

+@main
 struct EXOApp: App {
    @StateObject private var controller: ExoProcessController
    @StateObject private var stateService: ClusterStateService
@@ -224,7 +225,7 @@ private final class ExoUpdaterDelegate: NSObject, SPUUpdaterDelegate {
        }
    }

-    nonisolated private func showNotification(title: String, body: String) {
+    private func showNotification(title: String, body: String) {
        let center = UNUserNotificationCenter.current()
        let content = UNMutableNotificationContent()
        content.title = title
--- a/app/EXO/EXO/ExoProcessController.swift
+++ b/app/EXO/EXO/ExoProcessController.swift
@@ -28,10 +28,6 @@ final class ExoProcessController: ObservableObject {
        }
    }

-    static let exoDirectoryURL: URL = {
-        URL(fileURLWithPath: NSHomeDirectory()).appendingPathComponent(".exo")
-    }()
-
    @Published private(set) var status: Status = .stopped
    @Published private(set) var lastError: String?
    @Published private(set) var launchCountdownSeconds: Int?
@@ -82,11 +78,7 @@ final class ExoProcessController: ObservableObject {

            let child = Process()
            child.executableURL = executableURL
-            let exoHomeURL = Self.exoDirectoryURL
-            try? FileManager.default.createDirectory(
-                at: exoHomeURL, withIntermediateDirectories: true
-            )
-            child.currentDirectoryURL = exoHomeURL
+            child.currentDirectoryURL = runtimeURL
            child.environment = makeEnvironment(for: runtimeURL)

            child.standardOutput = FileHandle.nullDevice
--- a/app/EXO/EXO/Models/ClusterState.swift
+++ b/app/EXO/EXO/Models/ClusterState.swift
@@ -293,7 +293,7 @@ struct ClusterTask {
    let modelName: String?
    let promptPreview: String?
    let errorMessage: String?
-    let parameters: TextGenerationTaskParameters?
+    let parameters: ChatCompletionTaskParameters?

    var sortPriority: Int {
        switch status {
@@ -330,12 +330,12 @@ struct ClusterTaskPayload: Decodable {
    let taskStatus: TaskStatus?
    let instanceId: String?
    let commandId: String?
-    let taskParams: TextGenerationTaskParameters?
+    let taskParams: ChatCompletionTaskParameters?
    let errorType: String?
    let errorMessage: String?
 }

-struct TextGenerationTaskParameters: Decodable, Equatable {
+struct ChatCompletionTaskParameters: Decodable, Equatable {
    let model: String?
    let messages: [ChatCompletionMessage]?
    let maxTokens: Int?
@@ -374,7 +374,7 @@ extension ClusterTask {
        guard let id = payload.taskId else { return nil }
        let status = payload.taskStatus ?? .unknown
        switch kindKey {
-        case "TextGeneration":
+        case "ChatCompletion":
            self.init(
                id: id,
                status: status,
--- a/app/EXO/EXO/Services/BugReportService.swift
+++ b/app/EXO/EXO/Services/BugReportService.swift
@@ -44,12 +44,16 @@ struct BugReportService {
        let dayPrefix = Self.dayPrefixString(now)
        let prefix = "reports/\(dayPrefix)/\(timestamp)/"

-        let logFiles = readAllLogs()
+        let logData = readLog()
        let ifconfigText = try await captureIfconfig()
        let hostName = Host.current().localizedName ?? "unknown"
        let debugInfo = readDebugInfo()

-        let stateData = try await fetch(url: baseURL.appendingPathComponent("state"))
+        async let stateResult = fetch(url: baseURL.appendingPathComponent("state"))
+        async let eventsResult = fetch(url: baseURL.appendingPathComponent("events"))
+
+        let stateData = try await stateResult
+        let eventsData = try await eventsResult

        // Extract cluster TB bridge status from exo state
        let clusterTbBridgeStatus = extractClusterTbBridgeStatus(from: stateData)
@@ -63,19 +67,12 @@ struct BugReportService {
            clusterTbBridgeStatus: clusterTbBridgeStatus
        )

-        let eventLogFiles = readAllEventLogs()
-
-        var uploads: [(path: String, data: Data?)] = logFiles.map { (path, data) in
-            ("\(prefix)\(path)", data)
-        }
-        uploads.append(
-            contentsOf: eventLogFiles.map { (path, data) in
-                ("\(prefix)\(path)", data as Data?)
-            })
-        uploads.append(contentsOf: [
+        let uploads: [(path: String, data: Data?)] = [
+            ("\(prefix)exo.log", logData),
            ("\(prefix)state.json", stateData),
+            ("\(prefix)events.json", eventsData),
            ("\(prefix)report.json", reportJSON),
-        ])
+        ]

        let uploadItems: [(key: String, body: Data)] = uploads.compactMap { item in
            guard let body = item.data else { return nil }
@@ -152,40 +149,11 @@ struct BugReportService {
        return decoded.urls
    }

-    private func readAllLogs() -> [(path: String, data: Data)] {
-        let dir = URL(fileURLWithPath: NSHomeDirectory())
+    private func readLog() -> Data? {
+        let logURL = URL(fileURLWithPath: NSHomeDirectory())
            .appendingPathComponent(".exo")
-            .appendingPathComponent("exo_log")
-        var results: [(path: String, data: Data)] = []
-
-        let contents = (try? FileManager.default.contentsOfDirectory(atPath: dir.path)) ?? []
-        for name in contents {
-            if let data = try? Data(contentsOf: dir.appendingPathComponent(name)) {
-                results.append(("exo_log/\(name)", data))
-            }
-        }
-
-        return results
-    }
-
-    private func readAllEventLogs() -> [(path: String, data: Data)] {
-        let eventLogDir = URL(fileURLWithPath: NSHomeDirectory())
-            .appendingPathComponent(".exo")
-            .appendingPathComponent("event_log")
-        var results: [(path: String, data: Data)] = []
-
-        for subdir in ["master", "api"] {
-            let dir = eventLogDir.appendingPathComponent(subdir)
-            let contents =
-                (try? FileManager.default.contentsOfDirectory(atPath: dir.path)) ?? []
-            for name in contents where name.hasPrefix("events.") {
-                if let data = try? Data(contentsOf: dir.appendingPathComponent(name)) {
-                    results.append(("event_log/\(subdir)/\(name)", data))
-                }
-            }
-        }
-
-        return results
+            .appendingPathComponent("exo.log")
+        return try? Data(contentsOf: logURL)
    }

    private func captureIfconfig() async throws -> String {
--- a/app/EXO/EXO/Services/NetworkSetupHelper.swift
+++ b/app/EXO/EXO/Services/NetworkSetupHelper.swift
@@ -18,9 +18,6 @@ enum NetworkSetupHelper {

        set -euo pipefail

-        # Wait for macOS to finish network setup after boot
-        sleep 20
-
        PREFS="/Library/Preferences/SystemConfiguration/preferences.plist"

        # Remove bridge0 interface
@@ -83,7 +80,7 @@ enum NetworkSetupHelper {
                let alert = NSAlert()
                alert.messageText = "EXO Network Configuration"
                alert.informativeText =
-                    "EXO needs to install a system service to configure local networking. This will disable Thunderbolt Bridge (preventing packet storms) and install a Network Location.\n\nYou will be prompted for your password."
+                    "EXO needs to install a system service to automatically disable Thunderbolt Bridge on startup. This prevents network loops when connecting multiple Macs via Thunderbolt.\n\nYou will be prompted for your administrator password."
                alert.alertStyle = .informational
                alert.addButton(withTitle: "Install")
                alert.addButton(withTitle: "Not Now")
@@ -244,11 +241,11 @@ enum NetworkSetupHelper {
        rm -f "$LOG_OUT" "$LOG_ERR"

        # Switch back to Automatic network location
-        networksetup -switchtolocation Automatic >/dev/null 2>&1 || true
+        networksetup -switchtolocation Automatic 2>/dev/null || true

        # Delete the exo network location if it exists
-        networksetup -listlocations 2>/dev/null | grep -q '^exo$' && {
-          networksetup -deletelocation exo >/dev/null 2>&1 || true
+        networksetup -listlocations | grep -q '^exo$' && {
+          networksetup -deletelocation exo 2>/dev/null || true
        } || true

        # Re-enable any Thunderbolt Bridge service if it exists
@@ -258,12 +255,12 @@ enum NetworkSetupHelper {
          tb_devices=$(networksetup -listallhardwareports 2>/dev/null | awk '
            /^Hardware Port:/ { port = tolower(substr($0, 16)) }
            /^Device:/ { if (port ~ /thunderbolt/) print substr($0, 9) }
-          ') || true
+          ')
          [ -z "$tb_devices" ] && return 0

          # For each bridge device, check if it contains Thunderbolt interfaces
          for bridge in bridge0 bridge1 bridge2; do
-            members=$(ifconfig "$bridge" 2>/dev/null | awk '/member:/ {print $2}') || true
+            members=$(ifconfig "$bridge" 2>/dev/null | awk '/member:/ {print $2}')
            [ -z "$members" ] && continue

            for tb_dev in $tb_devices; do
@@ -272,7 +269,7 @@ enum NetworkSetupHelper {
                service_name=$(networksetup -listnetworkserviceorder 2>/dev/null | awk -v dev="$bridge" '
                  /^\\([0-9*]/ { gsub(/^\\([0-9*]+\\) /, ""); svc = $0 }
                  /Device:/ && $0 ~ dev { print svc; exit }
-                ') || true
+                ')
                if [ -n "$service_name" ]; then
                  networksetup -setnetworkserviceenabled "$service_name" on 2>/dev/null || true
                  return 0
@@ -280,69 +277,13 @@ enum NetworkSetupHelper {
              fi
            done
          done
-          return 0
        }
-        find_and_enable_thunderbolt_bridge || true
+        find_and_enable_thunderbolt_bridge

        echo "EXO network components removed successfully"
        """
    }

-    /// Direct install without GUI (requires root).
-    /// Returns true on success, false on failure.
-    static func installDirectly() -> Bool {
-        let script = makeInstallerScript()
-        return runShellDirectly(script)
-    }
-
-    /// Direct uninstall without GUI (requires root).
-    /// Returns true on success, false on failure.
-    static func uninstallDirectly() -> Bool {
-        let script = makeUninstallScript()
-        return runShellDirectly(script)
-    }
-
-    /// Run a shell script directly via Process (no AppleScript, requires root).
-    /// Returns true on success, false on failure.
-    private static func runShellDirectly(_ script: String) -> Bool {
-        let process = Process()
-        process.executableURL = URL(fileURLWithPath: "/bin/bash")
-        process.arguments = ["-c", script]
-
-        let outputPipe = Pipe()
-        let errorPipe = Pipe()
-        process.standardOutput = outputPipe
-        process.standardError = errorPipe
-
-        do {
-            try process.run()
-            process.waitUntilExit()
-
-            let outputData = outputPipe.fileHandleForReading.readDataToEndOfFile()
-            let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile()
-
-            if let output = String(data: outputData, encoding: .utf8), !output.isEmpty {
-                print(output)
-            }
-            if let errorOutput = String(data: errorData, encoding: .utf8), !errorOutput.isEmpty {
-                fputs(errorOutput, stderr)
-            }
-
-            if process.terminationStatus == 0 {
-                logger.info("Shell script completed successfully")
-                return true
-            } else {
-                logger.error("Shell script failed with exit code \(process.terminationStatus)")
-                return false
-            }
-        } catch {
-            logger.error(
-                "Failed to run shell script: \(error.localizedDescription, privacy: .public)")
-            fputs("Error: \(error.localizedDescription)\n", stderr)
-            return false
-        }
-    }
-
    private static func runShellAsAdmin(_ script: String) throws {
        let escapedScript =
            script
--- a/app/EXO/EXO/Services/ThunderboltBridgeService.swift
+++ b/app/EXO/EXO/Services/ThunderboltBridgeService.swift
@@ -127,24 +127,21 @@ final class ThunderboltBridgeService: ObservableObject {

        // 2. Request specific network configuration rights
        let rightName = "system.services.systemconfiguration.network"
-        status = rightName.withCString { nameCString in
-            var item = AuthorizationItem(
-                name: nameCString,
-                valueLength: 0,
-                value: nil,
-                flags: 0
-            )
-            return withUnsafeMutablePointer(to: &item) { itemPointer in
-                var rights = AuthorizationRights(count: 1, items: itemPointer)
-                return AuthorizationCopyRights(
-                    authRef,
-                    &rights,
-                    nil,
-                    [.extendRights, .interactionAllowed],
-                    nil
-                )
-            }
-        }
+        var item = AuthorizationItem(
+            name: rightName,
+            valueLength: 0,
+            value: nil,
+            flags: 0
+        )
+        var rights = AuthorizationRights(count: 1, items: &item)
+
+        status = AuthorizationCopyRights(
+            authRef,
+            &rights,
+            nil,
+            [.extendRights, .interactionAllowed],
+            nil
+        )
        guard status == errAuthorizationSuccess else {
            if status == errAuthorizationCanceled {
                throw ThunderboltBridgeError.authorizationCanceled
--- a/app/EXO/EXO/ViewModels/InstanceViewModel.swift
+++ b/app/EXO/EXO/ViewModels/InstanceViewModel.swift
@@ -216,7 +216,7 @@ struct InstanceTaskViewModel: Identifiable, Equatable {
    let promptPreview: String?
    let errorMessage: String?
    let subtitle: String?
-    let parameters: TextGenerationTaskParameters?
+    let parameters: ChatCompletionTaskParameters?

    var title: String {
        switch kind {
--- a/app/EXO/EXO/main.swift
+++ b/app/EXO/EXO/main.swift
@@ -1,85 +0,0 @@
-//
-//  main.swift
-//  EXO
-//
-//  Created by Jake Hillion on 2026-02-03.
-//
-
-import Foundation
-
-/// Command line options for the EXO app
-enum CLICommand {
-    case install
-    case uninstall
-    case help
-    case none
-}
-
-/// Parse command line arguments to determine the CLI command
-func parseArguments() -> CLICommand {
-    let args = CommandLine.arguments
-    if args.contains("--help") || args.contains("-h") {
-        return .help
-    }
-    if args.contains("--install") {
-        return .install
-    }
-    if args.contains("--uninstall") {
-        return .uninstall
-    }
-    return .none
-}
-
-/// Print usage information
-func printUsage() {
-    let programName = (CommandLine.arguments.first as NSString?)?.lastPathComponent ?? "EXO"
-    print(
-        """
-        Usage: \(programName) [OPTIONS]
-
-        Options:
-          --install     Install EXO network configuration (requires root)
-          --uninstall   Uninstall EXO network configuration (requires root)
-          --help, -h    Show this help message
-
-        When run without options, starts the normal GUI application.
-
-        Examples:
-          sudo \(programName) --install    Install network components as root
-          sudo \(programName) --uninstall  Remove network components as root
-        """)
-}
-
-/// Check if running as root
-func isRunningAsRoot() -> Bool {
-    return getuid() == 0
-}
-
-// Main entry point
-let command = parseArguments()
-
-switch command {
-case .help:
-    printUsage()
-    exit(0)
-
-case .install:
-    if !isRunningAsRoot() {
-        fputs("Error: --install requires root privileges. Run with sudo.\n", stderr)
-        exit(1)
-    }
-    let success = NetworkSetupHelper.installDirectly()
-    exit(success ? 0 : 1)
-
-case .uninstall:
-    if !isRunningAsRoot() {
-        fputs("Error: --uninstall requires root privileges. Run with sudo.\n", stderr)
-        exit(1)
-    }
-    let success = NetworkSetupHelper.uninstallDirectly()
-    exit(success ? 0 : 1)
-
-case .none:
-    // Start normal GUI application
-    EXOApp.main()
-}
--- a/app/EXO/uninstall-exo.sh
+++ b/app/EXO/uninstall-exo.sh
@@ -29,21 +29,21 @@ YELLOW='\033[1;33m'
 NC='\033[0m' # No Color

 echo_info() {
-  echo -e "${GREEN}[INFO]${NC} $1"
+    echo -e "${GREEN}[INFO]${NC} $1"
 }

 echo_warn() {
-  echo -e "${YELLOW}[WARN]${NC} $1"
+    echo -e "${YELLOW}[WARN]${NC} $1"
 }

 echo_error() {
-  echo -e "${RED}[ERROR]${NC} $1"
+    echo -e "${RED}[ERROR]${NC} $1"
 }

 # Check if running as root
 if [[ $EUID -ne 0 ]]; then
-  echo_error "This script must be run as root (use sudo)"
-  exit 1
+    echo_error "This script must be run as root (use sudo)"
+    exit 1
 fi

 echo ""
@@ -55,64 +55,64 @@ echo ""
 # Unload the LaunchDaemon if running
 echo_info "Stopping network setup daemon..."
 if launchctl list | grep -q "$LABEL"; then
-  launchctl bootout system/"$LABEL" 2>/dev/null || true
-  echo_info "Daemon stopped"
+    launchctl bootout system/"$LABEL" 2>/dev/null || true
+    echo_info "Daemon stopped"
 else
-  echo_warn "Daemon was not running"
+    echo_warn "Daemon was not running"
 fi

 # Remove LaunchDaemon plist
-if [[ -f $PLIST_DEST ]]; then
-  rm -f "$PLIST_DEST"
-  echo_info "Removed LaunchDaemon plist"
+if [[ -f "$PLIST_DEST" ]]; then
+    rm -f "$PLIST_DEST"
+    echo_info "Removed LaunchDaemon plist"
 else
-  echo_warn "LaunchDaemon plist not found (already removed?)"
+    echo_warn "LaunchDaemon plist not found (already removed?)"
 fi

 # Remove the script and parent directory
-if [[ -f $SCRIPT_DEST ]]; then
-  rm -f "$SCRIPT_DEST"
-  echo_info "Removed network setup script"
+if [[ -f "$SCRIPT_DEST" ]]; then
+    rm -f "$SCRIPT_DEST"
+    echo_info "Removed network setup script"
 else
-  echo_warn "Network setup script not found (already removed?)"
+    echo_warn "Network setup script not found (already removed?)"
 fi

 # Remove EXO directory if empty
 if [[ -d "/Library/Application Support/EXO" ]]; then
-  rmdir "/Library/Application Support/EXO" 2>/dev/null &&
-    echo_info "Removed EXO support directory" ||
-    echo_warn "EXO support directory not empty, leaving in place"
+    rmdir "/Library/Application Support/EXO" 2>/dev/null && \
+        echo_info "Removed EXO support directory" || \
+        echo_warn "EXO support directory not empty, leaving in place"
 fi

 # Remove log files
-if [[ -f $LOG_OUT ]] || [[ -f $LOG_ERR ]]; then
-  rm -f "$LOG_OUT" "$LOG_ERR"
-  echo_info "Removed log files"
+if [[ -f "$LOG_OUT" ]] || [[ -f "$LOG_ERR" ]]; then
+    rm -f "$LOG_OUT" "$LOG_ERR"
+    echo_info "Removed log files"
 else
-  echo_warn "Log files not found (already removed?)"
+    echo_warn "Log files not found (already removed?)"
 fi

 # Switch back to Automatic network location
 echo_info "Restoring network configuration..."
 if networksetup -listlocations | grep -q "^Automatic$"; then
-  networksetup -switchtolocation Automatic 2>/dev/null || true
-  echo_info "Switched to Automatic network location"
+    networksetup -switchtolocation Automatic 2>/dev/null || true
+    echo_info "Switched to Automatic network location"
 else
-  echo_warn "Automatic network location not found"
+    echo_warn "Automatic network location not found"
 fi

 # Delete the exo network location if it exists
 if networksetup -listlocations | grep -q "^exo$"; then
-  networksetup -deletelocation exo 2>/dev/null || true
-  echo_info "Deleted 'exo' network location"
+    networksetup -deletelocation exo 2>/dev/null || true
+    echo_info "Deleted 'exo' network location"
 else
-  echo_warn "'exo' network location not found (already removed?)"
+    echo_warn "'exo' network location not found (already removed?)"
 fi

 # Re-enable Thunderbolt Bridge if it exists
 if networksetup -listnetworkservices 2>/dev/null | grep -q "Thunderbolt Bridge"; then
-  networksetup -setnetworkserviceenabled "Thunderbolt Bridge" on 2>/dev/null || true
-  echo_info "Re-enabled Thunderbolt Bridge"
+    networksetup -setnetworkserviceenabled "Thunderbolt Bridge" on 2>/dev/null || true
+    echo_info "Re-enabled Thunderbolt Bridge"
 fi

 # Note about launch at login registration
@@ -124,14 +124,14 @@ echo_warn "  System Settings → General → Login Items → Remove EXO"
 # Check if EXO.app exists in common locations
 APP_FOUND=false
 for app_path in "/Applications/EXO.app" "$HOME/Applications/EXO.app"; do
-  if [[ -d $app_path ]]; then
-    if [[ $APP_FOUND == false ]]; then
-      echo ""
-      APP_FOUND=true
+    if [[ -d "$app_path" ]]; then
+        if [[ "$APP_FOUND" == false ]]; then
+            echo ""
+            APP_FOUND=true
+        fi
+        echo_warn "EXO.app found at: $app_path"
+        echo_warn "You may want to move it to Trash manually."
    fi
-    echo_warn "EXO.app found at: $app_path"
-    echo_warn "You may want to move it to Trash manually."
-  fi
 done

 echo ""
@@ -151,3 +151,4 @@ echo ""
 echo "Manual step required:"
 echo "  Remove EXO from Login Items in System Settings → General → Login Items"
 echo ""
+
--- a/bench/src/exo_bench/init.py
+++ b/bench/src/exo_bench/init.py
--- a/bench/eval_config.toml
+++ b/bench/eval_config.toml
@@ -0,0 +1,66 @@
+# exo-eval configuration file
+# See bench/exo_eval.py for usage
+
+[eval]
+# Eval framework type: "lm_eval" | "swe_bench" | "custom"
+type = "lm_eval"
+# Require HuggingFace token (default: true)
+# Set to false if using only public datasets
+require_hf_token = true
+
+# Instance/placement configuration
+# Controls how exo sets up the model instance before running evals
+[instance]
+# Placement strategy: "ring" | "jaccl" | "both"
+instance_meta = "jaccl"
+# Sharding strategy: "pipeline" | "tensor" | "both"
+sharding = "tensor"
+# Node constraints
+min_nodes = 2
+max_nodes = 2
+
+# lm_eval configuration (EleutherAI's lm-evaluation-harness)
+[lm_eval]
+# Tasks to run (list of task names)
+# NOTE: Chat completions API only supports generation-based tasks.
+# Loglikelihood tasks (mmlu, hellaswag, arc) require /v1/completions endpoint.
+#
+# Generation-based tasks (work with chat completions):
+#   - mmlu_pro, mmlu_generative, mmlu_flan_cot_fewshot, mmlu_flan_cot_zeroshot
+#   - gsm8k, gsm8k_cot, gsm8k_cot_zeroshot
+#   - truthfulqa (uses generate_until for some subtasks)
+#   - humaneval, mbpp (code generation)
+#
+# Run `lm_eval --tasks list` to see all available tasks
+tasks = ["mmlu_pro"]
+# Number of few-shot examples (5 is standard for mmlu_pro CoT)
+num_fewshot = 5
+# Batch size (use 1 for API models, "auto" doesn't work)
+batch_size = 1
+# Number of concurrent requests (set > 1 to enable parallelism)
+# Higher values enable better batching throughput
+num_concurrent = 64
+# Apply chat template for instruct/chat models (default: true)
+apply_chat_template = true
+# Use fewshot examples as conversation turns (better for chat models)
+fewshot_as_multiturn = true
+# Optional: limit samples per task (omit or comment out for no limit)
+# limit = 100
+# Output path for results
+output_path = "bench/eval_results"
+
+# SWE-bench configuration (placeholder)
+[swe_bench]
+# SWE-bench dataset
+dataset = "princeton-nlp/SWE-bench_Lite"
+# Maximum workers for parallel execution
+max_workers = 8
+# Path for prediction outputs
+predictions_path = "bench/predictions"
+
+# Custom evaluation script configuration
+[custom]
+# Path to custom evaluation script
+script = "path/to/eval_script.py"
+# Arguments to pass to the script
+args = ["--arg1", "value1"]
--- a/bench/exo_bench.py
+++ b/bench/exo_bench.py
@@ -5,13 +5,10 @@ from __future__ import annotations
 import argparse
 import contextlib
 import http.client
-import itertools
 import json
 import os
-import sys
 import time
 from collections.abc import Callable
-from pathlib import Path
 from statistics import mean
 from typing import Any
 from urllib.parse import urlencode
@@ -19,84 +16,6 @@ from urllib.parse import urlencode
 from loguru import logger
 from transformers import AutoTokenizer

-# Monkey-patch for transformers 5.x compatibility
-# Kimi's tokenization_kimi.py imports bytes_to_unicode from the old location
-# which was moved in transformers 5.0.0rc2
-try:
-    import transformers.models.gpt2.tokenization_gpt2 as gpt2_tokenization
-    from transformers.convert_slow_tokenizer import bytes_to_unicode
-
-    if not hasattr(gpt2_tokenization, "bytes_to_unicode"):
-        gpt2_tokenization.bytes_to_unicode = bytes_to_unicode  # type: ignore[attr-defined]
-except ImportError:
-    pass  # transformers < 5.0 or bytes_to_unicode not available
-
-
-def load_tokenizer_for_bench(model_id: str) -> Any:
-    """
-    Load tokenizer for benchmarking, with special handling for Kimi models.
-
-    Kimi uses a custom TikTokenTokenizer that transformers 5.x can't load via AutoTokenizer.
-    This function replicates the logic from utils_mlx.py for bench compatibility.
-    """
-    model_id_lower = model_id.lower()
-
-    if "kimi-k2" in model_id_lower:
-        import importlib.util
-        import types
-
-        from huggingface_hub import snapshot_download
-
-        # Download/get the model path
-        model_path = Path(
-            snapshot_download(
-                model_id,
-                allow_patterns=["*.json", "*.py", "*.tiktoken"],
-            )
-        )
-
-        sys.path.insert(0, str(model_path))
-
-        # Load tool_declaration_ts first (tokenization_kimi imports it with relative import)
-        tool_decl_path = model_path / "tool_declaration_ts.py"
-        if tool_decl_path.exists():
-            spec = importlib.util.spec_from_file_location(
-                "tool_declaration_ts", tool_decl_path
-            )
-            if spec and spec.loader:
-                tool_decl_module = importlib.util.module_from_spec(spec)
-                sys.modules["tool_declaration_ts"] = tool_decl_module
-                spec.loader.exec_module(tool_decl_module)
-
-        # Load tokenization_kimi with patched source (convert relative to absolute import)
-        tok_path = model_path / "tokenization_kimi.py"
-        source = tok_path.read_text()
-        source = source.replace("from .tool_declaration_ts", "from tool_declaration_ts")
-        spec = importlib.util.spec_from_file_location("tokenization_kimi", tok_path)
-        if spec:
-            tok_module = types.ModuleType("tokenization_kimi")
-            tok_module.__file__ = str(tok_path)
-            sys.modules["tokenization_kimi"] = tok_module
-            exec(compile(source, tok_path, "exec"), tok_module.__dict__)  # noqa: S102
-            TikTokenTokenizer = tok_module.TikTokenTokenizer  # noqa: N806
-        else:
-            from tokenization_kimi import TikTokenTokenizer  # type: ignore[import-not-found]  # noqa: I001
-
-        hf_tokenizer: Any = TikTokenTokenizer.from_pretrained(model_path)
-
-        # Patch encode to use internal tiktoken model directly
-        # transformers 5.x has a bug in the encode->pad path for slow tokenizers
-        def _patched_encode(text: str, **kwargs: object) -> list[int]:
-            # Pass allowed_special="all" to handle special tokens like <|im_user|>
-            return list(hf_tokenizer.model.encode(text, allowed_special="all"))
-
-        hf_tokenizer.encode = _patched_encode
-
-        return hf_tokenizer
-
-    # Default: use AutoTokenizer
-    return AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-

 class ExoHttpError(RuntimeError):
    def __init__(self, status: int, reason: str, body_preview: str):
@@ -105,7 +24,7 @@ class ExoHttpError(RuntimeError):


 class ExoClient:
-    def __init__(self, host: str, port: int, timeout_s: float = 7200.0):
+    def __init__(self, host: str, port: int, timeout_s: float = 600.0):
        self.host = host
        self.port = port
        self.timeout_s = timeout_s
@@ -261,7 +180,14 @@ def parse_int_list(values: list[str]) -> list[int]:
            part = part.strip()
            if part:
                items.append(int(part))
-    return items
+
+    seen: set[int] = set()
+    out: list[int] = []
+    for x in items:
+        if x not in seen:
+            out.append(x)
+            seen.add(x)
+    return out


 def resolve_model_short_id(client: ExoClient, model_arg: str) -> tuple[str, str]:
@@ -314,11 +240,7 @@ def run_one_completion(

    stats = out.get("generation_stats")

-    # Extract preview, handling None content (common for thinking models)
-    choices = out.get("choices") or [{}]
-    message = choices[0].get("message", {}) if choices else {}
-    content = message.get("content") or ""
-    preview = content[:200] if content else ""
+    preview = (out.get("choices") or [{}])[0]["message"]["content"][:200]

    return {
        "elapsed_s": elapsed,
@@ -355,29 +277,12 @@ class PromptSizer:
                f"Target ({target}) is smaller than template overhead ({self.base_tokens})."
            )

-        # Estimate tokens per atom using a sample
-        sample_count = 100
-        sample_content = self.atom * sample_count
-        sample_tokens = self.count_fn(sample_content) - self.base_tokens
-        tokens_per_atom = sample_tokens / sample_count
-
-        # Estimate starting point
-        needed_tokens = target - self.base_tokens
-        estimated_atoms = int(needed_tokens / tokens_per_atom)
-
-        # Binary search to find exact atom count
-        low, high = 0, estimated_atoms * 2 + 100
-        while low < high:
-            mid = (low + high) // 2
-            tok = self.count_fn(self.atom * mid)
-            if tok < target:
-                low = mid + 1
-            else:
-                high = mid
-
-        content = self.atom * low
+        content = ""
        tok = self.count_fn(content)
-        logger.info(f"{tok=}")
+
+        while tok < target:
+            content += self.atom
+            tok = self.count_fn(content)

        if tok != target:
            raise RuntimeError(
@@ -431,12 +336,7 @@ def main() -> int:
    ap.add_argument(
        "--skip-pipeline-jaccl",
        action="store_true",
-        help="Skip pipeline+jaccl placements, as it's often pointless.",
-    )
-    ap.add_argument(
-        "--skip-tensor-ring",
-        action="store_true",
-        help="Skip tensor+ring placements, as it's so slow.",
+        help="Pipeline jaccl is often pointless, skip by default",
    )
    ap.add_argument(
        "--repeat", type=int, default=1, help="Repetitions per (pp,tg) pair."
@@ -448,22 +348,16 @@ def main() -> int:
        help="Warmup runs per placement (uses first pp/tg).",
    )
    ap.add_argument(
-        "--timeout", type=float, default=7200.0, help="HTTP timeout (seconds)."
+        "--timeout", type=float, default=600.0, help="HTTP timeout (seconds)."
    )
    ap.add_argument(
        "--json-out",
        default="bench/results.json",
        help="Write raw per-run results JSON to this path.",
    )
-    ap.add_argument("--stdout", action="store_true", help="Write results to stdout")
    ap.add_argument(
        "--dry-run", action="store_true", help="List selected placements and exit."
    )
-    ap.add_argument(
-        "--all-combinations",
-        action="store_true",
-        help="Force all pp×tg combinations (cartesian product) even when lists have equal length.",
-    )
    args = ap.parse_args()

    pp_list = parse_int_list(args.pp)
@@ -475,15 +369,6 @@ def main() -> int:
        logger.error("--repeat must be >= 1")
        return 2

-    # Log pairing mode
-    use_combinations = args.all_combinations or len(pp_list) != len(tg_list)
-    if use_combinations:
-        logger.info(
-            f"pp/tg mode: combinations (product) - {len(pp_list) * len(tg_list)} pairs"
-        )
-    else:
-        logger.info(f"pp/tg mode: tandem (zip) - {len(pp_list)} pairs")
-
    client = ExoClient(args.host, args.port, timeout_s=args.timeout)
    short_id, full_model_id = resolve_model_short_id(client, args.model)

@@ -492,7 +377,10 @@ def main() -> int:
    )
    previews = previews_resp.get("previews") or []

-    tokenizer = load_tokenizer_for_bench(full_model_id)
+    tokenizer = AutoTokenizer.from_pretrained(
+        full_model_id,
+        trust_remote_code=True,
+    )
    if tokenizer is None:
        raise RuntimeError("[exo-bench] tokenizer load failed")

@@ -539,16 +427,6 @@ def main() -> int:
        ):
            continue

-        if (
-            args.skip_tensor_ring
-            and (
-                args.instance_meta == "both"
-                and "ring" in p.get("instance_meta", "").lower()
-            )
-            and (args.sharding == "both" and "tensor" in p.get("sharding", "").lower())
-        ):
-            continue
-
        if args.min_nodes <= n <= args.max_nodes:
            selected.append(p)

@@ -608,55 +486,60 @@ def main() -> int:
                )
                logger.debug(f"  warmup {i + 1}/{args.warmup} done")

-            # If pp and tg lists have same length, run in tandem (zip)
-            # Otherwise (or if --all-combinations), run all combinations (cartesian product)
-            if use_combinations:
-                pp_tg_pairs = list(itertools.product(pp_list, tg_list))
-            else:
-                pp_tg_pairs = list(zip(pp_list, tg_list, strict=True))
-
-            for pp, tg in pp_tg_pairs:
-                runs: list[dict[str, Any]] = []
-                for r in range(args.repeat):
-                    time.sleep(3)
-                    try:
-                        row, actual_pp_tokens = run_one_completion(
-                            client, full_model_id, pp, tg, prompt_sizer
+            for pp in pp_list:
+                # if (
+                #     pp * n_nodes > 2048
+                #     and "ring" in instance_meta.lower()
+                #     and "tensor" in sharding.lower()
+                # ):
+                #     model_card = MODEL_CARDS[short_id]
+                #     if model_card.metadata.storage_size > Memory.from_gb(10):
+                #         logger.info(
+                #             f"Skipping tensor ring as this is too slow for model of size {model_card.metadata.storage_size} on {n_nodes=}"
+                #         )
+                #         continue
+                for tg in tg_list:
+                    runs: list[dict[str, Any]] = []
+                    for r in range(args.repeat):
+                        time.sleep(3)
+                        try:
+                            row, actual_pp_tokens = run_one_completion(
+                                client, full_model_id, pp, tg, prompt_sizer
+                            )
+                        except Exception as e:
+                            logger.error(e)
+                            continue
+                        row.update(
+                            {
+                                "model_short_id": short_id,
+                                "model_id": full_model_id,
+                                "placement_sharding": sharding,
+                                "placement_instance_meta": instance_meta,
+                                "placement_nodes": n_nodes,
+                                "instance_id": instance_id,
+                                "pp_tokens": actual_pp_tokens,
+                                "tg": tg,
+                                "repeat_index": r,
+                            }
                        )
-                    except Exception as e:
-                        logger.error(e)
-                        continue
-                    row.update(
-                        {
-                            "model_short_id": short_id,
-                            "model_id": full_model_id,
-                            "placement_sharding": sharding,
-                            "placement_instance_meta": instance_meta,
-                            "placement_nodes": n_nodes,
-                            "instance_id": instance_id,
-                            "pp_tokens": actual_pp_tokens,
-                            "tg": tg,
-                            "repeat_index": r,
-                        }
-                    )
-                    runs.append(row)
-                    all_rows.append(row)
+                        runs.append(row)
+                        all_rows.append(row)

-                if runs:
-                    prompt_tps = mean(x["stats"]["prompt_tps"] for x in runs)
-                    gen_tps = mean(x["stats"]["generation_tps"] for x in runs)
-                    ptok = mean(x["stats"]["prompt_tokens"] for x in runs)
-                    gtok = mean(x["stats"]["generation_tokens"] for x in runs)
-                    peak = mean(
-                        x["stats"]["peak_memory_usage"]["inBytes"] for x in runs
-                    )
+                    if runs:
+                        prompt_tps = mean(x["stats"]["prompt_tps"] for x in runs)
+                        gen_tps = mean(x["stats"]["generation_tps"] for x in runs)
+                        ptok = mean(x["stats"]["prompt_tokens"] for x in runs)
+                        gtok = mean(x["stats"]["generation_tokens"] for x in runs)
+                        peak = mean(
+                            x["stats"]["peak_memory_usage"]["inBytes"] for x in runs
+                        )

-                    logger.info(
-                        f"prompt_tps={prompt_tps:.2f} gen_tps={gen_tps:.2f}    "
-                        f"prompt_tokens={ptok} gen_tokens={gtok}    "
-                        f"peak_memory={format_peak_memory(peak)}\n"
-                    )
-                time.sleep(2)
+                        logger.info(
+                            f"prompt_tps={prompt_tps:.2f} gen_tps={gen_tps:.2f}    "
+                            f"prompt_tokens={ptok} gen_tokens={gtok}    "
+                            f"peak_memory={format_peak_memory(peak)}\n"
+                        )
+                    time.sleep(2)
        finally:
            try:
                client.request_json("DELETE", f"/instance/{instance_id}")
@@ -668,9 +551,7 @@ def main() -> int:

            time.sleep(5)

-    if args.stdout:
-        json.dump(all_rows, sys.stdout, indent=2, ensure_ascii=False)
-    elif args.json_out:
+    if args.json_out:
        with open(args.json_out, "w", encoding="utf-8") as f:
            json.dump(all_rows, f, indent=2, ensure_ascii=False)
        logger.debug(f"\nWrote results JSON: {args.json_out}")
--- a/bench/exo_eval.py
+++ b/bench/exo_eval.py
@@ -0,0 +1,679 @@
+#!/usr/bin/env python3
+# pyright: reportAny=false, reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false
+"""
+exo-eval: Evaluation harness for exo inference system.
+
+Supports multiple evaluation frameworks via TOML configuration:
+- lm_eval: Language model evaluation using EleutherAI's lm-evaluation-harness
+- swe_bench: SWE-bench evaluation (placeholder for future implementation)
+- custom: Custom evaluation scripts
+
+Usage:
+    uv run python -m bench.exo_eval --config bench/eval_config.toml --model Llama-3.2-1b-Instruct-4bit
+    uv run python -m bench.exo_eval --config bench/eval_config.toml --model Llama-3.2-1b-Instruct-4bit --dry-run
+"""
+
+from __future__ import annotations
+
+import argparse
+import contextlib
+import json
+import os
+import subprocess
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Literal
+
+# Add parent directory to path for direct script execution
+if __name__ == "__main__" and __package__ is None:
+    sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+import tomlkit
+from huggingface_hub import get_token as get_hf_token
+from loguru import logger
+from tomlkit.exceptions import TOMLKitError
+
+from bench.exo_bench import (
+    ExoClient,
+    ExoHttpError,
+    instance_id_from_instance,
+    nodes_used_in_instance,
+    placement_filter,
+    resolve_model_short_id,
+    sharding_filter,
+    wait_for_instance_gone,
+    wait_for_instance_ready,
+)
+
+EvalType = Literal["lm_eval", "swe_bench", "custom"]
+
+
+def load_config(config_path: str) -> dict[str, Any]:
+    """Load and parse TOML configuration file."""
+    path = Path(config_path)
+    if not path.exists():
+        raise FileNotFoundError(f"Config file not found: {config_path}")
+
+    with open(path, encoding="utf-8") as f:
+        return dict(tomlkit.load(f))
+
+
+def get_eval_type(config: dict[str, Any]) -> EvalType:
+    """Extract evaluation type from config."""
+    eval_section = config.get("eval", {})
+    eval_type = eval_section.get("type", "lm_eval")
+    if eval_type not in ("lm_eval", "swe_bench", "custom"):
+        raise ValueError(f"Unknown eval type: {eval_type}")
+    return eval_type
+
+
+def check_hf_token(config: dict[str, Any]) -> bool:
+    """Check if HuggingFace token is available when required.
+
+    Returns True if token is available or not required, False otherwise.
+    """
+    eval_section = config.get("eval", {})
+    require_hf_token = eval_section.get("require_hf_token", True)
+
+    if not require_hf_token:
+        return True
+
+    token = get_hf_token()
+    if token is None:
+        logger.error(
+            "HuggingFace token not found. "
+            "Set HF_TOKEN environment variable or run 'huggingface-cli login'. "
+            "To disable this check, set require_hf_token = false in [eval] config."
+        )
+        return False
+
+    logger.info("HuggingFace token found")
+    return True
+
+
+def select_placement(
+    client: ExoClient,
+    full_model_id: str,
+    config: dict[str, Any],
+) -> dict[str, Any] | None:
+    """Select a placement based on config preferences."""
+    instance_config = config.get("instance", {})
+
+    # If explicit instance is provided, use it directly
+    if "instance" in instance_config:
+        return instance_config["instance"]
+
+    # Otherwise, select from previews based on preferences
+    instance_meta_pref = instance_config.get("instance_meta", "ring")
+    sharding_pref = instance_config.get("sharding", "pipeline")
+    max_nodes = instance_config.get("max_nodes", 4)
+    min_nodes = instance_config.get("min_nodes", 1)
+
+    previews_resp = client.request_json(
+        "GET", "/instance/previews", params={"model_id": full_model_id}
+    )
+    previews = previews_resp.get("previews") or []
+
+    selected: list[dict[str, Any]] = []
+    for p in previews:
+        if p.get("error") is not None:
+            continue
+        if not placement_filter(str(p.get("instance_meta", "")), instance_meta_pref):
+            continue
+        if not sharding_filter(str(p.get("sharding", "")), sharding_pref):
+            continue
+
+        instance = p.get("instance")
+        if not isinstance(instance, dict):
+            continue
+
+        n = nodes_used_in_instance(instance)
+        if min_nodes <= n <= max_nodes:
+            selected.append(p)
+
+    if not selected:
+        return None
+
+    # Sort by preference: exact match on sharding/meta, then by node count (descending)
+    def sort_key(p: dict[str, Any]) -> tuple[int, int, int]:
+        meta_match = (
+            1 if instance_meta_pref in str(p.get("instance_meta", "")).lower() else 0
+        )
+        sharding_match = 1 if sharding_pref in str(p.get("sharding", "")).lower() else 0
+        n_nodes = nodes_used_in_instance(p["instance"])
+        return (meta_match, sharding_match, n_nodes)
+
+    selected.sort(key=sort_key, reverse=True)
+    return selected[0]
+
+
+def setup_instance(
+    client: ExoClient,
+    full_model_id: str,
+    config: dict[str, Any],
+    dry_run: bool,
+) -> tuple[str | None, dict[str, Any] | None]:
+    """Create and wait for an instance to be ready. Returns (instance_id, preview)."""
+    preview = select_placement(client, full_model_id, config)
+
+    if preview is None:
+        logger.error("No valid placement found matching config preferences")
+        return None, None
+
+    instance_data = preview.get("instance")
+    instance: dict[str, Any] = (
+        instance_data if isinstance(instance_data, dict) else preview
+    )
+    instance_id = instance_id_from_instance(instance)
+
+    sharding = str(preview.get("sharding", "unknown"))
+    instance_meta = str(preview.get("instance_meta", "unknown"))
+    n_nodes = nodes_used_in_instance(instance)
+
+    logger.info(f"Selected placement: {sharding} / {instance_meta} / nodes={n_nodes}")
+    logger.info(f"Instance ID: {instance_id}")
+
+    if dry_run:
+        logger.info("[dry-run] Would create instance and wait for ready")
+        return instance_id, preview
+
+    # Create instance
+    client.request_json("POST", "/instance", body={"instance": instance})
+
+    try:
+        wait_for_instance_ready(client, instance_id)
+        logger.info("Instance is ready")
+        time.sleep(1)  # Brief pause after ready
+        return instance_id, preview
+    except (RuntimeError, TimeoutError) as e:
+        logger.error(f"Failed to initialize instance: {e}")
+        with contextlib.suppress(ExoHttpError):
+            client.request_json("DELETE", f"/instance/{instance_id}")
+        return None, None
+
+
+def teardown_instance(client: ExoClient, instance_id: str) -> None:
+    """Delete an instance and wait for it to be gone."""
+    try:
+        client.request_json("DELETE", f"/instance/{instance_id}")
+    except ExoHttpError as e:
+        if e.status != 404:
+            raise
+    except (ConnectionRefusedError, OSError):
+        logger.warning(
+            f"Could not connect to exo to delete instance {instance_id} (server may be down)"
+        )
+        return
+    try:
+        wait_for_instance_gone(client, instance_id)
+    except (ConnectionRefusedError, OSError, TimeoutError):
+        logger.warning("Could not verify instance deletion (server may be down)")
+        return
+    logger.info(f"Instance {instance_id} deleted")
+
+
+def build_lm_eval_args(
+    config: dict[str, Any],
+    base_url: str,
+    model: str,
+    output_path: str | None,
+    limit: int | None,
+    use_completions: bool,
+) -> list[str]:
+    """Build command-line arguments for lm_eval."""
+    lm_eval_config = config.get("lm_eval", {})
+
+    # Choose model type based on whether tasks need completions API
+    if use_completions:
+        model_type = "local-completions"
+        endpoint_url = f"{base_url}/v1/completions"
+    else:
+        model_type = "local-chat-completions"
+        endpoint_url = f"{base_url}/v1/chat/completions"
+
+    # Build model_args string with num_concurrent and timeout
+    model_args_parts = [f"model={model}", f"base_url={endpoint_url}"]
+    num_concurrent = lm_eval_config.get("num_concurrent")
+    if num_concurrent is not None and num_concurrent > 1:
+        model_args_parts.append(f"num_concurrent={num_concurrent}")
+    # Use a very long timeout (1 week) to handle large request queues
+    timeout = lm_eval_config.get("timeout", 604800)
+    model_args_parts.append(f"timeout={timeout}")
+    model_args = ",".join(model_args_parts)
+
+    args = [
+        sys.executable,
+        "-m",
+        "bench.lm_eval_patched",
+        "--model",
+        model_type,
+        "--model_args",
+        model_args,
+        "--verbosity",
+        "WARNING",
+    ]
+
+    # Tasks
+    tasks = lm_eval_config.get("tasks", ["mmlu"])
+    tasks_str = ",".join(tasks) if isinstance(tasks, list) else str(tasks)
+    args.extend(["--tasks", tasks_str])
+
+    # Few-shot
+    num_fewshot = lm_eval_config.get("num_fewshot")
+    if num_fewshot is not None:
+        args.extend(["--num_fewshot", str(num_fewshot)])
+
+    # Batch size (default to 1 for API models, "auto" doesn't work)
+    batch_size = lm_eval_config.get("batch_size", 1)
+    args.extend(["--batch_size", str(batch_size)])
+
+    # Apply chat template for instruct/chat models (default: true)
+    # Only applies to chat completions, but doesn't hurt to include
+    apply_chat_template = lm_eval_config.get("apply_chat_template", True)
+    if apply_chat_template and not use_completions:
+        args.append("--apply_chat_template")
+
+    # Fewshot as multiturn (optional, works with chat template)
+    fewshot_as_multiturn = lm_eval_config.get("fewshot_as_multiturn", False)
+    if fewshot_as_multiturn and not use_completions:
+        args.append("--fewshot_as_multiturn")
+
+    # Limit (command line overrides config)
+    effective_limit = limit if limit is not None else lm_eval_config.get("limit")
+    if effective_limit is not None:
+        args.extend(["--limit", str(effective_limit)])
+
+    # Output path
+    effective_output = output_path or lm_eval_config.get("output_path")
+    if effective_output:
+        args.extend(["--output_path", effective_output])
+        # Log model responses for post-hoc analysis when output is saved
+        args.append("--log_samples")
+
+    return args
+
+
+def run_lm_eval(
+    config: dict[str, Any],
+    host: str,
+    port: int,
+    model: str,
+    output_path: str | None,
+    limit: int | None,
+    dry_run: bool,
+) -> int:
+    """Run lm_eval evaluation."""
+    lm_eval_config = config.get("lm_eval", {})
+    tasks = lm_eval_config.get("tasks", ["mmlu"])
+    if isinstance(tasks, str):
+        tasks = [tasks]
+
+    exo_base_url = f"http://{host}:{port}"
+
+    # Build args - use native completions or chat completions endpoint directly
+    args = build_lm_eval_args(
+        config, exo_base_url, model, output_path, limit, use_completions=False
+    )
+    logger.info(f"lm_eval command: {' '.join(args)}")
+
+    if dry_run:
+        logger.info("[dry-run] Would execute the above command")
+        return 0
+
+    try:
+        result = subprocess.run(args, check=False)
+
+        # Print token usage summary from exo
+        try:
+            import httpx
+
+            usage_resp = httpx.get(f"{exo_base_url}/v1/usage", timeout=5)
+            if usage_resp.status_code == 200:
+                usage = usage_resp.json()
+                logger.info("--- Token Usage (Total) ---")
+                logger.info(f"  Requests:          {usage.get('total_requests', 0)}")
+                logger.info(
+                    f"  Prompt tokens:     {usage.get('total_prompt_tokens', 0)}"
+                )
+                logger.info(
+                    f"  Completion tokens: {usage.get('total_completion_tokens', 0)}"
+                )
+                logger.info(
+                    f"  Reasoning tokens:  {usage.get('total_reasoning_tokens', 0)}"
+                )
+                logger.info(f"  Total tokens:      {usage.get('total_tokens', 0)}")
+                by_model = usage.get("by_model", {})
+                if by_model:
+                    for model_name, counters in by_model.items():
+                        logger.info(f"--- Token Usage ({model_name}) ---")
+                        logger.info(
+                            f"  Requests:          {counters.get('requests', 0)}"
+                        )
+                        logger.info(
+                            f"  Prompt tokens:     {counters.get('prompt_tokens', 0)}"
+                        )
+                        logger.info(
+                            f"  Completion tokens: {counters.get('completion_tokens', 0)}"
+                        )
+                        logger.info(
+                            f"  Reasoning tokens:  {counters.get('reasoning_tokens', 0)}"
+                        )
+        except Exception:
+            pass  # Usage endpoint not available
+
+        return result.returncode
+    except FileNotFoundError:
+        logger.error("lm_eval not found. Install with: uv sync --extra eval")
+        return 1
+
+
+def run_swe_bench(
+    config: dict[str, Any],
+    host: str,
+    port: int,
+    model: str,
+    output_path: str | None,
+    dry_run: bool,
+) -> int:
+    """Run SWE-bench evaluation (placeholder)."""
+    swe_config = config.get("swe_bench", {})
+
+    dataset = swe_config.get("dataset", "princeton-nlp/SWE-bench_Lite")
+    max_workers = swe_config.get("max_workers", 8)
+    predictions_path = output_path or swe_config.get(
+        "predictions_path", "bench/predictions"
+    )
+
+    logger.info("SWE-bench evaluation configuration:")
+    logger.info(f"  Dataset: {dataset}")
+    logger.info(f"  Model: {model}")
+    logger.info(f"  API endpoint: http://{host}:{port}/v1")
+    logger.info(f"  Max workers: {max_workers}")
+    logger.info(f"  Predictions path: {predictions_path}")
+
+    if dry_run:
+        logger.info("[dry-run] SWE-bench evaluation would be executed")
+        return 0
+
+    logger.warning(
+        "SWE-bench integration is a placeholder. "
+        "Implement swebench inference and evaluation logic as needed."
+    )
+    return 0
+
+
+def run_custom_eval(
+    config: dict[str, Any],
+    host: str,
+    port: int,
+    model: str,
+    output_path: str | None,
+    dry_run: bool,
+) -> int:
+    """Run custom evaluation script."""
+    custom_config = config.get("custom", {})
+
+    script = custom_config.get("script")
+    if not script:
+        logger.error("No script specified in [custom] config section")
+        return 1
+
+    script_path = Path(script)
+    if not script_path.exists():
+        logger.error(f"Custom script not found: {script}")
+        return 1
+
+    script_args = custom_config.get("args", [])
+    if not isinstance(script_args, list):
+        script_args = [str(script_args)]
+
+    # Build environment with exo connection info
+    env = os.environ.copy()
+    env["EXO_HOST"] = host
+    env["EXO_PORT"] = str(port)
+    env["EXO_MODEL"] = model
+    if output_path:
+        env["EXO_OUTPUT_PATH"] = output_path
+
+    cmd = [sys.executable, str(script_path), *script_args]
+    logger.info(f"Custom eval command: {' '.join(cmd)}")
+
+    if dry_run:
+        logger.info("[dry-run] Would execute the above command")
+        return 0
+
+    result = subprocess.run(cmd, env=env, check=False)
+    return result.returncode
+
+
+def write_results_metadata(
+    output_path: str,
+    config: dict[str, Any],
+    host: str,
+    port: int,
+    model: str,
+    eval_type: EvalType,
+    return_code: int,
+    preview: dict[str, Any] | None,
+) -> None:
+    """Write evaluation metadata to a JSON file."""
+    metadata: dict[str, Any] = {
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "eval_type": eval_type,
+        "model": model,
+        "api_endpoint": f"http://{host}:{port}/v1",
+        "config": config,
+        "return_code": return_code,
+    }
+
+    if preview:
+        metadata["placement"] = {
+            "sharding": preview.get("sharding"),
+            "instance_meta": preview.get("instance_meta"),
+            "instance_id": instance_id_from_instance(preview["instance"])
+            if "instance" in preview
+            else None,
+        }
+
+    output_dir = Path(output_path)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    metadata_path = output_dir / "eval_metadata.json"
+
+    with open(metadata_path, "w", encoding="utf-8") as f:
+        json.dump(metadata, f, indent=2, ensure_ascii=False, default=str)
+
+    logger.info(f"Wrote evaluation metadata to: {metadata_path}")
+
+
+def main() -> int:
+    """Main entry point for exo-eval."""
+    ap = argparse.ArgumentParser(
+        prog="exo-eval",
+        description="Evaluation harness for exo inference system.",
+    )
+    ap.add_argument(
+        "--config",
+        required=True,
+        help="Path to TOML configuration file",
+    )
+    ap.add_argument(
+        "--host",
+        default=os.environ.get("EXO_HOST", "localhost"),
+        help="exo API host (default: localhost or EXO_HOST env var)",
+    )
+    ap.add_argument(
+        "--port",
+        type=int,
+        default=int(os.environ.get("EXO_PORT", "52415")),
+        help="exo API port (default: 52415 or EXO_PORT env var)",
+    )
+    ap.add_argument(
+        "--model",
+        required=True,
+        help="Model name/ID to evaluate",
+    )
+    ap.add_argument(
+        "--output",
+        default=None,
+        help="Output path for results (overrides config)",
+    )
+    ap.add_argument(
+        "--limit",
+        type=int,
+        default=None,
+        help="Limit samples per task (overrides config, lm_eval only)",
+    )
+    ap.add_argument(
+        "--timeout",
+        type=float,
+        default=604800.0,
+        help="HTTP timeout in seconds (default: 604800 = 1 week)",
+    )
+    ap.add_argument(
+        "--skip-instance-setup",
+        action="store_true",
+        help="Skip instance creation (assume instance already running)",
+    )
+    ap.add_argument(
+        "--pipeline",
+        type=int,
+        default=None,
+        metavar="N",
+        help="Use pipeline sharding with exactly N nodes (overrides config)",
+    )
+    ap.add_argument(
+        "--instance-meta",
+        choices=["ring", "jaccl", "both"],
+        default=None,
+        help="Instance meta preference (overrides config)",
+    )
+    ap.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print commands without executing",
+    )
+    args = ap.parse_args()
+
+    logger.info(f"exo-eval starting with config: {args.config}")
+
+    try:
+        config = load_config(args.config)
+    except FileNotFoundError as e:
+        logger.error(str(e))
+        return 1
+    except TOMLKitError as e:
+        logger.error(f"Failed to parse config: {e}")
+        return 1
+
+    eval_type = get_eval_type(config)
+    logger.info(f"Evaluation type: {eval_type}")
+    logger.info(f"Model: {args.model}")
+    logger.info(f"API endpoint: http://{args.host}:{args.port}/v1")
+
+    # Apply CLI overrides to instance config
+    if args.pipeline is not None or args.instance_meta is not None:
+        instance_config = config.setdefault("instance", {})
+        if args.pipeline is not None:
+            instance_config["sharding"] = "pipeline"
+            instance_config["min_nodes"] = args.pipeline
+            instance_config["max_nodes"] = args.pipeline
+            logger.info(f"CLI override: pipeline={args.pipeline} nodes")
+            # Limit concurrency for pipeline to avoid GPU timeouts
+            if args.pipeline >= 2:
+                lm_eval_config = config.setdefault("lm_eval", {})
+                lm_eval_config["num_concurrent"] = 4
+                logger.info("CLI override: num_concurrent=4 (pipeline>=2)")
+        if args.instance_meta is not None:
+            instance_config["instance_meta"] = args.instance_meta
+            logger.info(f"CLI override: instance_meta={args.instance_meta}")
+
+    # Check HuggingFace token if required
+    if not check_hf_token(config):
+        return 1
+
+    # Setup instance and resolve model
+    instance_id: str | None = None
+    preview: dict[str, Any] | None = None
+    client: ExoClient | None = None
+
+    if args.skip_instance_setup:
+        # Use model name as-is when skipping instance setup
+        full_model_id = args.model
+        logger.info(f"Using model: {full_model_id} (instance setup skipped)")
+    else:
+        client = ExoClient(args.host, args.port, timeout_s=args.timeout)
+
+        # Resolve model
+        try:
+            short_id, full_model_id = resolve_model_short_id(client, args.model)
+            logger.info(f"Resolved model: {short_id} -> {full_model_id}")
+        except Exception as e:
+            logger.error(f"Failed to resolve model: {e}")
+            return 1
+
+        instance_id, preview = setup_instance(
+            client, full_model_id, config, args.dry_run
+        )
+        if instance_id is None and not args.dry_run:
+            return 1
+
+    try:
+        # Run evaluation
+        if eval_type == "lm_eval":
+            return_code = run_lm_eval(
+                config,
+                args.host,
+                args.port,
+                full_model_id,
+                args.output,
+                args.limit,
+                args.dry_run,
+            )
+        elif eval_type == "swe_bench":
+            return_code = run_swe_bench(
+                config,
+                args.host,
+                args.port,
+                full_model_id,
+                args.output,
+                args.dry_run,
+            )
+        elif eval_type == "custom":
+            return_code = run_custom_eval(
+                config,
+                args.host,
+                args.port,
+                full_model_id,
+                args.output,
+                args.dry_run,
+            )
+        else:
+            logger.error(f"Unknown eval type: {eval_type}")
+            return 1
+
+        # Write metadata if output path specified and not dry-run
+        output_path = args.output or config.get(eval_type, {}).get("output_path")
+        if output_path and not args.dry_run:
+            write_results_metadata(
+                output_path,
+                config,
+                args.host,
+                args.port,
+                full_model_id,
+                eval_type,
+                return_code,
+                preview,
+            )
+
+        return return_code
+
+    finally:
+        # Teardown instance
+        if instance_id and client and not args.skip_instance_setup and not args.dry_run:
+            teardown_instance(client, instance_id)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/bench/lm_eval_patched.py
+++ b/bench/lm_eval_patched.py
@@ -0,0 +1,145 @@
+"""Patched lm_eval runner that fixes bugs in the upstream library.
+
+Fixes:
+- UnboundLocalError on `outputs` in TemplateAPI.amodel_call when API returns error
+- Prevents eval crash on transient API failures (returns None instead of raising)
+- Compatibility with transformers 5.x (missing AutoModelForVision2Seq)
+- sock_read timeout causing connection drops with large request queues
+
+Usage: python -m bench.lm_eval_patched [lm_eval args...]
+"""
+
+# ruff: noqa: I001, E402
+# pyright: reportMissingTypeStubs=false, reportUnknownVariableType=false
+# pyright: reportUnknownMemberType=false, reportAny=false, reportUnknownArgumentType=false
+# pyright: reportPrivateUsage=false, reportUnknownLambdaType=false
+
+# MUST patch transformers BEFORE any lm_eval imports
+# AutoModelForVision2Seq/AutoModelForImageTextToText were removed in transformers 5.0
+# Patch the lazy module's __getattr__ to return stubs for missing classes
+from transformers.utils import import_utils
+
+_original_getattr = import_utils._LazyModule.__getattr__
+
+
+def _patched_getattr(self: object, name: str) -> object:
+    if name in ("AutoModelForVision2Seq", "AutoModelForImageTextToText"):
+        return type(name, (), {})  # Return a stub class
+    return _original_getattr(self, name)  # type: ignore
+
+
+import_utils._LazyModule.__getattr__ = _patched_getattr
+
+import functools
+from typing import Any
+
+
+def _patch_amodel_call() -> None:
+    """Monkey-patch TemplateAPI.amodel_call to handle the unbound `outputs` variable bug."""
+    from lm_eval.models.api_models import TemplateAPI
+
+    original: Any = TemplateAPI.amodel_call
+
+    @functools.wraps(original)
+    async def patched_amodel_call(self: Any, *args: Any, **kwargs: Any) -> Any:
+        try:
+            return await original(self, *args, **kwargs)
+        except (UnboundLocalError, Exception):
+            # Return one empty-string result per request in the batch so the
+            # reorderer doesn't assert on missing coverage.
+            messages = kwargs.get("messages") or (args[2] if len(args) > 2 else [])
+            return [""] * max(len(messages), 1)
+
+    TemplateAPI.amodel_call = patched_amodel_call
+
+
+def _patch_client_timeout() -> None:
+    """Patch TemplateAPI.get_batched_requests to disable sock_read timeout.
+
+    By default, aiohttp's ClientTimeout can have a sock_read timeout that causes
+    connections to drop if no data is received for a while. With large request
+    queues, requests may wait a long time before processing starts, causing
+    spurious connection drops and retries that pile up requests.
+    """
+    from aiohttp import ClientSession, ClientTimeout, TCPConnector
+
+    from lm_eval.models.api_models import TemplateAPI
+
+    original_get_batched: Any = TemplateAPI.get_batched_requests
+
+    @functools.wraps(original_get_batched)
+    async def patched_get_batched_requests(self: Any, *args: Any, **kwargs: Any) -> Any:
+        # Override the timeout to explicitly disable sock_read timeout
+        # This prevents connection drops when requests are queued for a long time
+        original_timeout = getattr(self, "timeout", 604800)
+        conn = TCPConnector(limit=self._concurrent, ssl=self.verify_certificate)
+        timeout = ClientTimeout(
+            total=original_timeout, sock_read=None, sock_connect=None
+        )
+
+        async with ClientSession(connector=conn, timeout=timeout) as session:
+            # Call the internal async logic with our session
+            return await _run_batched_requests_with_session(
+                self, session, *args, **kwargs
+            )
+
+    async def _run_batched_requests_with_session(
+        self: Any,
+        session: ClientSession,
+        requests: Any,
+        cache_keys: Any = None,
+        ctxlens: Any = None,
+        **kwargs: Any,
+    ) -> Any:
+        import asyncio
+        import copy
+        import logging
+
+        from tqdm.asyncio import tqdm_asyncio
+        from tenacity import retry, stop_after_attempt, wait_exponential
+        from lm_eval.models.utils import chunks
+
+        eval_logger = logging.getLogger("lm_eval.models.api_models")
+        ctxlens = ctxlens if ctxlens else [None] * len(requests)
+        sem = asyncio.Semaphore(self._concurrent)
+
+        retry_: Any = retry(
+            stop=stop_after_attempt(self.max_retries),
+            wait=wait_exponential(multiplier=0.5, min=1, max=10),
+            reraise=True,
+            before_sleep=lambda retry_state: eval_logger.info(
+                f"Retry attempt {retry_state.attempt_number}"
+            ),
+        )(self.amodel_call)
+
+        tasks = [
+            asyncio.create_task(
+                retry_(
+                    session=session,
+                    sem=sem,
+                    messages=message,
+                    cache_keys=cache_key,
+                    ctxlens=ctxlen,
+                    gen_kwargs=copy.deepcopy(kwargs.get("gen_kwargs")),
+                    **{k: v for k, v in kwargs.items() if k != "gen_kwargs"},
+                )
+            )
+            for message, cache_key, ctxlen in zip(
+                chunks(requests, n=self._batch_size),
+                chunks(cache_keys, n=self._batch_size),
+                chunks(ctxlens, n=self._batch_size),
+                strict=True,
+            )
+        ]
+
+        return await tqdm_asyncio.gather(*tasks, desc="Requesting API")
+
+    TemplateAPI.get_batched_requests = patched_get_batched_requests
+
+
+if __name__ == "__main__":
+    _patch_amodel_call()
+    _patch_client_timeout()
+    from lm_eval.__main__ import cli_evaluate
+
+    cli_evaluate()
--- a/bench/pyproject.toml
+++ b/bench/pyproject.toml
@@ -1,16 +0,0 @@
-[project]
-name = "exo-bench"
-version = "0.1.0"
-description = "Benchmarking tool for exo distributed inference"
-requires-python = ">=3.13"
-dependencies = [
-    "loguru>=0.7.3",
-    "transformers>=5.0.0",
-    "huggingface-hub>=0.33.4",
-    "tiktoken>=0.12.0",
-    "jinja2>=3.1.0",
-]
-
-[build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
--- a/bench/stats_dashboard.html
+++ b/bench/stats_dashboard.html
@@ -0,0 +1,290 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>exo Usage Stats</title>
+<style>
+  * { margin: 0; padding: 0; box-sizing: border-box; }
+  body {
+    font-family: -apple-system, BlinkMacSystemFont, 'SF Mono', 'Menlo', monospace;
+    background: #1a1a2e;
+    color: #e0e0e0;
+    padding: 24px;
+    min-height: 100vh;
+  }
+  .header {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    margin-bottom: 24px;
+    padding-bottom: 16px;
+    border-bottom: 1px solid #333;
+  }
+  .header h1 {
+    font-size: 20px;
+    font-weight: 600;
+    color: #fff;
+  }
+  .status {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    font-size: 13px;
+    color: #888;
+  }
+  .status-dot {
+    width: 8px;
+    height: 8px;
+    border-radius: 50%;
+    background: #666;
+  }
+  .status-dot.connected { background: #4caf50; }
+  .status-dot.error { background: #f44336; }
+  .config {
+    margin-bottom: 24px;
+    display: flex;
+    align-items: center;
+    gap: 8px;
+  }
+  .config label {
+    font-size: 12px;
+    color: #888;
+  }
+  .config input {
+    background: #252540;
+    border: 1px solid #444;
+    border-radius: 4px;
+    color: #e0e0e0;
+    padding: 4px 8px;
+    font-size: 13px;
+    font-family: inherit;
+    width: 280px;
+  }
+  .section {
+    background: #252540;
+    border-radius: 8px;
+    padding: 20px;
+    margin-bottom: 16px;
+  }
+  .section h2 {
+    font-size: 14px;
+    font-weight: 600;
+    color: #aaa;
+    text-transform: uppercase;
+    letter-spacing: 0.5px;
+    margin-bottom: 16px;
+  }
+  .stat-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+    gap: 16px;
+  }
+  .stat-card {
+    background: #1a1a2e;
+    border-radius: 6px;
+    padding: 16px;
+  }
+  .stat-label {
+    font-size: 11px;
+    color: #888;
+    text-transform: uppercase;
+    letter-spacing: 0.5px;
+    margin-bottom: 4px;
+  }
+  .stat-value {
+    font-size: 28px;
+    font-weight: 700;
+    color: #fff;
+  }
+  .stat-rate {
+    font-size: 12px;
+    color: #4caf50;
+    margin-top: 4px;
+  }
+  table {
+    width: 100%;
+    border-collapse: collapse;
+    font-size: 13px;
+  }
+  th {
+    text-align: left;
+    padding: 8px 12px;
+    color: #888;
+    font-weight: 500;
+    border-bottom: 1px solid #333;
+    font-size: 11px;
+    text-transform: uppercase;
+    letter-spacing: 0.5px;
+  }
+  td {
+    padding: 8px 12px;
+    border-bottom: 1px solid #2a2a45;
+  }
+  td.num {
+    text-align: right;
+    font-variant-numeric: tabular-nums;
+  }
+  .model-name {
+    color: #7c9eff;
+    max-width: 300px;
+    overflow: hidden;
+    text-overflow: ellipsis;
+    white-space: nowrap;
+  }
+  .empty-state {
+    color: #666;
+    font-style: italic;
+    padding: 16px 0;
+  }
+</style>
+</head>
+<body>
+  <div class="header">
+    <h1>exo Usage Stats</h1>
+    <div class="status">
+      <div class="status-dot" id="statusDot"></div>
+      <span id="statusText">connecting...</span>
+    </div>
+  </div>
+
+  <div class="config">
+    <label for="baseUrl">Base URL:</label>
+    <input type="text" id="baseUrl" value="http://mac8-1:52415">
+  </div>
+
+  <div class="section">
+    <h2>Totals</h2>
+    <div class="stat-grid">
+      <div class="stat-card">
+        <div class="stat-label">Requests</div>
+        <div class="stat-value" id="totalRequests">0</div>
+      </div>
+      <div class="stat-card">
+        <div class="stat-label">Prompt Tokens</div>
+        <div class="stat-value" id="totalPrompt">0</div>
+        <div class="stat-rate" id="promptRate"></div>
+      </div>
+      <div class="stat-card">
+        <div class="stat-label">Completion Tokens</div>
+        <div class="stat-value" id="totalCompletion">0</div>
+        <div class="stat-rate" id="completionRate"></div>
+      </div>
+      <div class="stat-card">
+        <div class="stat-label">Reasoning Tokens</div>
+        <div class="stat-value" id="totalReasoning">0</div>
+      </div>
+      <div class="stat-card">
+        <div class="stat-label">Total Tokens</div>
+        <div class="stat-value" id="totalTokens">0</div>
+        <div class="stat-rate" id="totalRate"></div>
+      </div>
+    </div>
+  </div>
+
+  <div class="section">
+    <h2>Per-Model Breakdown</h2>
+    <div id="modelTable">
+      <div class="empty-state">No data yet</div>
+    </div>
+  </div>
+
+<script>
+
+  function fmt(n) {
+    return n.toLocaleString();
+  }
+
+  // Track first non-zero timestamp for overall average rate
+  let firstSeenTime = null;
+  let firstSeenTokens = { prompt: 0, completion: 0, total: 0 };
+
+  function setRate(id, currentTokens, tokenType) {
+    const el = document.getElementById(id);
+    if (firstSeenTime === null || currentTokens <= firstSeenTokens[tokenType]) {
+      el.textContent = '';
+      return;
+    }
+    const elapsed = (performance.now() / 1000) - firstSeenTime;
+    if (elapsed <= 0) { el.textContent = ''; return; }
+    const delta = currentTokens - firstSeenTokens[tokenType];
+    const avg = delta / elapsed;
+    el.textContent = fmt(Math.round(avg)) + ' tok/s avg';
+  }
+
+  function renderModelTable(byModel) {
+    const container = document.getElementById('modelTable');
+    const models = Object.entries(byModel);
+    if (models.length === 0) {
+      container.innerHTML = '<div class="empty-state">No data yet</div>';
+      return;
+    }
+    let html = '<table><thead><tr>';
+    html += '<th>Model</th><th style="text-align:right">Requests</th>';
+    html += '<th style="text-align:right">Prompt</th>';
+    html += '<th style="text-align:right">Completion</th>';
+    html += '<th style="text-align:right">Reasoning</th>';
+    html += '<th style="text-align:right">Total</th>';
+    html += '</tr></thead><tbody>';
+    for (const [name, counters] of models) {
+      const total = (counters.prompt_tokens || 0) + (counters.completion_tokens || 0);
+      html += '<tr>';
+      html += `<td class="model-name" title="${name}">${name}</td>`;
+      html += `<td class="num">${fmt(counters.requests || 0)}</td>`;
+      html += `<td class="num">${fmt(counters.prompt_tokens || 0)}</td>`;
+      html += `<td class="num">${fmt(counters.completion_tokens || 0)}</td>`;
+      html += `<td class="num">${fmt(counters.reasoning_tokens || 0)}</td>`;
+      html += `<td class="num">${fmt(total)}</td>`;
+      html += '</tr>';
+    }
+    html += '</tbody></table>';
+    container.innerHTML = html;
+  }
+
+  async function poll() {
+    const baseUrl = document.getElementById('baseUrl').value.replace(/\/+$/, '');
+    const dot = document.getElementById('statusDot');
+    const text = document.getElementById('statusText');
+
+    try {
+      const resp = await fetch(baseUrl + '/v1/usage');
+      if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
+      const data = await resp.json();
+
+      dot.className = 'status-dot connected';
+      text.textContent = 'connected';
+
+
+      document.getElementById('totalRequests').textContent = fmt(data.total_requests || 0);
+      document.getElementById('totalPrompt').textContent = fmt(data.total_prompt_tokens || 0);
+      document.getElementById('totalCompletion').textContent = fmt(data.total_completion_tokens || 0);
+      document.getElementById('totalReasoning').textContent = fmt(data.total_reasoning_tokens || 0);
+      document.getElementById('totalTokens').textContent = fmt(data.total_tokens || 0);
+
+      // Record first non-zero reading as baseline
+      if (firstSeenTime === null && (data.total_tokens || 0) > 0) {
+        firstSeenTime = performance.now() / 1000;
+        firstSeenTokens = {
+          prompt: data.total_prompt_tokens || 0,
+          completion: data.total_completion_tokens || 0,
+          total: data.total_tokens || 0,
+        };
+      }
+
+      setRate('promptRate', data.total_prompt_tokens || 0, 'prompt');
+      setRate('completionRate', data.total_completion_tokens || 0, 'completion');
+      setRate('totalRate', data.total_tokens || 0, 'total');
+
+      renderModelTable(data.by_model || {});
+
+    } catch (e) {
+      dot.className = 'status-dot error';
+      text.textContent = e.message || 'error';
+    }
+  }
+
+  poll();
+  setInterval(poll, 1000);
+</script>
+</body>
+</html>
--- a/dashboard/package-lock.json
+++ b/dashboard/package-lock.json
@@ -865,6 +865,7 @@
 			"integrity": "sha512-oH8tXw7EZnie8FdOWYrF7Yn4IKrqTFHhXvl8YxXxbKwTMcD/5NNCryUSEXRk2ZR4ojnub0P8rNrsVGHXWqIDtA==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"@standard-schema/spec": "^1.0.0",
 				"@sveltejs/acorn-typescript": "^1.0.5",
@@ -904,6 +905,7 @@
 			"integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"@sveltejs/vite-plugin-svelte-inspector": "^4.0.1",
 				"debug": "^4.4.1",
@@ -1520,6 +1522,7 @@
 			"integrity": "sha512-LCCV0HdSZZZb34qifBsyWlUmok6W7ouER+oQIGBScS8EsZsQbrtFTUrDX4hOl+CS6p7cnNC4td+qrSVGSCTUfQ==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"undici-types": "~6.21.0"
 			}
@@ -1529,6 +1532,7 @@
 			"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
 			"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
 			"license": "MIT",
+			"peer": true,
 			"bin": {
 				"acorn": "bin/acorn"
 			},
@@ -1941,6 +1945,7 @@
 			"integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
 			"dev": true,
 			"license": "ISC",
+			"peer": true,
 			"engines": {
 				"node": ">=12"
 			}
@@ -2648,6 +2653,7 @@
 			"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"engines": {
 				"node": ">=12"
 			},
@@ -2690,6 +2696,7 @@
 			"integrity": "sha512-UOnG6LftzbdaHZcKoPFtOcCKztrQ57WkHDeRD9t/PTQtmT0NHSeWWepj6pS0z/N7+08BHFDQVUrfmfMRcZwbMg==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"bin": {
 				"prettier": "bin/prettier.cjs"
 			},
@@ -2862,6 +2869,7 @@
 			"resolved": "https://registry.npmjs.org/svelte/-/svelte-5.45.3.tgz",
 			"integrity": "sha512-ngKXNhNvwPzF43QqEhDOue7TQTrG09em1sd4HBxVF0Wr2gopAmdEWan+rgbdgK4fhBtSOTJO8bYU4chUG7VXZQ==",
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"@jridgewell/remapping": "^2.3.4",
 				"@jridgewell/sourcemap-codec": "^1.5.0",
@@ -3006,6 +3014,7 @@
 			"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
 			"dev": true,
 			"license": "Apache-2.0",
+			"peer": true,
 			"bin": {
 				"tsc": "bin/tsc",
 				"tsserver": "bin/tsserver"
@@ -3027,6 +3036,7 @@
 			"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"esbuild": "^0.25.0",
 				"fdir": "^6.4.4",
--- a/dashboard/src/lib/components/ChatForm.svelte
+++ b/dashboard/src/lib/components/ChatForm.svelte
@@ -599,8 +599,9 @@
            : isImageModel()
              ? "Describe the image you want to generate..."
              : placeholder}
+        disabled={loading}
        rows={1}
-        class="flex-1 resize-none bg-transparent text-foreground placeholder:text-exo-light-gray/60 placeholder:text-sm placeholder:tracking-[0.15em] placeholder:leading-7 focus:outline-none focus:ring-0 focus:border-none text-sm leading-7 font-mono"
+        class="flex-1 resize-none bg-transparent text-foreground placeholder:text-exo-light-gray/60 placeholder:text-sm placeholder:tracking-[0.15em] placeholder:leading-7 focus:outline-none focus:ring-0 focus:border-none disabled:opacity-50 text-sm leading-7 font-mono"
        style="min-height: 28px; max-height: 150px;"
      ></textarea>

--- a/dashboard/src/lib/components/ChatMessages.svelte
+++ b/dashboard/src/lib/components/ChatMessages.svelte
@@ -6,14 +6,11 @@
    deleteMessage,
    editAndRegenerate,
    regenerateLastResponse,
-    regenerateFromToken,
    setEditingImage,
  } from "$lib/stores/app.svelte";
  import type { Message } from "$lib/stores/app.svelte";
  import type { MessageAttachment } from "$lib/stores/app.svelte";
  import MarkdownContent from "./MarkdownContent.svelte";
-  import TokenHeatmap from "./TokenHeatmap.svelte";
-  import ImageLightbox from "./ImageLightbox.svelte";

  interface Props {
    class?: string;
@@ -102,26 +99,6 @@
  let copiedMessageId = $state<string | null>(null);
  let expandedThinkingMessageIds = $state<Set<string>>(new Set());

-  // Lightbox state
-  let expandedImageSrc = $state<string | null>(null);
-
-  // Uncertainty heatmap toggle
-  let heatmapMessageIds = $state<Set<string>>(new Set());
-
-  function toggleHeatmap(messageId: string) {
-    const next = new Set(heatmapMessageIds);
-    if (next.has(messageId)) {
-      next.delete(messageId);
-    } else {
-      next.add(messageId);
-    }
-    heatmapMessageIds = next;
-  }
-
-  function isHeatmapVisible(messageId: string): boolean {
-    return heatmapMessageIds.has(messageId);
-  }
-
  function formatTimestamp(timestamp: number): string {
    return new Date(timestamp).toLocaleTimeString("en-US", {
      hour12: false,
@@ -393,15 +370,10 @@
                        class="flex items-center gap-2 bg-exo-dark-gray/60 border border-exo-yellow/20 rounded px-2 py-1 text-xs font-mono"
                      >
                        {#if attachment.type === "image" && attachment.preview}
-                          <!-- svelte-ignore a11y_no_noninteractive_element_interactions, a11y_click_events_have_key_events -->
                          <img
                            src={attachment.preview}
                            alt={attachment.name}
-                            class="w-12 h-12 object-cover rounded border border-exo-yellow/20 cursor-pointer hover:border-exo-yellow/50 transition-colors"
-                            onclick={() => {
-                              if (attachment.preview)
-                                expandedImageSrc = attachment.preview;
-                            }}
+                            class="w-12 h-12 object-cover rounded border border-exo-yellow/20"
                          />
                        {:else}
                          <span>{getAttachmentIcon(attachment)}</span>
@@ -475,44 +447,15 @@
                  <div class="mb-3">
                    {#each message.attachments.filter((a) => a.type === "generated-image") as attachment}
                      <div class="relative group/img inline-block">
-                        <!-- svelte-ignore a11y_no_noninteractive_element_interactions, a11y_click_events_have_key_events -->
                        <img
                          src={attachment.preview}
                          alt=""
-                          class="max-w-full max-h-[512px] rounded-lg border border-exo-yellow/20 shadow-lg shadow-black/20 cursor-pointer"
-                          onclick={() => {
-                            if (attachment.preview)
-                              expandedImageSrc = attachment.preview;
-                          }}
+                          class="max-w-full max-h-[512px] rounded-lg border border-exo-yellow/20 shadow-lg shadow-black/20"
                        />
                        <!-- Button overlay -->
                        <div
                          class="absolute top-2 right-2 flex gap-1 opacity-0 group-hover/img:opacity-100 transition-opacity"
                        >
-                          <!-- Expand button -->
-                          <button
-                            type="button"
-                            class="p-2 rounded-lg bg-exo-dark-gray/80 border border-exo-yellow/30 text-exo-yellow hover:bg-exo-dark-gray hover:border-exo-yellow/50 cursor-pointer"
-                            onclick={() => {
-                              if (attachment.preview)
-                                expandedImageSrc = attachment.preview;
-                            }}
-                            title="Expand image"
-                          >
-                            <svg
-                              class="w-4 h-4"
-                              fill="none"
-                              viewBox="0 0 24 24"
-                              stroke="currentColor"
-                              stroke-width="2"
-                            >
-                              <path
-                                stroke-linecap="round"
-                                stroke-linejoin="round"
-                                d="M4 8V4m0 0h4M4 4l5 5m11-1V4m0 0h-4m4 0l-5 5M4 16v4m0 0h4m-4 0l5-5m11 5l-5-5m5 5v-4m0 4h-4"
-                              />
-                            </svg>
-                          </button>
                          <!-- Edit button -->
                          <button
                            type="button"
@@ -605,23 +548,13 @@
                      >
                    </div>
                  {:else if message.content || (loading && !message.attachments?.some((a) => a.type === "generated-image"))}
-                    {#if isHeatmapVisible(message.id) && message.tokens && message.tokens.length > 0}
-                      <TokenHeatmap
-                        tokens={message.tokens}
-                        isGenerating={loading &&
-                          isLastAssistantMessage(message.id)}
-                        onRegenerateFrom={(tokenIndex) =>
-                          regenerateFromToken(message.id, tokenIndex)}
-                      />
-                    {:else}
-                      <MarkdownContent
-                        content={message.content || (loading ? response : "")}
-                      />
-                      {#if loading && !message.content}
-                        <span
-                          class="inline-block w-2 h-4 bg-exo-yellow/70 ml-1 cursor-blink"
-                        ></span>
-                      {/if}
+                    <MarkdownContent
+                      content={message.content || (loading ? response : "")}
+                    />
+                    {#if loading && !message.content}
+                      <span
+                        class="inline-block w-2 h-4 bg-exo-yellow/70 ml-1 cursor-blink"
+                      ></span>
                    {/if}
                  {/if}
                </div>
@@ -696,35 +629,6 @@
              </button>
            {/if}

-            <!-- Uncertainty heatmap toggle (assistant messages with tokens) -->
-            {#if message.role === "assistant" && message.tokens && message.tokens.length > 0}
-              <button
-                onclick={() => toggleHeatmap(message.id)}
-                class="p-1.5 transition-colors rounded cursor-pointer {isHeatmapVisible(
-                  message.id,
-                )
-                  ? 'text-exo-yellow'
-                  : 'text-exo-light-gray hover:text-exo-yellow'}"
-                title={isHeatmapVisible(message.id)
-                  ? "Hide uncertainty heatmap"
-                  : "Show uncertainty heatmap"}
-              >
-                <svg
-                  class="w-3.5 h-3.5"
-                  fill="none"
-                  viewBox="0 0 24 24"
-                  stroke="currentColor"
-                >
-                  <path
-                    stroke-linecap="round"
-                    stroke-linejoin="round"
-                    stroke-width="2"
-                    d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z"
-                  />
-                </svg>
-              </button>
-            {/if}
-
            <!-- Regenerate button (last assistant message only) -->
            {#if message.role === "assistant" && isLastAssistantMessage(message.id) && !loading}
              <button
@@ -827,8 +731,3 @@
    </button>
  {/if}
 </div>
-
-<ImageLightbox
-  src={expandedImageSrc}
-  onclose={() => (expandedImageSrc = null)}
-/>
--- a/dashboard/src/lib/components/FamilyLogos.svelte
+++ b/dashboard/src/lib/components/FamilyLogos.svelte
@@ -1,85 +0,0 @@
-<script lang="ts">
-  type FamilyLogoProps = {
-    family: string;
-    class?: string;
-  };
-
-  let { family, class: className = "" }: FamilyLogoProps = $props();
-</script>
-
-{#if family === "favorites"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M12 2l3.09 6.26L22 9.27l-5 4.87 1.18 6.88L12 17.77l-6.18 3.25L7 14.14 2 9.27l6.91-1.01L12 2z"
-    />
-  </svg>
-{:else if family === "llama" || family === "meta"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M6.915 4.03c-1.968 0-3.683 1.28-4.871 3.113C.704 9.208 0 11.883 0 14.449c0 .706.07 1.369.21 1.973a6.624 6.624 0 0 0 .265.86 5.297 5.297 0 0 0 .371.761c.696 1.159 1.818 1.927 3.593 1.927 1.497 0 2.633-.671 3.965-2.444.76-1.012 1.144-1.626 2.663-4.32l.756-1.339.186-.325c.061.1.121.196.183.3l2.152 3.595c.724 1.21 1.665 2.556 2.47 3.314 1.046.987 1.992 1.22 3.06 1.22 1.075 0 1.876-.355 2.455-.843a3.743 3.743 0 0 0 .81-.973c.542-.939.861-2.127.861-3.745 0-2.72-.681-5.357-2.084-7.45-1.282-1.912-2.957-2.93-4.716-2.93-1.047 0-2.088.467-3.053 1.308-.652.57-1.257 1.29-1.82 2.05-.69-.875-1.335-1.547-1.958-2.056-1.182-.966-2.315-1.303-3.454-1.303zm10.16 2.053c1.147 0 2.188.758 2.992 1.999 1.132 1.748 1.647 4.195 1.647 6.4 0 1.548-.368 2.9-1.839 2.9-.58 0-1.027-.23-1.664-1.004-.496-.601-1.343-1.878-2.832-4.358l-.617-1.028a44.908 44.908 0 0 0-1.255-1.98c.07-.109.141-.224.211-.327 1.12-1.667 2.118-2.602 3.358-2.602zm-10.201.553c1.265 0 2.058.791 2.675 1.446.307.327.737.871 1.234 1.579l-1.02 1.566c-.757 1.163-1.882 3.017-2.837 4.338-1.191 1.649-1.81 1.817-2.486 1.817-.524 0-1.038-.237-1.383-.794-.263-.426-.464-1.13-.464-2.046 0-2.221.63-4.535 1.66-6.088.454-.687.964-1.226 1.533-1.533a2.264 2.264 0 0 1 1.088-.285z"
-    />
-  </svg>
-{:else if family === "qwen"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M12.604 1.34c.393.69.784 1.382 1.174 2.075a.18.18 0 00.157.091h5.552c.174 0 .322.11.446.327l1.454 2.57c.19.337.24.478.024.837-.26.43-.513.864-.76 1.3l-.367.658c-.106.196-.223.28-.04.512l2.652 4.637c.172.301.111.494-.043.77-.437.785-.882 1.564-1.335 2.34-.159.272-.352.375-.68.37-.777-.016-1.552-.01-2.327.016a.099.099 0 00-.081.05 575.097 575.097 0 01-2.705 4.74c-.169.293-.38.363-.725.364-.997.003-2.002.004-3.017.002a.537.537 0 01-.465-.271l-1.335-2.323a.09.09 0 00-.083-.049H4.982c-.285.03-.553-.001-.805-.092l-1.603-2.77a.543.543 0 01-.002-.54l1.207-2.12a.198.198 0 000-.197 550.951 550.951 0 01-1.875-3.272l-.79-1.395c-.16-.31-.173-.496.095-.965.465-.813.927-1.625 1.387-2.436.132-.234.304-.334.584-.335a338.3 338.3 0 012.589-.001.124.124 0 00.107-.063l2.806-4.895a.488.488 0 01.422-.246c.524-.001 1.053 0 1.583-.006L11.704 1c.341-.003.724.032.9.34zm-3.432.403a.06.06 0 00-.052.03L6.254 6.788a.157.157 0 01-.135.078H3.253c-.056 0-.07.025-.041.074l5.81 10.156c.025.042.013.062-.034.063l-2.795.015a.218.218 0 00-.2.116l-1.32 2.31c-.044.078-.021.118.068.118l5.716.008c.046 0 .08.02.104.061l1.403 2.454c.046.081.092.082.139 0l5.006-8.76.783-1.382a.055.055 0 01.096 0l1.424 2.53a.122.122 0 00.107.062l2.763-.02a.04.04 0 00.035-.02.041.041 0 000-.04l-2.9-5.086a.108.108 0 010-.113l.293-.507 1.12-1.977c.024-.041.012-.062-.035-.062H9.2c-.059 0-.073-.026-.043-.077l1.434-2.505a.107.107 0 000-.114L9.225 1.774a.06.06 0 00-.053-.031zm6.29 8.02c.046 0 .058.02.034.06l-.832 1.465-2.613 4.585a.056.056 0 01-.05.029.058.058 0 01-.05-.029L8.498 9.841c-.02-.034-.01-.052.028-.054l.216-.012 6.722-.012z"
-    />
-  </svg>
-{:else if family === "deepseek"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M23.748 4.482c-.254-.124-.364.113-.512.234-.051.039-.094.09-.137.136-.372.397-.806.657-1.373.626-.829-.046-1.537.214-2.163.848-.133-.782-.575-1.248-1.247-1.548-.352-.156-.708-.311-.955-.65-.172-.241-.219-.51-.305-.774-.055-.16-.11-.323-.293-.35-.2-.031-.278.136-.356.276-.313.572-.434 1.202-.422 1.84.027 1.436.633 2.58 1.838 3.393.137.093.172.187.129.323-.082.28-.18.552-.266.833-.055.179-.137.217-.329.14a5.526 5.526 0 01-1.736-1.18c-.857-.828-1.631-1.742-2.597-2.458a11.365 11.365 0 00-.689-.471c-.985-.957.13-1.743.388-1.836.27-.098.093-.432-.779-.428-.872.004-1.67.295-2.687.684a3.055 3.055 0 01-.465.137 9.597 9.597 0 00-2.883-.102c-1.885.21-3.39 1.102-4.497 2.623C.082 8.606-.231 10.684.152 12.85c.403 2.284 1.569 4.175 3.36 5.653 1.858 1.533 3.997 2.284 6.438 2.14 1.482-.085 3.133-.284 4.994-1.86.47.234.962.327 1.78.397.63.059 1.236-.03 1.705-.128.735-.156.684-.837.419-.961-2.155-1.004-1.682-.595-2.113-.926 1.096-1.296 2.746-2.642 3.392-7.003.05-.347.007-.565 0-.845-.004-.17.035-.237.23-.256a4.173 4.173 0 001.545-.475c1.396-.763 1.96-2.015 2.093-3.517.02-.23-.004-.467-.247-.588zM11.581 18c-2.089-1.642-3.102-2.183-3.52-2.16-.392.024-.321.471-.235.763.09.288.207.486.371.739.114.167.192.416-.113.603-.673.416-1.842-.14-1.897-.167-1.361-.802-2.5-1.86-3.301-3.307-.774-1.393-1.224-2.887-1.298-4.482-.02-.386.093-.522.477-.592a4.696 4.696 0 011.529-.039c2.132.312 3.946 1.265 5.468 2.774.868.86 1.525 1.887 2.202 2.891.72 1.066 1.494 2.082 2.48 2.914.348.292.625.514.891.677-.802.09-2.14.11-3.054-.614zm1-6.44a.306.306 0 01.415-.287.302.302 0 01.2.288.306.306 0 01-.31.307.303.303 0 01-.304-.308zm3.11 1.596c-.2.081-.399.151-.59.16a1.245 1.245 0 01-.798-.254c-.274-.23-.47-.358-.552-.758a1.73 1.73 0 01.016-.588c.07-.327-.008-.537-.239-.727-.187-.156-.426-.199-.688-.199a.559.559 0 01-.254-.078c-.11-.054-.2-.19-.114-.358.028-.054.16-.186.192-.21.356-.202.767-.136 1.146.016.352.144.618.408 1.001.782.391.451.462.576.685.914.176.265.336.537.445.848.067.195-.019.354-.25.452z"
-    />
-  </svg>
-{:else if family === "openai" || family === "gpt-oss"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M22.2819 9.8211a5.9847 5.9847 0 0 0-.5157-4.9108 6.0462 6.0462 0 0 0-6.5098-2.9A6.0651 6.0651 0 0 0 4.9807 4.1818a5.9847 5.9847 0 0 0-3.9977 2.9 6.0462 6.0462 0 0 0 .7427 7.0966 5.98 5.98 0 0 0 .511 4.9107 6.051 6.051 0 0 0 6.5146 2.9001A5.9847 5.9847 0 0 0 13.2599 24a6.0557 6.0557 0 0 0 5.7718-4.2058 5.9894 5.9894 0 0 0 3.9977-2.9001 6.0557 6.0557 0 0 0-.7475-7.0729zm-9.022 12.6081a4.4755 4.4755 0 0 1-2.8764-1.0408l.1419-.0804 4.7783-2.7582a.7948.7948 0 0 0 .3927-.6813v-6.7369l2.02 1.1686a.071.071 0 0 1 .038.052v5.5826a4.504 4.504 0 0 1-4.4945 4.4944zm-9.6607-4.1254a4.4708 4.4708 0 0 1-.5346-3.0137l.142.0852 4.783 2.7582a.7712.7712 0 0 0 .7806 0l5.8428-3.3685v2.3324a.0804.0804 0 0 1-.0332.0615L9.74 19.9502a4.4992 4.4992 0 0 1-6.1408-1.6464zM2.3408 7.8956a4.485 4.485 0 0 1 2.3655-1.9728V11.6a.7664.7664 0 0 0 .3879.6765l5.8144 3.3543-2.0201 1.1685a.0757.0757 0 0 1-.071 0l-4.8303-2.7865A4.504 4.504 0 0 1 2.3408 7.872zm16.5963 3.8558L13.1038 8.364 15.1192 7.2a.0757.0757 0 0 1 .071 0l4.8303 2.7913a4.4944 4.4944 0 0 1-.6765 8.1042v-5.6772a.79.79 0 0 0-.407-.667zm2.0107-3.0231l-.142-.0852-4.7735-2.7818a.7759.7759 0 0 0-.7854 0L9.409 9.2297V6.8974a.0662.0662 0 0 1 .0284-.0615l4.8303-2.7866a4.4992 4.4992 0 0 1 6.6802 4.66zM8.3065 12.863l-2.02-1.1638a.0804.0804 0 0 1-.038-.0567V6.0742a4.4992 4.4992 0 0 1 7.3757-3.4537l-.142.0805L8.704 5.459a.7948.7948 0 0 0-.3927.6813zm1.0976-2.3654l2.602-1.4998 2.6069 1.4998v2.9994l-2.5974 1.4997-2.6067-1.4997Z"
-    />
-  </svg>
-{:else if family === "glm"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M11.991 23.503a.24.24 0 00-.244.248.24.24 0 00.244.249.24.24 0 00.245-.249.24.24 0 00-.22-.247l-.025-.001zM9.671 5.365a1.697 1.697 0 011.099 2.132l-.071.172-.016.04-.018.054c-.07.16-.104.32-.104.498-.035.71.47 1.279 1.186 1.314h.366c1.309.053 2.338 1.173 2.286 2.523-.052 1.332-1.152 2.38-2.478 2.327h-.174c-.715.018-1.274.64-1.239 1.368 0 .124.018.23.053.337.209.373.54.658.96.8.75.23 1.517-.125 1.9-.782l.018-.035c.402-.64 1.17-.96 1.92-.711.854.284 1.378 1.226 1.099 2.167a1.661 1.661 0 01-2.077 1.102 1.711 1.711 0 01-.907-.711l-.017-.035c-.2-.323-.463-.58-.851-.711l-.056-.018a1.646 1.646 0 00-1.954.746 1.66 1.66 0 01-1.065.764 1.677 1.677 0 01-1.989-1.279c-.209-.906.332-1.83 1.257-2.043a1.51 1.51 0 01.296-.035h.018c.68-.071 1.151-.622 1.116-1.333a1.307 1.307 0 00-.227-.693 2.515 2.515 0 01-.366-1.403 2.39 2.39 0 01.366-1.208c.14-.195.21-.444.227-.693.018-.71-.506-1.261-1.186-1.332l-.07-.018a1.43 1.43 0 01-.299-.07l-.05-.019a1.7 1.7 0 01-1.047-2.114 1.68 1.68 0 012.094-1.101zm-5.575 10.11c.26-.264.639-.367.994-.27.355.096.633.379.728.74.095.362-.007.748-.267 1.013-.402.41-1.053.41-1.455 0a1.062 1.062 0 010-1.482zm14.845-.294c.359-.09.738.024.992.297.254.274.344.665.237 1.025-.107.36-.396.634-.756.718-.551.128-1.1-.22-1.23-.781a1.05 1.05 0 01.757-1.26zm-.064-4.39c.314.32.49.753.49 1.206 0 .452-.176.886-.49 1.206-.315.32-.74.5-1.185.5-.444 0-.87-.18-1.184-.5a1.727 1.727 0 010-2.412 1.654 1.654 0 012.369 0zm-11.243.163c.364.484.447 1.128.218 1.691a1.665 1.665 0 01-2.188.923c-.855-.36-1.26-1.358-.907-2.228a1.68 1.68 0 011.33-1.038c.593-.08 1.183.169 1.547.652zm11.545-4.221c.368 0 .708.2.892.524.184.324.184.724 0 1.048a1.026 1.026 0 01-.892.524c-.568 0-1.03-.47-1.03-1.048 0-.579.462-1.048 1.03-1.048zm-14.358 0c.368 0 .707.2.891.524.184.324.184.724 0 1.048a1.026 1.026 0 01-.891.524c-.569 0-1.03-.47-1.03-1.048 0-.579.461-1.048 1.03-1.048zm10.031-1.475c.925 0 1.675.764 1.675 1.706s-.75 1.705-1.675 1.705-1.674-.763-1.674-1.705c0-.942.75-1.706 1.674-1.706zm-2.626-.684c.362-.082.653-.356.761-.718a1.062 1.062 0 00-.238-1.028 1.017 1.017 0 00-.996-.294c-.547.14-.881.7-.752 1.257.13.558.675.907 1.225.783zm0 16.876c.359-.087.644-.36.75-.72a1.062 1.062 0 00-.237-1.019 1.018 1.018 0 00-.985-.301 1.037 1.037 0 00-.762.717c-.108.361-.017.754.239 1.028.245.263.606.377.953.305l.043-.01zM17.19 3.5a.631.631 0 00.628-.64c0-.355-.279-.64-.628-.64a.631.631 0 00-.628.64c0 .355.28.64.628.64zm-10.38 0a.631.631 0 00.628-.64c0-.355-.28-.64-.628-.64a.631.631 0 00-.628.64c0 .355.279.64.628.64zm-5.182 7.852a.631.631 0 00-.628.64c0 .354.28.639.628.639a.63.63 0 00.627-.606l.001-.034a.62.62 0 00-.628-.64zm5.182 9.13a.631.631 0 00-.628.64c0 .355.279.64.628.64a.631.631 0 00.628-.64c0-.355-.28-.64-.628-.64zm10.38.018a.631.631 0 00-.628.64c0 .355.28.64.628.64a.631.631 0 00.628-.64c0-.355-.279-.64-.628-.64zm5.182-9.148a.631.631 0 00-.628.64c0 .354.279.639.628.639a.631.631 0 00.628-.64c0-.355-.28-.64-.628-.64zm-.384-4.992a.24.24 0 00.244-.249.24.24 0 00-.244-.249.24.24 0 00-.244.249c0 .142.122.249.244.249zM11.991.497a.24.24 0 00.245-.248A.24.24 0 0011.99 0a.24.24 0 00-.244.249c0 .133.108.236.223.247l.021.001zM2.011 6.36a.24.24 0 00.245-.249.24.24 0 00-.244-.249.24.24 0 00-.244.249.24.24 0 00.244.249zm0 11.263a.24.24 0 00-.243.248.24.24 0 00.244.249.24.24 0 00.244-.249.252.252 0 00-.244-.248zm19.995-.018a.24.24 0 00-.245.248.24.24 0 00.245.25.24.24 0 00.244-.25.252.252 0 00-.244-.248z"
-    />
-  </svg>
-{:else if family === "minimax"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M16.278 2c1.156 0 2.093.927 2.093 2.07v12.501a.74.74 0 00.744.709.74.74 0 00.743-.709V9.099a2.06 2.06 0 012.071-2.049A2.06 2.06 0 0124 9.1v6.561a.649.649 0 01-.652.645.649.649 0 01-.653-.645V9.1a.762.762 0 00-.766-.758.762.762 0 00-.766.758v7.472a2.037 2.037 0 01-2.048 2.026 2.037 2.037 0 01-2.048-2.026v-12.5a.785.785 0 00-.788-.753.785.785 0 00-.789.752l-.001 15.904A2.037 2.037 0 0113.441 22a2.037 2.037 0 01-2.048-2.026V18.04c0-.356.292-.645.652-.645.36 0 .652.289.652.645v1.934c0 .263.142.506.372.638.23.131.514.131.744 0a.734.734 0 00.372-.638V4.07c0-1.143.937-2.07 2.093-2.07zm-5.674 0c1.156 0 2.093.927 2.093 2.07v11.523a.648.648 0 01-.652.645.648.648 0 01-.652-.645V4.07a.785.785 0 00-.789-.78.785.785 0 00-.789.78v14.013a2.06 2.06 0 01-2.07 2.048 2.06 2.06 0 01-2.071-2.048V9.1a.762.762 0 00-.766-.758.762.762 0 00-.766.758v3.8a2.06 2.06 0 01-2.071 2.049A2.06 2.06 0 010 12.9v-1.378c0-.357.292-.646.652-.646.36 0 .653.29.653.646V12.9c0 .418.343.757.766.757s.766-.339.766-.757V9.099a2.06 2.06 0 012.07-2.048 2.06 2.06 0 012.071 2.048v8.984c0 .419.343.758.767.758.423 0 .766-.339.766-.758V4.07c0-1.143.937-2.07 2.093-2.07z"
-    />
-  </svg>
-{:else if family === "kimi"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M19.738 5.776c.163-.209.306-.4.457-.585.07-.087.064-.153-.004-.244-.655-.861-.717-1.817-.34-2.787.283-.73.909-1.072 1.674-1.145.477-.045.945.004 1.379.236.57.305.902.77 1.01 1.412.086.512.07 1.012-.075 1.508-.257.878-.888 1.333-1.753 1.448-.718.096-1.446.108-2.17.157-.056.004-.113 0-.178 0z"
-    />
-    <path
-      d="M17.962 1.844h-4.326l-3.425 7.81H5.369V1.878H1.5V22h3.87v-8.477h6.824a3.025 3.025 0 002.743-1.75V22h3.87v-8.477a3.87 3.87 0 00-3.588-3.86v-.01h-2.125a3.94 3.94 0 002.323-2.12l2.545-5.689z"
-    />
-  </svg>
-{:else if family === "flux"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M12 2L2 19h7.5l2.5-4.5L14.5 19H22L12 2zm0 4.5L16.5 17h-3l-1.5-2.7L10.5 17h-3L12 6.5z"
-    />
-  </svg>
-{:else if family === "qwen-image"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M12.604 1.34c.393.69.784 1.382 1.174 2.075a.18.18 0 00.157.091h5.552c.174 0 .322.11.446.327l1.454 2.57c.19.337.24.478.024.837-.26.43-.513.864-.76 1.3l-.367.658c-.106.196-.223.28-.04.512l2.652 4.637c.172.301.111.494-.043.77-.437.785-.882 1.564-1.335 2.34-.159.272-.352.375-.68.37-.777-.016-1.552-.01-2.327.016a.099.099 0 00-.081.05 575.097 575.097 0 01-2.705 4.74c-.169.293-.38.363-.725.364-.997.003-2.002.004-3.017.002a.537.537 0 01-.465-.271l-1.335-2.323a.09.09 0 00-.083-.049H4.982c-.285.03-.553-.001-.805-.092l-1.603-2.77a.543.543 0 01-.002-.54l1.207-2.12a.198.198 0 000-.197 550.951 550.951 0 01-1.875-3.272l-.79-1.395c-.16-.31-.173-.496.095-.965.465-.813.927-1.625 1.387-2.436.132-.234.304-.334.584-.335a338.3 338.3 0 012.589-.001.124.124 0 00.107-.063l2.806-4.895a.488.488 0 01.422-.246c.524-.001 1.053 0 1.583-.006L11.704 1c.341-.003.724.032.9.34zm-3.432.403a.06.06 0 00-.052.03L6.254 6.788a.157.157 0 01-.135.078H3.253c-.056 0-.07.025-.041.074l5.81 10.156c.025.042.013.062-.034.063l-2.795.015a.218.218 0 00-.2.116l-1.32 2.31c-.044.078-.021.118.068.118l5.716.008c.046 0 .08.02.104.061l1.403 2.454c.046.081.092.082.139 0l5.006-8.76.783-1.382a.055.055 0 01.096 0l1.424 2.53a.122.122 0 00.107.062l2.763-.02a.04.04 0 00.035-.02.041.041 0 000-.04l-2.9-5.086a.108.108 0 010-.113l.293-.507 1.12-1.977c.024-.041.012-.062-.035-.062H9.2c-.059 0-.073-.026-.043-.077l1.434-2.505a.107.107 0 000-.114L9.225 1.774a.06.06 0 00-.053-.031zm6.29 8.02c.046 0 .058.02.034.06l-.832 1.465-2.613 4.585a.056.056 0 01-.05.029.058.058 0 01-.05-.029L8.498 9.841c-.02-.034-.01-.052.028-.054l.216-.012 6.722-.012z"
-    />
-  </svg>
-{:else if family === "huggingface"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M12.025 1.13c-5.77 0-10.449 4.647-10.449 10.378 0 1.112.178 2.181.503 3.185.064-.222.203-.444.416-.577a.96.96 0 0 1 .524-.15c.293 0 .584.124.84.284.278.173.48.408.71.694.226.282.458.611.684.951v-.014c.017-.324.106-.622.264-.874s.403-.487.762-.543c.3-.047.596.06.787.203s.31.313.4.467c.15.257.212.468.233.542.01.026.653 1.552 1.657 2.54.616.605 1.01 1.223 1.082 1.912.055.537-.096 1.059-.38 1.572.637.121 1.294.187 1.967.187.657 0 1.298-.063 1.921-.178-.287-.517-.44-1.041-.384-1.581.07-.69.465-1.307 1.081-1.913 1.004-.987 1.647-2.513 1.657-2.539.021-.074.083-.285.233-.542.09-.154.208-.323.4-.467a1.08 1.08 0 0 1 .787-.203c.359.056.604.29.762.543s.247.55.265.874v.015c.225-.34.457-.67.683-.952.23-.286.432-.52.71-.694.257-.16.547-.284.84-.285a.97.97 0 0 1 .524.151c.228.143.373.388.43.625l.006.04a10.3 10.3 0 0 0 .534-3.273c0-5.731-4.678-10.378-10.449-10.378M8.327 6.583a1.5 1.5 0 0 1 .713.174 1.487 1.487 0 0 1 .617 2.013c-.183.343-.762-.214-1.102-.094-.38.134-.532.914-.917.71a1.487 1.487 0 0 1 .69-2.803m7.486 0a1.487 1.487 0 0 1 .689 2.803c-.385.204-.536-.576-.916-.71-.34-.12-.92.437-1.103.094a1.487 1.487 0 0 1 .617-2.013 1.5 1.5 0 0 1 .713-.174m-10.68 1.55a.96.96 0 1 1 0 1.921.96.96 0 0 1 0-1.92m13.838 0a.96.96 0 1 1 0 1.92.96.96 0 0 1 0-1.92M8.489 11.458c.588.01 1.965 1.157 3.572 1.164 1.607-.007 2.984-1.155 3.572-1.164.196-.003.305.12.305.454 0 .886-.424 2.328-1.563 3.202-.22-.756-1.396-1.366-1.63-1.32q-.011.001-.02.006l-.044.026-.01.008-.03.024q-.018.017-.035.036l-.032.04a1 1 0 0 0-.058.09l-.014.025q-.049.088-.11.19a1 1 0 0 1-.083.116 1.2 1.2 0 0 1-.173.18q-.035.029-.075.058a1.3 1.3 0 0 1-.251-.243 1 1 0 0 1-.076-.107c-.124-.193-.177-.363-.337-.444-.034-.016-.104-.008-.2.022q-.094.03-.216.087-.06.028-.125.063l-.13.074q-.067.04-.136.086a3 3 0 0 0-.135.096 3 3 0 0 0-.26.219 2 2 0 0 0-.12.121 2 2 0 0 0-.106.128l-.002.002a2 2 0 0 0-.09.132l-.001.001a1.2 1.2 0 0 0-.105.212q-.013.036-.024.073c-1.139-.875-1.563-2.317-1.563-3.203 0-.334.109-.457.305-.454m.836 10.354c.824-1.19.766-2.082-.365-3.194-1.13-1.112-1.789-2.738-1.789-2.738s-.246-.945-.806-.858-.97 1.499.202 2.362c1.173.864-.233 1.45-.685.64-.45-.812-1.683-2.896-2.322-3.295s-1.089-.175-.938.647 2.822 2.813 2.562 3.244-1.176-.506-1.176-.506-2.866-2.567-3.49-1.898.473 1.23 2.037 2.16c1.564.932 1.686 1.178 1.464 1.53s-3.675-2.511-4-1.297c-.323 1.214 3.524 1.567 3.287 2.405-.238.839-2.71-1.587-3.216-.642-.506.946 3.49 2.056 3.522 2.064 1.29.33 4.568 1.028 5.713-.624m5.349 0c-.824-1.19-.766-2.082.365-3.194 1.13-1.112 1.789-2.738 1.789-2.738s.246-.945.806-.858.97 1.499-.202 2.362c-1.173.864.233 1.45.685.64.451-.812 1.683-2.896 2.322-3.295s1.089-.175.938.647-2.822 2.813-2.562 3.244 1.176-.506 1.176-.506 2.866-2.567 3.49-1.898-.473 1.23-2.037 2.16c-1.564.932-1.686 1.178-1.464 1.53s3.675-2.511 4-1.297c.323 1.214-3.524 1.567-3.287 2.405.238.839 2.71-1.587 3.216-.642.506.946-3.49 2.056-3.522 2.064-1.29.33-4.568 1.028-5.713-.624"
-    />
-  </svg>
-{:else}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-2 15l-5-5 1.41-1.41L10 14.17l7.59-7.59L19 8l-9 9z"
-    />
-  </svg>
-{/if}
--- a/dashboard/src/lib/components/FamilySidebar.svelte
+++ b/dashboard/src/lib/components/FamilySidebar.svelte
@@ -1,144 +0,0 @@
-<script lang="ts">
-  import FamilyLogos from "./FamilyLogos.svelte";
-
-  type FamilySidebarProps = {
-    families: string[];
-    selectedFamily: string | null;
-    hasFavorites: boolean;
-    onSelect: (family: string | null) => void;
-  };
-
-  let { families, selectedFamily, hasFavorites, onSelect }: FamilySidebarProps =
-    $props();
-
-  // Family display names
-  const familyNames: Record<string, string> = {
-    favorites: "Favorites",
-    huggingface: "Hub",
-    llama: "Meta",
-    qwen: "Qwen",
-    deepseek: "DeepSeek",
-    "gpt-oss": "OpenAI",
-    glm: "GLM",
-    minimax: "MiniMax",
-    kimi: "Kimi",
-    flux: "FLUX",
-    "qwen-image": "Qwen Img",
-  };
-
-  function getFamilyName(family: string): string {
-    return (
-      familyNames[family] || family.charAt(0).toUpperCase() + family.slice(1)
-    );
-  }
-</script>
-
-<div
-  class="flex flex-col gap-1 py-2 px-1 border-r border-exo-yellow/10 bg-exo-medium-gray/30 min-w-[64px] overflow-y-auto scrollbar-hide"
->
-  <!-- All models (no filter) -->
-  <button
-    type="button"
-    onclick={() => onSelect(null)}
-    class="group flex flex-col items-center justify-center p-2 rounded transition-all duration-200 cursor-pointer {selectedFamily ===
-    null
-      ? 'bg-exo-yellow/20 border-l-2 border-exo-yellow'
-      : 'hover:bg-white/5 border-l-2 border-transparent'}"
-    title="All models"
-  >
-    <svg
-      class="w-5 h-5 {selectedFamily === null
-        ? 'text-exo-yellow'
-        : 'text-white/50 group-hover:text-white/70'}"
-      viewBox="0 0 24 24"
-      fill="currentColor"
-    >
-      <path
-        d="M4 8h4V4H4v4zm6 12h4v-4h-4v4zm-6 0h4v-4H4v4zm0-6h4v-4H4v4zm6 0h4v-4h-4v4zm6-10v4h4V4h-4zm-6 4h4V4h-4v4zm6 6h4v-4h-4v4zm0 6h4v-4h-4v4z"
-      />
-    </svg>
-    <span
-      class="text-[9px] font-mono mt-0.5 {selectedFamily === null
-        ? 'text-exo-yellow'
-        : 'text-white/40 group-hover:text-white/60'}">All</span
-    >
-  </button>
-
-  <!-- Favorites (only show if has favorites) -->
-  {#if hasFavorites}
-    <button
-      type="button"
-      onclick={() => onSelect("favorites")}
-      class="group flex flex-col items-center justify-center p-2 rounded transition-all duration-200 cursor-pointer {selectedFamily ===
-      'favorites'
-        ? 'bg-exo-yellow/20 border-l-2 border-exo-yellow'
-        : 'hover:bg-white/5 border-l-2 border-transparent'}"
-      title="Show favorited models"
-    >
-      <FamilyLogos
-        family="favorites"
-        class={selectedFamily === "favorites"
-          ? "text-amber-400"
-          : "text-white/50 group-hover:text-amber-400/70"}
-      />
-      <span
-        class="text-[9px] font-mono mt-0.5 {selectedFamily === 'favorites'
-          ? 'text-amber-400'
-          : 'text-white/40 group-hover:text-white/60'}">Faves</span
-      >
-    </button>
-  {/if}
-
-  <!-- HuggingFace Hub -->
-  <button
-    type="button"
-    onclick={() => onSelect("huggingface")}
-    class="group flex flex-col items-center justify-center p-2 rounded transition-all duration-200 cursor-pointer {selectedFamily ===
-    'huggingface'
-      ? 'bg-orange-500/20 border-l-2 border-orange-400'
-      : 'hover:bg-white/5 border-l-2 border-transparent'}"
-    title="Browse and add models from Hugging Face"
-  >
-    <FamilyLogos
-      family="huggingface"
-      class={selectedFamily === "huggingface"
-        ? "text-orange-400"
-        : "text-white/50 group-hover:text-orange-400/70"}
-    />
-    <span
-      class="text-[9px] font-mono mt-0.5 {selectedFamily === 'huggingface'
-        ? 'text-orange-400'
-        : 'text-white/40 group-hover:text-white/60'}">Hub</span
-    >
-  </button>
-
-  <div class="h-px bg-exo-yellow/10 my-1"></div>
-
-  <!-- Model families -->
-  {#each families as family}
-    <button
-      type="button"
-      onclick={() => onSelect(family)}
-      class="group flex flex-col items-center justify-center p-2 rounded transition-all duration-200 cursor-pointer {selectedFamily ===
-      family
-        ? 'bg-exo-yellow/20 border-l-2 border-exo-yellow'
-        : 'hover:bg-white/5 border-l-2 border-transparent'}"
-      title={getFamilyName(family)}
-    >
-      <FamilyLogos
-        {family}
-        class={selectedFamily === family
-          ? "text-exo-yellow"
-          : "text-white/50 group-hover:text-white/70"}
-      />
-      <span
-        class="text-[9px] font-mono mt-0.5 truncate max-w-full {selectedFamily ===
-        family
-          ? 'text-exo-yellow'
-          : 'text-white/40 group-hover:text-white/60'}"
-      >
-        {getFamilyName(family)}
-      </span>
-    </button>
-  {/each}
-</div>
--- a/dashboard/src/lib/components/HuggingFaceResultItem.svelte
+++ b/dashboard/src/lib/components/HuggingFaceResultItem.svelte
@@ -1,151 +0,0 @@
-<script lang="ts">
-  interface HuggingFaceModel {
-    id: string;
-    author: string;
-    downloads: number;
-    likes: number;
-    last_modified: string;
-    tags: string[];
-  }
-
-  type HuggingFaceResultItemProps = {
-    model: HuggingFaceModel;
-    isAdded: boolean;
-    isAdding: boolean;
-    onAdd: () => void;
-    onSelect: () => void;
-    downloadedOnNodes?: string[];
-  };
-
-  let {
-    model,
-    isAdded,
-    isAdding,
-    onAdd,
-    onSelect,
-    downloadedOnNodes = [],
-  }: HuggingFaceResultItemProps = $props();
-
-  function formatNumber(num: number): string {
-    if (num >= 1000000) {
-      return `${(num / 1000000).toFixed(1)}M`;
-    } else if (num >= 1000) {
-      return `${(num / 1000).toFixed(1)}k`;
-    }
-    return num.toString();
-  }
-
-  // Extract model name from full ID (e.g., "mlx-community/Llama-3.2-1B" -> "Llama-3.2-1B")
-  const modelName = $derived(model.id.split("/").pop() || model.id);
-</script>
-
-<div
-  class="flex items-center justify-between gap-3 px-3 py-2.5 hover:bg-white/5 transition-colors border-b border-white/5 last:border-b-0"
->
-  <div class="flex-1 min-w-0">
-    <div class="flex items-center gap-2">
-      <span class="text-sm font-mono text-white truncate" title={model.id}
-        >{modelName}</span
-      >
-      {#if downloadedOnNodes.length > 0}
-        <span
-          class="flex-shrink-0"
-          title={`Downloaded on ${downloadedOnNodes.join(", ")}`}
-        >
-          <svg
-            class="w-4 h-4"
-            viewBox="0 0 24 24"
-            fill="none"
-            stroke="currentColor"
-            stroke-width="2"
-            stroke-linecap="round"
-            stroke-linejoin="round"
-          >
-            <path
-              class="text-white/40"
-              d="M20 20a2 2 0 0 0 2-2V8a2 2 0 0 0-2-2h-7.9a2 2 0 0 1-1.69-.9L9.6 3.9A2 2 0 0 0 7.93 3H4a2 2 0 0 0-2 2v13a2 2 0 0 0 2 2Z"
-            />
-            <path class="text-green-400" d="m9 13 2 2 4-4" />
-          </svg>
-        </span>
-      {/if}
-      {#if isAdded}
-        <span
-          class="px-1.5 py-0.5 text-[10px] font-mono bg-green-500/20 text-green-400 rounded"
-          >Added</span
-        >
-      {/if}
-    </div>
-    <div class="flex items-center gap-3 mt-0.5 text-xs text-white/40">
-      <span class="truncate">{model.author}</span>
-      <span
-        class="flex items-center gap-1 shrink-0"
-        title="Downloads in the last 30 days"
-      >
-        <svg
-          class="w-3 h-3"
-          fill="none"
-          stroke="currentColor"
-          viewBox="0 0 24 24"
-        >
-          <path
-            stroke-linecap="round"
-            stroke-linejoin="round"
-            stroke-width="2"
-            d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-4l-4 4m0 0l-4-4m4 4V4"
-          />
-        </svg>
-        {formatNumber(model.downloads)}
-      </span>
-      <span
-        class="flex items-center gap-1 shrink-0"
-        title="Community likes on Hugging Face"
-      >
-        <svg
-          class="w-3 h-3"
-          fill="none"
-          stroke="currentColor"
-          viewBox="0 0 24 24"
-        >
-          <path
-            stroke-linecap="round"
-            stroke-linejoin="round"
-            stroke-width="2"
-            d="M4.318 6.318a4.5 4.5 0 000 6.364L12 20.364l7.682-7.682a4.5 4.5 0 00-6.364-6.364L12 7.636l-1.318-1.318a4.5 4.5 0 00-6.364 0z"
-          />
-        </svg>
-        {formatNumber(model.likes)}
-      </span>
-    </div>
-  </div>
-
-  <div class="flex items-center gap-2 shrink-0">
-    {#if isAdded}
-      <button
-        type="button"
-        onclick={onSelect}
-        class="px-3 py-1.5 text-xs font-mono tracking-wider uppercase bg-exo-yellow/10 text-exo-yellow border border-exo-yellow/30 hover:bg-exo-yellow/20 transition-colors rounded cursor-pointer"
-      >
-        Select
-      </button>
-    {:else}
-      <button
-        type="button"
-        onclick={onAdd}
-        disabled={isAdding}
-        class="px-3 py-1.5 text-xs font-mono tracking-wider uppercase bg-orange-500/10 text-orange-400 border border-orange-400/30 hover:bg-orange-500/20 transition-colors rounded cursor-pointer disabled:opacity-50 disabled:cursor-not-allowed"
-      >
-        {#if isAdding}
-          <span class="flex items-center gap-1.5">
-            <span
-              class="w-3 h-3 border-2 border-orange-400 border-t-transparent rounded-full animate-spin"
-            ></span>
-            Adding...
-          </span>
-        {:else}
-          + Add
-        {/if}
-      </button>
-    {/if}
-  </div>
-</div>
--- a/dashboard/src/lib/components/ImageLightbox.svelte
+++ b/dashboard/src/lib/components/ImageLightbox.svelte
@@ -1,96 +0,0 @@
-<script lang="ts">
-  import { fade, fly } from "svelte/transition";
-  import { cubicOut } from "svelte/easing";
-
-  interface Props {
-    src: string | null;
-    onclose: () => void;
-  }
-
-  let { src, onclose }: Props = $props();
-
-  function handleKeydown(e: KeyboardEvent) {
-    if (e.key === "Escape") {
-      onclose();
-    }
-  }
-
-  function extensionFromSrc(dataSrc: string): string {
-    const match = dataSrc.match(/^data:image\/(\w+)/);
-    if (match) return match[1] === "jpeg" ? "jpg" : match[1];
-    const urlMatch = dataSrc.match(/\.(\w+)(?:\?|$)/);
-    if (urlMatch) return urlMatch[1];
-    return "png";
-  }
-
-  function handleDownload(e: MouseEvent) {
-    e.stopPropagation();
-    if (!src) return;
-    const link = document.createElement("a");
-    link.href = src;
-    link.download = `image-${Date.now()}.${extensionFromSrc(src)}`;
-    link.click();
-  }
-
-  function handleClose(e: MouseEvent) {
-    e.stopPropagation();
-    onclose();
-  }
-</script>
-
-<svelte:window onkeydown={src ? handleKeydown : undefined} />
-
-{#if src}
-  <div
-    class="fixed inset-0 z-50 bg-black/90 backdrop-blur-sm flex items-center justify-center"
-    transition:fade={{ duration: 200 }}
-    onclick={onclose}
-    role="presentation"
-    onintrostart={() => (document.body.style.overflow = "hidden")}
-    onoutroend={() => (document.body.style.overflow = "")}
-  >
-    <div class="absolute top-4 right-4 flex gap-2 z-10">
-      <button
-        type="button"
-        class="p-2 rounded-lg bg-exo-dark-gray/80 border border-exo-yellow/30 text-exo-yellow hover:bg-exo-dark-gray hover:border-exo-yellow/50 cursor-pointer transition-colors"
-        onclick={handleDownload}
-        title="Download image"
-      >
-        <svg
-          class="w-5 h-5"
-          fill="none"
-          viewBox="0 0 24 24"
-          stroke="currentColor"
-          stroke-width="2"
-        >
-          <path
-            stroke-linecap="round"
-            stroke-linejoin="round"
-            d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-4l-4 4m0 0l-4-4m4 4V4"
-          />
-        </svg>
-      </button>
-      <button
-        type="button"
-        class="p-2 rounded-lg bg-exo-dark-gray/80 border border-exo-yellow/30 text-exo-yellow hover:bg-exo-dark-gray hover:border-exo-yellow/50 cursor-pointer transition-colors"
-        onclick={handleClose}
-        title="Close"
-      >
-        <svg class="w-5 h-5" viewBox="0 0 24 24" fill="currentColor">
-          <path
-            d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"
-          />
-        </svg>
-      </button>
-    </div>
-
-    <!-- svelte-ignore a11y_no_noninteractive_element_interactions, a11y_click_events_have_key_events -->
-    <img
-      {src}
-      alt=""
-      class="max-w-[90vw] max-h-[90vh] object-contain rounded-lg shadow-2xl"
-      transition:fly={{ y: 20, duration: 300, easing: cubicOut }}
-      onclick={(e) => e.stopPropagation()}
-    />
-  </div>
-{/if}
--- a/dashboard/src/lib/components/ImageParamsPanel.svelte
+++ b/dashboard/src/lib/components/ImageParamsPanel.svelte
@@ -64,8 +64,6 @@
    "1024x1024",
    "1024x768",
    "768x1024",
-    "1024x1365",
-    "1365x1024",
  ];

  const qualityOptions: ImageGenerationParams["quality"][] = [
@@ -150,15 +148,6 @@
    setImageGenerationParams({ guidance: null });
  }

-  function handleNumSyncStepsChange(event: Event) {
-    const value = parseInt((event.target as HTMLInputElement).value, 10);
-    setImageGenerationParams({ numSyncSteps: value });
-  }
-
-  function clearNumSyncSteps() {
-    setImageGenerationParams({ numSyncSteps: null });
-  }
-
  function handleReset() {
    resetImageGenerationParams();
    showAdvanced = false;
@@ -168,8 +157,7 @@
    params.seed !== null ||
      params.numInferenceSteps !== null ||
      params.guidance !== null ||
-      (params.negativePrompt !== null && params.negativePrompt.trim() !== "") ||
-      params.numSyncSteps !== null,
+      (params.negativePrompt !== null && params.negativePrompt.trim() !== ""),
  );
 </script>

@@ -590,50 +578,7 @@
        </div>
      </div>

-      <!-- Row 3: Sync Steps -->
-      <div class="flex items-center gap-1.5">
-        <span
-          class="text-xs text-exo-light-gray uppercase tracking-wider whitespace-nowrap"
-          >SYNC STEPS:</span
-        >
-        <div class="flex items-center gap-2 flex-1 max-w-xs">
-          <input
-            type="range"
-            min="1"
-            max="100"
-            value={params.numSyncSteps ?? 1}
-            oninput={handleNumSyncStepsChange}
-            class="flex-1 h-1 bg-exo-medium-gray/50 rounded appearance-none cursor-pointer accent-exo-yellow"
-          />
-          <span class="text-xs font-mono text-exo-yellow w-8 text-right">
-            {params.numSyncSteps ?? "--"}
-          </span>
-          {#if params.numSyncSteps !== null}
-            <button
-              type="button"
-              onclick={clearNumSyncSteps}
-              class="text-exo-light-gray hover:text-exo-yellow transition-colors"
-              title="Clear"
-            >
-              <svg
-                class="w-3 h-3"
-                fill="none"
-                viewBox="0 0 24 24"
-                stroke="currentColor"
-              >
-                <path
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                  stroke-width="2"
-                  d="M6 18L18 6M6 6l12 12"
-                />
-              </svg>
-            </button>
-          {/if}
-        </div>
-      </div>
-
-      <!-- Row 4: Negative Prompt -->
+      <!-- Row 3: Negative Prompt -->
      <div class="flex flex-col gap-1.5">
        <span class="text-xs text-exo-light-gray uppercase tracking-wider"
          >NEGATIVE PROMPT:</span
--- a/dashboard/src/lib/components/ModelFilterPopover.svelte
+++ b/dashboard/src/lib/components/ModelFilterPopover.svelte
@@ -1,254 +0,0 @@
-<script lang="ts">
-  import { fly } from "svelte/transition";
-  import { cubicOut } from "svelte/easing";
-
-  interface FilterState {
-    capabilities: string[];
-    sizeRange: { min: number; max: number } | null;
-    downloadedOnly: boolean;
-  }
-
-  type ModelFilterPopoverProps = {
-    filters: FilterState;
-    onChange: (filters: FilterState) => void;
-    onClear: () => void;
-    onClose: () => void;
-  };
-
-  let { filters, onChange, onClear, onClose }: ModelFilterPopoverProps =
-    $props();
-
-  // Available capabilities
-  const availableCapabilities = [
-    { id: "text", label: "Text" },
-    { id: "thinking", label: "Thinking" },
-    { id: "code", label: "Code" },
-    { id: "vision", label: "Vision" },
-    { id: "image_gen", label: "Image Gen" },
-    { id: "image_edit", label: "Image Edit" },
-  ];
-
-  // Size ranges
-  const sizeRanges = [
-    { label: "< 10GB", min: 0, max: 10 },
-    { label: "10-50GB", min: 10, max: 50 },
-    { label: "50-200GB", min: 50, max: 200 },
-    { label: "> 200GB", min: 200, max: 10000 },
-  ];
-
-  function toggleCapability(cap: string) {
-    const next = filters.capabilities.includes(cap)
-      ? filters.capabilities.filter((c) => c !== cap)
-      : [...filters.capabilities, cap];
-    onChange({ ...filters, capabilities: next });
-  }
-
-  function selectSizeRange(range: { min: number; max: number } | null) {
-    // Toggle off if same range is clicked
-    if (
-      filters.sizeRange &&
-      range &&
-      filters.sizeRange.min === range.min &&
-      filters.sizeRange.max === range.max
-    ) {
-      onChange({ ...filters, sizeRange: null });
-    } else {
-      onChange({ ...filters, sizeRange: range });
-    }
-  }
-
-  function handleClickOutside(e: MouseEvent) {
-    const target = e.target as HTMLElement;
-    if (
-      !target.closest(".filter-popover") &&
-      !target.closest(".filter-toggle")
-    ) {
-      onClose();
-    }
-  }
-</script>
-
-<svelte:window onclick={handleClickOutside} />
-
-<!-- svelte-ignore a11y_no_static_element_interactions -->
-<div
-  class="filter-popover absolute right-0 top-full mt-2 w-64 bg-exo-dark-gray border border-exo-yellow/10 rounded-lg shadow-xl z-10"
-  transition:fly={{ y: -10, duration: 200, easing: cubicOut }}
-  onclick={(e) => e.stopPropagation()}
-  role="dialog"
-  aria-label="Filter options"
->
-  <div class="p-3 space-y-4">
-    <!-- Capabilities -->
-    <div>
-      <h4 class="text-xs font-mono text-white/50 mb-2">Capabilities</h4>
-      <div class="flex flex-wrap gap-1.5">
-        {#each availableCapabilities as cap}
-          {@const isSelected = filters.capabilities.includes(cap.id)}
-          <button
-            type="button"
-            class="px-2 py-1 text-xs font-mono rounded transition-colors {isSelected
-              ? 'bg-exo-yellow/20 text-exo-yellow border border-exo-yellow/30'
-              : 'bg-white/5 text-white/60 hover:bg-white/10 border border-transparent'}"
-            onclick={() => toggleCapability(cap.id)}
-          >
-            {#if cap.id === "text"}
-              <svg
-                class="w-3.5 h-3.5 inline-block"
-                viewBox="0 0 24 24"
-                fill="none"
-                stroke="currentColor"
-                stroke-width="1.5"
-                ><path
-                  d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                /></svg
-              >
-            {:else if cap.id === "thinking"}
-              <svg
-                class="w-3.5 h-3.5 inline-block"
-                viewBox="0 0 24 24"
-                fill="none"
-                stroke="currentColor"
-                stroke-width="1.5"
-                ><path
-                  d="M12 2a7 7 0 0 0-7 7c0 2.38 1.19 4.47 3 5.74V17a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1v-2.26c1.81-1.27 3-3.36 3-5.74a7 7 0 0 0-7-7zM9 20h6M10 22h4"
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                /></svg
-              >
-            {:else if cap.id === "code"}
-              <svg
-                class="w-3.5 h-3.5 inline-block"
-                viewBox="0 0 24 24"
-                fill="none"
-                stroke="currentColor"
-                stroke-width="1.5"
-                ><path
-                  d="M16 18l6-6-6-6M8 6l-6 6 6 6"
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                /></svg
-              >
-            {:else if cap.id === "vision"}
-              <svg
-                class="w-3.5 h-3.5 inline-block"
-                viewBox="0 0 24 24"
-                fill="none"
-                stroke="currentColor"
-                stroke-width="1.5"
-                ><path
-                  d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z"
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                /><circle cx="12" cy="12" r="3" /></svg
-              >
-            {:else if cap.id === "image_gen"}
-              <svg
-                class="w-3.5 h-3.5 inline-block"
-                viewBox="0 0 24 24"
-                fill="none"
-                stroke="currentColor"
-                stroke-width="1.5"
-                ><rect
-                  x="3"
-                  y="3"
-                  width="18"
-                  height="18"
-                  rx="2"
-                  ry="2"
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                /><circle cx="8.5" cy="8.5" r="1.5" /><path
-                  d="M21 15l-5-5L5 21"
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                /></svg
-              >
-            {:else if cap.id === "image_edit"}
-              <svg
-                class="w-3.5 h-3.5 inline-block"
-                viewBox="0 0 24 24"
-                fill="none"
-                stroke="currentColor"
-                stroke-width="1.5"
-                ><path
-                  d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                /><path
-                  d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                /></svg
-              >
-            {/if}
-            <span class="ml-1">{cap.label}</span>
-          </button>
-        {/each}
-      </div>
-    </div>
-
-    <!-- Downloaded only -->
-    <div>
-      <h4 class="text-xs font-mono text-white/50 mb-2">Availability</h4>
-      <button
-        type="button"
-        class="px-2 py-1 text-xs font-mono rounded transition-colors {filters.downloadedOnly
-          ? 'bg-green-500/20 text-green-400 border border-green-500/30'
-          : 'bg-white/5 text-white/60 hover:bg-white/10 border border-transparent'}"
-        onclick={() =>
-          onChange({ ...filters, downloadedOnly: !filters.downloadedOnly })}
-      >
-        <svg
-          class="w-3.5 h-3.5 inline-block"
-          viewBox="0 0 24 24"
-          fill="none"
-          stroke="currentColor"
-          stroke-width="2"
-          stroke-linecap="round"
-          stroke-linejoin="round"
-        >
-          <path
-            class="text-white/40"
-            d="M20 20a2 2 0 0 0 2-2V8a2 2 0 0 0-2-2h-7.9a2 2 0 0 1-1.69-.9L9.6 3.9A2 2 0 0 0 7.93 3H4a2 2 0 0 0-2 2v13a2 2 0 0 0 2 2Z"
-          />
-          <path class="text-green-400" d="m9 13 2 2 4-4" />
-        </svg>
-        <span class="ml-1">Downloaded</span>
-      </button>
-    </div>
-
-    <!-- Size range -->
-    <div>
-      <h4 class="text-xs font-mono text-white/50 mb-2">Model Size</h4>
-      <div class="flex flex-wrap gap-1.5">
-        {#each sizeRanges as range}
-          {@const isSelected =
-            filters.sizeRange &&
-            filters.sizeRange.min === range.min &&
-            filters.sizeRange.max === range.max}
-          <button
-            type="button"
-            class="px-2 py-1 text-xs font-mono rounded transition-colors {isSelected
-              ? 'bg-exo-yellow/20 text-exo-yellow border border-exo-yellow/30'
-              : 'bg-white/5 text-white/60 hover:bg-white/10 border border-transparent'}"
-            onclick={() => selectSizeRange(range)}
-          >
-            {range.label}
-          </button>
-        {/each}
-      </div>
-    </div>
-
-    <!-- Clear button -->
-    <button
-      type="button"
-      class="w-full py-1.5 text-xs font-mono text-white/50 hover:text-white/70 hover:bg-white/5 rounded transition-colors"
-      onclick={onClear}
-    >
-      Clear all filters
-    </button>
-  </div>
-</div>
--- a/dashboard/src/lib/components/ModelPickerGroup.svelte
+++ b/dashboard/src/lib/components/ModelPickerGroup.svelte
@@ -1,477 +0,0 @@
-<script lang="ts">
-  interface ModelInfo {
-    id: string;
-    name?: string;
-    storage_size_megabytes?: number;
-    base_model?: string;
-    quantization?: string;
-    supports_tensor?: boolean;
-    capabilities?: string[];
-    family?: string;
-    is_custom?: boolean;
-  }
-
-  interface ModelGroup {
-    id: string;
-    name: string;
-    capabilities: string[];
-    family: string;
-    variants: ModelInfo[];
-    smallestVariant: ModelInfo;
-    hasMultipleVariants: boolean;
-  }
-
-  type DownloadAvailability = {
-    available: boolean;
-    nodeNames: string[];
-    nodeIds: string[];
-  };
-  type ModelFitStatus = "fits_now" | "fits_cluster_capacity" | "too_large";
-
-  type ModelPickerGroupProps = {
-    group: ModelGroup;
-    isExpanded: boolean;
-    isFavorite: boolean;
-    selectedModelId: string | null;
-    canModelFit: (id: string) => boolean;
-    getModelFitStatus: (id: string) => ModelFitStatus;
-    onToggleExpand: () => void;
-    onSelectModel: (modelId: string) => void;
-    onToggleFavorite: (baseModelId: string) => void;
-    onShowInfo: (group: ModelGroup) => void;
-    downloadStatusMap?: Map<string, DownloadAvailability>;
-  };
-
-  let {
-    group,
-    isExpanded,
-    isFavorite,
-    selectedModelId,
-    canModelFit,
-    getModelFitStatus,
-    onToggleExpand,
-    onSelectModel,
-    onToggleFavorite,
-    onShowInfo,
-    downloadStatusMap,
-  }: ModelPickerGroupProps = $props();
-
-  // Group-level download status: show if any variant is downloaded
-  const groupDownloadStatus = $derived.by(() => {
-    if (!downloadStatusMap || downloadStatusMap.size === 0) return undefined;
-    // Return the first available entry (prefer "available" ones)
-    for (const avail of downloadStatusMap.values()) {
-      if (avail.available) return avail;
-    }
-    return downloadStatusMap.values().next().value;
-  });
-
-  // Format storage size
-  function formatSize(mb: number | undefined): string {
-    if (!mb) return "";
-    if (mb >= 1024) {
-      return `${(mb / 1024).toFixed(0)}GB`;
-    }
-    return `${mb}MB`;
-  }
-
-  // Check if any variant can fit
-  const anyVariantFits = $derived(
-    group.variants.some((v) => canModelFit(v.id)),
-  );
-  const groupFitStatus = $derived.by((): ModelFitStatus => {
-    let hasClusterCapacityOnly = false;
-    for (const variant of group.variants) {
-      const fitStatus = getModelFitStatus(variant.id);
-      if (fitStatus === "fits_now") {
-        return "fits_now";
-      }
-      if (fitStatus === "fits_cluster_capacity") {
-        hasClusterCapacityOnly = true;
-      }
-    }
-    return hasClusterCapacityOnly ? "fits_cluster_capacity" : "too_large";
-  });
-
-  function getSizeClassForFitStatus(fitStatus: ModelFitStatus): string {
-    switch (fitStatus) {
-      case "fits_now":
-        return "text-white/40";
-      case "fits_cluster_capacity":
-        return "text-orange-400/80";
-      case "too_large":
-        return "text-red-400/70";
-    }
-  }
-
-  // Check if this group's model is currently selected (for single-variant groups)
-  const isMainSelected = $derived(
-    !group.hasMultipleVariants &&
-      group.variants.some((v) => v.id === selectedModelId),
-  );
-</script>
-
-<div
-  class="border-b border-white/5 last:border-b-0 {!anyVariantFits
-    ? 'opacity-50'
-    : ''}"
->
-  <!-- Main row -->
-  <div
-    class="flex items-center gap-2 px-3 py-2.5 transition-colors {anyVariantFits
-      ? 'hover:bg-white/5 cursor-pointer'
-      : 'cursor-not-allowed'} {isMainSelected
-      ? 'bg-exo-yellow/10 border-l-2 border-exo-yellow'
-      : 'border-l-2 border-transparent'}"
-    onclick={() => {
-      if (group.hasMultipleVariants) {
-        onToggleExpand();
-      } else {
-        const modelId = group.variants[0]?.id;
-        if (modelId && canModelFit(modelId)) {
-          onSelectModel(modelId);
-        }
-      }
-    }}
-    role="button"
-    tabindex="0"
-    onkeydown={(e) => {
-      if (e.key === "Enter" || e.key === " ") {
-        e.preventDefault();
-        if (group.hasMultipleVariants) {
-          onToggleExpand();
-        } else {
-          const modelId = group.variants[0]?.id;
-          if (modelId && canModelFit(modelId)) {
-            onSelectModel(modelId);
-          }
-        }
-      }
-    }}
-  >
-    <!-- Expand/collapse chevron (for groups with variants) -->
-    {#if group.hasMultipleVariants}
-      <svg
-        class="w-4 h-4 text-white/40 transition-transform duration-200 flex-shrink-0 {isExpanded
-          ? 'rotate-90'
-          : ''}"
-        viewBox="0 0 24 24"
-        fill="currentColor"
-      >
-        <path d="M8.59 16.59L13.17 12 8.59 7.41 10 6l6 6-6 6-1.41-1.41z" />
-      </svg>
-    {:else}
-      <div class="w-4 flex-shrink-0"></div>
-    {/if}
-
-    <!-- Model name -->
-    <div class="flex-1 min-w-0">
-      <div class="flex items-center gap-2">
-        <span class="font-mono text-sm text-white truncate">
-          {group.name}
-        </span>
-        <!-- Capability icons -->
-        {#each group.capabilities.filter((c) => c !== "text") as cap}
-          {#if cap === "thinking"}
-            <svg
-              class="w-3.5 h-3.5 text-white/40 flex-shrink-0"
-              viewBox="0 0 24 24"
-              fill="none"
-              stroke="currentColor"
-              stroke-width="1.5"
-              title="Supports Thinking"
-            >
-              <path
-                d="M12 2a7 7 0 0 0-7 7c0 2.38 1.19 4.47 3 5.74V17a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1v-2.26c1.81-1.27 3-3.36 3-5.74a7 7 0 0 0-7-7zM9 20h6M10 22h4"
-                stroke-linecap="round"
-                stroke-linejoin="round"
-              />
-            </svg>
-          {:else if cap === "code"}
-            <svg
-              class="w-3.5 h-3.5 text-white/40 flex-shrink-0"
-              viewBox="0 0 24 24"
-              fill="none"
-              stroke="currentColor"
-              stroke-width="1.5"
-              title="Supports code generation"
-            >
-              <path
-                d="M16 18l6-6-6-6M8 6l-6 6 6 6"
-                stroke-linecap="round"
-                stroke-linejoin="round"
-              />
-            </svg>
-          {:else if cap === "vision"}
-            <svg
-              class="w-3.5 h-3.5 text-white/40 flex-shrink-0"
-              viewBox="0 0 24 24"
-              fill="none"
-              stroke="currentColor"
-              stroke-width="1.5"
-              title="Supports image input"
-            >
-              <path
-                d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z"
-                stroke-linecap="round"
-                stroke-linejoin="round"
-              />
-              <circle cx="12" cy="12" r="3" />
-            </svg>
-          {:else if cap === "image_gen"}
-            <svg
-              class="w-3.5 h-3.5 text-white/40 flex-shrink-0"
-              viewBox="0 0 24 24"
-              fill="none"
-              stroke="currentColor"
-              stroke-width="1.5"
-              title="Supports image generation"
-            >
-              <rect
-                x="3"
-                y="3"
-                width="18"
-                height="18"
-                rx="2"
-                ry="2"
-                stroke-linecap="round"
-                stroke-linejoin="round"
-              />
-              <circle cx="8.5" cy="8.5" r="1.5" />
-              <path
-                d="M21 15l-5-5L5 21"
-                stroke-linecap="round"
-                stroke-linejoin="round"
-              />
-            </svg>
-          {:else if cap === "image_edit"}
-            <svg
-              class="w-3.5 h-3.5 text-white/40 flex-shrink-0"
-              viewBox="0 0 24 24"
-              fill="none"
-              stroke="currentColor"
-              stroke-width="1.5"
-              title="Supports image editing"
-            >
-              <path
-                d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"
-                stroke-linecap="round"
-                stroke-linejoin="round"
-              />
-              <path
-                d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"
-                stroke-linecap="round"
-                stroke-linejoin="round"
-              />
-            </svg>
-          {/if}
-        {/each}
-      </div>
-    </div>
-
-    <!-- Size indicator (smallest variant) -->
-    {#if !group.hasMultipleVariants && group.smallestVariant?.storage_size_megabytes}
-      {@const singleVariantFitStatus = getModelFitStatus(
-        group.smallestVariant.id,
-      )}
-      <span
-        class="text-xs font-mono flex-shrink-0 {getSizeClassForFitStatus(
-          singleVariantFitStatus,
-        )}"
-      >
-        {formatSize(group.smallestVariant.storage_size_megabytes)}
-      </span>
-    {/if}
-
-    <!-- Variant count with size range -->
-    {#if group.hasMultipleVariants}
-      {@const sizes = group.variants
-        .map((v) => v.storage_size_megabytes || 0)
-        .filter((s) => s > 0)
-        .sort((a, b) => a - b)}
-      <span
-        class="text-xs font-mono flex-shrink-0 {getSizeClassForFitStatus(
-          groupFitStatus,
-        )}"
-      >
-        {group.variants.length} variants{#if sizes.length >= 2}{" "}({formatSize(
-            sizes[0],
-          )}-{formatSize(sizes[sizes.length - 1])}){/if}
-      </span>
-    {/if}
-
-    <!-- Download availability indicator -->
-    {#if groupDownloadStatus && groupDownloadStatus.nodeIds.length > 0}
-      <span
-        class="flex-shrink-0"
-        title={groupDownloadStatus.available
-          ? `Ready — downloaded on ${groupDownloadStatus.nodeNames.join(", ")}`
-          : `Downloaded on ${groupDownloadStatus.nodeNames.join(", ")} (may need more nodes)`}
-      >
-        <svg
-          class="w-4 h-4"
-          viewBox="0 0 24 24"
-          fill="none"
-          stroke="currentColor"
-          stroke-width="2"
-          stroke-linecap="round"
-          stroke-linejoin="round"
-        >
-          <path
-            class="text-white/40"
-            d="M20 20a2 2 0 0 0 2-2V8a2 2 0 0 0-2-2h-7.9a2 2 0 0 1-1.69-.9L9.6 3.9A2 2 0 0 0 7.93 3H4a2 2 0 0 0-2 2v13a2 2 0 0 0 2 2Z"
-          />
-          <path class="text-green-400" d="m9 13 2 2 4-4" />
-        </svg>
-      </span>
-    {/if}
-
-    <!-- Check mark if selected (single-variant) -->
-    {#if isMainSelected}
-      <svg
-        class="w-4 h-4 text-exo-yellow flex-shrink-0"
-        viewBox="0 0 24 24"
-        fill="currentColor"
-      >
-        <path d="M9 16.17L4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41L9 16.17z" />
-      </svg>
-    {/if}
-
-    <!-- Favorite star -->
-    <button
-      type="button"
-      class="p-1 rounded hover:bg-white/10 transition-colors flex-shrink-0"
-      onclick={(e) => {
-        e.stopPropagation();
-        onToggleFavorite(group.id);
-      }}
-      title={isFavorite ? "Remove from favorites" : "Add to favorites"}
-    >
-      {#if isFavorite}
-        <svg
-          class="w-4 h-4 text-amber-400"
-          viewBox="0 0 24 24"
-          fill="currentColor"
-        >
-          <path
-            d="M12 2l3.09 6.26L22 9.27l-5 4.87 1.18 6.88L12 17.77l-6.18 3.25L7 14.14 2 9.27l6.91-1.01L12 2z"
-          />
-        </svg>
-      {:else}
-        <svg
-          class="w-4 h-4 text-white/30 hover:text-white/50"
-          viewBox="0 0 24 24"
-          fill="none"
-          stroke="currentColor"
-          stroke-width="2"
-        >
-          <path
-            d="M12 2l3.09 6.26L22 9.27l-5 4.87 1.18 6.88L12 17.77l-6.18 3.25L7 14.14 2 9.27l6.91-1.01L12 2z"
-          />
-        </svg>
-      {/if}
-    </button>
-
-    <!-- Info button -->
-    <button
-      type="button"
-      class="p-1 rounded hover:bg-white/10 transition-colors flex-shrink-0"
-      onclick={(e) => {
-        e.stopPropagation();
-        onShowInfo(group);
-      }}
-      title="View model details"
-    >
-      <svg
-        class="w-4 h-4 text-white/30 hover:text-white/50"
-        viewBox="0 0 24 24"
-        fill="currentColor"
-      >
-        <path
-          d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm1 15h-2v-6h2v6zm0-8h-2V7h2v2z"
-        />
-      </svg>
-    </button>
-  </div>
-
-  <!-- Expanded variants -->
-  {#if isExpanded && group.hasMultipleVariants}
-    <div class="bg-black/20 border-t border-white/5">
-      {#each group.variants as variant}
-        {@const fitStatus = getModelFitStatus(variant.id)}
-        {@const modelCanFit = canModelFit(variant.id)}
-        {@const isSelected = selectedModelId === variant.id}
-        <button
-          type="button"
-          class="w-full flex items-center gap-3 px-3 py-2 pl-10 hover:bg-white/5 transition-colors text-left {!modelCanFit
-            ? 'opacity-50 cursor-not-allowed'
-            : 'cursor-pointer'} {isSelected
-            ? 'bg-exo-yellow/10 border-l-2 border-exo-yellow'
-            : 'border-l-2 border-transparent'}"
-          disabled={!modelCanFit}
-          onclick={() => {
-            if (modelCanFit) {
-              onSelectModel(variant.id);
-            }
-          }}
-        >
-          <!-- Quantization badge -->
-          <span
-            class="text-xs font-mono px-1.5 py-0.5 rounded bg-white/10 text-white/70 flex-shrink-0"
-          >
-            {variant.quantization || "default"}
-          </span>
-
-          <!-- Size -->
-          <span
-            class="text-xs font-mono flex-1 {getSizeClassForFitStatus(
-              fitStatus,
-            )}"
-          >
-            {formatSize(variant.storage_size_megabytes)}
-          </span>
-
-          <!-- Download indicator for this variant -->
-          {#if downloadStatusMap?.get(variant.id)}
-            {@const variantDl = downloadStatusMap.get(variant.id)}
-            {#if variantDl}
-              <span
-                class="flex-shrink-0"
-                title={`Downloaded on ${variantDl.nodeNames.join(", ")}`}
-              >
-                <svg
-                  class="w-3.5 h-3.5"
-                  viewBox="0 0 24 24"
-                  fill="none"
-                  stroke="currentColor"
-                  stroke-width="2"
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                >
-                  <path
-                    class="text-white/40"
-                    d="M20 20a2 2 0 0 0 2-2V8a2 2 0 0 0-2-2h-7.9a2 2 0 0 1-1.69-.9L9.6 3.9A2 2 0 0 0 7.93 3H4a2 2 0 0 0-2 2v13a2 2 0 0 0 2 2Z"
-                  />
-                  <path class="text-green-400" d="m9 13 2 2 4-4" />
-                </svg>
-              </span>
-            {/if}
-          {/if}
-
-          <!-- Check mark if selected -->
-          {#if isSelected}
-            <svg
-              class="w-4 h-4 text-exo-yellow"
-              viewBox="0 0 24 24"
-              fill="currentColor"
-            >
-              <path
-                d="M9 16.17L4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41L9 16.17z"
-              />
-            </svg>
-          {/if}
-        </button>
-      {/each}
-    </div>
-  {/if}
-</div>
--- a/dashboard/src/lib/components/ModelPickerModal.svelte
+++ b/dashboard/src/lib/components/ModelPickerModal.svelte
@@ -1,899 +0,0 @@
-<script lang="ts">
-  import { fade, fly } from "svelte/transition";
-  import { cubicOut } from "svelte/easing";
-  import FamilySidebar from "./FamilySidebar.svelte";
-  import ModelPickerGroup from "./ModelPickerGroup.svelte";
-  import ModelFilterPopover from "./ModelFilterPopover.svelte";
-  import HuggingFaceResultItem from "./HuggingFaceResultItem.svelte";
-  import { getNodesWithModelDownloaded } from "$lib/utils/downloads";
-
-  interface ModelInfo {
-    id: string;
-    name?: string;
-    storage_size_megabytes?: number;
-    base_model?: string;
-    quantization?: string;
-    supports_tensor?: boolean;
-    capabilities?: string[];
-    family?: string;
-    is_custom?: boolean;
-    tasks?: string[];
-    hugging_face_id?: string;
-  }
-
-  interface ModelGroup {
-    id: string;
-    name: string;
-    capabilities: string[];
-    family: string;
-    variants: ModelInfo[];
-    smallestVariant: ModelInfo;
-    hasMultipleVariants: boolean;
-  }
-
-  interface FilterState {
-    capabilities: string[];
-    sizeRange: { min: number; max: number } | null;
-    downloadedOnly: boolean;
-  }
-
-  interface HuggingFaceModel {
-    id: string;
-    author: string;
-    downloads: number;
-    likes: number;
-    last_modified: string;
-    tags: string[];
-  }
-
-  type ModelFitStatus = "fits_now" | "fits_cluster_capacity" | "too_large";
-
-  type ModelPickerModalProps = {
-    isOpen: boolean;
-    models: ModelInfo[];
-    selectedModelId: string | null;
-    favorites: Set<string>;
-    existingModelIds: Set<string>;
-    canModelFit: (modelId: string) => boolean;
-    getModelFitStatus: (modelId: string) => ModelFitStatus;
-    onSelect: (modelId: string) => void;
-    onClose: () => void;
-    onToggleFavorite: (baseModelId: string) => void;
-    onAddModel: (modelId: string) => Promise<void>;
-    onDeleteModel: (modelId: string) => Promise<void>;
-    totalMemoryGB: number;
-    usedMemoryGB: number;
-    downloadsData?: Record<string, unknown[]>;
-    topologyNodes?: Record<
-      string,
-      {
-        friendly_name?: string;
-        system_info?: { model_id?: string };
-        macmon_info?: { memory?: { ram_total?: number } };
-      }
-    >;
-  };
-
-  let {
-    isOpen,
-    models,
-    selectedModelId,
-    favorites,
-    existingModelIds,
-    canModelFit,
-    getModelFitStatus,
-    onSelect,
-    onClose,
-    onToggleFavorite,
-    onAddModel,
-    onDeleteModel,
-    totalMemoryGB,
-    usedMemoryGB,
-    downloadsData,
-    topologyNodes,
-  }: ModelPickerModalProps = $props();
-
-  // Local state
-  let searchQuery = $state("");
-  let selectedFamily = $state<string | null>(null);
-  let expandedGroups = $state<Set<string>>(new Set());
-  let showFilters = $state(false);
-  let filters = $state<FilterState>({
-    capabilities: [],
-    sizeRange: null,
-    downloadedOnly: false,
-  });
-  let infoGroup = $state<ModelGroup | null>(null);
-
-  // Download availability per model group
-  type DownloadAvailability = {
-    available: boolean;
-    nodeNames: string[];
-    nodeIds: string[];
-  };
-
-  function getNodeName(nodeId: string): string {
-    const node = topologyNodes?.[nodeId];
-    return (
-      node?.friendly_name || node?.system_info?.model_id || nodeId.slice(0, 8)
-    );
-  }
-
-  const modelDownloadAvailability = $derived.by(() => {
-    const result = new Map<string, DownloadAvailability>();
-    if (!downloadsData || !topologyNodes) return result;
-
-    for (const model of models) {
-      const nodeIds = getNodesWithModelDownloaded(downloadsData, model.id);
-      if (nodeIds.length === 0) continue;
-
-      // Sum total RAM across nodes that have the model
-      let totalRamBytes = 0;
-      for (const nodeId of nodeIds) {
-        const ramTotal = topologyNodes[nodeId]?.macmon_info?.memory?.ram_total;
-        if (typeof ramTotal === "number") totalRamBytes += ramTotal;
-      }
-
-      const modelSizeBytes = (model.storage_size_megabytes || 0) * 1024 * 1024;
-      result.set(model.id, {
-        available: modelSizeBytes > 0 && totalRamBytes >= modelSizeBytes,
-        nodeNames: nodeIds.map(getNodeName),
-        nodeIds,
-      });
-    }
-    return result;
-  });
-
-  // Aggregate download availability per group (available if ANY variant is available)
-  function getGroupDownloadAvailability(
-    group: ModelGroup,
-  ): DownloadAvailability | undefined {
-    for (const variant of group.variants) {
-      const avail = modelDownloadAvailability.get(variant.id);
-      if (avail && avail.nodeIds.length > 0) return avail;
-    }
-    return undefined;
-  }
-
-  // Get per-variant download map for a group
-  function getVariantDownloadMap(
-    group: ModelGroup,
-  ): Map<string, DownloadAvailability> {
-    const map = new Map<string, DownloadAvailability>();
-    for (const variant of group.variants) {
-      const avail = modelDownloadAvailability.get(variant.id);
-      if (avail && avail.nodeIds.length > 0) map.set(variant.id, avail);
-    }
-    return map;
-  }
-
-  // HuggingFace Hub state
-  let hfSearchQuery = $state("");
-  let hfSearchResults = $state<HuggingFaceModel[]>([]);
-  let hfTrendingModels = $state<HuggingFaceModel[]>([]);
-  let hfIsSearching = $state(false);
-  let hfIsLoadingTrending = $state(false);
-  let addingModelId = $state<string | null>(null);
-  let hfSearchDebounceTimer: ReturnType<typeof setTimeout> | null = null;
-  let manualModelId = $state("");
-  let addModelError = $state<string | null>(null);
-
-  // Reset transient state when modal opens, but preserve tab selection
-  $effect(() => {
-    if (isOpen) {
-      searchQuery = "";
-      expandedGroups = new Set();
-      showFilters = false;
-      manualModelId = "";
-      addModelError = null;
-    }
-  });
-
-  // Fetch trending models when HuggingFace is selected
-  $effect(() => {
-    if (
-      selectedFamily === "huggingface" &&
-      hfTrendingModels.length === 0 &&
-      !hfIsLoadingTrending
-    ) {
-      fetchTrendingModels();
-    }
-  });
-
-  async function fetchTrendingModels() {
-    hfIsLoadingTrending = true;
-    try {
-      const response = await fetch("/models/search?query=&limit=20");
-      if (response.ok) {
-        hfTrendingModels = await response.json();
-      }
-    } catch (error) {
-      console.error("Failed to fetch trending models:", error);
-    } finally {
-      hfIsLoadingTrending = false;
-    }
-  }
-
-  async function searchHuggingFace(query: string) {
-    if (query.length < 2) {
-      hfSearchResults = [];
-      return;
-    }
-
-    hfIsSearching = true;
-    try {
-      const response = await fetch(
-        `/models/search?query=${encodeURIComponent(query)}&limit=20`,
-      );
-      if (response.ok) {
-        hfSearchResults = await response.json();
-      } else {
-        hfSearchResults = [];
-      }
-    } catch (error) {
-      console.error("Failed to search models:", error);
-      hfSearchResults = [];
-    } finally {
-      hfIsSearching = false;
-    }
-  }
-
-  function handleHfSearchInput(query: string) {
-    hfSearchQuery = query;
-    addModelError = null;
-
-    if (hfSearchDebounceTimer) {
-      clearTimeout(hfSearchDebounceTimer);
-    }
-
-    if (query.length >= 2) {
-      hfSearchDebounceTimer = setTimeout(() => {
-        searchHuggingFace(query);
-      }, 300);
-    } else {
-      hfSearchResults = [];
-    }
-  }
-
-  async function handleAddModel(modelId: string) {
-    addingModelId = modelId;
-    addModelError = null;
-    try {
-      await onAddModel(modelId);
-    } catch (error) {
-      addModelError =
-        error instanceof Error ? error.message : "Failed to add model";
-    } finally {
-      addingModelId = null;
-    }
-  }
-
-  async function handleAddManualModel() {
-    if (!manualModelId.trim()) return;
-    await handleAddModel(manualModelId.trim());
-    if (!addModelError) {
-      manualModelId = "";
-    }
-  }
-
-  function handleSelectHfModel(modelId: string) {
-    onSelect(modelId);
-    onClose();
-  }
-
-  // Models to display in HuggingFace view
-  const hfDisplayModels = $derived.by((): HuggingFaceModel[] => {
-    if (hfSearchQuery.length >= 2) {
-      return hfSearchResults;
-    }
-    return hfTrendingModels;
-  });
-
-  // Group models by base_model
-  const groupedModels = $derived.by((): ModelGroup[] => {
-    const groups = new Map<string, ModelGroup>();
-
-    for (const model of models) {
-      const groupId = model.base_model || model.id;
-      const groupName = model.base_model || model.name || model.id;
-
-      if (!groups.has(groupId)) {
-        groups.set(groupId, {
-          id: groupId,
-          name: groupName,
-          capabilities: model.capabilities || ["text"],
-          family: model.family || "",
-          variants: [],
-          smallestVariant: model,
-          hasMultipleVariants: false,
-        });
-      }
-
-      const group = groups.get(groupId)!;
-      group.variants.push(model);
-
-      // Track smallest variant
-      if (
-        (model.storage_size_megabytes || 0) <
-        (group.smallestVariant.storage_size_megabytes || Infinity)
-      ) {
-        group.smallestVariant = model;
-      }
-
-      // Update capabilities if not set
-      if (
-        group.capabilities.length <= 1 &&
-        model.capabilities &&
-        model.capabilities.length > 1
-      ) {
-        group.capabilities = model.capabilities;
-      }
-      if (!group.family && model.family) {
-        group.family = model.family;
-      }
-    }
-
-    // Sort variants within each group by size
-    for (const group of groups.values()) {
-      group.variants.sort(
-        (a, b) =>
-          (a.storage_size_megabytes || 0) - (b.storage_size_megabytes || 0),
-      );
-      group.hasMultipleVariants = group.variants.length > 1;
-    }
-
-    // Convert to array and sort by smallest variant size (biggest first)
-    return Array.from(groups.values()).sort((a, b) => {
-      return (
-        (b.smallestVariant.storage_size_megabytes || 0) -
-        (a.smallestVariant.storage_size_megabytes || 0)
-      );
-    });
-  });
-
-  // Get unique families
-  const uniqueFamilies = $derived.by((): string[] => {
-    const families = new Set<string>();
-    for (const group of groupedModels) {
-      if (group.family) {
-        families.add(group.family);
-      }
-    }
-    const familyOrder = [
-      "kimi",
-      "qwen",
-      "glm",
-      "minimax",
-      "deepseek",
-      "gpt-oss",
-      "llama",
-      "flux",
-      "qwen-image",
-    ];
-    return Array.from(families).sort((a, b) => {
-      const aIdx = familyOrder.indexOf(a);
-      const bIdx = familyOrder.indexOf(b);
-      if (aIdx === -1 && bIdx === -1) return a.localeCompare(b);
-      if (aIdx === -1) return 1;
-      if (bIdx === -1) return -1;
-      return aIdx - bIdx;
-    });
-  });
-
-  // Filter models based on search, family, and filters
-  const filteredGroups = $derived.by((): ModelGroup[] => {
-    let result: ModelGroup[] = [...groupedModels];
-
-    // Filter by family
-    if (selectedFamily === "favorites") {
-      result = result.filter((g) => favorites.has(g.id));
-    } else if (selectedFamily && selectedFamily !== "huggingface") {
-      result = result.filter((g) => g.family === selectedFamily);
-    }
-
-    // Filter by search query
-    if (searchQuery.trim()) {
-      const query = searchQuery.toLowerCase().trim();
-      result = result.filter(
-        (g) =>
-          g.name.toLowerCase().includes(query) ||
-          g.variants.some(
-            (v) =>
-              v.id.toLowerCase().includes(query) ||
-              (v.name || "").toLowerCase().includes(query),
-          ),
-      );
-    }
-
-    // Filter by capabilities
-    if (filters.capabilities.length > 0) {
-      result = result.filter((g) =>
-        filters.capabilities.every((cap) => g.capabilities.includes(cap)),
-      );
-    }
-
-    // Filter by size range
-    if (filters.sizeRange) {
-      const { min, max } = filters.sizeRange;
-      result = result.filter((g) => {
-        const sizeGB = (g.smallestVariant.storage_size_megabytes || 0) / 1024;
-        return sizeGB >= min && sizeGB <= max;
-      });
-    }
-
-    // Filter to downloaded models only
-    if (filters.downloadedOnly) {
-      result = result.filter((g) =>
-        g.variants.some((v) => {
-          const avail = modelDownloadAvailability.get(v.id);
-          return avail && avail.nodeIds.length > 0;
-        }),
-      );
-    }
-
-    // Sort: fits-now first, then fits-cluster-capacity, then too-large
-    result.sort((a, b) => {
-      const getGroupFitRank = (group: ModelGroup): number => {
-        let hasClusterCapacityOnly = false;
-        for (const variant of group.variants) {
-          const fitStatus = getModelFitStatus(variant.id);
-          if (fitStatus === "fits_now") return 0;
-          if (fitStatus === "fits_cluster_capacity") {
-            hasClusterCapacityOnly = true;
-          }
-        }
-        return hasClusterCapacityOnly ? 1 : 2;
-      };
-
-      const aRank = getGroupFitRank(a);
-      const bRank = getGroupFitRank(b);
-      if (aRank !== bRank) return aRank - bRank;
-
-      return (
-        (b.smallestVariant.storage_size_megabytes || 0) -
-        (a.smallestVariant.storage_size_megabytes || 0)
-      );
-    });
-
-    return result;
-  });
-
-  // Check if any favorites exist
-  const hasFavorites = $derived(favorites.size > 0);
-
-  function toggleGroupExpanded(groupId: string) {
-    const next = new Set(expandedGroups);
-    if (next.has(groupId)) {
-      next.delete(groupId);
-    } else {
-      next.add(groupId);
-    }
-    expandedGroups = next;
-  }
-
-  function handleSelect(modelId: string) {
-    onSelect(modelId);
-    onClose();
-  }
-
-  function handleKeydown(e: KeyboardEvent) {
-    if (e.key === "Escape") {
-      onClose();
-    }
-  }
-
-  function handleFiltersChange(newFilters: FilterState) {
-    filters = newFilters;
-  }
-
-  function clearFilters() {
-    filters = { capabilities: [], sizeRange: null, downloadedOnly: false };
-  }
-
-  const hasActiveFilters = $derived(
-    filters.capabilities.length > 0 ||
-      filters.sizeRange !== null ||
-      filters.downloadedOnly,
-  );
-</script>
-
-<svelte:window onkeydown={handleKeydown} />
-
-{#if isOpen}
-  <!-- Backdrop -->
-  <div
-    class="fixed inset-0 z-50 bg-black/80 backdrop-blur-sm"
-    transition:fade={{ duration: 200 }}
-    onclick={onClose}
-    role="presentation"
-  ></div>
-
-  <!-- Modal -->
-  <div
-    class="fixed z-50 top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 w-[min(90vw,600px)] h-[min(80vh,700px)] bg-exo-dark-gray border border-exo-yellow/10 rounded-lg shadow-2xl overflow-hidden flex flex-col"
-    transition:fly={{ y: 20, duration: 300, easing: cubicOut }}
-    role="dialog"
-    aria-modal="true"
-    aria-label="Select a model"
-  >
-    <!-- Header with search -->
-    <div
-      class="flex items-center gap-2 p-3 border-b border-exo-yellow/10 bg-exo-medium-gray/30"
-    >
-      {#if selectedFamily === "huggingface"}
-        <!-- HuggingFace search -->
-        <svg
-          class="w-5 h-5 text-orange-400/60 flex-shrink-0"
-          viewBox="0 0 24 24"
-          fill="none"
-          stroke="currentColor"
-          stroke-width="2"
-        >
-          <circle cx="11" cy="11" r="8" />
-          <path d="M21 21l-4.35-4.35" />
-        </svg>
-        <input
-          type="search"
-          class="flex-1 bg-transparent border-none outline-none text-sm font-mono text-white placeholder-white/40"
-          placeholder="Search mlx-community models..."
-          value={hfSearchQuery}
-          oninput={(e) => handleHfSearchInput(e.currentTarget.value)}
-        />
-        {#if hfIsSearching}
-          <div class="flex-shrink-0">
-            <span
-              class="w-4 h-4 border-2 border-orange-400 border-t-transparent rounded-full animate-spin block"
-            ></span>
-          </div>
-        {/if}
-      {:else}
-        <!-- Normal model search -->
-        <svg
-          class="w-5 h-5 text-white/40 flex-shrink-0"
-          viewBox="0 0 24 24"
-          fill="none"
-          stroke="currentColor"
-          stroke-width="2"
-        >
-          <circle cx="11" cy="11" r="8" />
-          <path d="M21 21l-4.35-4.35" />
-        </svg>
-        <input
-          type="search"
-          class="flex-1 bg-transparent border-none outline-none text-sm font-mono text-white placeholder-white/40"
-          placeholder="Search models..."
-          bind:value={searchQuery}
-        />
-        <!-- Cluster memory -->
-        <span
-          class="text-xs font-mono flex-shrink-0"
-          title="Cluster memory usage"
-          ><span class="text-exo-yellow">{Math.round(usedMemoryGB)}GB</span
-          ><span class="text-white/40">/{Math.round(totalMemoryGB)}GB</span
-          ></span
-        >
-        <!-- Filter button -->
-        <div class="relative filter-toggle">
-          <button
-            type="button"
-            class="p-1.5 rounded hover:bg-white/10 transition-colors {hasActiveFilters
-              ? 'text-exo-yellow'
-              : 'text-white/50'}"
-            onclick={() => (showFilters = !showFilters)}
-            title="Filter by capability or size"
-          >
-            <svg class="w-5 h-5" viewBox="0 0 24 24" fill="currentColor">
-              <path d="M10 18h4v-2h-4v2zM3 6v2h18V6H3zm3 7h12v-2H6v2z" />
-            </svg>
-          </button>
-          {#if showFilters}
-            <ModelFilterPopover
-              {filters}
-              onChange={handleFiltersChange}
-              onClear={clearFilters}
-              onClose={() => (showFilters = false)}
-            />
-          {/if}
-        </div>
-      {/if}
-      <!-- Close button -->
-      <button
-        type="button"
-        class="p-1.5 rounded hover:bg-white/10 transition-colors text-white/50 hover:text-white/70"
-        onclick={onClose}
-        title="Close model picker"
-      >
-        <svg class="w-5 h-5" viewBox="0 0 24 24" fill="currentColor">
-          <path
-            d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"
-          />
-        </svg>
-      </button>
-    </div>
-
-    <!-- Body -->
-    <div class="flex flex-1 overflow-hidden">
-      <!-- Family sidebar -->
-      <FamilySidebar
-        families={uniqueFamilies}
-        {selectedFamily}
-        {hasFavorites}
-        onSelect={(family) => (selectedFamily = family)}
-      />
-
-      <!-- Model list -->
-      <div class="flex-1 overflow-y-auto scrollbar-hide flex flex-col">
-        {#if selectedFamily === "huggingface"}
-          <!-- HuggingFace Hub view -->
-          <div class="flex-1 flex flex-col min-h-0">
-            <!-- Section header -->
-            <div
-              class="sticky top-0 z-10 px-3 py-2 bg-exo-dark-gray/95 border-b border-exo-yellow/10"
-            >
-              <span class="text-xs font-mono text-white/40">
-                {#if hfSearchQuery.length >= 2}
-                  Search results for "{hfSearchQuery}"
-                {:else}
-                  Trending on mlx-community
-                {/if}
-              </span>
-            </div>
-
-            <!-- Results list -->
-            <div class="flex-1 overflow-y-auto scrollbar-hide">
-              {#if hfIsLoadingTrending && hfTrendingModels.length === 0}
-                <div
-                  class="flex items-center justify-center py-12 text-white/40"
-                >
-                  <span
-                    class="w-5 h-5 border-2 border-orange-400 border-t-transparent rounded-full animate-spin mr-2"
-                  ></span>
-                  <span class="font-mono text-sm"
-                    >Loading trending models...</span
-                  >
-                </div>
-              {:else if hfDisplayModels.length === 0}
-                <div
-                  class="flex flex-col items-center justify-center py-12 text-white/40"
-                >
-                  <svg
-                    class="w-10 h-10 mb-2"
-                    viewBox="0 0 24 24"
-                    fill="currentColor"
-                  >
-                    <path
-                      d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-2 13.5c-.83 0-1.5-.67-1.5-1.5s.67-1.5 1.5-1.5 1.5.67 1.5 1.5-.67 1.5-1.5 1.5zm4 0c-.83 0-1.5-.67-1.5-1.5s.67-1.5 1.5-1.5 1.5.67 1.5 1.5-.67 1.5-1.5 1.5zm2-4.5H8c0-2.21 1.79-4 4-4s4 1.79 4 4z"
-                    />
-                  </svg>
-                  <p class="font-mono text-sm">No models found</p>
-                  {#if hfSearchQuery}
-                    <p class="font-mono text-xs mt-1">
-                      Try a different search term
-                    </p>
-                  {/if}
-                </div>
-              {:else}
-                {#each hfDisplayModels as model}
-                  <HuggingFaceResultItem
-                    {model}
-                    isAdded={existingModelIds.has(model.id)}
-                    isAdding={addingModelId === model.id}
-                    onAdd={() => handleAddModel(model.id)}
-                    onSelect={() => handleSelectHfModel(model.id)}
-                    downloadedOnNodes={downloadsData
-                      ? getNodesWithModelDownloaded(
-                          downloadsData,
-                          model.id,
-                        ).map(getNodeName)
-                      : []}
-                  />
-                {/each}
-              {/if}
-            </div>
-
-            <!-- Manual input footer -->
-            <div
-              class="sticky bottom-0 border-t border-exo-yellow/10 bg-exo-dark-gray p-3"
-            >
-              {#if addModelError}
-                <div
-                  class="bg-red-500/10 border border-red-500/30 rounded px-3 py-2 mb-2"
-                >
-                  <p class="text-red-400 text-xs font-mono break-words">
-                    {addModelError}
-                  </p>
-                </div>
-              {/if}
-              <div class="flex gap-2">
-                <input
-                  type="text"
-                  class="flex-1 bg-exo-black/60 border border-exo-yellow/30 rounded px-3 py-1.5 text-xs font-mono text-white placeholder-white/30 focus:outline-none focus:border-exo-yellow/50"
-                  placeholder="Or paste model ID directly..."
-                  bind:value={manualModelId}
-                  onkeydown={(e) => {
-                    if (e.key === "Enter") handleAddManualModel();
-                  }}
-                />
-                <button
-                  type="button"
-                  onclick={handleAddManualModel}
-                  disabled={!manualModelId.trim() || addingModelId !== null}
-                  class="px-3 py-1.5 text-xs font-mono tracking-wider uppercase bg-orange-500/10 text-orange-400 border border-orange-400/30 hover:bg-orange-500/20 transition-colors rounded disabled:opacity-50 disabled:cursor-not-allowed"
-                >
-                  Add
-                </button>
-              </div>
-            </div>
-          </div>
-        {:else if filteredGroups.length === 0}
-          <div
-            class="flex flex-col items-center justify-center h-full text-white/40 p-8"
-          >
-            <svg class="w-12 h-12 mb-3" viewBox="0 0 24 24" fill="currentColor">
-              <path
-                d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-2 15l-5-5 1.41-1.41L10 14.17l7.59-7.59L19 8l-9 9z"
-              />
-            </svg>
-            <p class="font-mono text-sm">No models found</p>
-            {#if hasActiveFilters || searchQuery}
-              <button
-                type="button"
-                class="mt-2 text-xs text-exo-yellow hover:underline"
-                onclick={() => {
-                  searchQuery = "";
-                  clearFilters();
-                }}
-              >
-                Clear filters
-              </button>
-            {/if}
-          </div>
-        {:else}
-          {#each filteredGroups as group}
-            <ModelPickerGroup
-              {group}
-              isExpanded={expandedGroups.has(group.id)}
-              isFavorite={favorites.has(group.id)}
-              {selectedModelId}
-              {canModelFit}
-              {getModelFitStatus}
-              onToggleExpand={() => toggleGroupExpanded(group.id)}
-              onSelectModel={handleSelect}
-              {onToggleFavorite}
-              onShowInfo={(g) => (infoGroup = g)}
-              downloadStatusMap={getVariantDownloadMap(group)}
-            />
-          {/each}
-        {/if}
-      </div>
-    </div>
-
-    <!-- Footer with active filters indicator -->
-    {#if hasActiveFilters}
-      <div
-        class="flex items-center gap-2 px-3 py-2 border-t border-exo-yellow/10 bg-exo-medium-gray/20 text-xs font-mono text-white/50"
-      >
-        <span>Filters:</span>
-        {#each filters.capabilities as cap}
-          <span class="px-1.5 py-0.5 bg-exo-yellow/20 text-exo-yellow rounded"
-            >{cap}</span
-          >
-        {/each}
-        {#if filters.downloadedOnly}
-          <span class="px-1.5 py-0.5 bg-green-500/20 text-green-400 rounded"
-            >Downloaded</span
-          >
-        {/if}
-        {#if filters.sizeRange}
-          <span class="px-1.5 py-0.5 bg-exo-yellow/20 text-exo-yellow rounded">
-            {filters.sizeRange.min}GB - {filters.sizeRange.max}GB
-          </span>
-        {/if}
-        <button
-          type="button"
-          class="ml-auto text-white/40 hover:text-white/60"
-          onclick={clearFilters}
-        >
-          Clear all
-        </button>
-      </div>
-    {/if}
-  </div>
-
-  <!-- Info modal -->
-  {#if infoGroup}
-    <div
-      class="fixed inset-0 z-[60] bg-black/60"
-      transition:fade={{ duration: 150 }}
-      onclick={() => (infoGroup = null)}
-      role="presentation"
-    ></div>
-    <div
-      class="fixed z-[60] top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 w-[min(80vw,400px)] bg-exo-dark-gray border border-exo-yellow/10 rounded-lg shadow-2xl p-4"
-      transition:fly={{ y: 10, duration: 200, easing: cubicOut }}
-      role="dialog"
-      aria-modal="true"
-    >
-      <div class="flex items-start justify-between mb-3">
-        <h3 class="font-mono text-lg text-white">{infoGroup.name}</h3>
-        <button
-          type="button"
-          class="p-1 rounded hover:bg-white/10 transition-colors text-white/50"
-          onclick={() => (infoGroup = null)}
-          title="Close model details"
-          aria-label="Close info dialog"
-        >
-          <svg class="w-4 h-4" viewBox="0 0 24 24" fill="currentColor">
-            <path
-              d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"
-            />
-          </svg>
-        </button>
-      </div>
-      <div class="space-y-2 text-xs font-mono">
-        <div class="flex items-center gap-2">
-          <span class="text-white/40">Family:</span>
-          <span class="text-white/70">{infoGroup.family || "Unknown"}</span>
-        </div>
-        <div class="flex items-center gap-2">
-          <span class="text-white/40">Capabilities:</span>
-          <span class="text-white/70">{infoGroup.capabilities.join(", ")}</span>
-        </div>
-        <div class="flex items-center gap-2">
-          <span class="text-white/40">Variants:</span>
-          <span class="text-white/70">{infoGroup.variants.length}</span>
-        </div>
-        {#if infoGroup.variants.length > 0}
-          <div class="mt-3 pt-3 border-t border-exo-yellow/10">
-            <span class="text-white/40">Available quantizations:</span>
-            <div class="flex flex-wrap gap-1 mt-1">
-              {#each infoGroup.variants as variant}
-                <span
-                  class="px-1.5 py-0.5 bg-white/10 text-white/60 rounded text-[10px]"
-                >
-                  {variant.quantization || "default"} ({Math.round(
-                    (variant.storage_size_megabytes || 0) / 1024,
-                  )}GB)
-                </span>
-              {/each}
-            </div>
-          </div>
-        {/if}
-        {#if getGroupDownloadAvailability(infoGroup)?.nodeNames?.length}
-          {@const infoDownload = getGroupDownloadAvailability(infoGroup)}
-          {#if infoDownload}
-            <div class="mt-3 pt-3 border-t border-exo-yellow/10">
-              <div class="flex items-center gap-2 mb-1">
-                <svg
-                  class="w-3.5 h-3.5"
-                  viewBox="0 0 24 24"
-                  fill="none"
-                  stroke="currentColor"
-                  stroke-width="2"
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                >
-                  <path
-                    class="text-white/40"
-                    d="M20 20a2 2 0 0 0 2-2V8a2 2 0 0 0-2-2h-7.9a2 2 0 0 1-1.69-.9L9.6 3.9A2 2 0 0 0 7.93 3H4a2 2 0 0 0-2 2v13a2 2 0 0 0 2 2Z"
-                  />
-                  <path class="text-green-400" d="m9 13 2 2 4-4" />
-                </svg>
-                <span class="text-white/40">Downloaded on:</span>
-              </div>
-              <div class="flex flex-wrap gap-1 mt-1">
-                {#each infoDownload.nodeNames as nodeName}
-                  <span
-                    class="px-1.5 py-0.5 bg-green-500/10 text-green-400/80 border border-green-500/20 rounded text-[10px]"
-                  >
-                    {nodeName}
-                  </span>
-                {/each}
-              </div>
-            </div>
-          {/if}
-        {/if}
-      </div>
-    </div>
-  {/if}
-{/if}
--- a/dashboard/src/lib/components/TokenHeatmap.svelte
+++ b/dashboard/src/lib/components/TokenHeatmap.svelte
@@ -1,236 +0,0 @@
-<script lang="ts">
-  import type { TokenData } from "$lib/stores/app.svelte";
-
-  interface Props {
-    tokens: TokenData[];
-    class?: string;
-    isGenerating?: boolean;
-    onRegenerateFrom?: (tokenIndex: number) => void;
-  }
-
-  let {
-    tokens,
-    class: className = "",
-    isGenerating = false,
-    onRegenerateFrom,
-  }: Props = $props();
-
-  // Tooltip state - track both token data and index
-  let hoveredTokenIndex = $state<number | null>(null);
-  let hoveredPosition = $state<{ x: number; y: number } | null>(null);
-  let isTooltipHovered = $state(false);
-  let hideTimeoutId: ReturnType<typeof setTimeout> | null = null;
-
-  // Derive the hovered token from the index (stable across re-renders)
-  const hoveredToken = $derived(
-    hoveredTokenIndex !== null && hoveredPosition && tokens[hoveredTokenIndex]
-      ? {
-          token: tokens[hoveredTokenIndex],
-          index: hoveredTokenIndex,
-          ...hoveredPosition,
-        }
-      : null,
-  );
-
-  /**
-   * Get confidence styling based on probability.
-   * Following Apple design principles: high confidence tokens blend in,
-   * only uncertainty draws attention.
-   */
-  function getConfidenceClass(probability: number): string {
-    if (probability > 0.8) return "text-inherit"; // Expected tokens - blend in
-    if (probability > 0.5) return "bg-gray-500/10 text-inherit"; // Slight hint
-    if (probability > 0.2) return "bg-amber-500/15 text-amber-200/90"; // Subtle warmth
-    return "bg-red-500/20 text-red-200/90"; // Draws attention
-  }
-
-  /**
-   * Get border/underline styling for uncertain tokens
-   */
-  function getBorderClass(probability: number): string {
-    if (probability > 0.8) return "border-transparent"; // No border for expected
-    if (probability > 0.5) return "border-gray-500/20";
-    if (probability > 0.2) return "border-amber-500/30";
-    return "border-red-500/40";
-  }
-
-  function clearHideTimeout() {
-    if (hideTimeoutId) {
-      clearTimeout(hideTimeoutId);
-      hideTimeoutId = null;
-    }
-  }
-
-  function handleMouseEnter(
-    event: MouseEvent,
-    token: TokenData,
-    index: number,
-  ) {
-    clearHideTimeout();
-    const rects = (event.target as HTMLElement).getClientRects();
-    let rect = rects[0];
-    for (let j = 0; j < rects.length; j++) {
-      if (event.clientY >= rects[j].top && event.clientY <= rects[j].bottom) {
-        rect = rects[j];
-        break;
-      }
-    }
-    hoveredTokenIndex = index;
-    hoveredPosition = {
-      x: rect.left + rect.width / 2,
-      y: rect.top - 10,
-    };
-  }
-
-  function handleMouseLeave() {
-    clearHideTimeout();
-    // Use longer delay during generation to account for re-renders
-    const delay = isGenerating ? 300 : 200;
-    hideTimeoutId = setTimeout(() => {
-      if (!isTooltipHovered) {
-        hoveredTokenIndex = null;
-        hoveredPosition = null;
-      }
-    }, delay);
-  }
-
-  function handleTooltipEnter() {
-    clearHideTimeout();
-    isTooltipHovered = true;
-  }
-
-  function handleTooltipLeave() {
-    isTooltipHovered = false;
-    hoveredTokenIndex = null;
-    hoveredPosition = null;
-  }
-
-  function handleRegenerate() {
-    if (hoveredToken && onRegenerateFrom) {
-      const indexToRegenerate = hoveredToken.index;
-      // Clear hover state immediately
-      hoveredTokenIndex = null;
-      hoveredPosition = null;
-      isTooltipHovered = false;
-      // Call regenerate
-      onRegenerateFrom(indexToRegenerate);
-    }
-  }
-
-  function formatProbability(prob: number): string {
-    return (prob * 100).toFixed(1) + "%";
-  }
-
-  function formatLogprob(logprob: number): string {
-    return logprob.toFixed(3);
-  }
-
-  function getProbabilityColor(probability: number): string {
-    if (probability > 0.8) return "text-gray-300";
-    if (probability > 0.5) return "text-gray-400";
-    if (probability > 0.2) return "text-amber-400";
-    return "text-red-400";
-  }
-</script>
-
-<div class="token-heatmap leading-relaxed {className}">
-  {#each tokens as tokenData, i (i)}
-    <span
-      role="button"
-      tabindex="0"
-      class="token-span inline rounded px-0.5 py-0.5 cursor-pointer transition-all duration-150 border {getConfidenceClass(
-        tokenData.probability,
-      )} {getBorderClass(tokenData.probability)} hover:opacity-80"
-      onmouseenter={(e) => handleMouseEnter(e, tokenData, i)}
-      onmouseleave={handleMouseLeave}>{tokenData.token}</span
-    >
-  {/each}
-</div>
-
-<!-- Tooltip -->
-{#if hoveredToken}
-  <div
-    class="fixed z-50 pb-2"
-    style="left: {hoveredToken.x}px; top: {hoveredToken.y}px; transform: translate(-50%, -100%);"
-    onmouseenter={handleTooltipEnter}
-    onmouseleave={handleTooltipLeave}
-  >
-    <div
-      class="bg-gray-900/95 backdrop-blur-sm border border-gray-700/50 rounded-xl shadow-xl p-3 text-sm min-w-48"
-    >
-      <!-- Token info -->
-      <div class="mb-2">
-        <span class="text-gray-500 text-xs">Token:</span>
-        <span class="text-white font-mono ml-1"
-          >"{hoveredToken.token.token}"</span
-        >
-        <span class="{getProbabilityColor(hoveredToken.token.probability)} ml-2"
-          >{formatProbability(hoveredToken.token.probability)}</span
-        >
-      </div>
-
-      <div class="text-gray-400 text-xs mb-1">
-        logprob: <span class="text-gray-300 font-mono"
-          >{formatLogprob(hoveredToken.token.logprob)}</span
-        >
-      </div>
-
-      <!-- Top alternatives -->
-      {#if hoveredToken.token.topLogprobs.length > 0}
-        <div class="border-t border-gray-700/50 mt-2 pt-2">
-          <div class="text-gray-500 text-xs mb-1">Alternatives:</div>
-          {#each hoveredToken.token.topLogprobs.slice(0, 5) as alt, idx (idx)}
-            {@const altProb = Math.exp(alt.logprob)}
-            <div class="flex justify-between items-center text-xs py-0.5">
-              <span class="text-gray-300 font-mono truncate max-w-24"
-                >"{alt.token}"</span
-              >
-              <span class="text-gray-400 ml-2"
-                >{formatProbability(altProb)}</span
-              >
-            </div>
-          {/each}
-        </div>
-      {/if}
-
-      <!-- Regenerate button -->
-      {#if onRegenerateFrom}
-        <button
-          onclick={handleRegenerate}
-          class="w-full mt-2 pt-2 border-t border-gray-700/50 flex items-center justify-center gap-1.5 text-xs text-gray-400 hover:text-white transition-colors cursor-pointer"
-        >
-          <svg
-            class="w-3 h-3"
-            fill="none"
-            viewBox="0 0 24 24"
-            stroke="currentColor"
-          >
-            <path
-              stroke-linecap="round"
-              stroke-linejoin="round"
-              stroke-width="2"
-              d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15"
-            />
-          </svg>
-          Regenerate from here
-        </button>
-      {/if}
-    </div>
-    <!-- Arrow -->
-    <div class="absolute left-1/2 -translate-x-1/2 top-full">
-      <div class="border-8 border-transparent border-t-gray-900"></div>
-    </div>
-  </div>
-{/if}
-
-<style>
-  .token-heatmap {
-    word-wrap: break-word;
-    white-space: pre-wrap;
-  }
-
-  .token-span {
-    margin: 0;
-    border-width: 1px;
-  }
-</style>
--- a/dashboard/src/lib/components/index.ts
+++ b/dashboard/src/lib/components/index.ts
@@ -6,9 +6,3 @@ export { default as ChatSidebar } from "./ChatSidebar.svelte";
 export { default as ModelCard } from "./ModelCard.svelte";
 export { default as MarkdownContent } from "./MarkdownContent.svelte";
 export { default as ImageParamsPanel } from "./ImageParamsPanel.svelte";
-export { default as FamilyLogos } from "./FamilyLogos.svelte";
-export { default as FamilySidebar } from "./FamilySidebar.svelte";
-export { default as HuggingFaceResultItem } from "./HuggingFaceResultItem.svelte";
-export { default as ModelFilterPopover } from "./ModelFilterPopover.svelte";
-export { default as ModelPickerGroup } from "./ModelPickerGroup.svelte";
-export { default as ModelPickerModal } from "./ModelPickerModal.svelte";
--- a/dashboard/src/lib/stores/app.svelte.ts
+++ b/dashboard/src/lib/stores/app.svelte.ts
--- a/dashboard/src/lib/stores/favorites.svelte.ts
+++ b/dashboard/src/lib/stores/favorites.svelte.ts
@@ -1,97 +0,0 @@
-/**
- * FavoritesStore - Manages favorite models with localStorage persistence
- */
-
-import { browser } from "$app/environment";
-
-const FAVORITES_KEY = "exo-favorite-models";
-
-class FavoritesStore {
-  favorites = $state<Set<string>>(new Set());
-
-  constructor() {
-    if (browser) {
-      this.loadFromStorage();
-    }
-  }
-
-  private loadFromStorage() {
-    try {
-      const stored = localStorage.getItem(FAVORITES_KEY);
-      if (stored) {
-        const parsed = JSON.parse(stored) as string[];
-        this.favorites = new Set(parsed);
-      }
-    } catch (error) {
-      console.error("Failed to load favorites:", error);
-    }
-  }
-
-  private saveToStorage() {
-    try {
-      const array = Array.from(this.favorites);
-      localStorage.setItem(FAVORITES_KEY, JSON.stringify(array));
-    } catch (error) {
-      console.error("Failed to save favorites:", error);
-    }
-  }
-
-  add(baseModelId: string) {
-    const next = new Set(this.favorites);
-    next.add(baseModelId);
-    this.favorites = next;
-    this.saveToStorage();
-  }
-
-  remove(baseModelId: string) {
-    const next = new Set(this.favorites);
-    next.delete(baseModelId);
-    this.favorites = next;
-    this.saveToStorage();
-  }
-
-  toggle(baseModelId: string) {
-    if (this.favorites.has(baseModelId)) {
-      this.remove(baseModelId);
-    } else {
-      this.add(baseModelId);
-    }
-  }
-
-  isFavorite(baseModelId: string): boolean {
-    return this.favorites.has(baseModelId);
-  }
-
-  getAll(): string[] {
-    return Array.from(this.favorites);
-  }
-
-  getSet(): Set<string> {
-    return new Set(this.favorites);
-  }
-
-  hasAny(): boolean {
-    return this.favorites.size > 0;
-  }
-
-  clearAll() {
-    this.favorites = new Set();
-    this.saveToStorage();
-  }
-}
-
-export const favoritesStore = new FavoritesStore();
-
-export const favorites = () => favoritesStore.favorites;
-export const hasFavorites = () => favoritesStore.hasAny();
-export const isFavorite = (baseModelId: string) =>
-  favoritesStore.isFavorite(baseModelId);
-export const toggleFavorite = (baseModelId: string) =>
-  favoritesStore.toggle(baseModelId);
-export const addFavorite = (baseModelId: string) =>
-  favoritesStore.add(baseModelId);
-export const removeFavorite = (baseModelId: string) =>
-  favoritesStore.remove(baseModelId);
-export const getFavorites = () => favoritesStore.getAll();
-export const getFavoritesSet = () => favoritesStore.getSet();
-export const clearFavorites = () => favoritesStore.clearAll();
--- a/dashboard/src/lib/utils/downloads.ts
+++ b/dashboard/src/lib/utils/downloads.ts
@@ -1,152 +0,0 @@
-/**
- * Shared utilities for parsing and querying download state.
- *
- * The download state from `/state` is shaped as:
- *   Record<NodeId, Array<TaggedDownloadEntry>>
- *
- * Each entry is a tagged union object like:
- *   { "DownloadCompleted": { shard_metadata: { "PipelineShardMetadata": { model_card: { model_id: "..." }, ... } }, ... } }
- */
-
-/** Unwrap one level of tagged-union envelope, returning [tag, payload]. */
-function unwrapTagged(
-  obj: Record<string, unknown>,
-): [string, Record<string, unknown>] | null {
-  const keys = Object.keys(obj);
-  if (keys.length !== 1) return null;
-  const tag = keys[0];
-  const payload = obj[tag];
-  if (!payload || typeof payload !== "object") return null;
-  return [tag, payload as Record<string, unknown>];
-}
-
-/** Extract the model ID string from a download entry's nested shard_metadata. */
-export function extractModelIdFromDownload(
-  downloadPayload: Record<string, unknown>,
-): string | null {
-  const shardMetadata =
-    downloadPayload.shard_metadata ?? downloadPayload.shardMetadata;
-  if (!shardMetadata || typeof shardMetadata !== "object") return null;
-
-  const unwrapped = unwrapTagged(shardMetadata as Record<string, unknown>);
-  if (!unwrapped) return null;
-  const [, shardData] = unwrapped;
-
-  const modelMeta = shardData.model_card ?? shardData.modelCard;
-  if (!modelMeta || typeof modelMeta !== "object") return null;
-
-  const meta = modelMeta as Record<string, unknown>;
-  return (meta.model_id as string) ?? (meta.modelId as string) ?? null;
-}
-
-/** Extract the shard_metadata object from a download entry payload. */
-export function extractShardMetadata(
-  downloadPayload: Record<string, unknown>,
-): Record<string, unknown> | null {
-  const shardMetadata =
-    downloadPayload.shard_metadata ?? downloadPayload.shardMetadata;
-  if (!shardMetadata || typeof shardMetadata !== "object") return null;
-  return shardMetadata as Record<string, unknown>;
-}
-
-/** Get the download tag (DownloadCompleted, DownloadOngoing, etc.) from a wrapped entry. */
-export function getDownloadTag(
-  entry: unknown,
-): [string, Record<string, unknown>] | null {
-  if (!entry || typeof entry !== "object") return null;
-  return unwrapTagged(entry as Record<string, unknown>);
-}
-
-/**
- * Iterate over all download entries for a given node, yielding [tag, payload, modelId].
- */
-function* iterNodeDownloads(
-  nodeDownloads: unknown[],
-): Generator<[string, Record<string, unknown>, string]> {
-  for (const entry of nodeDownloads) {
-    const tagged = getDownloadTag(entry);
-    if (!tagged) continue;
-    const [tag, payload] = tagged;
-    const modelId = extractModelIdFromDownload(payload);
-    if (!modelId) continue;
-    yield [tag, payload, modelId];
-  }
-}
-
-/** Check if a specific model is fully downloaded (DownloadCompleted) on a specific node. */
-export function isModelDownloadedOnNode(
-  downloadsData: Record<string, unknown[]>,
-  nodeId: string,
-  modelId: string,
-): boolean {
-  const nodeDownloads = downloadsData[nodeId];
-  if (!Array.isArray(nodeDownloads)) return false;
-
-  for (const [tag, , entryModelId] of iterNodeDownloads(nodeDownloads)) {
-    if (tag === "DownloadCompleted" && entryModelId === modelId) return true;
-  }
-  return false;
-}
-
-/** Get all node IDs where a model is fully downloaded (DownloadCompleted). */
-export function getNodesWithModelDownloaded(
-  downloadsData: Record<string, unknown[]>,
-  modelId: string,
-): string[] {
-  const result: string[] = [];
-  for (const nodeId of Object.keys(downloadsData)) {
-    if (isModelDownloadedOnNode(downloadsData, nodeId, modelId)) {
-      result.push(nodeId);
-    }
-  }
-  return result;
-}
-
-/**
- * Find shard metadata for a model from any download entry across all nodes.
- * Returns the first match found (completed entries are preferred).
- */
-export function getShardMetadataForModel(
-  downloadsData: Record<string, unknown[]>,
-  modelId: string,
-): Record<string, unknown> | null {
-  let fallback: Record<string, unknown> | null = null;
-
-  for (const nodeDownloads of Object.values(downloadsData)) {
-    if (!Array.isArray(nodeDownloads)) continue;
-
-    for (const [tag, payload, entryModelId] of iterNodeDownloads(
-      nodeDownloads,
-    )) {
-      if (entryModelId !== modelId) continue;
-      const shard = extractShardMetadata(payload);
-      if (!shard) continue;
-
-      if (tag === "DownloadCompleted") return shard;
-      if (!fallback) fallback = shard;
-    }
-  }
-  return fallback;
-}
-
-/**
- * Get the download status tag for a specific model on a specific node.
- * Returns the "best" status: DownloadCompleted > DownloadOngoing > others.
- */
-export function getModelDownloadStatus(
-  downloadsData: Record<string, unknown[]>,
-  nodeId: string,
-  modelId: string,
-): string | null {
-  const nodeDownloads = downloadsData[nodeId];
-  if (!Array.isArray(nodeDownloads)) return null;
-
-  let best: string | null = null;
-  for (const [tag, , entryModelId] of iterNodeDownloads(nodeDownloads)) {
-    if (entryModelId !== modelId) continue;
-    if (tag === "DownloadCompleted") return tag;
-    if (tag === "DownloadOngoing") best = tag;
-    else if (!best) best = tag;
-  }
-  return best;
-}
--- a/dashboard/src/routes/+page.svelte
+++ b/dashboard/src/routes/+page.svelte
@@ -5,13 +5,7 @@
    ChatMessages,
    ChatSidebar,
    ModelCard,
-    ModelPickerModal,
  } from "$lib/components";
-  import {
-    favorites,
-    toggleFavorite,
-    getFavoritesSet,
-  } from "$lib/stores/favorites.svelte";
  import {
    hasStartedChat,
    isTopologyMinimized,
@@ -106,17 +100,8 @@
      storage_size_megabytes?: number;
      tasks?: string[];
      hugging_face_id?: string;
-      is_custom?: boolean;
-      family?: string;
-      quantization?: string;
-      base_model?: string;
-      capabilities?: string[];
    }>
  >([]);
-  type ModelMemoryFitStatus =
-    | "fits_now"
-    | "fits_cluster_capacity"
-    | "too_large";

  // Model tasks lookup for ChatForm - maps both short IDs and full HuggingFace IDs
  const modelTasks = $derived(() => {
@@ -226,11 +211,9 @@
  let launchingModelId = $state<string | null>(null);
  let instanceDownloadExpandedNodes = $state<Set<string>>(new Set());

-  // Model picker modal state
-  let isModelPickerOpen = $state(false);
-
-  // Favorites state (reactive)
-  const favoritesSet = $derived(getFavoritesSet());
+  // Custom dropdown state
+  let isModelDropdownOpen = $state(false);
+  let modelDropdownSearch = $state("");

  // Slider dragging state
  let isDraggingSlider = $state(false);
@@ -454,41 +437,14 @@
    );
  });

-  // Calculate total memory in the cluster (in GB)
-  const clusterTotalMemoryGB = $derived(() => {
-    if (!data) return 0;
-    return (
-      Object.values(data.nodes).reduce((acc, n) => {
-        const total =
-          n.macmon_info?.memory?.ram_total ?? n.system_info?.memory ?? 0;
-        return acc + total;
-      }, 0) /
-      (1024 * 1024 * 1024)
-    );
-  });
-
-  function getModelMemoryFitStatus(model: {
-    id: string;
-    name?: string;
-    storage_size_megabytes?: number;
-  }): ModelMemoryFitStatus {
-    const modelSizeGB = getModelSizeGB(model);
-    if (modelSizeGB <= availableMemoryGB()) {
-      return "fits_now";
-    }
-    if (modelSizeGB <= clusterTotalMemoryGB()) {
-      return "fits_cluster_capacity";
-    }
-    return "too_large";
-  }
-
  // Check if a model has enough memory to run
  function hasEnoughMemory(model: {
    id: string;
    name?: string;
    storage_size_megabytes?: number;
  }): boolean {
-    return getModelMemoryFitStatus(model) === "fits_now";
+    const modelSizeGB = getModelSizeGB(model);
+    return modelSizeGB <= availableMemoryGB();
  }

  // Sorted models for dropdown - biggest first, unrunnable at the end
@@ -574,47 +530,6 @@
    }
  }

-  async function addModelFromPicker(modelId: string) {
-    const response = await fetch("/models/add", {
-      method: "POST",
-      headers: { "Content-Type": "application/json" },
-      body: JSON.stringify({ model_id: modelId }),
-    });
-
-    if (!response.ok) {
-      let message = `Failed to add model (${response.status}: ${response.statusText})`;
-      try {
-        const err = await response.json();
-        if (err.detail) message = err.detail;
-      } catch {
-        // use default message
-      }
-      throw new Error(message);
-    }
-
-    await fetchModels();
-  }
-
-  async function deleteCustomModel(modelId: string) {
-    try {
-      const response = await fetch(
-        `/models/custom/${encodeURIComponent(modelId)}`,
-        { method: "DELETE" },
-      );
-      if (response.ok) {
-        await fetchModels();
-      }
-    } catch {
-      console.error("Failed to delete custom model");
-    }
-  }
-
-  function handleModelPickerSelect(modelId: string) {
-    selectPreviewModel(modelId);
-    saveLaunchDefaults();
-    isModelPickerOpen = false;
-  }
-
  async function launchInstance(
    modelId: string,
    specificPreview?: PlacementPreview | null,
@@ -2445,12 +2360,14 @@
              >
            </div>

-            <!-- Model Picker Button -->
-            <div class="flex-shrink-0 mb-3">
+            <!-- Model Dropdown (Custom) -->
+            <div class="flex-shrink-0 mb-3 relative">
              <button
                type="button"
-                onclick={() => (isModelPickerOpen = true)}
-                class="w-full bg-exo-medium-gray/50 border border-exo-yellow/30 rounded pl-3 pr-8 py-2.5 text-sm font-mono text-left tracking-wide cursor-pointer transition-all duration-200 hover:border-exo-yellow/50 focus:outline-none focus:border-exo-yellow/70 relative"
+                onclick={() => (isModelDropdownOpen = !isModelDropdownOpen)}
+                class="w-full bg-exo-medium-gray/50 border border-exo-yellow/30 rounded pl-3 pr-8 py-2.5 text-sm font-mono text-left tracking-wide cursor-pointer transition-all duration-200 hover:border-exo-yellow/50 focus:outline-none focus:border-exo-yellow/70 {isModelDropdownOpen
+                  ? 'border-exo-yellow/70'
+                  : ''}"
              >
                {#if selectedModelId}
                  {@const foundModel = models.find(
@@ -2458,12 +2375,54 @@
                  )}
                  {#if foundModel}
                    {@const sizeGB = getModelSizeGB(foundModel)}
+                    {@const isImageModel = modelSupportsImageGeneration(
+                      foundModel.id,
+                    )}
+                    {@const isImageEditModel = modelSupportsImageEditing(
+                      foundModel.id,
+                    )}
                    <span
                      class="flex items-center justify-between gap-2 w-full pr-4"
                    >
                      <span
                        class="flex items-center gap-2 text-exo-light-gray truncate"
                      >
+                        {#if isImageModel}
+                          <svg
+                            class="w-4 h-4 flex-shrink-0 text-exo-yellow"
+                            fill="none"
+                            viewBox="0 0 24 24"
+                            stroke="currentColor"
+                            stroke-width="2"
+                          >
+                            <rect
+                              x="3"
+                              y="3"
+                              width="18"
+                              height="18"
+                              rx="2"
+                              ry="2"
+                            />
+                            <circle cx="8.5" cy="8.5" r="1.5" />
+                            <polyline points="21 15 16 10 5 21" />
+                          </svg>
+                        {/if}
+                        {#if isImageEditModel}
+                          <svg
+                            class="w-4 h-4 flex-shrink-0 text-exo-yellow"
+                            fill="none"
+                            viewBox="0 0 24 24"
+                            stroke="currentColor"
+                            stroke-width="2"
+                          >
+                            <path
+                              d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"
+                            />
+                            <path
+                              d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"
+                            />
+                          </svg>
+                        {/if}
                        <span class="truncate"
                          >{foundModel.name || foundModel.id}</span
                        >
@@ -2480,24 +2439,142 @@
                {:else}
                  <span class="text-white/50">— SELECT MODEL —</span>
                {/if}
-                <div
-                  class="absolute right-3 top-1/2 -translate-y-1/2 pointer-events-none"
-                >
-                  <svg
-                    class="w-4 h-4 text-exo-yellow/60"
-                    fill="none"
-                    viewBox="0 0 24 24"
-                    stroke="currentColor"
-                  >
-                    <path
-                      stroke-linecap="round"
-                      stroke-linejoin="round"
-                      stroke-width="2"
-                      d="M19 9l-7 7-7-7"
-                    />
-                  </svg>
-                </div>
              </button>
+              <div
+                class="absolute right-3 top-1/2 -translate-y-1/2 pointer-events-none transition-transform duration-200 {isModelDropdownOpen
+                  ? 'rotate-180'
+                  : ''}"
+              >
+                <svg
+                  class="w-4 h-4 text-exo-yellow/60"
+                  fill="none"
+                  viewBox="0 0 24 24"
+                  stroke="currentColor"
+                >
+                  <path
+                    stroke-linecap="round"
+                    stroke-linejoin="round"
+                    stroke-width="2"
+                    d="M19 9l-7 7-7-7"
+                  />
+                </svg>
+              </div>
+
+              {#if isModelDropdownOpen}
+                <!-- Backdrop to close dropdown -->
+                <button
+                  type="button"
+                  class="fixed inset-0 z-40 cursor-default"
+                  onclick={() => (isModelDropdownOpen = false)}
+                  aria-label="Close dropdown"
+                ></button>
+
+                <!-- Dropdown Panel -->
+                <div
+                  class="absolute top-full left-0 right-0 mt-1 bg-exo-dark-gray border border-exo-yellow/30 rounded shadow-lg shadow-black/50 z-50 max-h-64 overflow-y-auto"
+                >
+                  <!-- Search within dropdown -->
+                  <div
+                    class="sticky top-0 bg-exo-dark-gray border-b border-exo-medium-gray/30 p-2"
+                  >
+                    <input
+                      type="text"
+                      placeholder="Search models..."
+                      bind:value={modelDropdownSearch}
+                      class="w-full bg-exo-dark-gray/60 border border-exo-medium-gray/30 rounded px-2 py-1.5 text-xs font-mono text-white/80 placeholder:text-white/40 focus:outline-none focus:border-exo-yellow/50"
+                    />
+                  </div>
+
+                  <!-- Options -->
+                  <div class="py-1">
+                    {#each sortedModels().filter((m) => !modelDropdownSearch || (m.name || m.id)
+                          .toLowerCase()
+                          .includes(modelDropdownSearch.toLowerCase())) as model}
+                      {@const sizeGB = getModelSizeGB(model)}
+                      {@const modelCanFit = hasEnoughMemory(model)}
+                      {@const isImageModel = modelSupportsImageGeneration(
+                        model.id,
+                      )}
+                      {@const isImageEditModel = modelSupportsImageEditing(
+                        model.id,
+                      )}
+                      <button
+                        type="button"
+                        onclick={() => {
+                          if (modelCanFit) {
+                            selectPreviewModel(model.id);
+                            saveLaunchDefaults();
+                            isModelDropdownOpen = false;
+                            modelDropdownSearch = "";
+                          }
+                        }}
+                        disabled={!modelCanFit}
+                        class="w-full px-3 py-2 text-left text-sm font-mono tracking-wide transition-colors duration-100 flex items-center justify-between gap-2 {selectedModelId ===
+                        model.id
+                          ? 'bg-transparent text-exo-yellow cursor-pointer'
+                          : modelCanFit
+                            ? 'text-white/80 hover:text-exo-yellow cursor-pointer'
+                            : 'text-white/30 cursor-default'}"
+                      >
+                        <span class="flex items-center gap-2 truncate flex-1">
+                          {#if isImageModel}
+                            <svg
+                              class="w-4 h-4 flex-shrink-0 text-exo-yellow"
+                              fill="none"
+                              viewBox="0 0 24 24"
+                              stroke="currentColor"
+                              stroke-width="2"
+                              aria-label="Image generation model"
+                            >
+                              <rect
+                                x="3"
+                                y="3"
+                                width="18"
+                                height="18"
+                                rx="2"
+                                ry="2"
+                              />
+                              <circle cx="8.5" cy="8.5" r="1.5" />
+                              <polyline points="21 15 16 10 5 21" />
+                            </svg>
+                          {/if}
+                          {#if isImageEditModel}
+                            <svg
+                              class="w-4 h-4 flex-shrink-0 text-exo-yellow"
+                              fill="none"
+                              viewBox="0 0 24 24"
+                              stroke="currentColor"
+                              stroke-width="2"
+                              aria-label="Image editing model"
+                            >
+                              <path
+                                d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"
+                              />
+                              <path
+                                d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"
+                              />
+                            </svg>
+                          {/if}
+                          <span class="truncate">{model.name || model.id}</span>
+                        </span>
+                        <span
+                          class="flex-shrink-0 text-xs {modelCanFit
+                            ? 'text-white/50'
+                            : 'text-red-400/60'}"
+                        >
+                          {sizeGB >= 1
+                            ? sizeGB.toFixed(0)
+                            : sizeGB.toFixed(1)}GB
+                        </span>
+                      </button>
+                    {:else}
+                      <div class="px-3 py-2 text-xs text-white/50 font-mono">
+                        No models found
+                      </div>
+                    {/each}
+                  </div>
+                </div>
+              {/if}
            </div>

            <!-- Configuration Options -->
@@ -3277,28 +3354,3 @@
    {/if}
  </main>
 </div>
-
-<ModelPickerModal
-  isOpen={isModelPickerOpen}
-  {models}
-  {selectedModelId}
-  favorites={favoritesSet}
-  existingModelIds={new Set(models.map((m) => m.id))}
-  canModelFit={(modelId) => {
-    const model = models.find((m) => m.id === modelId);
-    return model ? hasEnoughMemory(model) : false;
-  }}
-  getModelFitStatus={(modelId): ModelMemoryFitStatus => {
-    const model = models.find((m) => m.id === modelId);
-    return model ? getModelMemoryFitStatus(model) : "too_large";
-  }}
-  onSelect={handleModelPickerSelect}
-  onClose={() => (isModelPickerOpen = false)}
-  onToggleFavorite={toggleFavorite}
-  onAddModel={addModelFromPicker}
-  onDeleteModel={deleteCustomModel}
-  totalMemoryGB={clusterMemory().total / (1024 * 1024 * 1024)}
-  usedMemoryGB={clusterMemory().used / (1024 * 1024 * 1024)}
-  {downloadsData}
-  topologyNodes={data?.nodes}
-/>
--- a/dashboard/src/routes/traces/+page.svelte
+++ b/dashboard/src/routes/traces/+page.svelte
@@ -1,190 +0,0 @@
-<script lang="ts">
-  import { onMount } from "svelte";
-  import {
-    listTraces,
-    getTraceRawUrl,
-    type TraceListItem,
-  } from "$lib/stores/app.svelte";
-  import HeaderNav from "$lib/components/HeaderNav.svelte";
-
-  let traces = $state<TraceListItem[]>([]);
-  let loading = $state(true);
-  let error = $state<string | null>(null);
-
-  function formatBytes(bytes: number): string {
-    if (!bytes || bytes <= 0) return "0B";
-    const units = ["B", "KB", "MB", "GB"];
-    const i = Math.min(
-      Math.floor(Math.log(bytes) / Math.log(1024)),
-      units.length - 1,
-    );
-    const val = bytes / Math.pow(1024, i);
-    return `${val.toFixed(val >= 10 ? 0 : 1)}${units[i]}`;
-  }
-
-  function formatDate(isoString: string): string {
-    const date = new Date(isoString);
-    return date.toLocaleString();
-  }
-
-  async function downloadTrace(taskId: string) {
-    const response = await fetch(getTraceRawUrl(taskId));
-    const blob = await response.blob();
-    const url = URL.createObjectURL(blob);
-    const a = document.createElement("a");
-    a.href = url;
-    a.download = `trace_${taskId}.json`;
-    a.click();
-    URL.revokeObjectURL(url);
-  }
-
-  async function openInPerfetto(taskId: string) {
-    // Fetch trace data from our local API
-    const response = await fetch(getTraceRawUrl(taskId));
-    const traceData = await response.arrayBuffer();
-
-    // Open Perfetto UI
-    const perfettoWindow = window.open("https://ui.perfetto.dev");
-    if (!perfettoWindow) {
-      alert("Failed to open Perfetto. Please allow popups.");
-      return;
-    }
-
-    // Wait for Perfetto to be ready, then send trace via postMessage
-    const onMessage = (e: MessageEvent) => {
-      if (e.data === "PONG") {
-        window.removeEventListener("message", onMessage);
-        perfettoWindow.postMessage(
-          {
-            perfetto: {
-              buffer: traceData,
-              title: `Trace ${taskId}`,
-            },
-          },
-          "https://ui.perfetto.dev",
-        );
-      }
-    };
-    window.addEventListener("message", onMessage);
-
-    // Ping Perfetto until it responds
-    const pingInterval = setInterval(() => {
-      perfettoWindow.postMessage("PING", "https://ui.perfetto.dev");
-    }, 50);
-
-    // Clean up after 10 seconds
-    setTimeout(() => {
-      clearInterval(pingInterval);
-      window.removeEventListener("message", onMessage);
-    }, 10000);
-  }
-
-  async function refresh() {
-    loading = true;
-    error = null;
-    try {
-      const response = await listTraces();
-      traces = response.traces;
-    } catch (e) {
-      error = e instanceof Error ? e.message : "Failed to load traces";
-    } finally {
-      loading = false;
-    }
-  }
-
-  onMount(() => {
-    refresh();
-  });
-</script>
-
-<div class="min-h-screen bg-exo-dark-gray text-white">
-  <HeaderNav showHome={true} />
-  <div class="max-w-7xl mx-auto px-4 lg:px-8 py-6 space-y-6">
-    <div class="flex items-center justify-between gap-4 flex-wrap">
-      <div>
-        <h1
-          class="text-2xl font-mono tracking-[0.2em] uppercase text-exo-yellow"
-        >
-          Traces
-        </h1>
-      </div>
-      <div class="flex items-center gap-3">
-        <button
-          type="button"
-          class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-2 py-1 rounded"
-          onclick={refresh}
-          disabled={loading}
-        >
-          Refresh
-        </button>
-      </div>
-    </div>
-
-    {#if loading}
-      <div
-        class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-6 text-center text-exo-light-gray"
-      >
-        <div class="text-sm">Loading traces...</div>
-      </div>
-    {:else if error}
-      <div
-        class="rounded border border-red-500/30 bg-red-500/10 p-6 text-center text-red-400"
-      >
-        <div class="text-sm">{error}</div>
-      </div>
-    {:else if traces.length === 0}
-      <div
-        class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-6 text-center text-exo-light-gray space-y-2"
-      >
-        <div class="text-sm">No traces found.</div>
-        <div class="text-xs text-exo-light-gray/70">
-          Run exo with EXO_TRACING_ENABLED=1 to collect traces.
-        </div>
-      </div>
-    {:else}
-      <div class="space-y-3">
-        {#each traces as trace}
-          <div
-            class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-4 flex items-center justify-between gap-4"
-          >
-            <div class="min-w-0 flex-1">
-              <a
-                href="#/traces/{trace.taskId}"
-                class="text-sm font-mono text-white hover:text-exo-yellow transition-colors truncate block"
-              >
-                {trace.taskId}
-              </a>
-              <div class="text-xs text-exo-light-gray font-mono mt-1">
-                {formatDate(trace.createdAt)} &bull; {formatBytes(
-                  trace.fileSize,
-                )}
-              </div>
-            </div>
-            <div class="flex items-center gap-2 shrink-0">
-              <a
-                href="#/traces/{trace.taskId}"
-                class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-2 py-1 rounded"
-              >
-                View Stats
-              </a>
-              <button
-                type="button"
-                class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-2 py-1 rounded"
-                onclick={() => downloadTrace(trace.taskId)}
-              >
-                Download
-              </button>
-              <button
-                type="button"
-                class="text-xs font-mono text-exo-dark-gray bg-exo-yellow hover:bg-exo-yellow/90 transition-colors uppercase px-2 py-1 rounded font-semibold"
-                onclick={() => openInPerfetto(trace.taskId)}
-              >
-                View Trace
-              </button>
-            </div>
-          </div>
-        {/each}
-      </div>
-    {/if}
-  </div>
-</div>
--- a/dashboard/src/routes/traces/[taskId]/+page.svelte
+++ b/dashboard/src/routes/traces/[taskId]/+page.svelte
@@ -1,367 +0,0 @@
-<script lang="ts">
-  import { page } from "$app/stores";
-  import { onMount } from "svelte";
-  import {
-    fetchTraceStats,
-    getTraceRawUrl,
-    type TraceStatsResponse,
-    type TraceCategoryStats,
-  } from "$lib/stores/app.svelte";
-  import HeaderNav from "$lib/components/HeaderNav.svelte";
-
-  const taskId = $derived($page.params.taskId);
-
-  let stats = $state<TraceStatsResponse | null>(null);
-  let loading = $state(true);
-  let error = $state<string | null>(null);
-
-  function formatDuration(us: number): string {
-    if (us < 1000) return `${us.toFixed(0)}us`;
-    if (us < 1_000_000) return `${(us / 1000).toFixed(2)}ms`;
-    return `${(us / 1_000_000).toFixed(2)}s`;
-  }
-
-  function formatPercentage(part: number, total: number): string {
-    if (total === 0) return "0.0%";
-    return `${((part / total) * 100).toFixed(1)}%`;
-  }
-
-  // Parse hierarchical categories like "sync/compute" into phases
-  type PhaseData = {
-    name: string;
-    subcategories: { name: string; stats: TraceCategoryStats }[];
-    totalUs: number; // From outer span (e.g., "sync" category)
-    stepCount: number; // Count of outer span events
-  };
-
-  function parsePhases(
-    byCategory: Record<string, TraceCategoryStats>,
-  ): PhaseData[] {
-    const phases = new Map<
-      string,
-      {
-        subcats: Map<string, TraceCategoryStats>;
-        outerStats: TraceCategoryStats | null;
-      }
-    >();
-
-    for (const [category, catStats] of Object.entries(byCategory)) {
-      if (category.includes("/")) {
-        const [phase, subcat] = category.split("/", 2);
-        if (!phases.has(phase)) {
-          phases.set(phase, { subcats: new Map(), outerStats: null });
-        }
-        phases.get(phase)!.subcats.set(subcat, catStats);
-      } else {
-        // Outer span - this IS the phase total
-        if (!phases.has(category)) {
-          phases.set(category, { subcats: new Map(), outerStats: null });
-        }
-        phases.get(category)!.outerStats = catStats;
-      }
-    }
-
-    return Array.from(phases.entries())
-      .filter(([_, data]) => data.outerStats !== null) // Only phases with outer spans
-      .map(([name, data]) => ({
-        name,
-        subcategories: Array.from(data.subcats.entries())
-          .map(([subName, subStats]) => ({ name: subName, stats: subStats }))
-          .sort((a, b) => b.stats.totalUs - a.stats.totalUs),
-        totalUs: data.outerStats!.totalUs, // Outer span total
-        stepCount: data.outerStats!.count, // Number of steps
-      }))
-      .sort((a, b) => b.totalUs - a.totalUs);
-  }
-
-  async function downloadTrace() {
-    if (!taskId) return;
-    const response = await fetch(getTraceRawUrl(taskId));
-    const blob = await response.blob();
-    const url = URL.createObjectURL(blob);
-    const a = document.createElement("a");
-    a.href = url;
-    a.download = `trace_${taskId}.json`;
-    a.click();
-    URL.revokeObjectURL(url);
-  }
-
-  async function openInPerfetto() {
-    if (!taskId) return;
-
-    // Fetch trace data from our local API
-    const response = await fetch(getTraceRawUrl(taskId));
-    const traceData = await response.arrayBuffer();
-
-    // Open Perfetto UI
-    const perfettoWindow = window.open("https://ui.perfetto.dev");
-    if (!perfettoWindow) {
-      alert("Failed to open Perfetto. Please allow popups.");
-      return;
-    }
-
-    // Wait for Perfetto to be ready, then send trace via postMessage
-    const onMessage = (e: MessageEvent) => {
-      if (e.data === "PONG") {
-        window.removeEventListener("message", onMessage);
-        perfettoWindow.postMessage(
-          {
-            perfetto: {
-              buffer: traceData,
-              title: `Trace ${taskId}`,
-            },
-          },
-          "https://ui.perfetto.dev",
-        );
-      }
-    };
-    window.addEventListener("message", onMessage);
-
-    // Ping Perfetto until it responds
-    const pingInterval = setInterval(() => {
-      perfettoWindow.postMessage("PING", "https://ui.perfetto.dev");
-    }, 50);
-
-    // Clean up after 10 seconds
-    setTimeout(() => {
-      clearInterval(pingInterval);
-      window.removeEventListener("message", onMessage);
-    }, 10000);
-  }
-
-  onMount(async () => {
-    if (!taskId) {
-      error = "No task ID provided";
-      loading = false;
-      return;
-    }
-
-    try {
-      stats = await fetchTraceStats(taskId);
-    } catch (e) {
-      error = e instanceof Error ? e.message : "Failed to load trace";
-    } finally {
-      loading = false;
-    }
-  });
-
-  const phases = $derived(stats ? parsePhases(stats.byCategory) : []);
-  const sortedRanks = $derived(
-    stats
-      ? Object.keys(stats.byRank)
-          .map(Number)
-          .sort((a, b) => a - b)
-      : [],
-  );
-  const nodeCount = $derived(sortedRanks.length || 1);
-</script>
-
-<div class="min-h-screen bg-exo-dark-gray text-white">
-  <HeaderNav showHome={true} />
-  <div class="max-w-7xl mx-auto px-4 lg:px-8 py-6 space-y-6">
-    <div class="flex items-center justify-between gap-4 flex-wrap">
-      <div>
-        <h1
-          class="text-2xl font-mono tracking-[0.2em] uppercase text-exo-yellow"
-        >
-          Trace
-        </h1>
-        <p class="text-sm text-exo-light-gray font-mono truncate max-w-lg">
-          {taskId}
-        </p>
-      </div>
-      <div class="flex items-center gap-3">
-        <a
-          href="#/traces"
-          class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-3 py-1.5 rounded"
-        >
-          All Traces
-        </a>
-        <button
-          type="button"
-          class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-3 py-1.5 rounded"
-          onclick={downloadTrace}
-          disabled={loading || !!error}
-        >
-          Download
-        </button>
-        <button
-          type="button"
-          class="text-xs font-mono text-exo-dark-gray bg-exo-yellow hover:bg-exo-yellow/90 transition-colors uppercase px-3 py-1.5 rounded font-semibold"
-          onclick={openInPerfetto}
-          disabled={loading || !!error}
-        >
-          View Trace
-        </button>
-      </div>
-    </div>
-
-    {#if loading}
-      <div
-        class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-6 text-center text-exo-light-gray"
-      >
-        <div class="text-sm">Loading trace data...</div>
-      </div>
-    {:else if error}
-      <div
-        class="rounded border border-red-500/30 bg-red-500/10 p-6 text-center text-red-400"
-      >
-        <div class="text-sm">{error}</div>
-      </div>
-    {:else if stats}
-      <!-- Wall Time Summary -->
-      <div
-        class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-4 space-y-2"
-      >
-        <h2
-          class="text-sm font-mono uppercase tracking-wider text-exo-light-gray"
-        >
-          Summary
-        </h2>
-        <div class="text-3xl font-mono text-exo-yellow">
-          {formatDuration(stats.totalWallTimeUs)}
-        </div>
-        <div class="text-xs text-exo-light-gray">Total wall time</div>
-      </div>
-
-      <!-- By Phase -->
-      {#if phases.length > 0}
-        <div
-          class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-4 space-y-4"
-        >
-          <h2
-            class="text-sm font-mono uppercase tracking-wider text-exo-light-gray"
-          >
-            By Phase <span class="text-exo-light-gray/50">(avg per node)</span>
-          </h2>
-          <div class="space-y-4">
-            {#each phases as phase}
-              {@const normalizedTotal = phase.totalUs / nodeCount}
-              {@const normalizedStepCount = phase.stepCount / nodeCount}
-              <div class="space-y-2">
-                <div class="flex items-center justify-between">
-                  <span class="text-sm font-mono text-white">{phase.name}</span>
-                  <span class="text-sm font-mono">
-                    <span class="text-exo-yellow"
-                      >{formatDuration(normalizedTotal)}</span
-                    >
-                    <span class="text-exo-light-gray ml-2">
-                      ({normalizedStepCount} steps, {formatDuration(
-                        normalizedTotal / normalizedStepCount,
-                      )}/step)
-                    </span>
-                  </span>
-                </div>
-                {#if phase.subcategories.length > 0}
-                  <div class="pl-4 space-y-1.5">
-                    {#each phase.subcategories as subcat}
-                      {@const normalizedSubcat =
-                        subcat.stats.totalUs / nodeCount}
-                      {@const pct = formatPercentage(
-                        normalizedSubcat,
-                        normalizedTotal,
-                      )}
-                      {@const perStep = normalizedSubcat / normalizedStepCount}
-                      <div
-                        class="flex items-center justify-between text-xs font-mono"
-                      >
-                        <span class="text-exo-light-gray">{subcat.name}</span>
-                        <span class="text-white">
-                          {formatDuration(normalizedSubcat)}
-                          <span class="text-exo-light-gray ml-2">({pct})</span>
-                          <span class="text-exo-light-gray/60 ml-2"
-                            >{formatDuration(perStep)}/step</span
-                          >
-                        </span>
-                      </div>
-                      <!-- Progress bar -->
-                      <div
-                        class="relative h-1.5 bg-exo-black/60 rounded-sm overflow-hidden"
-                      >
-                        <div
-                          class="absolute inset-y-0 left-0 bg-gradient-to-r from-exo-yellow to-exo-yellow/70 transition-all duration-300"
-                          style="width: {pct}"
-                        ></div>
-                      </div>
-                    {/each}
-                  </div>
-                {/if}
-              </div>
-            {/each}
-          </div>
-        </div>
-      {/if}
-
-      <!-- By Rank -->
-      {#if sortedRanks.length > 0}
-        <div
-          class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-4 space-y-4"
-        >
-          <h2
-            class="text-sm font-mono uppercase tracking-wider text-exo-light-gray"
-          >
-            By Rank
-          </h2>
-          <div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
-            {#each sortedRanks as rank}
-              {@const rankStats = stats.byRank[rank]}
-              {@const rankPhases = parsePhases(rankStats.byCategory)}
-              <div
-                class="rounded border border-exo-medium-gray/20 bg-exo-dark-gray/60 p-3 space-y-3"
-              >
-                <div class="text-sm font-mono text-exo-yellow">
-                  Rank {rank}
-                </div>
-                <div class="space-y-2">
-                  {#each rankPhases as phase}
-                    <div class="space-y-1">
-                      <div class="flex items-center justify-between text-xs">
-                        <span class="font-mono text-exo-light-gray"
-                          >{phase.name}</span
-                        >
-                        <span class="font-mono text-white">
-                          {formatDuration(phase.totalUs)}
-                          <span class="text-exo-light-gray/50 ml-1">
-                            ({phase.stepCount}x)
-                          </span>
-                        </span>
-                      </div>
-                      {#if phase.subcategories.length > 0}
-                        <div class="pl-2 space-y-0.5">
-                          {#each phase.subcategories as subcat}
-                            {@const pct = formatPercentage(
-                              subcat.stats.totalUs,
-                              phase.totalUs,
-                            )}
-                            {@const perStep =
-                              subcat.stats.totalUs / phase.stepCount}
-                            <div
-                              class="flex items-center justify-between text-[10px] font-mono"
-                            >
-                              <span class="text-exo-light-gray/70"
-                                >{subcat.name}</span
-                              >
-                              <span class="text-exo-light-gray">
-                                {formatDuration(subcat.stats.totalUs)}
-                                <span class="text-exo-light-gray/50"
-                                  >({pct})</span
-                                >
-                                <span class="text-exo-light-gray/30 ml-1"
-                                  >{formatDuration(perStep)}/step</span
-                                >
-                              </span>
-                            </div>
-                          {/each}
-                        </div>
-                      {/if}
-                    </div>
-                  {/each}
-                </div>
-              </div>
-            {/each}
-          </div>
-        </div>
-      {/if}
-    {/if}
-  </div>
-</div>
--- a/flake.lock
+++ b/flake.lock
@@ -21,9 +21,7 @@
          "nixpkgs"
        ],
        "purescript-overlay": "purescript-overlay",
-        "pyproject-nix": [
-          "pyproject-nix"
-        ]
+        "pyproject-nix": "pyproject-nix"
      },
      "locked": {
        "lastModified": 1765953015,
@@ -151,44 +149,19 @@
        "type": "github"
      }
    },
-    "pyproject-build-systems": {
-      "inputs": {
-        "nixpkgs": [
-          "nixpkgs"
-        ],
-        "pyproject-nix": [
-          "pyproject-nix"
-        ],
-        "uv2nix": [
-          "uv2nix"
-        ]
-      },
-      "locked": {
-        "lastModified": 1763662255,
-        "narHash": "sha256-4bocaOyLa3AfiS8KrWjZQYu+IAta05u3gYZzZ6zXbT0=",
-        "owner": "pyproject-nix",
-        "repo": "build-system-pkgs",
-        "rev": "042904167604c681a090c07eb6967b4dd4dae88c",
-        "type": "github"
-      },
-      "original": {
-        "owner": "pyproject-nix",
-        "repo": "build-system-pkgs",
-        "type": "github"
-      }
-    },
    "pyproject-nix": {
      "inputs": {
        "nixpkgs": [
+          "dream2nix",
          "nixpkgs"
        ]
      },
      "locked": {
-        "lastModified": 1764134915,
-        "narHash": "sha256-xaKvtPx6YAnA3HQVp5LwyYG1MaN4LLehpQI8xEdBvBY=",
+        "lastModified": 1763017646,
+        "narHash": "sha256-Z+R2lveIp6Skn1VPH3taQIuMhABg1IizJd8oVdmdHsQ=",
        "owner": "pyproject-nix",
        "repo": "pyproject.nix",
-        "rev": "2c8df1383b32e5443c921f61224b198a2282a657",
+        "rev": "47bd6f296502842643078d66128f7b5e5370790c",
        "type": "github"
      },
      "original": {
@@ -205,10 +178,7 @@
        "flake-parts": "flake-parts",
        "nixpkgs": "nixpkgs",
        "nixpkgs-swift": "nixpkgs-swift",
-        "pyproject-build-systems": "pyproject-build-systems",
-        "pyproject-nix": "pyproject-nix",
-        "treefmt-nix": "treefmt-nix",
-        "uv2nix": "uv2nix"
+        "treefmt-nix": "treefmt-nix"
      }
    },
    "rust-analyzer-src": {
@@ -269,29 +239,6 @@
        "repo": "treefmt-nix",
        "type": "github"
      }
-    },
-    "uv2nix": {
-      "inputs": {
-        "nixpkgs": [
-          "nixpkgs"
-        ],
-        "pyproject-nix": [
-          "pyproject-nix"
-        ]
-      },
-      "locked": {
-        "lastModified": 1767701098,
-        "narHash": "sha256-CJhKZnWb3gumR9oTRjFvCg/6lYTGbZRU7xtvcyWIRwU=",
-        "owner": "pyproject-nix",
-        "repo": "uv2nix",
-        "rev": "9d357f0d2ce6f5f35ec7959d7e704452352eb4da",
-        "type": "github"
-      },
-      "original": {
-        "owner": "pyproject-nix",
-        "repo": "uv2nix",
-        "type": "github"
-      }
    }
  },
  "root": "root",
--- a/flake.nix
+++ b/flake.nix
@@ -24,26 +24,6 @@
    dream2nix = {
      url = "github:nix-community/dream2nix";
      inputs.nixpkgs.follows = "nixpkgs";
-      inputs.pyproject-nix.follows = "pyproject-nix";
-    };
-
-    # Python packaging with uv2nix
-    pyproject-nix = {
-      url = "github:pyproject-nix/pyproject.nix";
-      inputs.nixpkgs.follows = "nixpkgs";
-    };
-
-    uv2nix = {
-      url = "github:pyproject-nix/uv2nix";
-      inputs.pyproject-nix.follows = "pyproject-nix";
-      inputs.nixpkgs.follows = "nixpkgs";
-    };
-
-    pyproject-build-systems = {
-      url = "github:pyproject-nix/build-system-pkgs";
-      inputs.pyproject-nix.follows = "pyproject-nix";
-      inputs.uv2nix.follows = "uv2nix";
-      inputs.nixpkgs.follows = "nixpkgs";
    };

    # Pinned nixpkgs for swift-format (swift is broken on x86_64-linux in newer nixpkgs)
@@ -68,7 +48,6 @@
        inputs.treefmt-nix.flakeModule
        ./dashboard/parts.nix
        ./rust/parts.nix
-        ./python/parts.nix
      ];

      perSystem =
@@ -79,14 +58,6 @@
          pkgsSwift = import inputs.nixpkgs-swift { inherit system; };
        in
        {
-          # Allow unfree for metal-toolchain (needed for Darwin Metal packages)
-          _module.args.pkgs = import inputs.nixpkgs {
-            inherit system;
-            config.allowUnfreePredicate = pkg: (pkg.pname or "") == "metal-toolchain";
-            overlays = [
-              (import ./nix/apple-sdk-overlay.nix)
-            ];
-          };
          treefmt = {
            projectRootFile = "flake.nix";
            programs = {
@@ -108,25 +79,14 @@
                enable = true;
                package = pkgsSwift.swiftPackages.swift-format;
              };
-              shfmt.enable = true;
            };
          };

-          packages = lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin (
-            let
-              uvLock = builtins.fromTOML (builtins.readFile ./uv.lock);
-              mlxPackage = builtins.head (builtins.filter (p: p.name == "mlx") uvLock.package);
-              uvLockMlxVersion = mlxPackage.version;
-            in
-            {
-              metal-toolchain = pkgs.callPackage ./nix/metal-toolchain.nix { };
-              mlx = pkgs.callPackage ./nix/mlx.nix {
-                inherit (self'.packages) metal-toolchain;
-                inherit uvLockMlxVersion;
-              };
-              default = self'.packages.exo;
-            }
-          );
+          checks.lint = pkgs.runCommand "lint-check" { } ''
+            export RUFF_CACHE_DIR="$TMPDIR/ruff-cache"
+            ${pkgs.ruff}/bin/ruff check ${inputs.self}/
+            touch $out
+          '';

          devShells.default = with pkgs; pkgs.mkShell {
            inputsFrom = [ self'.checks.cargo-build ];
--- a/4
+++ b/4
@@ -1,7 +1,7 @@
 export NIX_CONFIG := "extra-experimental-features = nix-command flakes"

 fmt:
-    treefmt || nix fmt
+    nix fmt

 lint:
    uv run ruff check --fix
@@ -20,7 +20,7 @@ sync-clean:

 rust-rebuild:
    cargo run --bin stub_gen
-    uv sync --reinstall-package exo_pyo3_bindings
+    just sync-clean

 build-dashboard:
    #!/usr/bin/env bash
--- a/nix/apple-sdk-overlay.nix
+++ b/nix/apple-sdk-overlay.nix
@@ -1,18 +0,0 @@
-# Overlay that builds apple-sdk with a custom versions.json (for SDK 26.2).
-# The upstream nixpkgs package reads versions.json at eval time via a relative
-# path, so we can't override it through callPackage args. Instead, we copy
-# the upstream source and patch the one file.
-final: _prev:
-let
-  upstreamSrc = final.path + "/pkgs/by-name/ap/apple-sdk";
-  patchedSrc = final.runCommandLocal "apple-sdk-src-patched" { } ''
-    cp -r ${upstreamSrc} $out
-    chmod -R u+w $out
-    cp ${./apple-sdk/metadata/versions.json} $out/metadata/versions.json
-  '';
-in
-{
-  apple-sdk_26 = final.callPackage (patchedSrc + "/package.nix") {
-    darwinSdkMajorVersion = "26";
-  };
-}
--- a/nix/apple-sdk/metadata/versions.json
+++ b/nix/apple-sdk/metadata/versions.json
@@ -1,26 +0,0 @@
-{
-  "14": {
-    "urls": [
-      "https://swcdn.apple.com/content/downloads/14/48/052-59890-A_I0F5YGAY0Y/p9n40hio7892gou31o1v031ng6fnm9sb3c/CLTools_macOSNMOS_SDK.pkg",
-      "https://web.archive.org/web/20250211001355/https://swcdn.apple.com/content/downloads/14/48/052-59890-A_I0F5YGAY0Y/p9n40hio7892gou31o1v031ng6fnm9sb3c/CLTools_macOSNMOS_SDK.pkg"
-    ],
-    "version": "14.4",
-    "hash": "sha256-QozDiwY0Czc0g45vPD7G4v4Ra+3DujCJbSads3fJjjM="
-  },
-  "15": {
-    "urls": [
-      "https://swcdn.apple.com/content/downloads/52/01/082-41241-A_0747ZN8FHV/dectd075r63pppkkzsb75qk61s0lfee22j/CLTools_macOSNMOS_SDK.pkg",
-      "https://web.archive.org/web/20250530132510/https://swcdn.apple.com/content/downloads/52/01/082-41241-A_0747ZN8FHV/dectd075r63pppkkzsb75qk61s0lfee22j/CLTools_macOSNMOS_SDK.pkg"
-    ],
-    "version": "15.5",
-    "hash": "sha256-HBiSJuw1XBUK5R/8Sj65c3rftSEvQl/O9ZZVp/g1Amo="
-  },
-  "26": {
-    "urls": [
-      "https://swcdn.apple.com/content/downloads/60/22/089-71960-A_W8BL1RUJJ6/5zkyplomhk1cm7z6xja2ktgapnhhti6wwd/CLTools_macOSNMOS_SDK.pkg",
-      "https://web.archive.org/web/20250915230423/https://swcdn.apple.com/content/downloads/60/22/089-71960-A_W8BL1RUJJ6/5zkyplomhk1cm7z6xja2ktgapnhhti6wwd/CLTools_macOSNMOS_SDK.pkg"
-    ],
-    "version": "26.2",
-    "hash": "sha256-hXRlMieVv0smna5uiWRwq87IWOaPWtAjAldbi+wQXcw="
-  }
-}
--- a/nix/darwin-build-fixes.patch
+++ b/nix/darwin-build-fixes.patch
@@ -1,79 +0,0 @@
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 0ed30932..d8528132 100644
--- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -177,11 +177,7 @@ if(MLX_BUILD_METAL)
-     add_compile_definitions(MLX_METAL_DEBUG)
-   endif()
-
-  # Throw an error if xcrun not found
-  execute_process(
-    COMMAND zsh "-c" "/usr/bin/xcrun -sdk macosx --show-sdk-version"
-    OUTPUT_VARIABLE MACOS_SDK_VERSION
-    OUTPUT_STRIP_TRAILING_WHITESPACE COMMAND_ERROR_IS_FATAL ANY)
-+  set(MACOS_SDK_VERSION @sdkVersion@)
-
-   if(${MACOS_SDK_VERSION} LESS 14.0)
-     message(
-@@ -199,11 +195,8 @@ if(MLX_BUILD_METAL)
-     endif()
-     set(XCRUN_FLAGS "-mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
-   endif()
-  execute_process(
-    COMMAND
-      zsh "-c"
-      "echo \"__METAL_VERSION__\" | xcrun -sdk macosx metal ${XCRUN_FLAGS} -E -x metal -P - | tail -1 | tr -d '\n'"
-    OUTPUT_VARIABLE MLX_METAL_VERSION COMMAND_ERROR_IS_FATAL ANY)
-+  set(
-+    MLX_METAL_VERSION @metalVersion@)
-   FetchContent_Declare(metal_cpp URL ${METAL_CPP_URL})
-   FetchContent_MakeAvailable(metal_cpp)
-   target_include_directories(
-diff --git a/cmake/extension.cmake b/cmake/extension.cmake
-index 13db804a..5b385132 100644
--- a/cmake/extension.cmake
-+++ b/cmake/extension.cmake
-@@ -36,7 +36,7 @@ macro(mlx_build_metallib)
-   add_custom_command(
-     OUTPUT ${MTLLIB_BUILD_TARGET}
-     COMMAND
-      xcrun -sdk macosx metal
-+      metal -fmodules-cache-path=${CMAKE_BINARY_DIR}/metal-cache
-       "$<LIST:TRANSFORM,${MTLLIB_INCLUDE_DIRS},PREPEND,-I>"
-       ${MTLLIB_COMPILE_OPTIONS} ${MTLLIB_SOURCES} -o ${MTLLIB_BUILD_TARGET}
-     DEPENDS ${MTLLIB_DEPS} ${MTLLIB_SOURCES}
-diff --git a/mlx/backend/metal/kernels/CMakeLists.txt b/mlx/backend/metal/kernels/CMakeLists.txt
-index 262b0495..5c7446ad 100644
--- a/mlx/backend/metal/kernels/CMakeLists.txt
-+++ b/mlx/backend/metal/kernels/CMakeLists.txt
-@@ -29,7 +29,7 @@ function(build_kernel_base TARGET SRCFILE DEPS)
-                     "-mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
-   endif()
-   add_custom_command(
-    COMMAND xcrun -sdk macosx metal ${METAL_FLAGS} -c ${SRCFILE}
-+    COMMAND metal -fmodules-cache-path=${CMAKE_BINARY_DIR}/metal-cache ${METAL_FLAGS} -c ${SRCFILE}
-             -I${PROJECT_SOURCE_DIR} -o ${TARGET}.air
-     DEPENDS ${SRCFILE} ${DEPS} ${BASE_HEADERS}
-     OUTPUT ${TARGET}.air
-@@ -170,7 +170,7 @@ endif()
-
- add_custom_command(
-   OUTPUT ${MLX_METAL_PATH}/mlx.metallib
-  COMMAND xcrun -sdk macosx metallib ${KERNEL_AIR} -o
-+  COMMAND metallib ${KERNEL_AIR} -o
-           ${MLX_METAL_PATH}/mlx.metallib
-   DEPENDS ${KERNEL_AIR}
-   COMMENT "Building mlx.metallib"
-diff --git a/mlx/backend/metal/make_compiled_preamble.sh b/mlx/backend/metal/make_compiled_preamble.sh
-index bb55ed3a..94ea7dd7 100644
--- a/mlx/backend/metal/make_compiled_preamble.sh
-+++ b/mlx/backend/metal/make_compiled_preamble.sh
-@@ -31,7 +31,7 @@ OUTPUT_FILE=${OUTPUT_DIR}/${SRC_NAME}.cpp
- mkdir -p "$OUTPUT_DIR"
-
- # Use the metal compiler to get a list of headers (with depth)
-CCC="xcrun -sdk macosx metal -x metal"
-+CCC="metal -x metal -fmodules-cache-path=${OUTPUT_DIR}/metal-cache"
- HDRS=$( $CCC -I"$SRC_DIR" -I"$JIT_INCLUDES" -DMLX_METAL_JIT -E -P -CC -C -H "$INPUT_FILE" $CFLAGS -w 2>&1 1>/dev/null )
-
- # Remove any included system frameworks (for MetalPerformancePrimitive headers)
--- a/nix/metal-toolchain.nix
+++ b/nix/metal-toolchain.nix
@@ -1,56 +0,0 @@
-{ lib, stdenvNoCC, requireFile, nix }:
-
-let
-  narFile = requireFile {
-    name = "metal-toolchain-17C48.nar";
-    message = ''
-      The Metal Toolchain NAR must be available.
-
-      If you have cachix configured for exo.cachix.org, this should be automatic.
-
-      Otherwise:
-        1. Install Xcode 26+ from the App Store
-        2. Run: xcodebuild -downloadComponent MetalToolchain
-        3. Export the toolchain:
-           hdiutil attach "$(find /System/Library/AssetsV2/com_apple_MobileAsset_MetalToolchain -name '*.dmg' | head -1)" -mountpoint /tmp/metal-dmg
-           cp -R /tmp/metal-dmg/Metal.xctoolchain /tmp/metal-export
-           hdiutil detach /tmp/metal-dmg
-        4. Create NAR and add to store:
-           nix nar pack /tmp/metal-export > /tmp/metal-toolchain-17C48.nar
-           nix store add --mode flat /tmp/metal-toolchain-17C48.nar
-    '';
-    hash = "sha256-ayR5mXN4sZAddwKEG2OszGRF93k9ZFc7H0yi2xbylQw=";
-  };
-in
-stdenvNoCC.mkDerivation {
-  pname = "metal-toolchain";
-  version = "17C48";
-
-  dontUnpack = true;
-  dontBuild = true;
-  dontFixup = true;
-
-  nativeBuildInputs = [ nix ];
-
-  installPhase = ''
-    runHook preInstall
-
-    nix-store --restore $out < ${narFile}
-
-    # Create bin directory with symlinks for PATH
-    mkdir -p $out/bin
-    ln -s $out/usr/bin/metal $out/bin/metal
-    ln -s $out/usr/bin/metallib $out/bin/metallib
-
-    runHook postInstall
-  '';
-
-  # Metal language version for CMake (from: echo __METAL_VERSION__ | metal -E -x metal -P -)
-  passthru.metalVersion = "400";
-
-  meta = {
-    description = "Apple Metal compiler toolchain";
-    platforms = [ "aarch64-darwin" ];
-    license = lib.licenses.unfree;
-  };
-}
--- a/nix/mlx.nix
+++ b/nix/mlx.nix
@@ -1,159 +0,0 @@
-{ stdenv
-, lib
-, fetchFromGitHub
-, replaceVars
-, fetchzip
-, cmake
-, nlohmann_json
-, apple-sdk_26
-, metal-toolchain
-, runCommand
-, fmt
-, python313Packages
-, uvLockMlxVersion
-}:
-
-assert stdenv.isDarwin;
-
-let
-  python = python313Packages.python;
-
-  # Static dependencies included directly during compilation
-  gguf-tools = fetchFromGitHub {
-    owner = "antirez";
-    repo = "gguf-tools";
-    rev = "8fa6eb65236618e28fd7710a0fba565f7faa1848";
-    hash = "sha256-15FvyPOFqTOr5vdWQoPnZz+mYH919++EtghjozDlnSA=";
-  };
-
-  metal_cpp = fetchzip {
-    url = "https://developer.apple.com/metal/cpp/files/metal-cpp_26.zip";
-    hash = "sha256-7n2eI2lw/S+Us6l7YPAATKwcIbRRpaQ8VmES7S8ZjY8=";
-  };
-
-  nanobind = fetchFromGitHub {
-    owner = "wjakob";
-    repo = "nanobind";
-    rev = "v2.10.2";
-    hash = "sha256-io44YhN+VpfHFWyvvLWSanRgbzA0whK8WlDNRi3hahU=";
-    fetchSubmodules = true;
-  };
-
-  mlx = stdenv.mkDerivation rec {
-    pname = "mlx";
-    version = let v = "0.30.6"; in
-      assert v == uvLockMlxVersion || throw "MLX version mismatch: nix/mlx.nix has ${v} but uv.lock has ${uvLockMlxVersion}. Update both the version and hash in nix/mlx.nix.";
-      v;
-    pyproject = true;
-
-    src = fetchFromGitHub {
-      owner = "ml-explore";
-      repo = "mlx";
-      tag = "v${version}";
-      hash = "sha256-avD5EGhwgmPdXLAyQSqTO6AXk/W3ziH+f6AetjK3Sdo=";
-    };
-
-    patches = [
-      (replaceVars ./darwin-build-fixes.patch {
-        sdkVersion = apple-sdk_26.version;
-        metalVersion = metal-toolchain.metalVersion;
-      })
-    ];
-
-    postPatch = ''
-      substituteInPlace mlx/backend/cpu/jit_compiler.cpp \
-        --replace-fail "g++" "$CXX"
-    '';
-
-    dontUseCmakeConfigure = true;
-
-    enableParallelBuilding = true;
-
-    # Allows multiple cores to be used in Python builds.
-    postUnpack = ''
-      export MAKEFLAGS+="''${enableParallelBuilding:+-j$NIX_BUILD_CORES}"
-    '';
-
-    # Updates the wrong fetcher rev attribute
-    passthru.skipBulkUpdate = true;
-
-    env = {
-      DEV_RELEASE = 1;
-      CMAKE_ARGS = toString [
-        (lib.cmakeBool "USE_SYSTEM_FMT" true)
-        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_GGUFLIB" "${gguf-tools}")
-        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_JSON" "${nlohmann_json.src}")
-        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_NANOBIND" "${nanobind}")
-        (lib.cmakeBool "FETCHCONTENT_FULLY_DISCONNECTED" true)
-        (lib.cmakeBool "MLX_BUILD_CPU" true)
-        (lib.cmakeBool "MLX_BUILD_METAL" true)
-        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_METAL_CPP" "${metal_cpp}")
-        (lib.cmakeOptionType "string" "CMAKE_OSX_DEPLOYMENT_TARGET" "${apple-sdk_26.version}")
-        (lib.cmakeOptionType "filepath" "CMAKE_OSX_SYSROOT" "${apple-sdk_26.passthru.sdkroot}")
-      ];
-      SDKROOT = apple-sdk_26.passthru.sdkroot;
-      MACOSX_DEPLOYMENT_TARGET = apple-sdk_26.version;
-    };
-
-    build-system = [
-      python313Packages.setuptools
-    ];
-
-    nativeBuildInputs = [
-      cmake
-      metal-toolchain
-      python313Packages.pypaBuildHook
-      python313Packages.pypaInstallHook
-      python313Packages.setuptools
-      python313Packages.typing-extensions
-      python313Packages.wheel
-      python313Packages.cmake
-      python313Packages.ninja
-    ];
-
-    buildInputs = [
-      fmt
-      gguf-tools
-      python313Packages.nanobind
-      python313Packages.pybind11
-      apple-sdk_26
-    ];
-
-    # Tests require Metal GPU access which isn't available in the Nix sandbox.
-    # To run tests, build with: nix build --option sandbox false .#mlx.passthru.tests.mlxTest
-    doCheck = false;
-
-    pythonImportsCheck = [ "mlx" ];
-
-    passthru.tests = {
-      # Runs example scripts to verify MLX works. Requires --option sandbox false
-      # since Metal GPU access is needed.
-      mlxTest =
-        runCommand "run-mlx-examples"
-          {
-            buildInputs = [ mlx ];
-            nativeBuildInputs = [ python ];
-          }
-          ''
-            cp ${src}/examples/python/logistic_regression.py .
-            ${python.interpreter} logistic_regression.py
-            rm logistic_regression.py
-
-            cp ${src}/examples/python/linear_regression.py .
-            ${python.interpreter} linear_regression.py
-            rm linear_regression.py
-
-            touch $out
-          '';
-    };
-
-    meta = {
-      homepage = "https://github.com/ml-explore/mlx";
-      description = "Array framework for Apple silicon";
-      changelog = "https://github.com/ml-explore/mlx/releases/tag/${src.tag}";
-      license = lib.licenses.mit;
-      platforms = [ "aarch64-darwin" ];
-    };
-  };
-in
-mlx
--- a/packaging/pyinstaller/exo.spec
+++ b/packaging/pyinstaller/exo.spec
@@ -10,7 +10,6 @@ PROJECT_ROOT = Path.cwd()
 SOURCE_ROOT = PROJECT_ROOT / "src"
 ENTRYPOINT = SOURCE_ROOT / "exo" / "__main__.py"
 DASHBOARD_DIR = PROJECT_ROOT / "dashboard" / "build"
-RESOURCES_DIR = PROJECT_ROOT / "resources"
 EXO_SHARED_MODELS_DIR = SOURCE_ROOT / "exo" / "shared" / "models"

 if not ENTRYPOINT.is_file():
@@ -19,9 +18,6 @@ if not ENTRYPOINT.is_file():
 if not DASHBOARD_DIR.is_dir():
    raise SystemExit(f"Dashboard assets are missing: {DASHBOARD_DIR}")

-if not RESOURCES_DIR.is_dir():
-    raise SystemExit(f"Resource assets are missing: {RESOURCES_DIR}")
-
 if not EXO_SHARED_MODELS_DIR.is_dir():
    raise SystemExit(f"Shared model assets are missing: {EXO_SHARED_MODELS_DIR}")

@@ -62,7 +58,6 @@ HIDDEN_IMPORTS = sorted(

 DATAS: list[tuple[str, str]] = [
    (str(DASHBOARD_DIR), "dashboard"),
-    (str(RESOURCES_DIR), "resources"),
    (str(MLX_LIB_DIR), "mlx/lib"),
    (str(EXO_SHARED_MODELS_DIR), "exo/shared/models"),
 ]
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,27 +13,29 @@ dependencies = [
    "filelock>=3.18.0",
    "rustworkx>=0.17.1",
    "huggingface-hub>=0.33.4",
+    "typer", # for huggingface-cli
    "psutil>=7.0.0",
    "loguru>=0.7.3",
    "exo_pyo3_bindings", # rust bindings
    "anyio==4.11.0",
-    "mlx==0.30.6; sys_platform == 'darwin'",
-    "mlx[cpu]==0.30.6; sys_platform == 'linux'",
-    "mlx-lm==0.30.6",
+    "mlx==0.30.3; sys_platform == 'darwin'",
+    "mlx[cpu]==0.30.3; sys_platform == 'linux'",
+    "mlx-lm==0.30.5",
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
    "hypercorn>=0.18.0",
    "openai-harmony>=0.0.8",
    "httpx>=0.28.1",
    "tomlkit>=0.14.0",
    "pillow>=11.0,<12.0", # compatibility with mflux
-    "mflux==0.15.5",
+    "mflux==0.15.4",
    "python-multipart>=0.0.21",
-    "msgspec>=0.19.0",
-    "zstandard>=0.23.0",
 ]

 [project.scripts]
+exo-master = "exo.master.main:main"
+exo-worker = "exo.worker.main:main"
 exo = "exo.main:main"
+exo-eval = "bench.exo_eval:main"

 # dependencies only required for development
 [dependency-groups]
@@ -51,6 +53,9 @@ dev = [
 # cuda = [
 #     "mlx[cuda]==0.26.3",
 # ]
+eval = [
+    "lm_eval[api]",
+]

 ###
 # workspace configuration
@@ -59,15 +64,14 @@ dev = [
 [tool.uv.workspace]
 members = [
    "rust/exo_pyo3_bindings",
-    "bench",
 ]

 [tool.uv.sources]
 exo_pyo3_bindings = { workspace = true }
-#mlx-lm = { git = "https://github.com/davidmcc73/mlx-lm", branch = "stable" }
 # Uncomment to use local mlx/mlx-lm development versions:
 # mlx = { path = "/Users/Shared/mlx", editable=true }
 # mlx-lm = { path = "/Users/Shared/mlx-lm", editable=true }
+mlx-lm = { git = "https://github.com/davidmcc73/mlx-lm.git", branch = "main" }

 [build-system]
 requires = ["uv_build>=0.8.9,<0.9.0"]
@@ -106,7 +110,6 @@ root = "src"

 # supported platforms for this project
 [tool.uv]
-required-version = ">=0.8.6"
 prerelease = "allow"
 environments = [
    "sys_platform == 'darwin'",
--- a/python/parts.nix
+++ b/python/parts.nix
@@ -1,123 +0,0 @@
-{ inputs, ... }:
-{
-  perSystem =
-    { config, self', pkgs, lib, system, ... }:
-    let
-      # Load workspace from uv.lock
-      workspace = inputs.uv2nix.lib.workspace.loadWorkspace {
-        workspaceRoot = inputs.self;
-      };
-
-      # Create overlay from workspace
-      # Use wheels from PyPI for most packages; we override mlx with our pure Nix Metal build
-      overlay = workspace.mkPyprojectOverlay { sourcePreference = "wheel"; };
-
-      # Override overlay to inject Nix-built components
-      exoOverlay = final: prev: {
-        # Replace workspace exo_pyo3_bindings with Nix-built wheel
-        exo-pyo3-bindings = pkgs.stdenv.mkDerivation {
-          pname = "exo-pyo3-bindings";
-          version = "0.1.0";
-          src = self'.packages.exo_pyo3_bindings;
-          # Install from pre-built wheel
-          nativeBuildInputs = [ final.pyprojectWheelHook ];
-          dontStrip = true;
-        };
-      };
-
-      python = pkgs.python313;
-
-      # Overlay to provide build systems and custom packages
-      buildSystemsOverlay = final: prev: {
-        # Use our pure Nix-built MLX with Metal support
-        mlx = self'.packages.mlx;
-
-        # mlx-lm is a git dependency that needs setuptools
-        mlx-lm = prev.mlx-lm.overrideAttrs (old: {
-          nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
-            final.setuptools
-          ];
-        });
-      };
-
-      pythonSet = (pkgs.callPackage inputs.pyproject-nix.build.packages {
-        inherit python;
-      }).overrideScope (
-        lib.composeManyExtensions [
-          inputs.pyproject-build-systems.overlays.default
-          overlay
-          exoOverlay
-          buildSystemsOverlay
-        ]
-      );
-      exoVenv = pythonSet.mkVirtualEnv "exo-env" workspace.deps.default;
-
-      # Virtual environment with dev dependencies for testing
-      testVenv = pythonSet.mkVirtualEnv "exo-test-env" (
-        workspace.deps.default // {
-          exo = [ "dev" ]; # Include pytest, pytest-asyncio, pytest-env
-        }
-      );
-
-      mkPythonScript = name: path: pkgs.writeShellApplication {
-        inherit name;
-        runtimeInputs = [ exoVenv ];
-        runtimeEnv = {
-          EXO_DASHBOARD_DIR = self'.packages.dashboard;
-          EXO_RESOURCES_DIR = inputs.self + /resources;
-        };
-        text = ''exec python ${path} "$@"'';
-      };
-
-      benchVenv = pythonSet.mkVirtualEnv "exo-bench-env" {
-        exo-bench = [ ];
-      };
-
-      mkBenchScript = name: path: pkgs.writeShellApplication {
-        inherit name;
-        runtimeInputs = [ benchVenv ];
-        text = ''exec python ${path} "$@"'';
-      };
-
-      mkSimplePythonScript = name: path: pkgs.writeShellApplication {
-        inherit name;
-        runtimeInputs = [ pkgs.python313 ];
-        text = ''exec python ${path} "$@"'';
-      };
-
-      exoPackage = pkgs.runCommand "exo"
-        {
-          nativeBuildInputs = [ pkgs.makeWrapper ];
-        }
-        ''
-          mkdir -p $out/bin
-
-          # Create wrapper script
-          makeWrapper ${exoVenv}/bin/exo $out/bin/exo \
-            --set EXO_DASHBOARD_DIR ${self'.packages.dashboard} \
-            --set EXO_RESOURCES_DIR ${inputs.self + /resources} \
-            ${lib.optionalString pkgs.stdenv.hostPlatform.isDarwin "--prefix PATH : ${pkgs.macmon}/bin"}
-        '';
-    in
-    {
-      # Python package only available on macOS (requires MLX/Metal)
-      packages = lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin
-        {
-          exo = exoPackage;
-          # Test environment for running pytest outside of Nix sandbox (needs GPU access)
-          exo-test-env = testVenv;
-        } // {
-        exo-bench = mkBenchScript "exo-bench" (inputs.self + /bench/exo_bench.py);
-        exo-get-all-models-on-cluster = mkSimplePythonScript "exo-get-all-models-on-cluster" (inputs.self + /tests/get_all_models_on_cluster.py);
-      };
-
-      checks = {
-        # Ruff linting (works on all platforms)
-        lint = pkgs.runCommand "ruff-lint" { } ''
-          export RUFF_CACHE_DIR="$TMPDIR/ruff-cache"
-          ${pkgs.ruff}/bin/ruff check ${inputs.self}
-          touch $out
-        '';
-      };
-    };
-}
--- a/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev-4bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev-4bit.toml
@@ -1,49 +0,0 @@
-model_id = "exolabs/FLUX.1-Kontext-dev-4bit"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["ImageToImage"]
-family = "flux"
-quantization = "4bit"
-base_model = "FLUX.1 Kontext"
-capabilities = ["image_edit"]
-
-[storage_size]
-in_bytes = 15475325472
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 5950704160
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev-8bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev-8bit.toml
@@ -1,49 +0,0 @@
-model_id = "exolabs/FLUX.1-Kontext-dev-8bit"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["ImageToImage"]
-family = "flux"
-quantization = "8bit"
-base_model = "FLUX.1 Kontext"
-capabilities = ["image_edit"]
-
-[storage_size]
-in_bytes = 21426029632
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 11901408320
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev.toml
@@ -1,49 +0,0 @@
-model_id = "exolabs/FLUX.1-Kontext-dev"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["ImageToImage"]
-family = "flux"
-quantization = ""
-base_model = "FLUX.1 Kontext"
-capabilities = ["image_edit"]
-
-[storage_size]
-in_bytes = 33327437952
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 23802816640
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-4bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-4bit.toml
@@ -1,49 +0,0 @@
-model_id = "exolabs/FLUX.1-Krea-dev-4bit"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-family = "flux"
-quantization = "4bit"
-base_model = "FLUX.1 Krea"
-capabilities = ["image_gen"]
-
-[storage_size]
-in_bytes = 15475325472
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 5950704160
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-8bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-8bit.toml
@@ -1,49 +0,0 @@
-model_id = "exolabs/FLUX.1-Krea-dev-8bit"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-family = "flux"
-quantization = "8bit"
-base_model = "FLUX.1 Krea"
-capabilities = ["image_gen"]
-
-[storage_size]
-in_bytes = 21426029632
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 11901408320
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev.toml
@@ -1,49 +0,0 @@
-model_id = "exolabs/FLUX.1-Krea-dev"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-family = "flux"
-quantization = ""
-base_model = "FLUX.1 Krea"
-capabilities = ["image_gen"]
-
-[storage_size]
-in_bytes = 33327437952
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 23802816640
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-dev-4bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-dev-4bit.toml
@@ -1,49 +0,0 @@
-model_id = "exolabs/FLUX.1-dev-4bit"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-family = "flux"
-quantization = "4bit"
-base_model = "FLUX.1 Dev"
-capabilities = ["image_gen"]
-
-[storage_size]
-in_bytes = 15475325472
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 5950704160
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-dev-8bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-dev-8bit.toml
@@ -1,49 +0,0 @@
-model_id = "exolabs/FLUX.1-dev-8bit"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-family = "flux"
-quantization = "8bit"
-base_model = "FLUX.1 Dev"
-capabilities = ["image_gen"]
-
-[storage_size]
-in_bytes = 21426029632
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 11901408320
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-dev.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-dev.toml
@@ -1,49 +0,0 @@
-model_id = "exolabs/FLUX.1-dev"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-family = "flux"
-quantization = ""
-base_model = "FLUX.1 Dev"
-capabilities = ["image_gen"]
-
-[storage_size]
-in_bytes = 33327437952
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 23802816640
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-schnell-4bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-schnell-4bit.toml
@@ -1,49 +0,0 @@
-model_id = "exolabs/FLUX.1-schnell-4bit"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-family = "flux"
-quantization = "4bit"
-base_model = "FLUX.1 Schnell"
-capabilities = ["image_gen"]
-
-[storage_size]
-in_bytes = 15470210592
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 5945589280
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-schnell-8bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-schnell-8bit.toml
@@ -1,49 +0,0 @@
-model_id = "exolabs/FLUX.1-schnell-8bit"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-family = "flux"
-quantization = "8bit"
-base_model = "FLUX.1 Schnell"
-capabilities = ["image_gen"]
-
-[storage_size]
-in_bytes = 21415799872
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 11891178560
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-schnell.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-schnell.toml
@@ -1,49 +0,0 @@
-model_id = "exolabs/FLUX.1-schnell"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-family = "flux"
-quantization = ""
-base_model = "FLUX.1 Schnell"
-capabilities = ["image_gen"]
-
-[storage_size]
-in_bytes = 33306978432
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 23782357120
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-4bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-4bit.toml
@@ -1,40 +0,0 @@
-model_id = "exolabs/Qwen-Image-4bit"
-n_layers = 60
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-uses_cfg = true
-family = "qwen-image"
-quantization = "4bit"
-base_model = "Qwen Image"
-capabilities = ["image_gen"]
-
-[storage_size]
-in_bytes = 26799533856
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 16584333312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 60
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 10215200544
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-8bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-8bit.toml
@@ -1,40 +0,0 @@
-model_id = "exolabs/Qwen-Image-8bit"
-n_layers = 60
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-uses_cfg = true
-family = "qwen-image"
-quantization = "8bit"
-base_model = "Qwen Image"
-capabilities = ["image_gen"]
-
-[storage_size]
-in_bytes = 37014734400
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 16584333312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 60
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 20430401088
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-4bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-4bit.toml
@@ -1,40 +0,0 @@
-model_id = "exolabs/Qwen-Image-Edit-2509-4bit"
-n_layers = 60
-hidden_size = 1
-supports_tensor = false
-tasks = ["ImageToImage"]
-uses_cfg = true
-family = "qwen-image"
-quantization = "4bit"
-base_model = "Qwen Image Edit"
-capabilities = ["image_edit"]
-
-[storage_size]
-in_bytes = 26799533856
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 16584333312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 60
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 10215200544
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-8bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-8bit.toml
@@ -1,40 +0,0 @@
-model_id = "exolabs/Qwen-Image-Edit-2509-8bit"
-n_layers = 60
-hidden_size = 1
-supports_tensor = false
-tasks = ["ImageToImage"]
-uses_cfg = true
-family = "qwen-image"
-quantization = "8bit"
-base_model = "Qwen Image Edit"
-capabilities = ["image_edit"]
-
-[storage_size]
-in_bytes = 37014734400
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 16584333312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 60
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 20430401088
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509.toml
@@ -1,40 +0,0 @@
-model_id = "exolabs/Qwen-Image-Edit-2509"
-n_layers = 60
-hidden_size = 1
-supports_tensor = false
-tasks = ["ImageToImage"]
-uses_cfg = true
-family = "qwen-image"
-quantization = ""
-base_model = "Qwen Image Edit"
-capabilities = ["image_edit"]
-
-[storage_size]
-in_bytes = 57445135488
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 16584333312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 60
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 40860802176
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image.toml
@@ -1,40 +0,0 @@
-model_id = "exolabs/Qwen-Image"
-n_layers = 60
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-uses_cfg = true
-family = "qwen-image"
-quantization = ""
-base_model = "Qwen Image"
-capabilities = ["image_gen"]
-
-[storage_size]
-in_bytes = 57445135488
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 16584333312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 60
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 40860802176
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/DeepSeek-V3.1-4bit"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "deepseek"
-quantization = "4bit"
-base_model = "DeepSeek V3.1"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 405874409472
--- a/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/DeepSeek-V3.1-8bit"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "deepseek"
-quantization = "8bit"
-base_model = "DeepSeek V3.1"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 765577920512
--- a/resources/inference_model_cards/mlx-community--GLM-4.5-Air-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.5-Air-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.5-Air-8bit"
-n_layers = 46
-hidden_size = 4096
-supports_tensor = false
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "8bit"
-base_model = "GLM 4.5 Air"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 122406567936
--- a/resources/inference_model_cards/mlx-community--GLM-4.5-Air-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.5-Air-bf16.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.5-Air-bf16"
-n_layers = 46
-hidden_size = 4096
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "bf16"
-base_model = "GLM 4.5 Air"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 229780750336
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.7-4bit"
-n_layers = 91
-hidden_size = 5120
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "4bit"
-base_model = "GLM 4.7"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 198556925568
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-6bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.7-6bit"
-n_layers = 91
-hidden_size = 5120
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "6bit"
-base_model = "GLM 4.7"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 286737579648
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-8bit-gs32.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-8bit-gs32.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.7-8bit-gs32"
-n_layers = 91
-hidden_size = 5120
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "8bit"
-base_model = "GLM 4.7"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 396963397248
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.7-Flash-4bit"
-n_layers = 47
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "4bit"
-base_model = "GLM 4.7 Flash"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 19327352832
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-5bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-5bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.7-Flash-5bit"
-n_layers = 47
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "5bit"
-base_model = "GLM 4.7 Flash"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 22548578304
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-6bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.7-Flash-6bit"
-n_layers = 47
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "6bit"
-base_model = "GLM 4.7 Flash"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 26843545600
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.7-Flash-8bit"
-n_layers = 47
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "8bit"
-base_model = "GLM 4.7 Flash"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 34359738368
--- a/resources/inference_model_cards/mlx-community--Kimi-K2-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2-Instruct-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Kimi-K2-Instruct-4bit"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "kimi"
-quantization = "4bit"
-base_model = "Kimi K2"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 620622774272
--- a/resources/inference_model_cards/mlx-community--Kimi-K2-Thinking.toml
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2-Thinking.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Kimi-K2-Thinking"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "kimi"
-quantization = ""
-base_model = "Kimi K2"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 706522120192
--- a/resources/inference_model_cards/mlx-community--Kimi-K2.5.toml
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2.5.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Kimi-K2.5"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "kimi"
-quantization = ""
-base_model = "Kimi K2.5"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 662498705408
--- a/resources/inference_model_cards/mlx-community--Llama-3.2-1B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.2-1B-Instruct-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Llama-3.2-1B-Instruct-4bit"
-n_layers = 16
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "llama"
-quantization = "4bit"
-base_model = "Llama 3.2 1B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 729808896
--- a/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Llama-3.2-3B-Instruct-4bit"
-n_layers = 28
-hidden_size = 3072
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "llama"
-quantization = "4bit"
-base_model = "Llama 3.2 3B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 1863319552
--- a/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Llama-3.2-3B-Instruct-8bit"
-n_layers = 28
-hidden_size = 3072
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "llama"
-quantization = "8bit"
-base_model = "Llama 3.2 3B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 3501195264
--- a/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Llama-3.3-70B-Instruct-4bit"
-n_layers = 80
-hidden_size = 8192
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "llama"
-quantization = "4bit"
-base_model = "Llama 3.3 70B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 40652242944
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
dmcc73	6018a9c97c	Point mlx-lm to davidmcc73 fork with context parallelism support	2026-02-02 19:16:43 +00:00
dmcc73	07b8405d3e	Add context parallelism support to DeepSeek sharding Store pre-shard head count and distributed group on each attention layer during sharding, enabling automatic TP→CP switching at runtime when context length exceeds a threshold. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-02-02 18:35:20 +00:00
Ryuichi Leo Takashige	5d3b407602	parallelise Qwen3Next	2026-01-28 18:01:23 +00:00
Ryuichi Leo Takashige	e7a5826aed	try reshaping	2026-01-28 15:23:28 +00:00
Ryuichi Leo Takashige	ebe279018f	import	2026-01-28 15:11:57 +00:00
Ryuichi Leo Takashige	bf67e7d334	maybe?	2026-01-28 15:09:50 +00:00
Ryuichi Leo Takashige	0cd2f6aab4	oops	2026-01-28 14:57:05 +00:00
Ryuichi Leo Takashige	ba8a44e6a2	use wrapped minimax attention	2026-01-28 14:53:23 +00:00
Ryuichi Leo Takashige	07c4be157b	Oops	2026-01-28 14:07:19 +00:00
Ryuichi Leo Takashige	1e1eb8f8a1	Format	2026-01-28 14:01:45 +00:00
Ryuichi Leo Takashige	1bc2d9728d	Use different minimax sharding	2026-01-28 14:00:56 +00:00
Ryuichi Leo Takashige	7823fd7b1a	fix exo eval	2026-01-27 22:20:04 +00:00
Ryuichi Leo Takashige	05caab0047	Extract minimax think tokens	2026-01-27 21:59:51 +00:00
Ryuichi Leo Takashige	bd8f9f2d10	Extract thinking models generally.	2026-01-27 21:30:53 +00:00
Ryuichi Leo Takashige	34fcafa68a	Ignore timeout	2026-01-27 21:10:59 +00:00
Ryuichi Leo Takashige	5152789e00	lengthen timeout	2026-01-27 18:25:54 +00:00
Ryuichi Leo Takashige	b734437b2d	lengthen timeout	2026-01-27 15:51:23 +00:00
Ryuichi Leo Takashige	553939fa31	Merge branch 'main' into leo/add-logprobs-to-chatcompletion	2026-01-27 15:38:20 +00:00
Ryuichi Leo Takashige	13ee17428e	Use 1200s timeout	2026-01-27 13:36:25 +00:00
Ryuichi Leo Takashige	1b0d39c0b3	skip end token	2026-01-27 13:16:52 +00:00
Ryuichi Leo Takashige	5e3cd73a9e	fix batch handler for tp	2026-01-27 12:53:40 +00:00
Ryuichi Leo Takashige	1d1256c769	remove completion	2026-01-27 12:39:09 +00:00
Ryuichi Leo Takashige	77baf9c58e	Fix minimax	2026-01-27 12:22:42 +00:00
Ryuichi Leo Takashige	022a09b6d9	Fix merge	2026-01-27 12:13:49 +00:00
Ryuichi Leo Takashige	0aa708fac4	Fix merge	2026-01-27 11:57:33 +00:00
Ryuichi Leo Takashige	eb89c2e4b9	Use tensor rdma minimax	2026-01-27 11:46:18 +00:00
Ryuichi Leo Takashige	72a5eec3f7	Merge branch 'main' into leo/add-logprobs-to-chatcompletion	2026-01-27 11:34:10 +00:00
Ryuichi Leo Takashige	a25892e8d5	bug	2026-01-23 15:05:42 +00:00
Ryuichi Leo Takashige	8798ab52ee	bug	2026-01-23 15:00:11 +00:00
Ryuichi Leo Takashige	457debc338	bug	2026-01-23 13:41:56 +00:00
Ryuichi Leo Takashige	0cfaea41bc	bug	2026-01-23 13:21:35 +00:00
Ryuichi Leo Takashige	18c82443ba	fixes	2026-01-23 13:17:37 +00:00
Ryuichi Leo Takashige	b9ec8b0a44	fix	2026-01-23 12:58:36 +00:00
Ryuichi Leo Takashige	00442b3cfd	Add more llm stuff	2026-01-23 12:55:13 +00:00
Ryuichi Leo Takashige	aa41da8541	Add more llm stuff	2026-01-23 12:47:04 +00:00
Ryuichi Leo Takashige	86e5d7b101	optimize further and get usage stats	2026-01-22 22:13:00 +00:00
Ryuichi Leo Takashige	d9ddf90575	add token usage stats	2026-01-22 21:04:56 +00:00
Ryuichi Leo Takashige	4591301767	Add a bunch of LLM generated slop	2026-01-22 20:44:40 +00:00
Ryuichi Leo Takashige	8b0b5e1b88	Add completions endpoint	2026-01-22 17:26:52 +00:00
Ryuichi Leo Takashige	bd6287727a	Add basic exo eval	2026-01-22 16:48:12 +00:00
Ryuichi Leo Takashige	eb53611210	Add option to use null top k	2026-01-22 16:44:53 +00:00
Ryuichi Leo Takashige	71bbe5f25b	Review and extract logprob stuff from alexcheema/uncertainty-visualization	2026-01-22 14:51:12 +00:00