Fix cancellation during async step

Skip final rank async send upon cancellation
Handle cancellation completion in dashboard
2026-02-04 19:22:39 -05:00 · 2026-01-26 17:37:24 +00:00 · 2026-01-26 15:55:47 +00:00 · 2026-01-26 10:23:54 +00:00 · 2026-01-26 10:23:54 +00:00 · 2026-01-26 10:23:54 +00:00
164 changed files with 3956 additions and 12512 deletions
--- a/.github/actions/typecheck/action.yml
+++ b/.github/actions/typecheck/action.yml
@@ -0,0 +1,12 @@
+name: Type Check
+
+description: "Run type checker"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Run type checker
+      run: |
+        nix --extra-experimental-features nix-command --extra-experimental-features flakes develop -c just sync
+        nix --extra-experimental-features nix-command --extra-experimental-features flakes develop -c just check
+      shell: bash
--- a/.github/workflows/pipeline.yml
+++ b/.github/workflows/pipeline.yml
@@ -26,14 +26,73 @@ jobs:
          name: exo
          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"

-      - name: Load nix develop environment
-        run: nix run github:nicknovitski/nix-develop/v1
+      - name: Configure git user
+        run: |
+          git config --local user.email "github-actions@users.noreply.github.com"
+          git config --local user.name  "github-actions bot"
+        shell: bash

-      - name: Sync dependencies
-        run: uv sync --all-packages
+      - name: Pull LFS files
+        run: |
+          echo "Pulling Git LFS files..."
+          git lfs pull
+        shell: bash

-      - name: Run type checker
-        run: uv run basedpyright --project pyproject.toml
+      - name: Setup Nix Environment
+        run: |
+          echo "Checking for nix installation..."
+          
+          # Check if nix binary exists directly
+          if [ -f /nix/var/nix/profiles/default/bin/nix ]; then
+            echo "Found nix binary at /nix/var/nix/profiles/default/bin/nix"
+            export PATH="/nix/var/nix/profiles/default/bin:$PATH"
+            echo "PATH=$PATH" >> $GITHUB_ENV
+            nix --version
+          elif [ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]; then
+            echo "Found nix profile script, sourcing..."
+            source /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
+            nix --version
+          elif command -v nix >/dev/null 2>&1; then
+            echo "Nix already in PATH"
+            nix --version
+          else
+            echo "Nix not found. Debugging info:"
+            echo "Contents of /nix/var/nix/profiles/default/:"
+            ls -la /nix/var/nix/profiles/default/ 2>/dev/null || echo "Directory not found"
+            echo "Contents of /nix/var/nix/profiles/default/bin/:"
+            ls -la /nix/var/nix/profiles/default/bin/ 2>/dev/null || echo "Directory not found"
+            exit 1
+          fi
+        shell: bash
+
+      - name: Configure basedpyright include for local MLX
+        run: |
+          RUNNER_LABELS='${{ toJSON(runner.labels) }}'
+          if echo "$RUNNER_LABELS" | grep -q "local_mlx"; then
+            if [ -d "/Users/Shared/mlx" ]; then
+              echo "Updating [tool.basedpyright].include to use /Users/Shared/mlx"
+              awk '
+                BEGIN { in=0 }
+                /^\[tool\.basedpyright\]/ { in=1; print; next }
+                in && /^\[/ { in=0 }  # next section
+                in && /^[ \t]*include[ \t]*=/ {
+                  print "include = [\"/Users/Shared/mlx\"]"
+                  next
+                }
+                { print }
+              ' pyproject.toml > pyproject.toml.tmp && mv pyproject.toml.tmp pyproject.toml
+
+              echo "New [tool.basedpyright] section:"
+              sed -n '/^\[tool\.basedpyright\]/,/^\[/p' pyproject.toml | sed '$d' || true
+            else
+              echo "local_mlx tag present but /Users/Shared/mlx not found; leaving pyproject unchanged."
+            fi
+          else
+            echo "Runner does not have 'local_mlx' tag; leaving pyproject unchanged."
+          fi
+        shell: bash
+
+      - uses: ./.github/actions/typecheck

  nix:
    name: Build and check (${{ matrix.system }})
@@ -64,63 +123,6 @@ jobs:
          name: exo
          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"

-      - name: Build Metal packages (macOS only)
-        if: runner.os == 'macOS'
-        run: |
-          # Try to build metal-toolchain first (may succeed via cachix cache hit)
-          if nix build .#metal-toolchain 2>/dev/null; then
-            echo "metal-toolchain built successfully (likely cache hit)"
-          else
-            echo "metal-toolchain build failed, extracting from Xcode..."
-
-            NAR_HASH="sha256-ayR5mXN4sZAddwKEG2OszGRF93k9ZFc7H0yi2xbylQw="
-            NAR_NAME="metal-toolchain-17C48.nar"
-
-            # Use RUNNER_TEMP to avoid /tmp symlink issues on macOS
-            WORK_DIR="${RUNNER_TEMP}/metal-work"
-            mkdir -p "$WORK_DIR"
-
-            # Download the Metal toolchain component
-            xcodebuild -downloadComponent MetalToolchain
-
-            # Find and mount the DMG
-            DMG_PATH=$(find /System/Library/AssetsV2/com_apple_MobileAsset_MetalToolchain -name '*.dmg' 2>/dev/null | head -1)
-            if [ -z "$DMG_PATH" ]; then
-              echo "Error: Could not find Metal toolchain DMG"
-              exit 1
-            fi
-
-            echo "Found DMG at: $DMG_PATH"
-            hdiutil attach "$DMG_PATH" -mountpoint "${WORK_DIR}/metal-dmg"
-
-            # Copy the toolchain
-            cp -R "${WORK_DIR}/metal-dmg/Metal.xctoolchain" "${WORK_DIR}/metal-export"
-            hdiutil detach "${WORK_DIR}/metal-dmg"
-
-            # Create NAR and add to store
-            nix nar pack "${WORK_DIR}/metal-export" > "${WORK_DIR}/${NAR_NAME}"
-            STORE_PATH=$(nix store add --mode flat "${WORK_DIR}/${NAR_NAME}")
-            echo "Added NAR to store: $STORE_PATH"
-
-            # Verify the hash matches
-            ACTUAL_HASH=$(nix hash file "${WORK_DIR}/${NAR_NAME}")
-            if [ "$ACTUAL_HASH" != "$NAR_HASH" ]; then
-              echo "Warning: NAR hash mismatch!"
-              echo "Expected: $NAR_HASH"
-              echo "Actual:   $ACTUAL_HASH"
-              echo "The metal-toolchain.nix may need updating"
-            fi
-
-            # Clean up
-            rm -rf "$WORK_DIR"
-
-            # Retry the build now that NAR is in store
-            nix build .#metal-toolchain
-          fi
-
-          # Build mlx (depends on metal-toolchain)
-          nix build .#mlx
-
      - name: Build all Nix outputs
        run: |
          nix flake show --json | jq -r '
@@ -132,16 +134,3 @@ jobs:

      - name: Run nix flake check
        run: nix flake check
-
-      - name: Run pytest (macOS only)
-        if: runner.os == 'macOS'
-        run: |
-          # Build the test environment (requires relaxed sandbox for uv2nix on macOS)
-          TEST_ENV=$(nix build '.#exo-test-env' --option sandbox relaxed --print-out-paths)
-
-          # Run pytest outside sandbox (needs GPU access for MLX)
-          export HOME="$RUNNER_TEMP"
-          export EXO_TESTS=1
-          export EXO_DASHBOARD_DIR="$PWD/dashboard/" 
-          export EXO_RESOURCES_DIR="$PWD/resources" 
-          $TEST_ENV/bin/python -m pytest src -m "not slow" --import-mode=importlib
--- a/.gitignore
+++ b/.gitignore
@@ -28,7 +28,3 @@ target/
 dashboard/build/
 dashboard/node_modules/
 dashboard/.svelte-kit/
-
-# host config snapshots
-hosts_*.json
-.swp
--- a/.mlx_typings/mlx_lm/tokenizer_utils.pyi
+++ b/.mlx_typings/mlx_lm/tokenizer_utils.pyi
@@ -108,7 +108,6 @@ class TokenizerWrapper:
    _tokenizer: PreTrainedTokenizerFast
    eos_token_id: int | None
    eos_token: str | None
-    eos_token_ids: list[int] | set[int] | None
    bos_token_id: int | None
    bos_token: str | None
    vocab_size: int
@@ -118,7 +117,7 @@ class TokenizerWrapper:
        self,
        tokenizer: Any,
        detokenizer_class: Any = ...,
-        eos_token_ids: list[int] | set[int] | None = ...,
+        eos_token_ids: list[int] | None = ...,
        chat_template: Any = ...,
        tool_parser: Any = ...,
        tool_call_start: str | None = ...,
--- a/MISSED_THINGS.md
+++ b/MISSED_THINGS.md
@@ -5,18 +5,18 @@
 [X] Fetching download status of all models on start
 [X] Deduplication of tasks in plan_step.
 [X] resolve_allow_patterns should just be wildcard now.
-[] no mx_barrier in genreate.py mlx_generate at the end.
+[X] no mx_barrier in genreate.py mlx_generate at the end.
 [] cache assertion not needed in auto_parallel.py PipelineLastLayer.
-[] GPTOSS support dropped in auto_parallel.py.
-[] sharding changed "all-to-sharded" became _all_to_sharded in auto_parallel.py.
-[] same as above with "sharded-to-all" became _sharded_to_all in auto_parallel.py.
-[] Dropped support for Ministral3Model, DeepseekV32Model, Glm4MoeModel, Qwen3NextModel, GptOssMode in auto_parallel.py.
+[X] GPTOSS support dropped in auto_parallel.py.
+[X] sharding changed "all-to-sharded" became _all_to_sharded in auto_parallel.py.
+[X] same as above with "sharded-to-all" became _sharded_to_all in auto_parallel.py.
+[X] Dropped support for Ministral3Model, DeepseekV32Model, Glm4MoeModel, Qwen3NextModel, GptOssMode in auto_parallel.py.
 [] Dropped prefill/decode code in auto_parallel.py and utils_mlx.py.
 [X] KV_CACHE_BITS should be None to disable quantized KV cache.
-[] Dropped _set_nofile_limit in utils_mlx.py.
-[] We have group optional in load_mlx_items in utils_mlx.py.
+[X] Dropped _set_nofile_limit in utils_mlx.py.
+[X] We have group optional in load_mlx_items in utils_mlx.py.
 [] Dropped add_missing_chat_templates for GptOss in load_mlx_items in utils_mlx.py.
-[] Dropped model.make_cache in make_kv_cache in utils_mlx.py.
+[X] Dropped model.make_cache in make_kv_cache in utils_mlx.py.
 [X] We put cache limit back in utils_mlx.py.
 [] topology.py remove_node removes the connections after checking if node is is in self._node_id_to_rx_id_map. on beta_1 it checks after, so would remove stale connections I guess?
 [] Missing Glm 4.7 model cards (this isn't ready yet but should be picked up, probably create an issue... the blocker is transforemrs version doesn't support the tokenizer for Glm 4.7. rc-1 does but we can't upgrade as it breaks other things.)
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
  <img alt="exo logo" src="/docs/imgs/exo-logo-transparent.png" width="50%" height="50%">
 </picture>

-exo: Run frontier AI locally. Maintained by [exo labs](https://x.com/exolabs).
+exo: Run your own AI cluster at home with everyday devices. Maintained by [exo labs](https://x.com/exolabs).

 <p align="center">
  <a href="https://discord.gg/TJ4P57arEm" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/Discord-Join%20Server-5865F2?logo=discord&logoColor=white" alt="Discord"></a>
@@ -107,10 +107,6 @@ uv run exo

 This starts the exo dashboard and API at http://localhost:52415/

-
-*Please view the section on RDMA to enable this feature on MacOS >=26.2!*
-
-
 ### Run from Source (Linux)

 **Prerequisites:**
@@ -234,7 +230,7 @@ This removes:

 RDMA is a new capability added to macOS 26.2. It works on any Mac with Thunderbolt 5 (M4 Pro Mac Mini, M4 Max Mac Studio, M4 Max MacBook Pro, M3 Ultra Mac Studio).

-Please refer to the caveats for immediate troubleshooting.
+Note that on Mac Studio, you cannot use the Thunderbolt 5 port next to the Ethernet port.

 To enable RDMA on macOS, follow these steps:

@@ -251,14 +247,6 @@ To enable RDMA on macOS, follow these steps:

 After that, RDMA will be enabled in macOS and exo will take care of the rest.

-**Important Caveats**
-
-1. Devices that wish to be part of an RDMA cluster must be connected to all other devices in the cluster.
-2. The cables must support TB5.
-3. On a Mac Studio, you cannot use the Thunderbolt 5 port next to the Ethernet port.
-4. If running from source, please use the script found at `tmp/set_rdma_network_config.sh`, which will disable Thunderbolt Bridge and set dhcp on each RDMA port.
-5. RDMA ports may be unable to discover each other on different versions of MacOS. Please ensure that OS versions match exactly (even beta version numbers) on all devices.
-
 ---

 ### Using the API
--- a/app/EXO/EXO.xcodeproj/project.pbxproj
+++ b/app/EXO/EXO.xcodeproj/project.pbxproj
@@ -342,8 +342,6 @@
 				SDKROOT = macosx;
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
-				SWIFT_TREAT_WARNINGS_AS_ERRORS = YES;
-				GCC_TREAT_WARNINGS_AS_ERRORS = YES;
 			};
 			name = Debug;
 		};
@@ -399,8 +397,6 @@
 				MTL_FAST_MATH = YES;
 				SDKROOT = macosx;
 				SWIFT_COMPILATION_MODE = wholemodule;
-				SWIFT_TREAT_WARNINGS_AS_ERRORS = YES;
-				GCC_TREAT_WARNINGS_AS_ERRORS = YES;
 			};
 			name = Release;
 		};
--- a/app/EXO/EXO/EXOApp.swift
+++ b/app/EXO/EXO/EXOApp.swift
@@ -225,7 +225,7 @@ private final class ExoUpdaterDelegate: NSObject, SPUUpdaterDelegate {
        }
    }

-    nonisolated private func showNotification(title: String, body: String) {
+    private func showNotification(title: String, body: String) {
        let center = UNUserNotificationCenter.current()
        let content = UNMutableNotificationContent()
        content.title = title
--- a/app/EXO/EXO/Models/ClusterState.swift
+++ b/app/EXO/EXO/Models/ClusterState.swift
@@ -293,7 +293,7 @@ struct ClusterTask {
    let modelName: String?
    let promptPreview: String?
    let errorMessage: String?
-    let parameters: TextGenerationTaskParameters?
+    let parameters: ChatCompletionTaskParameters?

    var sortPriority: Int {
        switch status {
@@ -330,12 +330,12 @@ struct ClusterTaskPayload: Decodable {
    let taskStatus: TaskStatus?
    let instanceId: String?
    let commandId: String?
-    let taskParams: TextGenerationTaskParameters?
+    let taskParams: ChatCompletionTaskParameters?
    let errorType: String?
    let errorMessage: String?
 }

-struct TextGenerationTaskParameters: Decodable, Equatable {
+struct ChatCompletionTaskParameters: Decodable, Equatable {
    let model: String?
    let messages: [ChatCompletionMessage]?
    let maxTokens: Int?
@@ -374,7 +374,7 @@ extension ClusterTask {
        guard let id = payload.taskId else { return nil }
        let status = payload.taskStatus ?? .unknown
        switch kindKey {
-        case "TextGeneration":
+        case "ChatCompletion":
            self.init(
                id: id,
                status: status,
--- a/app/EXO/EXO/Services/NetworkSetupHelper.swift
+++ b/app/EXO/EXO/Services/NetworkSetupHelper.swift
@@ -18,9 +18,6 @@ enum NetworkSetupHelper {

        set -euo pipefail

-        # Wait for macOS to finish network setup after boot
-        sleep 20
-
        PREFS="/Library/Preferences/SystemConfiguration/preferences.plist"

        # Remove bridge0 interface
@@ -34,35 +31,6 @@ enum NetworkSetupHelper {
        # Remove Thunderbolt Bridge from VirtualNetworkInterfaces in preferences.plist
        /usr/libexec/PlistBuddy -c "Delete :VirtualNetworkInterfaces:Bridge:bridge0" "$PREFS" 2>/dev/null || true

-        networksetup -listlocations | grep -q exo || {
-          networksetup -createlocation exo
-        }
-
-        networksetup -switchtolocation exo
-        networksetup -listallhardwareports \\
-          | awk -F': ' '/Hardware Port: / {print $2}' \\
-          | while IFS=":" read -r name; do
-              case "$name" in
-                "Ethernet Adapter"*)
-                        ;;
-                "Thunderbolt Bridge")
-                        ;;
-                "Thunderbolt "*)
-                  networksetup -listallnetworkservices \\
-                    | grep -q "EXO $name" \\
-                      || networksetup -createnetworkservice "EXO $name" "$name" 2>/dev/null \\
-                      || continue
-                  networksetup -setdhcp "EXO $name"
-                        ;;
-                *)
-                  networksetup -listallnetworkservices \\
-                    | grep -q "$name" \\
-                      || networksetup -createnetworkservice "$name" "$name" 2>/dev/null \\
-                      || continue
-                        ;;
-              esac
-            done
-
        networksetup -listnetworkservices | grep -q "Thunderbolt Bridge" && {
          networksetup -setnetworkserviceenabled "Thunderbolt Bridge" off
        } || true
@@ -83,7 +51,7 @@ enum NetworkSetupHelper {
                let alert = NSAlert()
                alert.messageText = "EXO Network Configuration"
                alert.informativeText =
-                    "EXO needs to install a system service to configure local networking. This will disable Thunderbolt Bridge (preventing packet storms) and install a Network Location.\n\nYou will be prompted for your password."
+                    "EXO needs to install a system service to automatically disable Thunderbolt Bridge on startup. This prevents network loops when connecting multiple Macs via Thunderbolt.\n\nYou will be prompted for your administrator password."
                alert.alertStyle = .informational
                alert.addButton(withTitle: "Install")
                alert.addButton(withTitle: "Not Now")
@@ -244,11 +212,11 @@ enum NetworkSetupHelper {
        rm -f "$LOG_OUT" "$LOG_ERR"

        # Switch back to Automatic network location
-        networksetup -switchtolocation Automatic >/dev/null 2>&1 || true
+        networksetup -switchtolocation Automatic 2>/dev/null || true

        # Delete the exo network location if it exists
-        networksetup -listlocations 2>/dev/null | grep -q '^exo$' && {
-          networksetup -deletelocation exo >/dev/null 2>&1 || true
+        networksetup -listlocations | grep -q '^exo$' && {
+          networksetup -deletelocation exo 2>/dev/null || true
        } || true

        # Re-enable any Thunderbolt Bridge service if it exists
@@ -258,12 +226,12 @@ enum NetworkSetupHelper {
          tb_devices=$(networksetup -listallhardwareports 2>/dev/null | awk '
            /^Hardware Port:/ { port = tolower(substr($0, 16)) }
            /^Device:/ { if (port ~ /thunderbolt/) print substr($0, 9) }
-          ') || true
+          ')
          [ -z "$tb_devices" ] && return 0

          # For each bridge device, check if it contains Thunderbolt interfaces
          for bridge in bridge0 bridge1 bridge2; do
-            members=$(ifconfig "$bridge" 2>/dev/null | awk '/member:/ {print $2}') || true
+            members=$(ifconfig "$bridge" 2>/dev/null | awk '/member:/ {print $2}')
            [ -z "$members" ] && continue

            for tb_dev in $tb_devices; do
@@ -272,7 +240,7 @@ enum NetworkSetupHelper {
                service_name=$(networksetup -listnetworkserviceorder 2>/dev/null | awk -v dev="$bridge" '
                  /^\\([0-9*]/ { gsub(/^\\([0-9*]+\\) /, ""); svc = $0 }
                  /Device:/ && $0 ~ dev { print svc; exit }
-                ') || true
+                ')
                if [ -n "$service_name" ]; then
                  networksetup -setnetworkserviceenabled "$service_name" on 2>/dev/null || true
                  return 0
@@ -280,9 +248,8 @@ enum NetworkSetupHelper {
              fi
            done
          done
-          return 0
        }
-        find_and_enable_thunderbolt_bridge || true
+        find_and_enable_thunderbolt_bridge

        echo "EXO network components removed successfully"
        """
--- a/app/EXO/EXO/Services/ThunderboltBridgeService.swift
+++ b/app/EXO/EXO/Services/ThunderboltBridgeService.swift
@@ -127,24 +127,21 @@ final class ThunderboltBridgeService: ObservableObject {

        // 2. Request specific network configuration rights
        let rightName = "system.services.systemconfiguration.network"
-        status = rightName.withCString { nameCString in
-            var item = AuthorizationItem(
-                name: nameCString,
-                valueLength: 0,
-                value: nil,
-                flags: 0
-            )
-            return withUnsafeMutablePointer(to: &item) { itemPointer in
-                var rights = AuthorizationRights(count: 1, items: itemPointer)
-                return AuthorizationCopyRights(
-                    authRef,
-                    &rights,
-                    nil,
-                    [.extendRights, .interactionAllowed],
-                    nil
-                )
-            }
-        }
+        var item = AuthorizationItem(
+            name: rightName,
+            valueLength: 0,
+            value: nil,
+            flags: 0
+        )
+        var rights = AuthorizationRights(count: 1, items: &item)
+
+        status = AuthorizationCopyRights(
+            authRef,
+            &rights,
+            nil,
+            [.extendRights, .interactionAllowed],
+            nil
+        )
        guard status == errAuthorizationSuccess else {
            if status == errAuthorizationCanceled {
                throw ThunderboltBridgeError.authorizationCanceled
--- a/app/EXO/EXO/ViewModels/InstanceViewModel.swift
+++ b/app/EXO/EXO/ViewModels/InstanceViewModel.swift
@@ -216,7 +216,7 @@ struct InstanceTaskViewModel: Identifiable, Equatable {
    let promptPreview: String?
    let errorMessage: String?
    let subtitle: String?
-    let parameters: TextGenerationTaskParameters?
+    let parameters: ChatCompletionTaskParameters?

    var title: String {
        switch kind {
--- a/app/EXO/uninstall-exo.sh
+++ b/app/EXO/uninstall-exo.sh
@@ -29,21 +29,21 @@ YELLOW='\033[1;33m'
 NC='\033[0m' # No Color

 echo_info() {
-  echo -e "${GREEN}[INFO]${NC} $1"
+    echo -e "${GREEN}[INFO]${NC} $1"
 }

 echo_warn() {
-  echo -e "${YELLOW}[WARN]${NC} $1"
+    echo -e "${YELLOW}[WARN]${NC} $1"
 }

 echo_error() {
-  echo -e "${RED}[ERROR]${NC} $1"
+    echo -e "${RED}[ERROR]${NC} $1"
 }

 # Check if running as root
 if [[ $EUID -ne 0 ]]; then
-  echo_error "This script must be run as root (use sudo)"
-  exit 1
+    echo_error "This script must be run as root (use sudo)"
+    exit 1
 fi

 echo ""
@@ -55,64 +55,64 @@ echo ""
 # Unload the LaunchDaemon if running
 echo_info "Stopping network setup daemon..."
 if launchctl list | grep -q "$LABEL"; then
-  launchctl bootout system/"$LABEL" 2>/dev/null || true
-  echo_info "Daemon stopped"
+    launchctl bootout system/"$LABEL" 2>/dev/null || true
+    echo_info "Daemon stopped"
 else
-  echo_warn "Daemon was not running"
+    echo_warn "Daemon was not running"
 fi

 # Remove LaunchDaemon plist
-if [[ -f $PLIST_DEST ]]; then
-  rm -f "$PLIST_DEST"
-  echo_info "Removed LaunchDaemon plist"
+if [[ -f "$PLIST_DEST" ]]; then
+    rm -f "$PLIST_DEST"
+    echo_info "Removed LaunchDaemon plist"
 else
-  echo_warn "LaunchDaemon plist not found (already removed?)"
+    echo_warn "LaunchDaemon plist not found (already removed?)"
 fi

 # Remove the script and parent directory
-if [[ -f $SCRIPT_DEST ]]; then
-  rm -f "$SCRIPT_DEST"
-  echo_info "Removed network setup script"
+if [[ -f "$SCRIPT_DEST" ]]; then
+    rm -f "$SCRIPT_DEST"
+    echo_info "Removed network setup script"
 else
-  echo_warn "Network setup script not found (already removed?)"
+    echo_warn "Network setup script not found (already removed?)"
 fi

 # Remove EXO directory if empty
 if [[ -d "/Library/Application Support/EXO" ]]; then
-  rmdir "/Library/Application Support/EXO" 2>/dev/null &&
-    echo_info "Removed EXO support directory" ||
-    echo_warn "EXO support directory not empty, leaving in place"
+    rmdir "/Library/Application Support/EXO" 2>/dev/null && \
+        echo_info "Removed EXO support directory" || \
+        echo_warn "EXO support directory not empty, leaving in place"
 fi

 # Remove log files
-if [[ -f $LOG_OUT ]] || [[ -f $LOG_ERR ]]; then
-  rm -f "$LOG_OUT" "$LOG_ERR"
-  echo_info "Removed log files"
+if [[ -f "$LOG_OUT" ]] || [[ -f "$LOG_ERR" ]]; then
+    rm -f "$LOG_OUT" "$LOG_ERR"
+    echo_info "Removed log files"
 else
-  echo_warn "Log files not found (already removed?)"
+    echo_warn "Log files not found (already removed?)"
 fi

 # Switch back to Automatic network location
 echo_info "Restoring network configuration..."
 if networksetup -listlocations | grep -q "^Automatic$"; then
-  networksetup -switchtolocation Automatic 2>/dev/null || true
-  echo_info "Switched to Automatic network location"
+    networksetup -switchtolocation Automatic 2>/dev/null || true
+    echo_info "Switched to Automatic network location"
 else
-  echo_warn "Automatic network location not found"
+    echo_warn "Automatic network location not found"
 fi

 # Delete the exo network location if it exists
 if networksetup -listlocations | grep -q "^exo$"; then
-  networksetup -deletelocation exo 2>/dev/null || true
-  echo_info "Deleted 'exo' network location"
+    networksetup -deletelocation exo 2>/dev/null || true
+    echo_info "Deleted 'exo' network location"
 else
-  echo_warn "'exo' network location not found (already removed?)"
+    echo_warn "'exo' network location not found (already removed?)"
 fi

 # Re-enable Thunderbolt Bridge if it exists
 if networksetup -listnetworkservices 2>/dev/null | grep -q "Thunderbolt Bridge"; then
-  networksetup -setnetworkserviceenabled "Thunderbolt Bridge" on 2>/dev/null || true
-  echo_info "Re-enabled Thunderbolt Bridge"
+    networksetup -setnetworkserviceenabled "Thunderbolt Bridge" on 2>/dev/null || true
+    echo_info "Re-enabled Thunderbolt Bridge"
 fi

 # Note about launch at login registration
@@ -124,14 +124,14 @@ echo_warn "  System Settings → General → Login Items → Remove EXO"
 # Check if EXO.app exists in common locations
 APP_FOUND=false
 for app_path in "/Applications/EXO.app" "$HOME/Applications/EXO.app"; do
-  if [[ -d $app_path ]]; then
-    if [[ $APP_FOUND == false ]]; then
-      echo ""
-      APP_FOUND=true
+    if [[ -d "$app_path" ]]; then
+        if [[ "$APP_FOUND" == false ]]; then
+            echo ""
+            APP_FOUND=true
+        fi
+        echo_warn "EXO.app found at: $app_path"
+        echo_warn "You may want to move it to Trash manually."
    fi
-    echo_warn "EXO.app found at: $app_path"
-    echo_warn "You may want to move it to Trash manually."
-  fi
 done

 echo ""
@@ -151,3 +151,4 @@ echo ""
 echo "Manual step required:"
 echo "  Remove EXO from Login Items in System Settings → General → Login Items"
 echo ""
+
--- a/bench/exo_bench.py
+++ b/bench/exo_bench.py
@@ -5,13 +5,10 @@ from __future__ import annotations
 import argparse
 import contextlib
 import http.client
-import itertools
 import json
 import os
-import sys
 import time
 from collections.abc import Callable
-from pathlib import Path
 from statistics import mean
 from typing import Any
 from urllib.parse import urlencode
@@ -19,84 +16,6 @@ from urllib.parse import urlencode
 from loguru import logger
 from transformers import AutoTokenizer

-# Monkey-patch for transformers 5.x compatibility
-# Kimi's tokenization_kimi.py imports bytes_to_unicode from the old location
-# which was moved in transformers 5.0.0rc2
-try:
-    import transformers.models.gpt2.tokenization_gpt2 as gpt2_tokenization
-    from transformers.convert_slow_tokenizer import bytes_to_unicode
-
-    if not hasattr(gpt2_tokenization, "bytes_to_unicode"):
-        gpt2_tokenization.bytes_to_unicode = bytes_to_unicode  # type: ignore[attr-defined]
-except ImportError:
-    pass  # transformers < 5.0 or bytes_to_unicode not available
-
-
-def load_tokenizer_for_bench(model_id: str) -> Any:
-    """
-    Load tokenizer for benchmarking, with special handling for Kimi models.
-
-    Kimi uses a custom TikTokenTokenizer that transformers 5.x can't load via AutoTokenizer.
-    This function replicates the logic from utils_mlx.py for bench compatibility.
-    """
-    model_id_lower = model_id.lower()
-
-    if "kimi-k2" in model_id_lower:
-        import importlib.util
-        import types
-
-        from huggingface_hub import snapshot_download
-
-        # Download/get the model path
-        model_path = Path(
-            snapshot_download(
-                model_id,
-                allow_patterns=["*.json", "*.py", "*.tiktoken"],
-            )
-        )
-
-        sys.path.insert(0, str(model_path))
-
-        # Load tool_declaration_ts first (tokenization_kimi imports it with relative import)
-        tool_decl_path = model_path / "tool_declaration_ts.py"
-        if tool_decl_path.exists():
-            spec = importlib.util.spec_from_file_location(
-                "tool_declaration_ts", tool_decl_path
-            )
-            if spec and spec.loader:
-                tool_decl_module = importlib.util.module_from_spec(spec)
-                sys.modules["tool_declaration_ts"] = tool_decl_module
-                spec.loader.exec_module(tool_decl_module)
-
-        # Load tokenization_kimi with patched source (convert relative to absolute import)
-        tok_path = model_path / "tokenization_kimi.py"
-        source = tok_path.read_text()
-        source = source.replace("from .tool_declaration_ts", "from tool_declaration_ts")
-        spec = importlib.util.spec_from_file_location("tokenization_kimi", tok_path)
-        if spec:
-            tok_module = types.ModuleType("tokenization_kimi")
-            tok_module.__file__ = str(tok_path)
-            sys.modules["tokenization_kimi"] = tok_module
-            exec(compile(source, tok_path, "exec"), tok_module.__dict__)  # noqa: S102
-            TikTokenTokenizer = tok_module.TikTokenTokenizer  # noqa: N806
-        else:
-            from tokenization_kimi import TikTokenTokenizer  # type: ignore[import-not-found]  # noqa: I001
-
-        hf_tokenizer: Any = TikTokenTokenizer.from_pretrained(model_path)
-
-        # Patch encode to use internal tiktoken model directly
-        # transformers 5.x has a bug in the encode->pad path for slow tokenizers
-        def _patched_encode(text: str, **kwargs: object) -> list[int]:
-            # Pass allowed_special="all" to handle special tokens like <|im_user|>
-            return list(hf_tokenizer.model.encode(text, allowed_special="all"))
-
-        hf_tokenizer.encode = _patched_encode
-
-        return hf_tokenizer
-
-    # Default: use AutoTokenizer
-    return AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-

 class ExoHttpError(RuntimeError):
    def __init__(self, status: int, reason: str, body_preview: str):
@@ -105,7 +24,7 @@ class ExoHttpError(RuntimeError):


 class ExoClient:
-    def __init__(self, host: str, port: int, timeout_s: float = 7200.0):
+    def __init__(self, host: str, port: int, timeout_s: float = 600.0):
        self.host = host
        self.port = port
        self.timeout_s = timeout_s
@@ -261,7 +180,14 @@ def parse_int_list(values: list[str]) -> list[int]:
            part = part.strip()
            if part:
                items.append(int(part))
-    return items
+
+    seen: set[int] = set()
+    out: list[int] = []
+    for x in items:
+        if x not in seen:
+            out.append(x)
+            seen.add(x)
+    return out


 def resolve_model_short_id(client: ExoClient, model_arg: str) -> tuple[str, str]:
@@ -314,11 +240,7 @@ def run_one_completion(

    stats = out.get("generation_stats")

-    # Extract preview, handling None content (common for thinking models)
-    choices = out.get("choices") or [{}]
-    message = choices[0].get("message", {}) if choices else {}
-    content = message.get("content") or ""
-    preview = content[:200] if content else ""
+    preview = (out.get("choices") or [{}])[0]["message"]["content"][:200]

    return {
        "elapsed_s": elapsed,
@@ -355,29 +277,12 @@ class PromptSizer:
                f"Target ({target}) is smaller than template overhead ({self.base_tokens})."
            )

-        # Estimate tokens per atom using a sample
-        sample_count = 100
-        sample_content = self.atom * sample_count
-        sample_tokens = self.count_fn(sample_content) - self.base_tokens
-        tokens_per_atom = sample_tokens / sample_count
-
-        # Estimate starting point
-        needed_tokens = target - self.base_tokens
-        estimated_atoms = int(needed_tokens / tokens_per_atom)
-
-        # Binary search to find exact atom count
-        low, high = 0, estimated_atoms * 2 + 100
-        while low < high:
-            mid = (low + high) // 2
-            tok = self.count_fn(self.atom * mid)
-            if tok < target:
-                low = mid + 1
-            else:
-                high = mid
-
-        content = self.atom * low
+        content = ""
        tok = self.count_fn(content)
-        logger.info(f"{tok=}")
+
+        while tok < target:
+            content += self.atom
+            tok = self.count_fn(content)

        if tok != target:
            raise RuntimeError(
@@ -443,7 +348,7 @@ def main() -> int:
        help="Warmup runs per placement (uses first pp/tg).",
    )
    ap.add_argument(
-        "--timeout", type=float, default=7200.0, help="HTTP timeout (seconds)."
+        "--timeout", type=float, default=600.0, help="HTTP timeout (seconds)."
    )
    ap.add_argument(
        "--json-out",
@@ -453,11 +358,6 @@ def main() -> int:
    ap.add_argument(
        "--dry-run", action="store_true", help="List selected placements and exit."
    )
-    ap.add_argument(
-        "--all-combinations",
-        action="store_true",
-        help="Force all pp×tg combinations (cartesian product) even when lists have equal length.",
-    )
    args = ap.parse_args()

    pp_list = parse_int_list(args.pp)
@@ -469,15 +369,6 @@ def main() -> int:
        logger.error("--repeat must be >= 1")
        return 2

-    # Log pairing mode
-    use_combinations = args.all_combinations or len(pp_list) != len(tg_list)
-    if use_combinations:
-        logger.info(
-            f"pp/tg mode: combinations (product) - {len(pp_list) * len(tg_list)} pairs"
-        )
-    else:
-        logger.info(f"pp/tg mode: tandem (zip) - {len(pp_list)} pairs")
-
    client = ExoClient(args.host, args.port, timeout_s=args.timeout)
    short_id, full_model_id = resolve_model_short_id(client, args.model)

@@ -486,7 +377,10 @@ def main() -> int:
    )
    previews = previews_resp.get("previews") or []

-    tokenizer = load_tokenizer_for_bench(full_model_id)
+    tokenizer = AutoTokenizer.from_pretrained(
+        full_model_id,
+        trust_remote_code=True,
+    )
    if tokenizer is None:
        raise RuntimeError("[exo-bench] tokenizer load failed")

@@ -592,55 +486,60 @@ def main() -> int:
                )
                logger.debug(f"  warmup {i + 1}/{args.warmup} done")

-            # If pp and tg lists have same length, run in tandem (zip)
-            # Otherwise (or if --all-combinations), run all combinations (cartesian product)
-            if use_combinations:
-                pp_tg_pairs = list(itertools.product(pp_list, tg_list))
-            else:
-                pp_tg_pairs = list(zip(pp_list, tg_list, strict=True))
-
-            for pp, tg in pp_tg_pairs:
-                runs: list[dict[str, Any]] = []
-                for r in range(args.repeat):
-                    time.sleep(3)
-                    try:
-                        row, actual_pp_tokens = run_one_completion(
-                            client, full_model_id, pp, tg, prompt_sizer
+            for pp in pp_list:
+                # if (
+                #     pp * n_nodes > 2048
+                #     and "ring" in instance_meta.lower()
+                #     and "tensor" in sharding.lower()
+                # ):
+                #     model_card = MODEL_CARDS[short_id]
+                #     if model_card.metadata.storage_size > Memory.from_gb(10):
+                #         logger.info(
+                #             f"Skipping tensor ring as this is too slow for model of size {model_card.metadata.storage_size} on {n_nodes=}"
+                #         )
+                #         continue
+                for tg in tg_list:
+                    runs: list[dict[str, Any]] = []
+                    for r in range(args.repeat):
+                        time.sleep(3)
+                        try:
+                            row, actual_pp_tokens = run_one_completion(
+                                client, full_model_id, pp, tg, prompt_sizer
+                            )
+                        except Exception as e:
+                            logger.error(e)
+                            continue
+                        row.update(
+                            {
+                                "model_short_id": short_id,
+                                "model_id": full_model_id,
+                                "placement_sharding": sharding,
+                                "placement_instance_meta": instance_meta,
+                                "placement_nodes": n_nodes,
+                                "instance_id": instance_id,
+                                "pp_tokens": actual_pp_tokens,
+                                "tg": tg,
+                                "repeat_index": r,
+                            }
                        )
-                    except Exception as e:
-                        logger.error(e)
-                        continue
-                    row.update(
-                        {
-                            "model_short_id": short_id,
-                            "model_id": full_model_id,
-                            "placement_sharding": sharding,
-                            "placement_instance_meta": instance_meta,
-                            "placement_nodes": n_nodes,
-                            "instance_id": instance_id,
-                            "pp_tokens": actual_pp_tokens,
-                            "tg": tg,
-                            "repeat_index": r,
-                        }
-                    )
-                    runs.append(row)
-                    all_rows.append(row)
+                        runs.append(row)
+                        all_rows.append(row)

-                if runs:
-                    prompt_tps = mean(x["stats"]["prompt_tps"] for x in runs)
-                    gen_tps = mean(x["stats"]["generation_tps"] for x in runs)
-                    ptok = mean(x["stats"]["prompt_tokens"] for x in runs)
-                    gtok = mean(x["stats"]["generation_tokens"] for x in runs)
-                    peak = mean(
-                        x["stats"]["peak_memory_usage"]["inBytes"] for x in runs
-                    )
+                    if runs:
+                        prompt_tps = mean(x["stats"]["prompt_tps"] for x in runs)
+                        gen_tps = mean(x["stats"]["generation_tps"] for x in runs)
+                        ptok = mean(x["stats"]["prompt_tokens"] for x in runs)
+                        gtok = mean(x["stats"]["generation_tokens"] for x in runs)
+                        peak = mean(
+                            x["stats"]["peak_memory_usage"]["inBytes"] for x in runs
+                        )

-                    logger.info(
-                        f"prompt_tps={prompt_tps:.2f} gen_tps={gen_tps:.2f}    "
-                        f"prompt_tokens={ptok} gen_tokens={gtok}    "
-                        f"peak_memory={format_peak_memory(peak)}\n"
-                    )
-                time.sleep(2)
+                        logger.info(
+                            f"prompt_tps={prompt_tps:.2f} gen_tps={gen_tps:.2f}    "
+                            f"prompt_tokens={ptok} gen_tokens={gtok}    "
+                            f"peak_memory={format_peak_memory(peak)}\n"
+                        )
+                    time.sleep(2)
        finally:
            try:
                client.request_json("DELETE", f"/instance/{instance_id}")
--- a/dashboard/package-lock.json
+++ b/dashboard/package-lock.json
@@ -865,6 +865,7 @@
 			"integrity": "sha512-oH8tXw7EZnie8FdOWYrF7Yn4IKrqTFHhXvl8YxXxbKwTMcD/5NNCryUSEXRk2ZR4ojnub0P8rNrsVGHXWqIDtA==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"@standard-schema/spec": "^1.0.0",
 				"@sveltejs/acorn-typescript": "^1.0.5",
@@ -904,6 +905,7 @@
 			"integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"@sveltejs/vite-plugin-svelte-inspector": "^4.0.1",
 				"debug": "^4.4.1",
@@ -1520,6 +1522,7 @@
 			"integrity": "sha512-LCCV0HdSZZZb34qifBsyWlUmok6W7ouER+oQIGBScS8EsZsQbrtFTUrDX4hOl+CS6p7cnNC4td+qrSVGSCTUfQ==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"undici-types": "~6.21.0"
 			}
@@ -1529,6 +1532,7 @@
 			"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
 			"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
 			"license": "MIT",
+			"peer": true,
 			"bin": {
 				"acorn": "bin/acorn"
 			},
@@ -1941,6 +1945,7 @@
 			"integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
 			"dev": true,
 			"license": "ISC",
+			"peer": true,
 			"engines": {
 				"node": ">=12"
 			}
@@ -2648,6 +2653,7 @@
 			"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"engines": {
 				"node": ">=12"
 			},
@@ -2690,6 +2696,7 @@
 			"integrity": "sha512-UOnG6LftzbdaHZcKoPFtOcCKztrQ57WkHDeRD9t/PTQtmT0NHSeWWepj6pS0z/N7+08BHFDQVUrfmfMRcZwbMg==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"bin": {
 				"prettier": "bin/prettier.cjs"
 			},
@@ -2862,6 +2869,7 @@
 			"resolved": "https://registry.npmjs.org/svelte/-/svelte-5.45.3.tgz",
 			"integrity": "sha512-ngKXNhNvwPzF43QqEhDOue7TQTrG09em1sd4HBxVF0Wr2gopAmdEWan+rgbdgK4fhBtSOTJO8bYU4chUG7VXZQ==",
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"@jridgewell/remapping": "^2.3.4",
 				"@jridgewell/sourcemap-codec": "^1.5.0",
@@ -3006,6 +3014,7 @@
 			"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
 			"dev": true,
 			"license": "Apache-2.0",
+			"peer": true,
 			"bin": {
 				"tsc": "bin/tsc",
 				"tsserver": "bin/tsserver"
@@ -3027,6 +3036,7 @@
 			"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"esbuild": "^0.25.0",
 				"fdir": "^6.4.4",
--- a/dashboard/parts.nix
+++ b/dashboard/parts.nix
@@ -3,28 +3,12 @@
  perSystem =
    { pkgs, lib, ... }:
    let
-      # Filter source to ONLY include package.json and package-lock.json
-      # This ensures prettier-svelte only rebuilds when lockfiles change
-      dashboardLockfileSrc = lib.cleanSourceWith {
-        src = inputs.self;
-        filter =
-          path: type:
-          let
-            baseName = builtins.baseNameOf path;
-            isDashboardDir = baseName == "dashboard" && type == "directory";
-            isPackageFile =
-              (lib.hasInfix "/dashboard/" path || lib.hasSuffix "/dashboard" (builtins.dirOf path))
-              && (baseName == "package.json" || baseName == "package-lock.json");
-          in
-          isDashboardDir || isPackageFile;
-      };
-
      # Stub source with lockfiles and minimal files for build to succeed
      # This allows prettier-svelte to avoid rebuilding when dashboard source changes
      dashboardStubSrc = pkgs.runCommand "dashboard-stub-src" { } ''
        mkdir -p $out
-        cp ${dashboardLockfileSrc}/dashboard/package.json $out/
-        cp ${dashboardLockfileSrc}/dashboard/package-lock.json $out/
+        cp ${inputs.self}/dashboard/package.json $out/
+        cp ${inputs.self}/dashboard/package-lock.json $out/
        # Minimal files so vite build succeeds (produces empty output)
        echo '<!DOCTYPE html><html><head></head><body></body></html>' > $out/index.html
        mkdir -p $out/src
--- a/dashboard/src/lib/components/ChatForm.svelte
+++ b/dashboard/src/lib/components/ChatForm.svelte
@@ -12,6 +12,7 @@
    ttftMs,
    tps,
    totalTokens,
+    cancelRequest,
  } from "$lib/stores/app.svelte";
  import ChatAttachments from "./ChatAttachments.svelte";
  import ImageParamsPanel from "./ImageParamsPanel.svelte";
@@ -605,37 +606,15 @@
        style="min-height: 28px; max-height: 150px;"
      ></textarea>

-      <button
-        type="submit"
-        disabled={!canSend || loading || isEditOnlyWithoutImage}
-        class="px-2.5 sm:px-4 py-1.5 sm:py-2 rounded text-xs sm:text-xs tracking-[0.1em] sm:tracking-[0.15em] uppercase font-medium transition-all duration-200 whitespace-nowrap
-					{!canSend || loading || isEditOnlyWithoutImage
-          ? 'bg-exo-medium-gray/50 text-exo-light-gray cursor-not-allowed'
-          : 'bg-exo-yellow text-exo-black hover:bg-exo-yellow-darker hover:shadow-[0_0_20px_rgba(255,215,0,0.3)]'}"
-        aria-label={shouldShowEditMode
-          ? "Edit image"
-          : isImageModel()
-            ? "Generate image"
-            : "Send message"}
-      >
-        {#if loading}
+      {#if loading}
+        <button
+          type="button"
+          onclick={() => cancelRequest()}
+          class="px-2.5 sm:px-4 py-1.5 sm:py-2 rounded text-xs sm:text-xs tracking-[0.1em] sm:tracking-[0.15em] uppercase font-medium transition-all duration-200 whitespace-nowrap bg-exo-medium-gray/50 text-exo-light-gray border border-exo-medium-gray/50 hover:border-red-500/50 hover:text-red-400 cursor-pointer"
+        >
          <span class="inline-flex items-center gap-1 sm:gap-2">
-            <span
-              class="w-2.5 h-2.5 sm:w-3 sm:h-3 border-2 border-current border-t-transparent rounded-full animate-spin"
-            ></span>
-            <span class="hidden sm:inline"
-              >{shouldShowEditMode
-                ? "EDITING"
-                : isImageModel()
-                  ? "GENERATING"
-                  : "PROCESSING"}</span
-            >
-            <span class="sm:hidden">...</span>
-          </span>
-        {:else if shouldShowEditMode}
-          <span class="inline-flex items-center gap-1.5">
            <svg
-              class="w-3.5 h-3.5"
+              class="w-3 h-3"
              fill="none"
              viewBox="0 0 24 24"
              stroke="currentColor"
@@ -644,47 +623,81 @@
              <path
                stroke-linecap="round"
                stroke-linejoin="round"
-                d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z"
+                d="M6 18L18 6M6 6l12 12"
              />
            </svg>
-            <span>EDIT</span>
+            <span class="hidden sm:inline">CANCEL</span>
+            <span class="sm:hidden">X</span>
          </span>
-        {:else if isEditOnlyWithoutImage}
-          <span class="inline-flex items-center gap-1.5">
-            <svg
-              class="w-3.5 h-3.5"
-              fill="none"
-              viewBox="0 0 24 24"
-              stroke="currentColor"
-              stroke-width="2"
-            >
-              <path
-                stroke-linecap="round"
-                stroke-linejoin="round"
-                d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z"
-              />
-            </svg>
-            <span>EDIT</span>
-          </span>
-        {:else if isImageModel()}
-          <span class="inline-flex items-center gap-1.5">
-            <svg
-              class="w-3.5 h-3.5"
-              fill="none"
-              viewBox="0 0 24 24"
-              stroke="currentColor"
-              stroke-width="2"
-            >
-              <rect x="3" y="3" width="18" height="18" rx="2" ry="2" />
-              <circle cx="8.5" cy="8.5" r="1.5" />
-              <polyline points="21 15 16 10 5 21" />
-            </svg>
-            <span>GENERATE</span>
-          </span>
-        {:else}
-          SEND
-        {/if}
-      </button>
+        </button>
+      {:else}
+        <button
+          type="submit"
+          disabled={!canSend || isEditOnlyWithoutImage}
+          class="px-2.5 sm:px-4 py-1.5 sm:py-2 rounded text-xs sm:text-xs tracking-[0.1em] sm:tracking-[0.15em] uppercase font-medium transition-all duration-200 whitespace-nowrap
+            {!canSend || isEditOnlyWithoutImage
+            ? 'bg-exo-medium-gray/50 text-exo-light-gray cursor-not-allowed'
+            : 'bg-exo-yellow text-exo-black hover:bg-exo-yellow-darker hover:shadow-[0_0_20px_rgba(255,215,0,0.3)]'}"
+          aria-label={shouldShowEditMode
+            ? "Edit image"
+            : isImageModel()
+              ? "Generate image"
+              : "Send message"}
+        >
+          {#if shouldShowEditMode}
+            <span class="inline-flex items-center gap-1.5">
+              <svg
+                class="w-3.5 h-3.5"
+                fill="none"
+                viewBox="0 0 24 24"
+                stroke="currentColor"
+                stroke-width="2"
+              >
+                <path
+                  stroke-linecap="round"
+                  stroke-linejoin="round"
+                  d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z"
+                />
+              </svg>
+              <span>EDIT</span>
+            </span>
+          {:else if isEditOnlyWithoutImage}
+            <span class="inline-flex items-center gap-1.5">
+              <svg
+                class="w-3.5 h-3.5"
+                fill="none"
+                viewBox="0 0 24 24"
+                stroke="currentColor"
+                stroke-width="2"
+              >
+                <path
+                  stroke-linecap="round"
+                  stroke-linejoin="round"
+                  d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z"
+                />
+              </svg>
+              <span>EDIT</span>
+            </span>
+          {:else if isImageModel()}
+            <span class="inline-flex items-center gap-1.5">
+              <svg
+                class="w-3.5 h-3.5"
+                fill="none"
+                viewBox="0 0 24 24"
+                stroke="currentColor"
+                stroke-width="2"
+              >
+                <rect x="3" y="3" width="18" height="18" rx="2" ry="2" />
+                <circle cx="8.5" cy="8.5" r="1.5" />
+                <polyline points="21 15 16 10 5 21" />
+              </svg>
+              <span>GENERATE</span>
+            </span>
+          {:else}
+            SEND
+          {/if}
+        </button>
+      {/if}
    </div>

    <!-- Bottom accent line -->
--- a/dashboard/src/lib/components/FamilyLogos.svelte
+++ b/dashboard/src/lib/components/FamilyLogos.svelte
@@ -1,73 +0,0 @@
-<script lang="ts">
-  type FamilyLogoProps = {
-    family: string;
-    class?: string;
-  };
-
-  let { family, class: className = "" }: FamilyLogoProps = $props();
-</script>
-
-{#if family === "favorites"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M12 2l3.09 6.26L22 9.27l-5 4.87 1.18 6.88L12 17.77l-6.18 3.25L7 14.14 2 9.27l6.91-1.01L12 2z"
-    />
-  </svg>
-{:else if family === "llama" || family === "meta"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M6.915 4.03c-1.968 0-3.683 1.28-4.871 3.113C.704 9.208 0 11.883 0 14.449c0 .706.07 1.369.21 1.973a6.624 6.624 0 0 0 .265.86 5.297 5.297 0 0 0 .371.761c.696 1.159 1.818 1.927 3.593 1.927 1.497 0 2.633-.671 3.965-2.444.76-1.012 1.144-1.626 2.663-4.32l.756-1.339.186-.325c.061.1.121.196.183.3l2.152 3.595c.724 1.21 1.665 2.556 2.47 3.314 1.046.987 1.992 1.22 3.06 1.22 1.075 0 1.876-.355 2.455-.843a3.743 3.743 0 0 0 .81-.973c.542-.939.861-2.127.861-3.745 0-2.72-.681-5.357-2.084-7.45-1.282-1.912-2.957-2.93-4.716-2.93-1.047 0-2.088.467-3.053 1.308-.652.57-1.257 1.29-1.82 2.05-.69-.875-1.335-1.547-1.958-2.056-1.182-.966-2.315-1.303-3.454-1.303zm10.16 2.053c1.147 0 2.188.758 2.992 1.999 1.132 1.748 1.647 4.195 1.647 6.4 0 1.548-.368 2.9-1.839 2.9-.58 0-1.027-.23-1.664-1.004-.496-.601-1.343-1.878-2.832-4.358l-.617-1.028a44.908 44.908 0 0 0-1.255-1.98c.07-.109.141-.224.211-.327 1.12-1.667 2.118-2.602 3.358-2.602zm-10.201.553c1.265 0 2.058.791 2.675 1.446.307.327.737.871 1.234 1.579l-1.02 1.566c-.757 1.163-1.882 3.017-2.837 4.338-1.191 1.649-1.81 1.817-2.486 1.817-.524 0-1.038-.237-1.383-.794-.263-.426-.464-1.13-.464-2.046 0-2.221.63-4.535 1.66-6.088.454-.687.964-1.226 1.533-1.533a2.264 2.264 0 0 1 1.088-.285z"
-    />
-  </svg>
-{:else if family === "qwen"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M12.604 1.34c.393.69.784 1.382 1.174 2.075a.18.18 0 00.157.091h5.552c.174 0 .322.11.446.327l1.454 2.57c.19.337.24.478.024.837-.26.43-.513.864-.76 1.3l-.367.658c-.106.196-.223.28-.04.512l2.652 4.637c.172.301.111.494-.043.77-.437.785-.882 1.564-1.335 2.34-.159.272-.352.375-.68.37-.777-.016-1.552-.01-2.327.016a.099.099 0 00-.081.05 575.097 575.097 0 01-2.705 4.74c-.169.293-.38.363-.725.364-.997.003-2.002.004-3.017.002a.537.537 0 01-.465-.271l-1.335-2.323a.09.09 0 00-.083-.049H4.982c-.285.03-.553-.001-.805-.092l-1.603-2.77a.543.543 0 01-.002-.54l1.207-2.12a.198.198 0 000-.197 550.951 550.951 0 01-1.875-3.272l-.79-1.395c-.16-.31-.173-.496.095-.965.465-.813.927-1.625 1.387-2.436.132-.234.304-.334.584-.335a338.3 338.3 0 012.589-.001.124.124 0 00.107-.063l2.806-4.895a.488.488 0 01.422-.246c.524-.001 1.053 0 1.583-.006L11.704 1c.341-.003.724.032.9.34zm-3.432.403a.06.06 0 00-.052.03L6.254 6.788a.157.157 0 01-.135.078H3.253c-.056 0-.07.025-.041.074l5.81 10.156c.025.042.013.062-.034.063l-2.795.015a.218.218 0 00-.2.116l-1.32 2.31c-.044.078-.021.118.068.118l5.716.008c.046 0 .08.02.104.061l1.403 2.454c.046.081.092.082.139 0l5.006-8.76.783-1.382a.055.055 0 01.096 0l1.424 2.53a.122.122 0 00.107.062l2.763-.02a.04.04 0 00.035-.02.041.041 0 000-.04l-2.9-5.086a.108.108 0 010-.113l.293-.507 1.12-1.977c.024-.041.012-.062-.035-.062H9.2c-.059 0-.073-.026-.043-.077l1.434-2.505a.107.107 0 000-.114L9.225 1.774a.06.06 0 00-.053-.031zm6.29 8.02c.046 0 .058.02.034.06l-.832 1.465-2.613 4.585a.056.056 0 01-.05.029.058.058 0 01-.05-.029L8.498 9.841c-.02-.034-.01-.052.028-.054l.216-.012 6.722-.012z"
-    />
-  </svg>
-{:else if family === "deepseek"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M23.748 4.482c-.254-.124-.364.113-.512.234-.051.039-.094.09-.137.136-.372.397-.806.657-1.373.626-.829-.046-1.537.214-2.163.848-.133-.782-.575-1.248-1.247-1.548-.352-.156-.708-.311-.955-.65-.172-.241-.219-.51-.305-.774-.055-.16-.11-.323-.293-.35-.2-.031-.278.136-.356.276-.313.572-.434 1.202-.422 1.84.027 1.436.633 2.58 1.838 3.393.137.093.172.187.129.323-.082.28-.18.552-.266.833-.055.179-.137.217-.329.14a5.526 5.526 0 01-1.736-1.18c-.857-.828-1.631-1.742-2.597-2.458a11.365 11.365 0 00-.689-.471c-.985-.957.13-1.743.388-1.836.27-.098.093-.432-.779-.428-.872.004-1.67.295-2.687.684a3.055 3.055 0 01-.465.137 9.597 9.597 0 00-2.883-.102c-1.885.21-3.39 1.102-4.497 2.623C.082 8.606-.231 10.684.152 12.85c.403 2.284 1.569 4.175 3.36 5.653 1.858 1.533 3.997 2.284 6.438 2.14 1.482-.085 3.133-.284 4.994-1.86.47.234.962.327 1.78.397.63.059 1.236-.03 1.705-.128.735-.156.684-.837.419-.961-2.155-1.004-1.682-.595-2.113-.926 1.096-1.296 2.746-2.642 3.392-7.003.05-.347.007-.565 0-.845-.004-.17.035-.237.23-.256a4.173 4.173 0 001.545-.475c1.396-.763 1.96-2.015 2.093-3.517.02-.23-.004-.467-.247-.588zM11.581 18c-2.089-1.642-3.102-2.183-3.52-2.16-.392.024-.321.471-.235.763.09.288.207.486.371.739.114.167.192.416-.113.603-.673.416-1.842-.14-1.897-.167-1.361-.802-2.5-1.86-3.301-3.307-.774-1.393-1.224-2.887-1.298-4.482-.02-.386.093-.522.477-.592a4.696 4.696 0 011.529-.039c2.132.312 3.946 1.265 5.468 2.774.868.86 1.525 1.887 2.202 2.891.72 1.066 1.494 2.082 2.48 2.914.348.292.625.514.891.677-.802.09-2.14.11-3.054-.614zm1-6.44a.306.306 0 01.415-.287.302.302 0 01.2.288.306.306 0 01-.31.307.303.303 0 01-.304-.308zm3.11 1.596c-.2.081-.399.151-.59.16a1.245 1.245 0 01-.798-.254c-.274-.23-.47-.358-.552-.758a1.73 1.73 0 01.016-.588c.07-.327-.008-.537-.239-.727-.187-.156-.426-.199-.688-.199a.559.559 0 01-.254-.078c-.11-.054-.2-.19-.114-.358.028-.054.16-.186.192-.21.356-.202.767-.136 1.146.016.352.144.618.408 1.001.782.391.451.462.576.685.914.176.265.336.537.445.848.067.195-.019.354-.25.452z"
-    />
-  </svg>
-{:else if family === "openai" || family === "gpt-oss"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M22.2819 9.8211a5.9847 5.9847 0 0 0-.5157-4.9108 6.0462 6.0462 0 0 0-6.5098-2.9A6.0651 6.0651 0 0 0 4.9807 4.1818a5.9847 5.9847 0 0 0-3.9977 2.9 6.0462 6.0462 0 0 0 .7427 7.0966 5.98 5.98 0 0 0 .511 4.9107 6.051 6.051 0 0 0 6.5146 2.9001A5.9847 5.9847 0 0 0 13.2599 24a6.0557 6.0557 0 0 0 5.7718-4.2058 5.9894 5.9894 0 0 0 3.9977-2.9001 6.0557 6.0557 0 0 0-.7475-7.0729zm-9.022 12.6081a4.4755 4.4755 0 0 1-2.8764-1.0408l.1419-.0804 4.7783-2.7582a.7948.7948 0 0 0 .3927-.6813v-6.7369l2.02 1.1686a.071.071 0 0 1 .038.052v5.5826a4.504 4.504 0 0 1-4.4945 4.4944zm-9.6607-4.1254a4.4708 4.4708 0 0 1-.5346-3.0137l.142.0852 4.783 2.7582a.7712.7712 0 0 0 .7806 0l5.8428-3.3685v2.3324a.0804.0804 0 0 1-.0332.0615L9.74 19.9502a4.4992 4.4992 0 0 1-6.1408-1.6464zM2.3408 7.8956a4.485 4.485 0 0 1 2.3655-1.9728V11.6a.7664.7664 0 0 0 .3879.6765l5.8144 3.3543-2.0201 1.1685a.0757.0757 0 0 1-.071 0l-4.8303-2.7865A4.504 4.504 0 0 1 2.3408 7.872zm16.5963 3.8558L13.1038 8.364 15.1192 7.2a.0757.0757 0 0 1 .071 0l4.8303 2.7913a4.4944 4.4944 0 0 1-.6765 8.1042v-5.6772a.79.79 0 0 0-.407-.667zm2.0107-3.0231l-.142-.0852-4.7735-2.7818a.7759.7759 0 0 0-.7854 0L9.409 9.2297V6.8974a.0662.0662 0 0 1 .0284-.0615l4.8303-2.7866a4.4992 4.4992 0 0 1 6.6802 4.66zM8.3065 12.863l-2.02-1.1638a.0804.0804 0 0 1-.038-.0567V6.0742a4.4992 4.4992 0 0 1 7.3757-3.4537l-.142.0805L8.704 5.459a.7948.7948 0 0 0-.3927.6813zm1.0976-2.3654l2.602-1.4998 2.6069 1.4998v2.9994l-2.5974 1.4997-2.6067-1.4997Z"
-    />
-  </svg>
-{:else if family === "glm"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M11.991 23.503a.24.24 0 00-.244.248.24.24 0 00.244.249.24.24 0 00.245-.249.24.24 0 00-.22-.247l-.025-.001zM9.671 5.365a1.697 1.697 0 011.099 2.132l-.071.172-.016.04-.018.054c-.07.16-.104.32-.104.498-.035.71.47 1.279 1.186 1.314h.366c1.309.053 2.338 1.173 2.286 2.523-.052 1.332-1.152 2.38-2.478 2.327h-.174c-.715.018-1.274.64-1.239 1.368 0 .124.018.23.053.337.209.373.54.658.96.8.75.23 1.517-.125 1.9-.782l.018-.035c.402-.64 1.17-.96 1.92-.711.854.284 1.378 1.226 1.099 2.167a1.661 1.661 0 01-2.077 1.102 1.711 1.711 0 01-.907-.711l-.017-.035c-.2-.323-.463-.58-.851-.711l-.056-.018a1.646 1.646 0 00-1.954.746 1.66 1.66 0 01-1.065.764 1.677 1.677 0 01-1.989-1.279c-.209-.906.332-1.83 1.257-2.043a1.51 1.51 0 01.296-.035h.018c.68-.071 1.151-.622 1.116-1.333a1.307 1.307 0 00-.227-.693 2.515 2.515 0 01-.366-1.403 2.39 2.39 0 01.366-1.208c.14-.195.21-.444.227-.693.018-.71-.506-1.261-1.186-1.332l-.07-.018a1.43 1.43 0 01-.299-.07l-.05-.019a1.7 1.7 0 01-1.047-2.114 1.68 1.68 0 012.094-1.101zm-5.575 10.11c.26-.264.639-.367.994-.27.355.096.633.379.728.74.095.362-.007.748-.267 1.013-.402.41-1.053.41-1.455 0a1.062 1.062 0 010-1.482zm14.845-.294c.359-.09.738.024.992.297.254.274.344.665.237 1.025-.107.36-.396.634-.756.718-.551.128-1.1-.22-1.23-.781a1.05 1.05 0 01.757-1.26zm-.064-4.39c.314.32.49.753.49 1.206 0 .452-.176.886-.49 1.206-.315.32-.74.5-1.185.5-.444 0-.87-.18-1.184-.5a1.727 1.727 0 010-2.412 1.654 1.654 0 012.369 0zm-11.243.163c.364.484.447 1.128.218 1.691a1.665 1.665 0 01-2.188.923c-.855-.36-1.26-1.358-.907-2.228a1.68 1.68 0 011.33-1.038c.593-.08 1.183.169 1.547.652zm11.545-4.221c.368 0 .708.2.892.524.184.324.184.724 0 1.048a1.026 1.026 0 01-.892.524c-.568 0-1.03-.47-1.03-1.048 0-.579.462-1.048 1.03-1.048zm-14.358 0c.368 0 .707.2.891.524.184.324.184.724 0 1.048a1.026 1.026 0 01-.891.524c-.569 0-1.03-.47-1.03-1.048 0-.579.461-1.048 1.03-1.048zm10.031-1.475c.925 0 1.675.764 1.675 1.706s-.75 1.705-1.675 1.705-1.674-.763-1.674-1.705c0-.942.75-1.706 1.674-1.706zm-2.626-.684c.362-.082.653-.356.761-.718a1.062 1.062 0 00-.238-1.028 1.017 1.017 0 00-.996-.294c-.547.14-.881.7-.752 1.257.13.558.675.907 1.225.783zm0 16.876c.359-.087.644-.36.75-.72a1.062 1.062 0 00-.237-1.019 1.018 1.018 0 00-.985-.301 1.037 1.037 0 00-.762.717c-.108.361-.017.754.239 1.028.245.263.606.377.953.305l.043-.01zM17.19 3.5a.631.631 0 00.628-.64c0-.355-.279-.64-.628-.64a.631.631 0 00-.628.64c0 .355.28.64.628.64zm-10.38 0a.631.631 0 00.628-.64c0-.355-.28-.64-.628-.64a.631.631 0 00-.628.64c0 .355.279.64.628.64zm-5.182 7.852a.631.631 0 00-.628.64c0 .354.28.639.628.639a.63.63 0 00.627-.606l.001-.034a.62.62 0 00-.628-.64zm5.182 9.13a.631.631 0 00-.628.64c0 .355.279.64.628.64a.631.631 0 00.628-.64c0-.355-.28-.64-.628-.64zm10.38.018a.631.631 0 00-.628.64c0 .355.28.64.628.64a.631.631 0 00.628-.64c0-.355-.279-.64-.628-.64zm5.182-9.148a.631.631 0 00-.628.64c0 .354.279.639.628.639a.631.631 0 00.628-.64c0-.355-.28-.64-.628-.64zm-.384-4.992a.24.24 0 00.244-.249.24.24 0 00-.244-.249.24.24 0 00-.244.249c0 .142.122.249.244.249zM11.991.497a.24.24 0 00.245-.248A.24.24 0 0011.99 0a.24.24 0 00-.244.249c0 .133.108.236.223.247l.021.001zM2.011 6.36a.24.24 0 00.245-.249.24.24 0 00-.244-.249.24.24 0 00-.244.249.24.24 0 00.244.249zm0 11.263a.24.24 0 00-.243.248.24.24 0 00.244.249.24.24 0 00.244-.249.252.252 0 00-.244-.248zm19.995-.018a.24.24 0 00-.245.248.24.24 0 00.245.25.24.24 0 00.244-.25.252.252 0 00-.244-.248z"
-    />
-  </svg>
-{:else if family === "minimax"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M16.278 2c1.156 0 2.093.927 2.093 2.07v12.501a.74.74 0 00.744.709.74.74 0 00.743-.709V9.099a2.06 2.06 0 012.071-2.049A2.06 2.06 0 0124 9.1v6.561a.649.649 0 01-.652.645.649.649 0 01-.653-.645V9.1a.762.762 0 00-.766-.758.762.762 0 00-.766.758v7.472a2.037 2.037 0 01-2.048 2.026 2.037 2.037 0 01-2.048-2.026v-12.5a.785.785 0 00-.788-.753.785.785 0 00-.789.752l-.001 15.904A2.037 2.037 0 0113.441 22a2.037 2.037 0 01-2.048-2.026V18.04c0-.356.292-.645.652-.645.36 0 .652.289.652.645v1.934c0 .263.142.506.372.638.23.131.514.131.744 0a.734.734 0 00.372-.638V4.07c0-1.143.937-2.07 2.093-2.07zm-5.674 0c1.156 0 2.093.927 2.093 2.07v11.523a.648.648 0 01-.652.645.648.648 0 01-.652-.645V4.07a.785.785 0 00-.789-.78.785.785 0 00-.789.78v14.013a2.06 2.06 0 01-2.07 2.048 2.06 2.06 0 01-2.071-2.048V9.1a.762.762 0 00-.766-.758.762.762 0 00-.766.758v3.8a2.06 2.06 0 01-2.071 2.049A2.06 2.06 0 010 12.9v-1.378c0-.357.292-.646.652-.646.36 0 .653.29.653.646V12.9c0 .418.343.757.766.757s.766-.339.766-.757V9.099a2.06 2.06 0 012.07-2.048 2.06 2.06 0 012.071 2.048v8.984c0 .419.343.758.767.758.423 0 .766-.339.766-.758V4.07c0-1.143.937-2.07 2.093-2.07z"
-    />
-  </svg>
-{:else if family === "kimi"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M19.738 5.776c.163-.209.306-.4.457-.585.07-.087.064-.153-.004-.244-.655-.861-.717-1.817-.34-2.787.283-.73.909-1.072 1.674-1.145.477-.045.945.004 1.379.236.57.305.902.77 1.01 1.412.086.512.07 1.012-.075 1.508-.257.878-.888 1.333-1.753 1.448-.718.096-1.446.108-2.17.157-.056.004-.113 0-.178 0z"
-    />
-    <path
-      d="M17.962 1.844h-4.326l-3.425 7.81H5.369V1.878H1.5V22h3.87v-8.477h6.824a3.025 3.025 0 002.743-1.75V22h3.87v-8.477a3.87 3.87 0 00-3.588-3.86v-.01h-2.125a3.94 3.94 0 002.323-2.12l2.545-5.689z"
-    />
-  </svg>
-{:else if family === "huggingface"}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M12.025 1.13c-5.77 0-10.449 4.647-10.449 10.378 0 1.112.178 2.181.503 3.185.064-.222.203-.444.416-.577a.96.96 0 0 1 .524-.15c.293 0 .584.124.84.284.278.173.48.408.71.694.226.282.458.611.684.951v-.014c.017-.324.106-.622.264-.874s.403-.487.762-.543c.3-.047.596.06.787.203s.31.313.4.467c.15.257.212.468.233.542.01.026.653 1.552 1.657 2.54.616.605 1.01 1.223 1.082 1.912.055.537-.096 1.059-.38 1.572.637.121 1.294.187 1.967.187.657 0 1.298-.063 1.921-.178-.287-.517-.44-1.041-.384-1.581.07-.69.465-1.307 1.081-1.913 1.004-.987 1.647-2.513 1.657-2.539.021-.074.083-.285.233-.542.09-.154.208-.323.4-.467a1.08 1.08 0 0 1 .787-.203c.359.056.604.29.762.543s.247.55.265.874v.015c.225-.34.457-.67.683-.952.23-.286.432-.52.71-.694.257-.16.547-.284.84-.285a.97.97 0 0 1 .524.151c.228.143.373.388.43.625l.006.04a10.3 10.3 0 0 0 .534-3.273c0-5.731-4.678-10.378-10.449-10.378M8.327 6.583a1.5 1.5 0 0 1 .713.174 1.487 1.487 0 0 1 .617 2.013c-.183.343-.762-.214-1.102-.094-.38.134-.532.914-.917.71a1.487 1.487 0 0 1 .69-2.803m7.486 0a1.487 1.487 0 0 1 .689 2.803c-.385.204-.536-.576-.916-.71-.34-.12-.92.437-1.103.094a1.487 1.487 0 0 1 .617-2.013 1.5 1.5 0 0 1 .713-.174m-10.68 1.55a.96.96 0 1 1 0 1.921.96.96 0 0 1 0-1.92m13.838 0a.96.96 0 1 1 0 1.92.96.96 0 0 1 0-1.92M8.489 11.458c.588.01 1.965 1.157 3.572 1.164 1.607-.007 2.984-1.155 3.572-1.164.196-.003.305.12.305.454 0 .886-.424 2.328-1.563 3.202-.22-.756-1.396-1.366-1.63-1.32q-.011.001-.02.006l-.044.026-.01.008-.03.024q-.018.017-.035.036l-.032.04a1 1 0 0 0-.058.09l-.014.025q-.049.088-.11.19a1 1 0 0 1-.083.116 1.2 1.2 0 0 1-.173.18q-.035.029-.075.058a1.3 1.3 0 0 1-.251-.243 1 1 0 0 1-.076-.107c-.124-.193-.177-.363-.337-.444-.034-.016-.104-.008-.2.022q-.094.03-.216.087-.06.028-.125.063l-.13.074q-.067.04-.136.086a3 3 0 0 0-.135.096 3 3 0 0 0-.26.219 2 2 0 0 0-.12.121 2 2 0 0 0-.106.128l-.002.002a2 2 0 0 0-.09.132l-.001.001a1.2 1.2 0 0 0-.105.212q-.013.036-.024.073c-1.139-.875-1.563-2.317-1.563-3.203 0-.334.109-.457.305-.454m.836 10.354c.824-1.19.766-2.082-.365-3.194-1.13-1.112-1.789-2.738-1.789-2.738s-.246-.945-.806-.858-.97 1.499.202 2.362c1.173.864-.233 1.45-.685.64-.45-.812-1.683-2.896-2.322-3.295s-1.089-.175-.938.647 2.822 2.813 2.562 3.244-1.176-.506-1.176-.506-2.866-2.567-3.49-1.898.473 1.23 2.037 2.16c1.564.932 1.686 1.178 1.464 1.53s-3.675-2.511-4-1.297c-.323 1.214 3.524 1.567 3.287 2.405-.238.839-2.71-1.587-3.216-.642-.506.946 3.49 2.056 3.522 2.064 1.29.33 4.568 1.028 5.713-.624m5.349 0c-.824-1.19-.766-2.082.365-3.194 1.13-1.112 1.789-2.738 1.789-2.738s.246-.945.806-.858.97 1.499-.202 2.362c-1.173.864.233 1.45.685.64.451-.812 1.683-2.896 2.322-3.295s1.089-.175.938.647-2.822 2.813-2.562 3.244 1.176-.506 1.176-.506 2.866-2.567 3.49-1.898-.473 1.23-2.037 2.16c-1.564.932-1.686 1.178-1.464 1.53s3.675-2.511 4-1.297c.323 1.214-3.524 1.567-3.287 2.405.238.839 2.71-1.587 3.216-.642.506.946-3.49 2.056-3.522 2.064-1.29.33-4.568 1.028-5.713-.624"
-    />
-  </svg>
-{:else}
-  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
-    <path
-      d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-2 15l-5-5 1.41-1.41L10 14.17l7.59-7.59L19 8l-9 9z"
-    />
-  </svg>
-{/if}
--- a/dashboard/src/lib/components/FamilySidebar.svelte
+++ b/dashboard/src/lib/components/FamilySidebar.svelte
@@ -1,142 +0,0 @@
-<script lang="ts">
-  import FamilyLogos from "./FamilyLogos.svelte";
-
-  type FamilySidebarProps = {
-    families: string[];
-    selectedFamily: string | null;
-    hasFavorites: boolean;
-    onSelect: (family: string | null) => void;
-  };
-
-  let { families, selectedFamily, hasFavorites, onSelect }: FamilySidebarProps =
-    $props();
-
-  // Family display names
-  const familyNames: Record<string, string> = {
-    favorites: "Favorites",
-    huggingface: "Hub",
-    llama: "Meta",
-    qwen: "Qwen",
-    deepseek: "DeepSeek",
-    "gpt-oss": "OpenAI",
-    glm: "GLM",
-    minimax: "MiniMax",
-    kimi: "Kimi",
-  };
-
-  function getFamilyName(family: string): string {
-    return (
-      familyNames[family] || family.charAt(0).toUpperCase() + family.slice(1)
-    );
-  }
-</script>
-
-<div
-  class="flex flex-col gap-1 py-2 px-1 border-r border-exo-yellow/10 bg-exo-medium-gray/30 min-w-[64px]"
->
-  <!-- All models (no filter) -->
-  <button
-    type="button"
-    onclick={() => onSelect(null)}
-    class="group flex flex-col items-center justify-center p-2 rounded transition-all duration-200 cursor-pointer {selectedFamily ===
-    null
-      ? 'bg-exo-yellow/20 border-l-2 border-exo-yellow'
-      : 'hover:bg-white/5 border-l-2 border-transparent'}"
-    title="All models"
-  >
-    <svg
-      class="w-5 h-5 {selectedFamily === null
-        ? 'text-exo-yellow'
-        : 'text-white/50 group-hover:text-white/70'}"
-      viewBox="0 0 24 24"
-      fill="currentColor"
-    >
-      <path
-        d="M4 8h4V4H4v4zm6 12h4v-4h-4v4zm-6 0h4v-4H4v4zm0-6h4v-4H4v4zm6 0h4v-4h-4v4zm6-10v4h4V4h-4zm-6 4h4V4h-4v4zm6 6h4v-4h-4v4zm0 6h4v-4h-4v4z"
-      />
-    </svg>
-    <span
-      class="text-[9px] font-mono mt-0.5 {selectedFamily === null
-        ? 'text-exo-yellow'
-        : 'text-white/40 group-hover:text-white/60'}">All</span
-    >
-  </button>
-
-  <!-- Favorites (only show if has favorites) -->
-  {#if hasFavorites}
-    <button
-      type="button"
-      onclick={() => onSelect("favorites")}
-      class="group flex flex-col items-center justify-center p-2 rounded transition-all duration-200 cursor-pointer {selectedFamily ===
-      'favorites'
-        ? 'bg-exo-yellow/20 border-l-2 border-exo-yellow'
-        : 'hover:bg-white/5 border-l-2 border-transparent'}"
-      title="Show favorited models"
-    >
-      <FamilyLogos
-        family="favorites"
-        class={selectedFamily === "favorites"
-          ? "text-amber-400"
-          : "text-white/50 group-hover:text-amber-400/70"}
-      />
-      <span
-        class="text-[9px] font-mono mt-0.5 {selectedFamily === 'favorites'
-          ? 'text-amber-400'
-          : 'text-white/40 group-hover:text-white/60'}">Faves</span
-      >
-    </button>
-  {/if}
-
-  <!-- HuggingFace Hub -->
-  <button
-    type="button"
-    onclick={() => onSelect("huggingface")}
-    class="group flex flex-col items-center justify-center p-2 rounded transition-all duration-200 cursor-pointer {selectedFamily ===
-    'huggingface'
-      ? 'bg-orange-500/20 border-l-2 border-orange-400'
-      : 'hover:bg-white/5 border-l-2 border-transparent'}"
-    title="Browse and add models from Hugging Face"
-  >
-    <FamilyLogos
-      family="huggingface"
-      class={selectedFamily === "huggingface"
-        ? "text-orange-400"
-        : "text-white/50 group-hover:text-orange-400/70"}
-    />
-    <span
-      class="text-[9px] font-mono mt-0.5 {selectedFamily === 'huggingface'
-        ? 'text-orange-400'
-        : 'text-white/40 group-hover:text-white/60'}">Hub</span
-    >
-  </button>
-
-  <div class="h-px bg-exo-yellow/10 my-1"></div>
-
-  <!-- Model families -->
-  {#each families as family}
-    <button
-      type="button"
-      onclick={() => onSelect(family)}
-      class="group flex flex-col items-center justify-center p-2 rounded transition-all duration-200 cursor-pointer {selectedFamily ===
-      family
-        ? 'bg-exo-yellow/20 border-l-2 border-exo-yellow'
-        : 'hover:bg-white/5 border-l-2 border-transparent'}"
-      title={getFamilyName(family)}
-    >
-      <FamilyLogos
-        {family}
-        class={selectedFamily === family
-          ? "text-exo-yellow"
-          : "text-white/50 group-hover:text-white/70"}
-      />
-      <span
-        class="text-[9px] font-mono mt-0.5 truncate max-w-full {selectedFamily ===
-        family
-          ? 'text-exo-yellow'
-          : 'text-white/40 group-hover:text-white/60'}"
-      >
-        {getFamilyName(family)}
-      </span>
-    </button>
-  {/each}
-</div>
--- a/dashboard/src/lib/components/HuggingFaceResultItem.svelte
+++ b/dashboard/src/lib/components/HuggingFaceResultItem.svelte
@@ -1,127 +0,0 @@
-<script lang="ts">
-  interface HuggingFaceModel {
-    id: string;
-    author: string;
-    downloads: number;
-    likes: number;
-    last_modified: string;
-    tags: string[];
-  }
-
-  type HuggingFaceResultItemProps = {
-    model: HuggingFaceModel;
-    isAdded: boolean;
-    isAdding: boolean;
-    onAdd: () => void;
-    onSelect: () => void;
-  };
-
-  let {
-    model,
-    isAdded,
-    isAdding,
-    onAdd,
-    onSelect,
-  }: HuggingFaceResultItemProps = $props();
-
-  function formatNumber(num: number): string {
-    if (num >= 1000000) {
-      return `${(num / 1000000).toFixed(1)}M`;
-    } else if (num >= 1000) {
-      return `${(num / 1000).toFixed(1)}k`;
-    }
-    return num.toString();
-  }
-
-  // Extract model name from full ID (e.g., "mlx-community/Llama-3.2-1B" -> "Llama-3.2-1B")
-  const modelName = $derived(model.id.split("/").pop() || model.id);
-</script>
-
-<div
-  class="flex items-center justify-between gap-3 px-3 py-2.5 hover:bg-white/5 transition-colors border-b border-white/5 last:border-b-0"
->
-  <div class="flex-1 min-w-0">
-    <div class="flex items-center gap-2">
-      <span class="text-sm font-mono text-white truncate" title={model.id}
-        >{modelName}</span
-      >
-      {#if isAdded}
-        <span
-          class="px-1.5 py-0.5 text-[10px] font-mono bg-green-500/20 text-green-400 rounded"
-          >Added</span
-        >
-      {/if}
-    </div>
-    <div class="flex items-center gap-3 mt-0.5 text-xs text-white/40">
-      <span class="truncate">{model.author}</span>
-      <span
-        class="flex items-center gap-1 shrink-0"
-        title="Downloads in the last 30 days"
-      >
-        <svg
-          class="w-3 h-3"
-          fill="none"
-          stroke="currentColor"
-          viewBox="0 0 24 24"
-        >
-          <path
-            stroke-linecap="round"
-            stroke-linejoin="round"
-            stroke-width="2"
-            d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-4l-4 4m0 0l-4-4m4 4V4"
-          />
-        </svg>
-        {formatNumber(model.downloads)}
-      </span>
-      <span
-        class="flex items-center gap-1 shrink-0"
-        title="Community likes on Hugging Face"
-      >
-        <svg
-          class="w-3 h-3"
-          fill="none"
-          stroke="currentColor"
-          viewBox="0 0 24 24"
-        >
-          <path
-            stroke-linecap="round"
-            stroke-linejoin="round"
-            stroke-width="2"
-            d="M4.318 6.318a4.5 4.5 0 000 6.364L12 20.364l7.682-7.682a4.5 4.5 0 00-6.364-6.364L12 7.636l-1.318-1.318a4.5 4.5 0 00-6.364 0z"
-          />
-        </svg>
-        {formatNumber(model.likes)}
-      </span>
-    </div>
-  </div>
-
-  <div class="flex items-center gap-2 shrink-0">
-    {#if isAdded}
-      <button
-        type="button"
-        onclick={onSelect}
-        class="px-3 py-1.5 text-xs font-mono tracking-wider uppercase bg-exo-yellow/10 text-exo-yellow border border-exo-yellow/30 hover:bg-exo-yellow/20 transition-colors rounded cursor-pointer"
-      >
-        Select
-      </button>
-    {:else}
-      <button
-        type="button"
-        onclick={onAdd}
-        disabled={isAdding}
-        class="px-3 py-1.5 text-xs font-mono tracking-wider uppercase bg-orange-500/10 text-orange-400 border border-orange-400/30 hover:bg-orange-500/20 transition-colors rounded cursor-pointer disabled:opacity-50 disabled:cursor-not-allowed"
-      >
-        {#if isAdding}
-          <span class="flex items-center gap-1.5">
-            <span
-              class="w-3 h-3 border-2 border-orange-400 border-t-transparent rounded-full animate-spin"
-            ></span>
-            Adding...
-          </span>
-        {:else}
-          + Add
-        {/if}
-      </button>
-    {/if}
-  </div>
-</div>
--- a/dashboard/src/lib/components/ModelFilterPopover.svelte
+++ b/dashboard/src/lib/components/ModelFilterPopover.svelte
@@ -1,182 +0,0 @@
-<script lang="ts">
-  import { fly } from "svelte/transition";
-  import { cubicOut } from "svelte/easing";
-
-  interface FilterState {
-    capabilities: string[];
-    sizeRange: { min: number; max: number } | null;
-  }
-
-  type ModelFilterPopoverProps = {
-    filters: FilterState;
-    onChange: (filters: FilterState) => void;
-    onClear: () => void;
-    onClose: () => void;
-  };
-
-  let { filters, onChange, onClear, onClose }: ModelFilterPopoverProps =
-    $props();
-
-  // Available capabilities
-  const availableCapabilities = [
-    { id: "text", label: "Text" },
-    { id: "thinking", label: "Thinking" },
-    { id: "code", label: "Code" },
-    { id: "vision", label: "Vision" },
-  ];
-
-  // Size ranges
-  const sizeRanges = [
-    { label: "< 10GB", min: 0, max: 10 },
-    { label: "10-50GB", min: 10, max: 50 },
-    { label: "50-200GB", min: 50, max: 200 },
-    { label: "> 200GB", min: 200, max: 10000 },
-  ];
-
-  function toggleCapability(cap: string) {
-    const next = filters.capabilities.includes(cap)
-      ? filters.capabilities.filter((c) => c !== cap)
-      : [...filters.capabilities, cap];
-    onChange({ ...filters, capabilities: next });
-  }
-
-  function selectSizeRange(range: { min: number; max: number } | null) {
-    // Toggle off if same range is clicked
-    if (
-      filters.sizeRange &&
-      range &&
-      filters.sizeRange.min === range.min &&
-      filters.sizeRange.max === range.max
-    ) {
-      onChange({ ...filters, sizeRange: null });
-    } else {
-      onChange({ ...filters, sizeRange: range });
-    }
-  }
-
-  function handleClickOutside(e: MouseEvent) {
-    const target = e.target as HTMLElement;
-    if (
-      !target.closest(".filter-popover") &&
-      !target.closest(".filter-toggle")
-    ) {
-      onClose();
-    }
-  }
-</script>
-
-<svelte:window onclick={handleClickOutside} />
-
-<!-- svelte-ignore a11y_no_static_element_interactions -->
-<div
-  class="filter-popover absolute right-0 top-full mt-2 w-64 bg-exo-dark-gray border border-exo-yellow/10 rounded-lg shadow-xl z-10"
-  transition:fly={{ y: -10, duration: 200, easing: cubicOut }}
-  onclick={(e) => e.stopPropagation()}
-  role="dialog"
-  aria-label="Filter options"
->
-  <div class="p-3 space-y-4">
-    <!-- Capabilities -->
-    <div>
-      <h4 class="text-xs font-mono text-white/50 mb-2">Capabilities</h4>
-      <div class="flex flex-wrap gap-1.5">
-        {#each availableCapabilities as cap}
-          {@const isSelected = filters.capabilities.includes(cap.id)}
-          <button
-            type="button"
-            class="px-2 py-1 text-xs font-mono rounded transition-colors {isSelected
-              ? 'bg-exo-yellow/20 text-exo-yellow border border-exo-yellow/30'
-              : 'bg-white/5 text-white/60 hover:bg-white/10 border border-transparent'}"
-            onclick={() => toggleCapability(cap.id)}
-          >
-            {#if cap.id === "text"}
-              <svg
-                class="w-3.5 h-3.5 inline-block"
-                viewBox="0 0 24 24"
-                fill="none"
-                stroke="currentColor"
-                stroke-width="1.5"
-                ><path
-                  d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                /></svg
-              >
-            {:else if cap.id === "thinking"}
-              <svg
-                class="w-3.5 h-3.5 inline-block"
-                viewBox="0 0 24 24"
-                fill="none"
-                stroke="currentColor"
-                stroke-width="1.5"
-                ><path
-                  d="M12 2a7 7 0 0 0-7 7c0 2.38 1.19 4.47 3 5.74V17a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1v-2.26c1.81-1.27 3-3.36 3-5.74a7 7 0 0 0-7-7zM9 20h6M10 22h4"
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                /></svg
-              >
-            {:else if cap.id === "code"}
-              <svg
-                class="w-3.5 h-3.5 inline-block"
-                viewBox="0 0 24 24"
-                fill="none"
-                stroke="currentColor"
-                stroke-width="1.5"
-                ><path
-                  d="M16 18l6-6-6-6M8 6l-6 6 6 6"
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                /></svg
-              >
-            {:else if cap.id === "vision"}
-              <svg
-                class="w-3.5 h-3.5 inline-block"
-                viewBox="0 0 24 24"
-                fill="none"
-                stroke="currentColor"
-                stroke-width="1.5"
-                ><path
-                  d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z"
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                /><circle cx="12" cy="12" r="3" /></svg
-              >
-            {/if}
-            <span class="ml-1">{cap.label}</span>
-          </button>
-        {/each}
-      </div>
-    </div>
-
-    <!-- Size range -->
-    <div>
-      <h4 class="text-xs font-mono text-white/50 mb-2">Model Size</h4>
-      <div class="flex flex-wrap gap-1.5">
-        {#each sizeRanges as range}
-          {@const isSelected =
-            filters.sizeRange &&
-            filters.sizeRange.min === range.min &&
-            filters.sizeRange.max === range.max}
-          <button
-            type="button"
-            class="px-2 py-1 text-xs font-mono rounded transition-colors {isSelected
-              ? 'bg-exo-yellow/20 text-exo-yellow border border-exo-yellow/30'
-              : 'bg-white/5 text-white/60 hover:bg-white/10 border border-transparent'}"
-            onclick={() => selectSizeRange(range)}
-          >
-            {range.label}
-          </button>
-        {/each}
-      </div>
-    </div>
-
-    <!-- Clear button -->
-    <button
-      type="button"
-      class="w-full py-1.5 text-xs font-mono text-white/50 hover:text-white/70 hover:bg-white/5 rounded transition-colors"
-      onclick={onClear}
-    >
-      Clear all filters
-    </button>
-  </div>
-</div>
--- a/dashboard/src/lib/components/ModelPickerGroup.svelte
+++ b/dashboard/src/lib/components/ModelPickerGroup.svelte
@@ -1,324 +0,0 @@
-<script lang="ts">
-  interface ModelInfo {
-    id: string;
-    name?: string;
-    storage_size_megabytes?: number;
-    base_model?: string;
-    quantization?: string;
-    supports_tensor?: boolean;
-    capabilities?: string[];
-    family?: string;
-    is_custom?: boolean;
-  }
-
-  interface ModelGroup {
-    id: string;
-    name: string;
-    capabilities: string[];
-    family: string;
-    variants: ModelInfo[];
-    smallestVariant: ModelInfo;
-    hasMultipleVariants: boolean;
-  }
-
-  type ModelPickerGroupProps = {
-    group: ModelGroup;
-    isExpanded: boolean;
-    isFavorite: boolean;
-    selectedModelId: string | null;
-    canModelFit: (id: string) => boolean;
-    onToggleExpand: () => void;
-    onSelectModel: (modelId: string) => void;
-    onToggleFavorite: (baseModelId: string) => void;
-    onShowInfo: (group: ModelGroup) => void;
-  };
-
-  let {
-    group,
-    isExpanded,
-    isFavorite,
-    selectedModelId,
-    canModelFit,
-    onToggleExpand,
-    onSelectModel,
-    onToggleFavorite,
-    onShowInfo,
-  }: ModelPickerGroupProps = $props();
-
-  // Format storage size
-  function formatSize(mb: number | undefined): string {
-    if (!mb) return "";
-    if (mb >= 1024) {
-      return `${(mb / 1024).toFixed(0)}GB`;
-    }
-    return `${mb}MB`;
-  }
-
-  // Check if any variant can fit
-  const anyVariantFits = $derived(
-    group.variants.some((v) => canModelFit(v.id)),
-  );
-
-  // Check if this group's model is currently selected (for single-variant groups)
-  const isMainSelected = $derived(
-    !group.hasMultipleVariants &&
-      group.variants.some((v) => v.id === selectedModelId),
-  );
-</script>
-
-<div
-  class="border-b border-white/5 last:border-b-0 {!anyVariantFits
-    ? 'opacity-50'
-    : ''}"
->
-  <!-- Main row -->
-  <div
-    class="flex items-center gap-2 px-3 py-2.5 transition-colors {anyVariantFits
-      ? 'hover:bg-white/5 cursor-pointer'
-      : 'cursor-not-allowed'} {isMainSelected
-      ? 'bg-exo-yellow/10 border-l-2 border-exo-yellow'
-      : 'border-l-2 border-transparent'}"
-    onclick={() => {
-      if (group.hasMultipleVariants) {
-        onToggleExpand();
-      } else {
-        const modelId = group.variants[0]?.id;
-        if (modelId && canModelFit(modelId)) {
-          onSelectModel(modelId);
-        }
-      }
-    }}
-    role="button"
-    tabindex="0"
-    onkeydown={(e) => {
-      if (e.key === "Enter" || e.key === " ") {
-        e.preventDefault();
-        if (group.hasMultipleVariants) {
-          onToggleExpand();
-        } else {
-          const modelId = group.variants[0]?.id;
-          if (modelId && canModelFit(modelId)) {
-            onSelectModel(modelId);
-          }
-        }
-      }
-    }}
-  >
-    <!-- Expand/collapse chevron (for groups with variants) -->
-    {#if group.hasMultipleVariants}
-      <svg
-        class="w-4 h-4 text-white/40 transition-transform duration-200 flex-shrink-0 {isExpanded
-          ? 'rotate-90'
-          : ''}"
-        viewBox="0 0 24 24"
-        fill="currentColor"
-      >
-        <path d="M8.59 16.59L13.17 12 8.59 7.41 10 6l6 6-6 6-1.41-1.41z" />
-      </svg>
-    {:else}
-      <div class="w-4 flex-shrink-0"></div>
-    {/if}
-
-    <!-- Model name -->
-    <div class="flex-1 min-w-0">
-      <div class="flex items-center gap-2">
-        <span class="font-mono text-sm text-white truncate">
-          {group.name}
-        </span>
-        <!-- Capability icons -->
-        {#each group.capabilities.filter((c) => c !== "text") as cap}
-          {#if cap === "thinking"}
-            <svg
-              class="w-3.5 h-3.5 text-white/40 flex-shrink-0"
-              viewBox="0 0 24 24"
-              fill="none"
-              stroke="currentColor"
-              stroke-width="1.5"
-              title="Supports Thinking"
-            >
-              <path
-                d="M12 2a7 7 0 0 0-7 7c0 2.38 1.19 4.47 3 5.74V17a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1v-2.26c1.81-1.27 3-3.36 3-5.74a7 7 0 0 0-7-7zM9 20h6M10 22h4"
-                stroke-linecap="round"
-                stroke-linejoin="round"
-              />
-            </svg>
-          {:else if cap === "code"}
-            <svg
-              class="w-3.5 h-3.5 text-white/40 flex-shrink-0"
-              viewBox="0 0 24 24"
-              fill="none"
-              stroke="currentColor"
-              stroke-width="1.5"
-              title="Supports code generation"
-            >
-              <path
-                d="M16 18l6-6-6-6M8 6l-6 6 6 6"
-                stroke-linecap="round"
-                stroke-linejoin="round"
-              />
-            </svg>
-          {:else if cap === "vision"}
-            <svg
-              class="w-3.5 h-3.5 text-white/40 flex-shrink-0"
-              viewBox="0 0 24 24"
-              fill="none"
-              stroke="currentColor"
-              stroke-width="1.5"
-              title="Supports image input"
-            >
-              <path
-                d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z"
-                stroke-linecap="round"
-                stroke-linejoin="round"
-              />
-              <circle cx="12" cy="12" r="3" />
-            </svg>
-          {:else if cap === "image_gen"}
-            <svg
-              class="w-3.5 h-3.5 text-white/40 flex-shrink-0"
-              viewBox="0 0 24 24"
-              fill="none"
-              stroke="currentColor"
-              stroke-width="1.5"
-              title="Supports image generation"
-            >
-              <rect x="3" y="3" width="18" height="18" rx="2" ry="2" />
-              <circle cx="8.5" cy="8.5" r="1.5" />
-              <path d="M21 15l-5-5L5 21" />
-            </svg>
-          {/if}
-        {/each}
-      </div>
-    </div>
-
-    <!-- Size indicator (smallest variant) -->
-    {#if !group.hasMultipleVariants && group.smallestVariant?.storage_size_megabytes}
-      <span class="text-xs font-mono text-white/30 flex-shrink-0">
-        {formatSize(group.smallestVariant.storage_size_megabytes)}
-      </span>
-    {/if}
-
-    <!-- Variant count -->
-    {#if group.hasMultipleVariants}
-      <span class="text-xs font-mono text-white/30 flex-shrink-0">
-        {group.variants.length} variants
-      </span>
-    {/if}
-
-    <!-- Check mark if selected (single-variant) -->
-    {#if isMainSelected}
-      <svg
-        class="w-4 h-4 text-exo-yellow flex-shrink-0"
-        viewBox="0 0 24 24"
-        fill="currentColor"
-      >
-        <path d="M9 16.17L4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41L9 16.17z" />
-      </svg>
-    {/if}
-
-    <!-- Favorite star -->
-    <button
-      type="button"
-      class="p-1 rounded hover:bg-white/10 transition-colors flex-shrink-0"
-      onclick={(e) => {
-        e.stopPropagation();
-        onToggleFavorite(group.id);
-      }}
-      title={isFavorite ? "Remove from favorites" : "Add to favorites"}
-    >
-      {#if isFavorite}
-        <svg
-          class="w-4 h-4 text-amber-400"
-          viewBox="0 0 24 24"
-          fill="currentColor"
-        >
-          <path
-            d="M12 2l3.09 6.26L22 9.27l-5 4.87 1.18 6.88L12 17.77l-6.18 3.25L7 14.14 2 9.27l6.91-1.01L12 2z"
-          />
-        </svg>
-      {:else}
-        <svg
-          class="w-4 h-4 text-white/30 hover:text-white/50"
-          viewBox="0 0 24 24"
-          fill="none"
-          stroke="currentColor"
-          stroke-width="2"
-        >
-          <path
-            d="M12 2l3.09 6.26L22 9.27l-5 4.87 1.18 6.88L12 17.77l-6.18 3.25L7 14.14 2 9.27l6.91-1.01L12 2z"
-          />
-        </svg>
-      {/if}
-    </button>
-
-    <!-- Info button -->
-    <button
-      type="button"
-      class="p-1 rounded hover:bg-white/10 transition-colors flex-shrink-0"
-      onclick={(e) => {
-        e.stopPropagation();
-        onShowInfo(group);
-      }}
-      title="View model details"
-    >
-      <svg
-        class="w-4 h-4 text-white/30 hover:text-white/50"
-        viewBox="0 0 24 24"
-        fill="currentColor"
-      >
-        <path
-          d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm1 15h-2v-6h2v6zm0-8h-2V7h2v2z"
-        />
-      </svg>
-    </button>
-  </div>
-
-  <!-- Expanded variants -->
-  {#if isExpanded && group.hasMultipleVariants}
-    <div class="bg-black/20 border-t border-white/5">
-      {#each group.variants as variant}
-        {@const modelCanFit = canModelFit(variant.id)}
-        {@const isSelected = selectedModelId === variant.id}
-        <button
-          type="button"
-          class="w-full flex items-center gap-3 px-3 py-2 pl-10 hover:bg-white/5 transition-colors text-left {!modelCanFit
-            ? 'opacity-50 cursor-not-allowed'
-            : 'cursor-pointer'} {isSelected
-            ? 'bg-exo-yellow/10 border-l-2 border-exo-yellow'
-            : 'border-l-2 border-transparent'}"
-          disabled={!modelCanFit}
-          onclick={() => {
-            if (modelCanFit) {
-              onSelectModel(variant.id);
-            }
-          }}
-        >
-          <!-- Quantization badge -->
-          <span
-            class="text-xs font-mono px-1.5 py-0.5 rounded bg-white/10 text-white/70 flex-shrink-0"
-          >
-            {variant.quantization || "default"}
-          </span>
-
-          <!-- Size -->
-          <span class="text-xs font-mono text-white/40 flex-1">
-            {formatSize(variant.storage_size_megabytes)}
-          </span>
-
-          <!-- Check mark if selected -->
-          {#if isSelected}
-            <svg
-              class="w-4 h-4 text-exo-yellow"
-              viewBox="0 0 24 24"
-              fill="currentColor"
-            >
-              <path
-                d="M9 16.17L4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41L9 16.17z"
-              />
-            </svg>
-          {/if}
-        </button>
-      {/each}
-    </div>
-  {/if}
-</div>
--- a/dashboard/src/lib/components/ModelPickerModal.svelte
+++ b/dashboard/src/lib/components/ModelPickerModal.svelte
@@ -1,748 +0,0 @@
-<script lang="ts">
-  import { fade, fly } from "svelte/transition";
-  import { cubicOut } from "svelte/easing";
-  import FamilySidebar from "./FamilySidebar.svelte";
-  import ModelPickerGroup from "./ModelPickerGroup.svelte";
-  import ModelFilterPopover from "./ModelFilterPopover.svelte";
-  import HuggingFaceResultItem from "./HuggingFaceResultItem.svelte";
-
-  interface ModelInfo {
-    id: string;
-    name?: string;
-    storage_size_megabytes?: number;
-    base_model?: string;
-    quantization?: string;
-    supports_tensor?: boolean;
-    capabilities?: string[];
-    family?: string;
-    is_custom?: boolean;
-    tasks?: string[];
-    hugging_face_id?: string;
-  }
-
-  interface ModelGroup {
-    id: string;
-    name: string;
-    capabilities: string[];
-    family: string;
-    variants: ModelInfo[];
-    smallestVariant: ModelInfo;
-    hasMultipleVariants: boolean;
-  }
-
-  interface FilterState {
-    capabilities: string[];
-    sizeRange: { min: number; max: number } | null;
-  }
-
-  interface HuggingFaceModel {
-    id: string;
-    author: string;
-    downloads: number;
-    likes: number;
-    last_modified: string;
-    tags: string[];
-  }
-
-  type ModelPickerModalProps = {
-    isOpen: boolean;
-    models: ModelInfo[];
-    selectedModelId: string | null;
-    favorites: Set<string>;
-    existingModelIds: Set<string>;
-    canModelFit: (modelId: string) => boolean;
-    onSelect: (modelId: string) => void;
-    onClose: () => void;
-    onToggleFavorite: (baseModelId: string) => void;
-    onAddModel: (modelId: string) => Promise<void>;
-    onDeleteModel: (modelId: string) => Promise<void>;
-    totalMemoryGB: number;
-    usedMemoryGB: number;
-  };
-
-  let {
-    isOpen,
-    models,
-    selectedModelId,
-    favorites,
-    existingModelIds,
-    canModelFit,
-    onSelect,
-    onClose,
-    onToggleFavorite,
-    onAddModel,
-    onDeleteModel,
-    totalMemoryGB,
-    usedMemoryGB,
-  }: ModelPickerModalProps = $props();
-
-  // Local state
-  let searchQuery = $state("");
-  let selectedFamily = $state<string | null>(null);
-  let expandedGroups = $state<Set<string>>(new Set());
-  let showFilters = $state(false);
-  let filters = $state<FilterState>({ capabilities: [], sizeRange: null });
-  let infoGroup = $state<ModelGroup | null>(null);
-
-  // HuggingFace Hub state
-  let hfSearchQuery = $state("");
-  let hfSearchResults = $state<HuggingFaceModel[]>([]);
-  let hfTrendingModels = $state<HuggingFaceModel[]>([]);
-  let hfIsSearching = $state(false);
-  let hfIsLoadingTrending = $state(false);
-  let addingModelId = $state<string | null>(null);
-  let hfSearchDebounceTimer: ReturnType<typeof setTimeout> | null = null;
-  let manualModelId = $state("");
-  let addModelError = $state<string | null>(null);
-
-  // Reset state when modal opens
-  $effect(() => {
-    if (isOpen) {
-      searchQuery = "";
-      selectedFamily = null;
-      expandedGroups = new Set();
-      showFilters = false;
-      hfSearchQuery = "";
-      hfSearchResults = [];
-      manualModelId = "";
-      addModelError = null;
-    }
-  });
-
-  // Fetch trending models when HuggingFace is selected
-  $effect(() => {
-    if (
-      selectedFamily === "huggingface" &&
-      hfTrendingModels.length === 0 &&
-      !hfIsLoadingTrending
-    ) {
-      fetchTrendingModels();
-    }
-  });
-
-  async function fetchTrendingModels() {
-    hfIsLoadingTrending = true;
-    try {
-      const response = await fetch("/models/search?query=&limit=20");
-      if (response.ok) {
-        hfTrendingModels = await response.json();
-      }
-    } catch (error) {
-      console.error("Failed to fetch trending models:", error);
-    } finally {
-      hfIsLoadingTrending = false;
-    }
-  }
-
-  async function searchHuggingFace(query: string) {
-    if (query.length < 2) {
-      hfSearchResults = [];
-      return;
-    }
-
-    hfIsSearching = true;
-    try {
-      const response = await fetch(
-        `/models/search?query=${encodeURIComponent(query)}&limit=20`,
-      );
-      if (response.ok) {
-        hfSearchResults = await response.json();
-      } else {
-        hfSearchResults = [];
-      }
-    } catch (error) {
-      console.error("Failed to search models:", error);
-      hfSearchResults = [];
-    } finally {
-      hfIsSearching = false;
-    }
-  }
-
-  function handleHfSearchInput(query: string) {
-    hfSearchQuery = query;
-    addModelError = null;
-
-    if (hfSearchDebounceTimer) {
-      clearTimeout(hfSearchDebounceTimer);
-    }
-
-    if (query.length >= 2) {
-      hfSearchDebounceTimer = setTimeout(() => {
-        searchHuggingFace(query);
-      }, 300);
-    } else {
-      hfSearchResults = [];
-    }
-  }
-
-  async function handleAddModel(modelId: string) {
-    addingModelId = modelId;
-    addModelError = null;
-    try {
-      await onAddModel(modelId);
-    } catch (error) {
-      addModelError =
-        error instanceof Error ? error.message : "Failed to add model";
-    } finally {
-      addingModelId = null;
-    }
-  }
-
-  async function handleAddManualModel() {
-    if (!manualModelId.trim()) return;
-    await handleAddModel(manualModelId.trim());
-    if (!addModelError) {
-      manualModelId = "";
-    }
-  }
-
-  function handleSelectHfModel(modelId: string) {
-    onSelect(modelId);
-    onClose();
-  }
-
-  // Models to display in HuggingFace view
-  const hfDisplayModels = $derived.by((): HuggingFaceModel[] => {
-    if (hfSearchQuery.length >= 2) {
-      return hfSearchResults;
-    }
-    return hfTrendingModels;
-  });
-
-  // Group models by base_model
-  const groupedModels = $derived.by((): ModelGroup[] => {
-    const groups = new Map<string, ModelGroup>();
-
-    for (const model of models) {
-      const groupId = model.base_model || model.id;
-      const groupName = model.base_model || model.name || model.id;
-
-      if (!groups.has(groupId)) {
-        groups.set(groupId, {
-          id: groupId,
-          name: groupName,
-          capabilities: model.capabilities || ["text"],
-          family: model.family || "",
-          variants: [],
-          smallestVariant: model,
-          hasMultipleVariants: false,
-        });
-      }
-
-      const group = groups.get(groupId)!;
-      group.variants.push(model);
-
-      // Track smallest variant
-      if (
-        (model.storage_size_megabytes || 0) <
-        (group.smallestVariant.storage_size_megabytes || Infinity)
-      ) {
-        group.smallestVariant = model;
-      }
-
-      // Update capabilities if not set
-      if (
-        group.capabilities.length <= 1 &&
-        model.capabilities &&
-        model.capabilities.length > 1
-      ) {
-        group.capabilities = model.capabilities;
-      }
-      if (!group.family && model.family) {
-        group.family = model.family;
-      }
-    }
-
-    // Sort variants within each group by size
-    for (const group of groups.values()) {
-      group.variants.sort(
-        (a, b) =>
-          (a.storage_size_megabytes || 0) - (b.storage_size_megabytes || 0),
-      );
-      group.hasMultipleVariants = group.variants.length > 1;
-    }
-
-    // Convert to array and sort by smallest variant size (biggest first)
-    return Array.from(groups.values()).sort((a, b) => {
-      return (
-        (b.smallestVariant.storage_size_megabytes || 0) -
-        (a.smallestVariant.storage_size_megabytes || 0)
-      );
-    });
-  });
-
-  // Get unique families
-  const uniqueFamilies = $derived.by((): string[] => {
-    const families = new Set<string>();
-    for (const group of groupedModels) {
-      if (group.family) {
-        families.add(group.family);
-      }
-    }
-    const familyOrder = [
-      "kimi",
-      "qwen",
-      "glm",
-      "minimax",
-      "deepseek",
-      "gpt-oss",
-      "llama",
-    ];
-    return Array.from(families).sort((a, b) => {
-      const aIdx = familyOrder.indexOf(a);
-      const bIdx = familyOrder.indexOf(b);
-      if (aIdx === -1 && bIdx === -1) return a.localeCompare(b);
-      if (aIdx === -1) return 1;
-      if (bIdx === -1) return -1;
-      return aIdx - bIdx;
-    });
-  });
-
-  // Filter models based on search, family, and filters
-  const filteredGroups = $derived.by((): ModelGroup[] => {
-    let result: ModelGroup[] = [...groupedModels];
-
-    // Filter by family
-    if (selectedFamily === "favorites") {
-      result = result.filter((g) => favorites.has(g.id));
-    } else if (selectedFamily && selectedFamily !== "huggingface") {
-      result = result.filter((g) => g.family === selectedFamily);
-    }
-
-    // Filter by search query
-    if (searchQuery.trim()) {
-      const query = searchQuery.toLowerCase().trim();
-      result = result.filter(
-        (g) =>
-          g.name.toLowerCase().includes(query) ||
-          g.variants.some(
-            (v) =>
-              v.id.toLowerCase().includes(query) ||
-              (v.name || "").toLowerCase().includes(query),
-          ),
-      );
-    }
-
-    // Filter by capabilities
-    if (filters.capabilities.length > 0) {
-      result = result.filter((g) =>
-        filters.capabilities.every((cap) => g.capabilities.includes(cap)),
-      );
-    }
-
-    // Filter by size range
-    if (filters.sizeRange) {
-      const { min, max } = filters.sizeRange;
-      result = result.filter((g) => {
-        const sizeGB = (g.smallestVariant.storage_size_megabytes || 0) / 1024;
-        return sizeGB >= min && sizeGB <= max;
-      });
-    }
-
-    // Sort: models that fit first, then by size (largest first)
-    result.sort((a, b) => {
-      const aFits = a.variants.some((v) => canModelFit(v.id));
-      const bFits = b.variants.some((v) => canModelFit(v.id));
-
-      if (aFits && !bFits) return -1;
-      if (!aFits && bFits) return 1;
-
-      return (
-        (b.smallestVariant.storage_size_megabytes || 0) -
-        (a.smallestVariant.storage_size_megabytes || 0)
-      );
-    });
-
-    return result;
-  });
-
-  // Check if any favorites exist
-  const hasFavorites = $derived(favorites.size > 0);
-
-  function toggleGroupExpanded(groupId: string) {
-    const next = new Set(expandedGroups);
-    if (next.has(groupId)) {
-      next.delete(groupId);
-    } else {
-      next.add(groupId);
-    }
-    expandedGroups = next;
-  }
-
-  function handleSelect(modelId: string) {
-    onSelect(modelId);
-    onClose();
-  }
-
-  function handleKeydown(e: KeyboardEvent) {
-    if (e.key === "Escape") {
-      onClose();
-    }
-  }
-
-  function handleFiltersChange(newFilters: FilterState) {
-    filters = newFilters;
-  }
-
-  function clearFilters() {
-    filters = { capabilities: [], sizeRange: null };
-  }
-
-  const hasActiveFilters = $derived(
-    filters.capabilities.length > 0 || filters.sizeRange !== null,
-  );
-</script>
-
-<svelte:window onkeydown={handleKeydown} />
-
-{#if isOpen}
-  <!-- Backdrop -->
-  <div
-    class="fixed inset-0 z-50 bg-black/80 backdrop-blur-sm"
-    transition:fade={{ duration: 200 }}
-    onclick={onClose}
-    role="presentation"
-  ></div>
-
-  <!-- Modal -->
-  <div
-    class="fixed z-50 top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 w-[min(90vw,600px)] h-[min(80vh,700px)] bg-exo-dark-gray border border-exo-yellow/10 rounded-lg shadow-2xl overflow-hidden flex flex-col"
-    transition:fly={{ y: 20, duration: 300, easing: cubicOut }}
-    role="dialog"
-    aria-modal="true"
-    aria-label="Select a model"
-  >
-    <!-- Header with search -->
-    <div
-      class="flex items-center gap-2 p-3 border-b border-exo-yellow/10 bg-exo-medium-gray/30"
-    >
-      {#if selectedFamily === "huggingface"}
-        <!-- HuggingFace search -->
-        <svg
-          class="w-5 h-5 text-orange-400/60 flex-shrink-0"
-          viewBox="0 0 24 24"
-          fill="none"
-          stroke="currentColor"
-          stroke-width="2"
-        >
-          <circle cx="11" cy="11" r="8" />
-          <path d="M21 21l-4.35-4.35" />
-        </svg>
-        <input
-          type="search"
-          class="flex-1 bg-transparent border-none outline-none text-sm font-mono text-white placeholder-white/40"
-          placeholder="Search mlx-community models..."
-          value={hfSearchQuery}
-          oninput={(e) => handleHfSearchInput(e.currentTarget.value)}
-        />
-        {#if hfIsSearching}
-          <div class="flex-shrink-0">
-            <span
-              class="w-4 h-4 border-2 border-orange-400 border-t-transparent rounded-full animate-spin block"
-            ></span>
-          </div>
-        {/if}
-      {:else}
-        <!-- Normal model search -->
-        <svg
-          class="w-5 h-5 text-white/40 flex-shrink-0"
-          viewBox="0 0 24 24"
-          fill="none"
-          stroke="currentColor"
-          stroke-width="2"
-        >
-          <circle cx="11" cy="11" r="8" />
-          <path d="M21 21l-4.35-4.35" />
-        </svg>
-        <input
-          type="search"
-          class="flex-1 bg-transparent border-none outline-none text-sm font-mono text-white placeholder-white/40"
-          placeholder="Search models..."
-          bind:value={searchQuery}
-        />
-        <!-- Cluster memory -->
-        <span
-          class="text-xs font-mono flex-shrink-0"
-          title="Cluster memory usage"
-          ><span class="text-exo-yellow">{Math.round(usedMemoryGB)}GB</span
-          ><span class="text-white/40">/{Math.round(totalMemoryGB)}GB</span
-          ></span
-        >
-        <!-- Filter button -->
-        <div class="relative filter-toggle">
-          <button
-            type="button"
-            class="p-1.5 rounded hover:bg-white/10 transition-colors {hasActiveFilters
-              ? 'text-exo-yellow'
-              : 'text-white/50'}"
-            onclick={() => (showFilters = !showFilters)}
-            title="Filter by capability or size"
-          >
-            <svg class="w-5 h-5" viewBox="0 0 24 24" fill="currentColor">
-              <path d="M10 18h4v-2h-4v2zM3 6v2h18V6H3zm3 7h12v-2H6v2z" />
-            </svg>
-          </button>
-          {#if showFilters}
-            <ModelFilterPopover
-              {filters}
-              onChange={handleFiltersChange}
-              onClear={clearFilters}
-              onClose={() => (showFilters = false)}
-            />
-          {/if}
-        </div>
-      {/if}
-      <!-- Close button -->
-      <button
-        type="button"
-        class="p-1.5 rounded hover:bg-white/10 transition-colors text-white/50 hover:text-white/70"
-        onclick={onClose}
-        title="Close model picker"
-      >
-        <svg class="w-5 h-5" viewBox="0 0 24 24" fill="currentColor">
-          <path
-            d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"
-          />
-        </svg>
-      </button>
-    </div>
-
-    <!-- Body -->
-    <div class="flex flex-1 overflow-hidden">
-      <!-- Family sidebar -->
-      <FamilySidebar
-        families={uniqueFamilies}
-        {selectedFamily}
-        {hasFavorites}
-        onSelect={(family) => (selectedFamily = family)}
-      />
-
-      <!-- Model list -->
-      <div class="flex-1 overflow-y-auto flex flex-col">
-        {#if selectedFamily === "huggingface"}
-          <!-- HuggingFace Hub view -->
-          <div class="flex-1 flex flex-col min-h-0">
-            <!-- Section header -->
-            <div
-              class="sticky top-0 z-10 px-3 py-2 bg-exo-dark-gray/95 border-b border-exo-yellow/10"
-            >
-              <span class="text-xs font-mono text-white/40">
-                {#if hfSearchQuery.length >= 2}
-                  Search results for "{hfSearchQuery}"
-                {:else}
-                  Trending on mlx-community
-                {/if}
-              </span>
-            </div>
-
-            <!-- Results list -->
-            <div class="flex-1 overflow-y-auto">
-              {#if hfIsLoadingTrending && hfTrendingModels.length === 0}
-                <div
-                  class="flex items-center justify-center py-12 text-white/40"
-                >
-                  <span
-                    class="w-5 h-5 border-2 border-orange-400 border-t-transparent rounded-full animate-spin mr-2"
-                  ></span>
-                  <span class="font-mono text-sm"
-                    >Loading trending models...</span
-                  >
-                </div>
-              {:else if hfDisplayModels.length === 0}
-                <div
-                  class="flex flex-col items-center justify-center py-12 text-white/40"
-                >
-                  <svg
-                    class="w-10 h-10 mb-2"
-                    viewBox="0 0 24 24"
-                    fill="currentColor"
-                  >
-                    <path
-                      d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-2 13.5c-.83 0-1.5-.67-1.5-1.5s.67-1.5 1.5-1.5 1.5.67 1.5 1.5-.67 1.5-1.5 1.5zm4 0c-.83 0-1.5-.67-1.5-1.5s.67-1.5 1.5-1.5 1.5.67 1.5 1.5-.67 1.5-1.5 1.5zm2-4.5H8c0-2.21 1.79-4 4-4s4 1.79 4 4z"
-                    />
-                  </svg>
-                  <p class="font-mono text-sm">No models found</p>
-                  {#if hfSearchQuery}
-                    <p class="font-mono text-xs mt-1">
-                      Try a different search term
-                    </p>
-                  {/if}
-                </div>
-              {:else}
-                {#each hfDisplayModels as model}
-                  <HuggingFaceResultItem
-                    {model}
-                    isAdded={existingModelIds.has(model.id)}
-                    isAdding={addingModelId === model.id}
-                    onAdd={() => handleAddModel(model.id)}
-                    onSelect={() => handleSelectHfModel(model.id)}
-                  />
-                {/each}
-              {/if}
-            </div>
-
-            <!-- Manual input footer -->
-            <div
-              class="sticky bottom-0 border-t border-exo-yellow/10 bg-exo-dark-gray p-3"
-            >
-              {#if addModelError}
-                <div
-                  class="bg-red-500/10 border border-red-500/30 rounded px-3 py-2 mb-2"
-                >
-                  <p class="text-red-400 text-xs font-mono break-words">
-                    {addModelError}
-                  </p>
-                </div>
-              {/if}
-              <div class="flex gap-2">
-                <input
-                  type="text"
-                  class="flex-1 bg-exo-black/60 border border-exo-yellow/30 rounded px-3 py-1.5 text-xs font-mono text-white placeholder-white/30 focus:outline-none focus:border-exo-yellow/50"
-                  placeholder="Or paste model ID directly..."
-                  bind:value={manualModelId}
-                  onkeydown={(e) => {
-                    if (e.key === "Enter") handleAddManualModel();
-                  }}
-                />
-                <button
-                  type="button"
-                  onclick={handleAddManualModel}
-                  disabled={!manualModelId.trim() || addingModelId !== null}
-                  class="px-3 py-1.5 text-xs font-mono tracking-wider uppercase bg-orange-500/10 text-orange-400 border border-orange-400/30 hover:bg-orange-500/20 transition-colors rounded disabled:opacity-50 disabled:cursor-not-allowed"
-                >
-                  Add
-                </button>
-              </div>
-            </div>
-          </div>
-        {:else if filteredGroups.length === 0}
-          <div
-            class="flex flex-col items-center justify-center h-full text-white/40 p-8"
-          >
-            <svg class="w-12 h-12 mb-3" viewBox="0 0 24 24" fill="currentColor">
-              <path
-                d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-2 15l-5-5 1.41-1.41L10 14.17l7.59-7.59L19 8l-9 9z"
-              />
-            </svg>
-            <p class="font-mono text-sm">No models found</p>
-            {#if hasActiveFilters || searchQuery}
-              <button
-                type="button"
-                class="mt-2 text-xs text-exo-yellow hover:underline"
-                onclick={() => {
-                  searchQuery = "";
-                  clearFilters();
-                }}
-              >
-                Clear filters
-              </button>
-            {/if}
-          </div>
-        {:else}
-          {#each filteredGroups as group}
-            <ModelPickerGroup
-              {group}
-              isExpanded={expandedGroups.has(group.id)}
-              isFavorite={favorites.has(group.id)}
-              {selectedModelId}
-              {canModelFit}
-              onToggleExpand={() => toggleGroupExpanded(group.id)}
-              onSelectModel={handleSelect}
-              {onToggleFavorite}
-              onShowInfo={(g) => (infoGroup = g)}
-            />
-          {/each}
-        {/if}
-      </div>
-    </div>
-
-    <!-- Footer with active filters indicator -->
-    {#if hasActiveFilters}
-      <div
-        class="flex items-center gap-2 px-3 py-2 border-t border-exo-yellow/10 bg-exo-medium-gray/20 text-xs font-mono text-white/50"
-      >
-        <span>Filters:</span>
-        {#each filters.capabilities as cap}
-          <span class="px-1.5 py-0.5 bg-exo-yellow/20 text-exo-yellow rounded"
-            >{cap}</span
-          >
-        {/each}
-        {#if filters.sizeRange}
-          <span class="px-1.5 py-0.5 bg-exo-yellow/20 text-exo-yellow rounded">
-            {filters.sizeRange.min}GB - {filters.sizeRange.max}GB
-          </span>
-        {/if}
-        <button
-          type="button"
-          class="ml-auto text-white/40 hover:text-white/60"
-          onclick={clearFilters}
-        >
-          Clear all
-        </button>
-      </div>
-    {/if}
-  </div>
-
-  <!-- Info modal -->
-  {#if infoGroup}
-    <div
-      class="fixed inset-0 z-[60] bg-black/60"
-      transition:fade={{ duration: 150 }}
-      onclick={() => (infoGroup = null)}
-      role="presentation"
-    ></div>
-    <div
-      class="fixed z-[60] top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 w-[min(80vw,400px)] bg-exo-dark-gray border border-exo-yellow/10 rounded-lg shadow-2xl p-4"
-      transition:fly={{ y: 10, duration: 200, easing: cubicOut }}
-      role="dialog"
-      aria-modal="true"
-    >
-      <div class="flex items-start justify-between mb-3">
-        <h3 class="font-mono text-lg text-white">{infoGroup.name}</h3>
-        <button
-          type="button"
-          class="p-1 rounded hover:bg-white/10 transition-colors text-white/50"
-          onclick={() => (infoGroup = null)}
-          title="Close model details"
-          aria-label="Close info dialog"
-        >
-          <svg class="w-4 h-4" viewBox="0 0 24 24" fill="currentColor">
-            <path
-              d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"
-            />
-          </svg>
-        </button>
-      </div>
-      <div class="space-y-2 text-xs font-mono">
-        <div class="flex items-center gap-2">
-          <span class="text-white/40">Family:</span>
-          <span class="text-white/70">{infoGroup.family || "Unknown"}</span>
-        </div>
-        <div class="flex items-center gap-2">
-          <span class="text-white/40">Capabilities:</span>
-          <span class="text-white/70">{infoGroup.capabilities.join(", ")}</span>
-        </div>
-        <div class="flex items-center gap-2">
-          <span class="text-white/40">Variants:</span>
-          <span class="text-white/70">{infoGroup.variants.length}</span>
-        </div>
-        {#if infoGroup.variants.length > 0}
-          <div class="mt-3 pt-3 border-t border-exo-yellow/10">
-            <span class="text-white/40">Available quantizations:</span>
-            <div class="flex flex-wrap gap-1 mt-1">
-              {#each infoGroup.variants as variant}
-                <span
-                  class="px-1.5 py-0.5 bg-white/10 text-white/60 rounded text-[10px]"
-                >
-                  {variant.quantization || "default"} ({Math.round(
-                    (variant.storage_size_megabytes || 0) / 1024,
-                  )}GB)
-                </span>
-              {/each}
-            </div>
-          </div>
-        {/if}
-      </div>
-    </div>
-  {/if}
-{/if}
--- a/dashboard/src/lib/components/index.ts
+++ b/dashboard/src/lib/components/index.ts
@@ -6,9 +6,3 @@ export { default as ChatSidebar } from "./ChatSidebar.svelte";
 export { default as ModelCard } from "./ModelCard.svelte";
 export { default as MarkdownContent } from "./MarkdownContent.svelte";
 export { default as ImageParamsPanel } from "./ImageParamsPanel.svelte";
-export { default as FamilyLogos } from "./FamilyLogos.svelte";
-export { default as FamilySidebar } from "./FamilySidebar.svelte";
-export { default as HuggingFaceResultItem } from "./HuggingFaceResultItem.svelte";
-export { default as ModelFilterPopover } from "./ModelFilterPopover.svelte";
-export { default as ModelPickerGroup } from "./ModelPickerGroup.svelte";
-export { default as ModelPickerModal } from "./ModelPickerModal.svelte";
--- a/dashboard/src/lib/stores/app.svelte.ts
+++ b/dashboard/src/lib/stores/app.svelte.ts
@@ -173,41 +173,6 @@ export interface PlacementPreviewResponse {
  previews: PlacementPreview[];
 }

-interface ImageApiResponse {
-  created: number;
-  data: Array<{ b64_json?: string; url?: string }>;
-}
-
-// Trace API response types
-export interface TraceCategoryStats {
-  totalUs: number;
-  count: number;
-  minUs: number;
-  maxUs: number;
-  avgUs: number;
-}
-
-export interface TraceRankStats {
-  byCategory: Record<string, TraceCategoryStats>;
-}
-
-export interface TraceStatsResponse {
-  taskId: string;
-  totalWallTimeUs: number;
-  byCategory: Record<string, TraceCategoryStats>;
-  byRank: Record<number, TraceRankStats>;
-}
-
-export interface TraceListItem {
-  taskId: string;
-  createdAt: string;
-  fileSize: number;
-}
-
-export interface TraceListResponse {
-  traces: TraceListItem[];
-}
-
 interface RawStateResponse {
  topology?: RawTopology;
  instances?: Record<
@@ -499,6 +464,7 @@ class AppStore {
  private previewsInterval: ReturnType<typeof setInterval> | null = null;
  private lastConversationPersistTs = 0;
  private previousNodeIds: Set<string> = new Set();
+  private activeAbortController: AbortController | null = null;

  constructor() {
    if (browser) {
@@ -1781,6 +1747,9 @@ class AppStore {
    const targetConversationId = this.activeConversationId;
    if (!targetConversationId) return;

+    this.activeAbortController = new AbortController();
+    const signal = this.activeAbortController.signal;
+
    this.isLoading = true;
    this.currentResponse = "";
    this.ttftMs = null;
@@ -1915,6 +1884,7 @@ class AppStore {
          temperature: 0.7,
          stream: true,
        }),
+        signal,
      });

      if (!response.ok) {
@@ -2010,6 +1980,9 @@ class AppStore {
        this.persistConversation(targetConversationId);
      }
    } catch (error) {
+      if (signal.aborted) {
+        return;
+      }
      console.error("Error sending message:", error);
      this.handleStreamingError(
        error,
@@ -2018,6 +1991,7 @@ class AppStore {
        "Failed to get response",
      );
    } finally {
+      this.activeAbortController = null;
      this.isLoading = false;
      this.currentResponse = "";
      this.saveConversationsToStorage();
@@ -2038,6 +2012,9 @@ class AppStore {
    const targetConversationId = this.activeConversationId;
    if (!targetConversationId) return;

+    this.activeAbortController = new AbortController();
+    const signal = this.activeAbortController.signal;
+
    this.isLoading = true;
    this.currentResponse = "";

@@ -2123,6 +2100,7 @@ class AppStore {
          "Content-Type": "application/json",
        },
        body: JSON.stringify(requestBody),
+        signal,
      });

      if (!response.ok) {
@@ -2130,138 +2108,121 @@ class AppStore {
        throw new Error(`API error: ${response.status} - ${errorText}`);
      }

-      // Streaming requires both stream=true AND partialImages > 0
-      const isStreaming = params.stream && params.partialImages > 0;
+      const reader = response.body?.getReader();
+      if (!reader) {
+        throw new Error("No response body");
+      }

-      if (!isStreaming) {
-        // Non-streaming: parse JSON response directly
-        const jsonResponse = (await response.json()) as ImageApiResponse;
-        const format = params.outputFormat || "png";
-        const mimeType = `image/${format}`;
+      interface ImageGenerationChunk {
+        data?: { b64_json?: string };
+        format?: string;
+        type?: "partial" | "final";
+        image_index?: number;
+        partial_index?: number;
+        total_partials?: number;
+      }

-        const attachments: MessageAttachment[] = jsonResponse.data
-          .filter((img) => img.b64_json)
-          .map((img, index) => ({
-            type: "generated-image" as const,
-            name: `generated-image-${index + 1}.${format}`,
-            preview: `data:${mimeType};base64,${img.b64_json}`,
-            mimeType,
-          }));
+      const numImages = params.numImages;

+      await this.parseSSEStream<ImageGenerationChunk>(
+        reader,
+        targetConversationId,
+        (parsed) => {
+          const imageData = parsed.data?.b64_json;
+
+          if (imageData) {
+            const format = parsed.format || "png";
+            const mimeType = `image/${format}`;
+            const imageIndex = parsed.image_index ?? 0;
+
+            if (parsed.type === "partial") {
+              // Update with partial image and progress
+              const partialNum = (parsed.partial_index ?? 0) + 1;
+              const totalPartials = parsed.total_partials ?? 3;
+              const progressText =
+                numImages > 1
+                  ? `Generating image ${imageIndex + 1}/${numImages}... ${partialNum}/${totalPartials}`
+                  : `Generating... ${partialNum}/${totalPartials}`;
+
+              const partialAttachment: MessageAttachment = {
+                type: "generated-image",
+                name: `generated-image.${format}`,
+                preview: `data:${mimeType};base64,${imageData}`,
+                mimeType,
+              };
+
+              this.updateConversationMessage(
+                targetConversationId,
+                assistantMessage.id,
+                (msg) => {
+                  msg.content = progressText;
+                  if (imageIndex === 0) {
+                    // First image - safe to replace attachments with partial preview
+                    msg.attachments = [partialAttachment];
+                  } else {
+                    // Subsequent images - keep existing finals, show partial at current position
+                    const existingAttachments = msg.attachments || [];
+                    // Keep only the completed final images (up to current imageIndex)
+                    const finals = existingAttachments.slice(0, imageIndex);
+                    msg.attachments = [...finals, partialAttachment];
+                  }
+                },
+              );
+            } else if (parsed.type === "final") {
+              // Final image - replace partial at this position
+              const newAttachment: MessageAttachment = {
+                type: "generated-image",
+                name: `generated-image-${imageIndex + 1}.${format}`,
+                preview: `data:${mimeType};base64,${imageData}`,
+                mimeType,
+              };
+
+              this.updateConversationMessage(
+                targetConversationId,
+                assistantMessage.id,
+                (msg) => {
+                  if (imageIndex === 0) {
+                    // First final image - replace any partial preview
+                    msg.attachments = [newAttachment];
+                  } else {
+                    // Subsequent images - keep previous finals, replace partial at current position
+                    const existingAttachments = msg.attachments || [];
+                    // Slice keeps indices 0 to imageIndex-1 (the previous final images)
+                    const previousFinals = existingAttachments.slice(
+                      0,
+                      imageIndex,
+                    );
+                    msg.attachments = [...previousFinals, newAttachment];
+                  }
+
+                  // Update progress message for multiple images
+                  if (numImages > 1 && imageIndex < numImages - 1) {
+                    msg.content = `Generating image ${imageIndex + 2}/${numImages}...`;
+                  } else {
+                    msg.content = "";
+                  }
+                },
+              );
+            }
+
+            this.syncActiveMessagesIfNeeded(targetConversationId);
+          }
+        },
+      );
+    } catch (error) {
+      if (signal.aborted) {
+        // Clean up the "Generating image..." message on cancellation
        this.updateConversationMessage(
          targetConversationId,
          assistantMessage.id,
          (msg) => {
-            msg.content = "";
-            msg.attachments = attachments;
+            msg.content = "Cancelled";
+            msg.attachments = [];
          },
        );
        this.syncActiveMessagesIfNeeded(targetConversationId);
-      } else {
-        // Streaming mode: use SSE parser
-        const reader = response.body?.getReader();
-        if (!reader) {
-          throw new Error("No response body");
-        }
-
-        interface ImageGenerationChunk {
-          data?: { b64_json?: string };
-          format?: string;
-          type?: "partial" | "final";
-          image_index?: number;
-          partial_index?: number;
-          total_partials?: number;
-        }
-
-        const numImages = params.numImages;
-
-        await this.parseSSEStream<ImageGenerationChunk>(
-          reader,
-          targetConversationId,
-          (parsed) => {
-            const imageData = parsed.data?.b64_json;
-
-            if (imageData) {
-              const format = parsed.format || "png";
-              const mimeType = `image/${format}`;
-              const imageIndex = parsed.image_index ?? 0;
-
-              if (parsed.type === "partial") {
-                // Update with partial image and progress
-                const partialNum = (parsed.partial_index ?? 0) + 1;
-                const totalPartials = parsed.total_partials ?? 3;
-                const progressText =
-                  numImages > 1
-                    ? `Generating image ${imageIndex + 1}/${numImages}... ${partialNum}/${totalPartials}`
-                    : `Generating... ${partialNum}/${totalPartials}`;
-
-                const partialAttachment: MessageAttachment = {
-                  type: "generated-image",
-                  name: `generated-image.${format}`,
-                  preview: `data:${mimeType};base64,${imageData}`,
-                  mimeType,
-                };
-
-                this.updateConversationMessage(
-                  targetConversationId,
-                  assistantMessage.id,
-                  (msg) => {
-                    msg.content = progressText;
-                    if (imageIndex === 0) {
-                      // First image - safe to replace attachments with partial preview
-                      msg.attachments = [partialAttachment];
-                    } else {
-                      // Subsequent images - keep existing finals, show partial at current position
-                      const existingAttachments = msg.attachments || [];
-                      // Keep only the completed final images (up to current imageIndex)
-                      const finals = existingAttachments.slice(0, imageIndex);
-                      msg.attachments = [...finals, partialAttachment];
-                    }
-                  },
-                );
-              } else if (parsed.type === "final") {
-                // Final image - replace partial at this position
-                const newAttachment: MessageAttachment = {
-                  type: "generated-image",
-                  name: `generated-image-${imageIndex + 1}.${format}`,
-                  preview: `data:${mimeType};base64,${imageData}`,
-                  mimeType,
-                };
-
-                this.updateConversationMessage(
-                  targetConversationId,
-                  assistantMessage.id,
-                  (msg) => {
-                    if (imageIndex === 0) {
-                      // First final image - replace any partial preview
-                      msg.attachments = [newAttachment];
-                    } else {
-                      // Subsequent images - keep previous finals, replace partial at current position
-                      const existingAttachments = msg.attachments || [];
-                      // Slice keeps indices 0 to imageIndex-1 (the previous final images)
-                      const previousFinals = existingAttachments.slice(
-                        0,
-                        imageIndex,
-                      );
-                      msg.attachments = [...previousFinals, newAttachment];
-                    }
-
-                    // Update progress message for multiple images
-                    if (numImages > 1 && imageIndex < numImages - 1) {
-                      msg.content = `Generating image ${imageIndex + 2}/${numImages}...`;
-                    } else {
-                      msg.content = "";
-                    }
-                  },
-                );
-              }
-
-              this.syncActiveMessagesIfNeeded(targetConversationId);
-            }
-          },
-        );
+        return;
      }
-    } catch (error) {
      console.error("Error generating image:", error);
      this.handleStreamingError(
        error,
@@ -2270,6 +2231,7 @@ class AppStore {
        "Failed to generate image",
      );
    } finally {
+      this.activeAbortController = null;
      this.isLoading = false;
      this.saveConversationsToStorage();
    }
@@ -2293,6 +2255,9 @@ class AppStore {
    const targetConversationId = this.activeConversationId;
    if (!targetConversationId) return;

+    this.activeAbortController = new AbortController();
+    const signal = this.activeAbortController.signal;
+
    this.isLoading = true;
    this.currentResponse = "";

@@ -2401,6 +2366,7 @@ class AppStore {
      const apiResponse = await fetch("/v1/images/edits", {
        method: "POST",
        body: formData,
+        signal,
      });

      if (!apiResponse.ok) {
@@ -2408,99 +2374,83 @@ class AppStore {
        throw new Error(`API error: ${apiResponse.status} - ${errorText}`);
      }

-      // Streaming requires both stream=true AND partialImages > 0
-      const isStreaming = params.stream && params.partialImages > 0;
+      const reader = apiResponse.body?.getReader();
+      if (!reader) {
+        throw new Error("No response body");
+      }

-      if (!isStreaming) {
-        // Non-streaming: parse JSON response directly
-        const jsonResponse = (await apiResponse.json()) as ImageApiResponse;
-        const format = params.outputFormat || "png";
-        const mimeType = `image/${format}`;
-        const attachments: MessageAttachment[] = jsonResponse.data
-          .filter((img) => img.b64_json)
-          .map((img) => ({
-            type: "generated-image" as const,
-            name: `edited-image.${format}`,
-            preview: `data:${mimeType};base64,${img.b64_json}`,
-            mimeType,
-          }));
+      interface ImageEditChunk {
+        data?: { b64_json?: string };
+        format?: string;
+        type?: "partial" | "final";
+        partial_index?: number;
+        total_partials?: number;
+      }

+      await this.parseSSEStream<ImageEditChunk>(
+        reader,
+        targetConversationId,
+        (parsed) => {
+          const imageData = parsed.data?.b64_json;
+
+          if (imageData) {
+            const format = parsed.format || "png";
+            const mimeType = `image/${format}`;
+            if (parsed.type === "partial") {
+              // Update with partial image and progress
+              const partialNum = (parsed.partial_index ?? 0) + 1;
+              const totalPartials = parsed.total_partials ?? 3;
+              this.updateConversationMessage(
+                targetConversationId,
+                assistantMessage.id,
+                (msg) => {
+                  msg.content = `Editing... ${partialNum}/${totalPartials}`;
+                  msg.attachments = [
+                    {
+                      type: "generated-image",
+                      name: `edited-image.${format}`,
+                      preview: `data:${mimeType};base64,${imageData}`,
+                      mimeType,
+                    },
+                  ];
+                },
+              );
+            } else if (parsed.type === "final") {
+              // Final image
+              this.updateConversationMessage(
+                targetConversationId,
+                assistantMessage.id,
+                (msg) => {
+                  msg.content = "";
+                  msg.attachments = [
+                    {
+                      type: "generated-image",
+                      name: `edited-image.${format}`,
+                      preview: `data:${mimeType};base64,${imageData}`,
+                      mimeType,
+                    },
+                  ];
+                },
+              );
+            }
+            this.syncActiveMessagesIfNeeded(targetConversationId);
+          }
+        },
+      );
+    } catch (error) {
+      if (signal.aborted) {
+        // Clean up the "Editing image..." message on cancellation
        this.updateConversationMessage(
          targetConversationId,
          assistantMessage.id,
          (msg) => {
-            msg.content = "";
-            msg.attachments = attachments;
+            msg.content = "cancelled";
+            msg.attachments = [];
          },
        );
        this.syncActiveMessagesIfNeeded(targetConversationId);
-      } else {
-        // Streaming mode: use SSE parser
-        const reader = apiResponse.body?.getReader();
-        if (!reader) {
-          throw new Error("No response body");
-        }
-
-        interface ImageEditChunk {
-          data?: { b64_json?: string };
-          format?: string;
-          type?: "partial" | "final";
-          partial_index?: number;
-          total_partials?: number;
-        }
-
-        await this.parseSSEStream<ImageEditChunk>(
-          reader,
-          targetConversationId,
-          (parsed) => {
-            const imageData = parsed.data?.b64_json;
-
-            if (imageData) {
-              const format = parsed.format || "png";
-              const mimeType = `image/${format}`;
-              if (parsed.type === "partial") {
-                // Update with partial image and progress
-                const partialNum = (parsed.partial_index ?? 0) + 1;
-                const totalPartials = parsed.total_partials ?? 3;
-                this.updateConversationMessage(
-                  targetConversationId,
-                  assistantMessage.id,
-                  (msg) => {
-                    msg.content = `Editing... ${partialNum}/${totalPartials}`;
-                    msg.attachments = [
-                      {
-                        type: "generated-image",
-                        name: `edited-image.${format}`,
-                        preview: `data:${mimeType};base64,${imageData}`,
-                        mimeType,
-                      },
-                    ];
-                  },
-                );
-              } else if (parsed.type === "final") {
-                // Final image
-                this.updateConversationMessage(
-                  targetConversationId,
-                  assistantMessage.id,
-                  (msg) => {
-                    msg.content = "";
-                    msg.attachments = [
-                      {
-                        type: "generated-image",
-                        name: `edited-image.${format}`,
-                        preview: `data:${mimeType};base64,${imageData}`,
-                        mimeType,
-                      },
-                    ];
-                  },
-                );
-              }
-              this.syncActiveMessagesIfNeeded(targetConversationId);
-            }
-          },
-        );
+        return;
      }
-    } catch (error) {
      console.error("Error editing image:", error);
      this.handleStreamingError(
        error,
@@ -2509,11 +2459,24 @@ class AppStore {
        "Failed to edit image",
      );
    } finally {
+      this.activeAbortController = null;
      this.isLoading = false;
      this.saveConversationsToStorage();
    }
  }

+  /**
+   * Cancel an in-flight request by aborting the active fetch
+   */
+  cancelRequest(): void {
+    if (this.activeAbortController) {
+      this.activeAbortController.abort();
+      this.activeAbortController = null;
+    }
+    this.isLoading = false;
+    this.currentResponse = "";
+  }
+
  /**
   * Clear current chat and go back to welcome state
   */
@@ -2585,49 +2548,6 @@ class AppStore {
      throw error;
    }
  }
-
-  /**
-   * List all available traces
-   */
-  async listTraces(): Promise<TraceListResponse> {
-    const response = await fetch("/v1/traces");
-    if (!response.ok) {
-      throw new Error(`Failed to list traces: ${response.status}`);
-    }
-    return (await response.json()) as TraceListResponse;
-  }
-
-  /**
-   * Check if a trace exists for a given task ID
-   */
-  async checkTraceExists(taskId: string): Promise<boolean> {
-    try {
-      const response = await fetch(`/v1/traces/${encodeURIComponent(taskId)}`);
-      return response.ok;
-    } catch {
-      return false;
-    }
-  }
-
-  /**
-   * Get computed statistics for a task's trace
-   */
-  async fetchTraceStats(taskId: string): Promise<TraceStatsResponse> {
-    const response = await fetch(
-      `/v1/traces/${encodeURIComponent(taskId)}/stats`,
-    );
-    if (!response.ok) {
-      throw new Error(`Failed to fetch trace stats: ${response.status}`);
-    }
-    return (await response.json()) as TraceStatsResponse;
-  }
-
-  /**
-   * Get the URL for the raw trace file (for Perfetto)
-   */
-  getTraceRawUrl(taskId: string): string {
-    return `/v1/traces/${encodeURIComponent(taskId)}/raw`;
-  }
 }

 export const appStore = new AppStore();
@@ -2693,6 +2613,7 @@ export const editMessage = (messageId: string, newContent: string) =>
 export const editAndRegenerate = (messageId: string, newContent: string) =>
  appStore.editAndRegenerate(messageId, newContent);
 export const regenerateLastResponse = () => appStore.regenerateLastResponse();
+export const cancelRequest = () => appStore.cancelRequest();

 // Conversation actions
 export const conversations = () => appStore.conversations;
@@ -2739,12 +2660,3 @@ export const startDownload = (nodeId: string, shardMetadata: object) =>
  appStore.startDownload(nodeId, shardMetadata);
 export const deleteDownload = (nodeId: string, modelId: string) =>
  appStore.deleteDownload(nodeId, modelId);
-
-// Trace actions
-export const listTraces = () => appStore.listTraces();
-export const checkTraceExists = (taskId: string) =>
-  appStore.checkTraceExists(taskId);
-export const fetchTraceStats = (taskId: string) =>
-  appStore.fetchTraceStats(taskId);
-export const getTraceRawUrl = (taskId: string) =>
-  appStore.getTraceRawUrl(taskId);
--- a/dashboard/src/lib/stores/favorites.svelte.ts
+++ b/dashboard/src/lib/stores/favorites.svelte.ts
@@ -1,97 +0,0 @@
-/**
- * FavoritesStore - Manages favorite models with localStorage persistence
- */
-
-import { browser } from "$app/environment";
-
-const FAVORITES_KEY = "exo-favorite-models";
-
-class FavoritesStore {
-  favorites = $state<Set<string>>(new Set());
-
-  constructor() {
-    if (browser) {
-      this.loadFromStorage();
-    }
-  }
-
-  private loadFromStorage() {
-    try {
-      const stored = localStorage.getItem(FAVORITES_KEY);
-      if (stored) {
-        const parsed = JSON.parse(stored) as string[];
-        this.favorites = new Set(parsed);
-      }
-    } catch (error) {
-      console.error("Failed to load favorites:", error);
-    }
-  }
-
-  private saveToStorage() {
-    try {
-      const array = Array.from(this.favorites);
-      localStorage.setItem(FAVORITES_KEY, JSON.stringify(array));
-    } catch (error) {
-      console.error("Failed to save favorites:", error);
-    }
-  }
-
-  add(baseModelId: string) {
-    const next = new Set(this.favorites);
-    next.add(baseModelId);
-    this.favorites = next;
-    this.saveToStorage();
-  }
-
-  remove(baseModelId: string) {
-    const next = new Set(this.favorites);
-    next.delete(baseModelId);
-    this.favorites = next;
-    this.saveToStorage();
-  }
-
-  toggle(baseModelId: string) {
-    if (this.favorites.has(baseModelId)) {
-      this.remove(baseModelId);
-    } else {
-      this.add(baseModelId);
-    }
-  }
-
-  isFavorite(baseModelId: string): boolean {
-    return this.favorites.has(baseModelId);
-  }
-
-  getAll(): string[] {
-    return Array.from(this.favorites);
-  }
-
-  getSet(): Set<string> {
-    return new Set(this.favorites);
-  }
-
-  hasAny(): boolean {
-    return this.favorites.size > 0;
-  }
-
-  clearAll() {
-    this.favorites = new Set();
-    this.saveToStorage();
-  }
-}
-
-export const favoritesStore = new FavoritesStore();
-
-export const favorites = () => favoritesStore.favorites;
-export const hasFavorites = () => favoritesStore.hasAny();
-export const isFavorite = (baseModelId: string) =>
-  favoritesStore.isFavorite(baseModelId);
-export const toggleFavorite = (baseModelId: string) =>
-  favoritesStore.toggle(baseModelId);
-export const addFavorite = (baseModelId: string) =>
-  favoritesStore.add(baseModelId);
-export const removeFavorite = (baseModelId: string) =>
-  favoritesStore.remove(baseModelId);
-export const getFavorites = () => favoritesStore.getAll();
-export const getFavoritesSet = () => favoritesStore.getSet();
-export const clearFavorites = () => favoritesStore.clearAll();
--- a/dashboard/src/routes/+page.svelte
+++ b/dashboard/src/routes/+page.svelte
@@ -5,13 +5,7 @@
    ChatMessages,
    ChatSidebar,
    ModelCard,
-    ModelPickerModal,
  } from "$lib/components";
-  import {
-    favorites,
-    toggleFavorite,
-    getFavoritesSet,
-  } from "$lib/stores/favorites.svelte";
  import {
    hasStartedChat,
    isTopologyMinimized,
@@ -106,11 +100,6 @@
      storage_size_megabytes?: number;
      tasks?: string[];
      hugging_face_id?: string;
-      is_custom?: boolean;
-      family?: string;
-      quantization?: string;
-      base_model?: string;
-      capabilities?: string[];
    }>
  >([]);

@@ -222,11 +211,9 @@
  let launchingModelId = $state<string | null>(null);
  let instanceDownloadExpandedNodes = $state<Set<string>>(new Set());

-  // Model picker modal state
-  let isModelPickerOpen = $state(false);
-
-  // Favorites state (reactive)
-  const favoritesSet = $derived(getFavoritesSet());
+  // Custom dropdown state
+  let isModelDropdownOpen = $state(false);
+  let modelDropdownSearch = $state("");

  // Slider dragging state
  let isDraggingSlider = $state(false);
@@ -543,47 +530,6 @@
    }
  }

-  async function addModelFromPicker(modelId: string) {
-    const response = await fetch("/models/add", {
-      method: "POST",
-      headers: { "Content-Type": "application/json" },
-      body: JSON.stringify({ model_id: modelId }),
-    });
-
-    if (!response.ok) {
-      let message = `Failed to add model (${response.status}: ${response.statusText})`;
-      try {
-        const err = await response.json();
-        if (err.detail) message = err.detail;
-      } catch {
-        // use default message
-      }
-      throw new Error(message);
-    }
-
-    await fetchModels();
-  }
-
-  async function deleteCustomModel(modelId: string) {
-    try {
-      const response = await fetch(
-        `/models/custom/${encodeURIComponent(modelId)}`,
-        { method: "DELETE" },
-      );
-      if (response.ok) {
-        await fetchModels();
-      }
-    } catch {
-      console.error("Failed to delete custom model");
-    }
-  }
-
-  function handleModelPickerSelect(modelId: string) {
-    selectPreviewModel(modelId);
-    saveLaunchDefaults();
-    isModelPickerOpen = false;
-  }
-
  async function launchInstance(
    modelId: string,
    specificPreview?: PlacementPreview | null,
@@ -2414,12 +2360,14 @@
              >
            </div>

-            <!-- Model Picker Button -->
-            <div class="flex-shrink-0 mb-3">
+            <!-- Model Dropdown (Custom) -->
+            <div class="flex-shrink-0 mb-3 relative">
              <button
                type="button"
-                onclick={() => (isModelPickerOpen = true)}
-                class="w-full bg-exo-medium-gray/50 border border-exo-yellow/30 rounded pl-3 pr-8 py-2.5 text-sm font-mono text-left tracking-wide cursor-pointer transition-all duration-200 hover:border-exo-yellow/50 focus:outline-none focus:border-exo-yellow/70 relative"
+                onclick={() => (isModelDropdownOpen = !isModelDropdownOpen)}
+                class="w-full bg-exo-medium-gray/50 border border-exo-yellow/30 rounded pl-3 pr-8 py-2.5 text-sm font-mono text-left tracking-wide cursor-pointer transition-all duration-200 hover:border-exo-yellow/50 focus:outline-none focus:border-exo-yellow/70 {isModelDropdownOpen
+                  ? 'border-exo-yellow/70'
+                  : ''}"
              >
                {#if selectedModelId}
                  {@const foundModel = models.find(
@@ -2427,12 +2375,54 @@
                  )}
                  {#if foundModel}
                    {@const sizeGB = getModelSizeGB(foundModel)}
+                    {@const isImageModel = modelSupportsImageGeneration(
+                      foundModel.id,
+                    )}
+                    {@const isImageEditModel = modelSupportsImageEditing(
+                      foundModel.id,
+                    )}
                    <span
                      class="flex items-center justify-between gap-2 w-full pr-4"
                    >
                      <span
                        class="flex items-center gap-2 text-exo-light-gray truncate"
                      >
+                        {#if isImageModel}
+                          <svg
+                            class="w-4 h-4 flex-shrink-0 text-exo-yellow"
+                            fill="none"
+                            viewBox="0 0 24 24"
+                            stroke="currentColor"
+                            stroke-width="2"
+                          >
+                            <rect
+                              x="3"
+                              y="3"
+                              width="18"
+                              height="18"
+                              rx="2"
+                              ry="2"
+                            />
+                            <circle cx="8.5" cy="8.5" r="1.5" />
+                            <polyline points="21 15 16 10 5 21" />
+                          </svg>
+                        {/if}
+                        {#if isImageEditModel}
+                          <svg
+                            class="w-4 h-4 flex-shrink-0 text-exo-yellow"
+                            fill="none"
+                            viewBox="0 0 24 24"
+                            stroke="currentColor"
+                            stroke-width="2"
+                          >
+                            <path
+                              d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"
+                            />
+                            <path
+                              d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"
+                            />
+                          </svg>
+                        {/if}
                        <span class="truncate"
                          >{foundModel.name || foundModel.id}</span
                        >
@@ -2449,24 +2439,142 @@
                {:else}
                  <span class="text-white/50">— SELECT MODEL —</span>
                {/if}
-                <div
-                  class="absolute right-3 top-1/2 -translate-y-1/2 pointer-events-none"
-                >
-                  <svg
-                    class="w-4 h-4 text-exo-yellow/60"
-                    fill="none"
-                    viewBox="0 0 24 24"
-                    stroke="currentColor"
-                  >
-                    <path
-                      stroke-linecap="round"
-                      stroke-linejoin="round"
-                      stroke-width="2"
-                      d="M19 9l-7 7-7-7"
-                    />
-                  </svg>
-                </div>
              </button>
+              <div
+                class="absolute right-3 top-1/2 -translate-y-1/2 pointer-events-none transition-transform duration-200 {isModelDropdownOpen
+                  ? 'rotate-180'
+                  : ''}"
+              >
+                <svg
+                  class="w-4 h-4 text-exo-yellow/60"
+                  fill="none"
+                  viewBox="0 0 24 24"
+                  stroke="currentColor"
+                >
+                  <path
+                    stroke-linecap="round"
+                    stroke-linejoin="round"
+                    stroke-width="2"
+                    d="M19 9l-7 7-7-7"
+                  />
+                </svg>
+              </div>
+
+              {#if isModelDropdownOpen}
+                <!-- Backdrop to close dropdown -->
+                <button
+                  type="button"
+                  class="fixed inset-0 z-40 cursor-default"
+                  onclick={() => (isModelDropdownOpen = false)}
+                  aria-label="Close dropdown"
+                ></button>
+
+                <!-- Dropdown Panel -->
+                <div
+                  class="absolute top-full left-0 right-0 mt-1 bg-exo-dark-gray border border-exo-yellow/30 rounded shadow-lg shadow-black/50 z-50 max-h-64 overflow-y-auto"
+                >
+                  <!-- Search within dropdown -->
+                  <div
+                    class="sticky top-0 bg-exo-dark-gray border-b border-exo-medium-gray/30 p-2"
+                  >
+                    <input
+                      type="text"
+                      placeholder="Search models..."
+                      bind:value={modelDropdownSearch}
+                      class="w-full bg-exo-dark-gray/60 border border-exo-medium-gray/30 rounded px-2 py-1.5 text-xs font-mono text-white/80 placeholder:text-white/40 focus:outline-none focus:border-exo-yellow/50"
+                    />
+                  </div>
+
+                  <!-- Options -->
+                  <div class="py-1">
+                    {#each sortedModels().filter((m) => !modelDropdownSearch || (m.name || m.id)
+                          .toLowerCase()
+                          .includes(modelDropdownSearch.toLowerCase())) as model}
+                      {@const sizeGB = getModelSizeGB(model)}
+                      {@const modelCanFit = hasEnoughMemory(model)}
+                      {@const isImageModel = modelSupportsImageGeneration(
+                        model.id,
+                      )}
+                      {@const isImageEditModel = modelSupportsImageEditing(
+                        model.id,
+                      )}
+                      <button
+                        type="button"
+                        onclick={() => {
+                          if (modelCanFit) {
+                            selectPreviewModel(model.id);
+                            saveLaunchDefaults();
+                            isModelDropdownOpen = false;
+                            modelDropdownSearch = "";
+                          }
+                        }}
+                        disabled={!modelCanFit}
+                        class="w-full px-3 py-2 text-left text-sm font-mono tracking-wide transition-colors duration-100 flex items-center justify-between gap-2 {selectedModelId ===
+                        model.id
+                          ? 'bg-transparent text-exo-yellow cursor-pointer'
+                          : modelCanFit
+                            ? 'text-white/80 hover:text-exo-yellow cursor-pointer'
+                            : 'text-white/30 cursor-default'}"
+                      >
+                        <span class="flex items-center gap-2 truncate flex-1">
+                          {#if isImageModel}
+                            <svg
+                              class="w-4 h-4 flex-shrink-0 text-exo-yellow"
+                              fill="none"
+                              viewBox="0 0 24 24"
+                              stroke="currentColor"
+                              stroke-width="2"
+                              aria-label="Image generation model"
+                            >
+                              <rect
+                                x="3"
+                                y="3"
+                                width="18"
+                                height="18"
+                                rx="2"
+                                ry="2"
+                              />
+                              <circle cx="8.5" cy="8.5" r="1.5" />
+                              <polyline points="21 15 16 10 5 21" />
+                            </svg>
+                          {/if}
+                          {#if isImageEditModel}
+                            <svg
+                              class="w-4 h-4 flex-shrink-0 text-exo-yellow"
+                              fill="none"
+                              viewBox="0 0 24 24"
+                              stroke="currentColor"
+                              stroke-width="2"
+                              aria-label="Image editing model"
+                            >
+                              <path
+                                d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"
+                              />
+                              <path
+                                d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"
+                              />
+                            </svg>
+                          {/if}
+                          <span class="truncate">{model.name || model.id}</span>
+                        </span>
+                        <span
+                          class="flex-shrink-0 text-xs {modelCanFit
+                            ? 'text-white/50'
+                            : 'text-red-400/60'}"
+                        >
+                          {sizeGB >= 1
+                            ? sizeGB.toFixed(0)
+                            : sizeGB.toFixed(1)}GB
+                        </span>
+                      </button>
+                    {:else}
+                      <div class="px-3 py-2 text-xs text-white/50 font-mono">
+                        No models found
+                      </div>
+                    {/each}
+                  </div>
+                </div>
+              {/if}
            </div>

            <!-- Configuration Options -->
@@ -3246,22 +3354,3 @@
    {/if}
  </main>
 </div>
-
-<ModelPickerModal
-  isOpen={isModelPickerOpen}
-  {models}
-  {selectedModelId}
-  favorites={favoritesSet}
-  existingModelIds={new Set(models.map((m) => m.id))}
-  canModelFit={(modelId) => {
-    const model = models.find((m) => m.id === modelId);
-    return model ? hasEnoughMemory(model) : false;
-  }}
-  onSelect={handleModelPickerSelect}
-  onClose={() => (isModelPickerOpen = false)}
-  onToggleFavorite={toggleFavorite}
-  onAddModel={addModelFromPicker}
-  onDeleteModel={deleteCustomModel}
-  totalMemoryGB={clusterMemory().total / (1024 * 1024 * 1024)}
-  usedMemoryGB={clusterMemory().used / (1024 * 1024 * 1024)}
-/>
--- a/dashboard/src/routes/traces/+page.svelte
+++ b/dashboard/src/routes/traces/+page.svelte
@@ -1,190 +0,0 @@
-<script lang="ts">
-  import { onMount } from "svelte";
-  import {
-    listTraces,
-    getTraceRawUrl,
-    type TraceListItem,
-  } from "$lib/stores/app.svelte";
-  import HeaderNav from "$lib/components/HeaderNav.svelte";
-
-  let traces = $state<TraceListItem[]>([]);
-  let loading = $state(true);
-  let error = $state<string | null>(null);
-
-  function formatBytes(bytes: number): string {
-    if (!bytes || bytes <= 0) return "0B";
-    const units = ["B", "KB", "MB", "GB"];
-    const i = Math.min(
-      Math.floor(Math.log(bytes) / Math.log(1024)),
-      units.length - 1,
-    );
-    const val = bytes / Math.pow(1024, i);
-    return `${val.toFixed(val >= 10 ? 0 : 1)}${units[i]}`;
-  }
-
-  function formatDate(isoString: string): string {
-    const date = new Date(isoString);
-    return date.toLocaleString();
-  }
-
-  async function downloadTrace(taskId: string) {
-    const response = await fetch(getTraceRawUrl(taskId));
-    const blob = await response.blob();
-    const url = URL.createObjectURL(blob);
-    const a = document.createElement("a");
-    a.href = url;
-    a.download = `trace_${taskId}.json`;
-    a.click();
-    URL.revokeObjectURL(url);
-  }
-
-  async function openInPerfetto(taskId: string) {
-    // Fetch trace data from our local API
-    const response = await fetch(getTraceRawUrl(taskId));
-    const traceData = await response.arrayBuffer();
-
-    // Open Perfetto UI
-    const perfettoWindow = window.open("https://ui.perfetto.dev");
-    if (!perfettoWindow) {
-      alert("Failed to open Perfetto. Please allow popups.");
-      return;
-    }
-
-    // Wait for Perfetto to be ready, then send trace via postMessage
-    const onMessage = (e: MessageEvent) => {
-      if (e.data === "PONG") {
-        window.removeEventListener("message", onMessage);
-        perfettoWindow.postMessage(
-          {
-            perfetto: {
-              buffer: traceData,
-              title: `Trace ${taskId}`,
-            },
-          },
-          "https://ui.perfetto.dev",
-        );
-      }
-    };
-    window.addEventListener("message", onMessage);
-
-    // Ping Perfetto until it responds
-    const pingInterval = setInterval(() => {
-      perfettoWindow.postMessage("PING", "https://ui.perfetto.dev");
-    }, 50);
-
-    // Clean up after 10 seconds
-    setTimeout(() => {
-      clearInterval(pingInterval);
-      window.removeEventListener("message", onMessage);
-    }, 10000);
-  }
-
-  async function refresh() {
-    loading = true;
-    error = null;
-    try {
-      const response = await listTraces();
-      traces = response.traces;
-    } catch (e) {
-      error = e instanceof Error ? e.message : "Failed to load traces";
-    } finally {
-      loading = false;
-    }
-  }
-
-  onMount(() => {
-    refresh();
-  });
-</script>
-
-<div class="min-h-screen bg-exo-dark-gray text-white">
-  <HeaderNav showHome={true} />
-  <div class="max-w-7xl mx-auto px-4 lg:px-8 py-6 space-y-6">
-    <div class="flex items-center justify-between gap-4 flex-wrap">
-      <div>
-        <h1
-          class="text-2xl font-mono tracking-[0.2em] uppercase text-exo-yellow"
-        >
-          Traces
-        </h1>
-      </div>
-      <div class="flex items-center gap-3">
-        <button
-          type="button"
-          class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-2 py-1 rounded"
-          onclick={refresh}
-          disabled={loading}
-        >
-          Refresh
-        </button>
-      </div>
-    </div>
-
-    {#if loading}
-      <div
-        class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-6 text-center text-exo-light-gray"
-      >
-        <div class="text-sm">Loading traces...</div>
-      </div>
-    {:else if error}
-      <div
-        class="rounded border border-red-500/30 bg-red-500/10 p-6 text-center text-red-400"
-      >
-        <div class="text-sm">{error}</div>
-      </div>
-    {:else if traces.length === 0}
-      <div
-        class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-6 text-center text-exo-light-gray space-y-2"
-      >
-        <div class="text-sm">No traces found.</div>
-        <div class="text-xs text-exo-light-gray/70">
-          Run exo with EXO_TRACING_ENABLED=1 to collect traces.
-        </div>
-      </div>
-    {:else}
-      <div class="space-y-3">
-        {#each traces as trace}
-          <div
-            class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-4 flex items-center justify-between gap-4"
-          >
-            <div class="min-w-0 flex-1">
-              <a
-                href="#/traces/{trace.taskId}"
-                class="text-sm font-mono text-white hover:text-exo-yellow transition-colors truncate block"
-              >
-                {trace.taskId}
-              </a>
-              <div class="text-xs text-exo-light-gray font-mono mt-1">
-                {formatDate(trace.createdAt)} &bull; {formatBytes(
-                  trace.fileSize,
-                )}
-              </div>
-            </div>
-            <div class="flex items-center gap-2 shrink-0">
-              <a
-                href="#/traces/{trace.taskId}"
-                class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-2 py-1 rounded"
-              >
-                View Stats
-              </a>
-              <button
-                type="button"
-                class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-2 py-1 rounded"
-                onclick={() => downloadTrace(trace.taskId)}
-              >
-                Download
-              </button>
-              <button
-                type="button"
-                class="text-xs font-mono text-exo-dark-gray bg-exo-yellow hover:bg-exo-yellow/90 transition-colors uppercase px-2 py-1 rounded font-semibold"
-                onclick={() => openInPerfetto(trace.taskId)}
-              >
-                View Trace
-              </button>
-            </div>
-          </div>
-        {/each}
-      </div>
-    {/if}
-  </div>
-</div>
--- a/dashboard/src/routes/traces/[taskId]/+page.svelte
+++ b/dashboard/src/routes/traces/[taskId]/+page.svelte
@@ -1,367 +0,0 @@
-<script lang="ts">
-  import { page } from "$app/stores";
-  import { onMount } from "svelte";
-  import {
-    fetchTraceStats,
-    getTraceRawUrl,
-    type TraceStatsResponse,
-    type TraceCategoryStats,
-  } from "$lib/stores/app.svelte";
-  import HeaderNav from "$lib/components/HeaderNav.svelte";
-
-  const taskId = $derived($page.params.taskId);
-
-  let stats = $state<TraceStatsResponse | null>(null);
-  let loading = $state(true);
-  let error = $state<string | null>(null);
-
-  function formatDuration(us: number): string {
-    if (us < 1000) return `${us.toFixed(0)}us`;
-    if (us < 1_000_000) return `${(us / 1000).toFixed(2)}ms`;
-    return `${(us / 1_000_000).toFixed(2)}s`;
-  }
-
-  function formatPercentage(part: number, total: number): string {
-    if (total === 0) return "0.0%";
-    return `${((part / total) * 100).toFixed(1)}%`;
-  }
-
-  // Parse hierarchical categories like "sync/compute" into phases
-  type PhaseData = {
-    name: string;
-    subcategories: { name: string; stats: TraceCategoryStats }[];
-    totalUs: number; // From outer span (e.g., "sync" category)
-    stepCount: number; // Count of outer span events
-  };
-
-  function parsePhases(
-    byCategory: Record<string, TraceCategoryStats>,
-  ): PhaseData[] {
-    const phases = new Map<
-      string,
-      {
-        subcats: Map<string, TraceCategoryStats>;
-        outerStats: TraceCategoryStats | null;
-      }
-    >();
-
-    for (const [category, catStats] of Object.entries(byCategory)) {
-      if (category.includes("/")) {
-        const [phase, subcat] = category.split("/", 2);
-        if (!phases.has(phase)) {
-          phases.set(phase, { subcats: new Map(), outerStats: null });
-        }
-        phases.get(phase)!.subcats.set(subcat, catStats);
-      } else {
-        // Outer span - this IS the phase total
-        if (!phases.has(category)) {
-          phases.set(category, { subcats: new Map(), outerStats: null });
-        }
-        phases.get(category)!.outerStats = catStats;
-      }
-    }
-
-    return Array.from(phases.entries())
-      .filter(([_, data]) => data.outerStats !== null) // Only phases with outer spans
-      .map(([name, data]) => ({
-        name,
-        subcategories: Array.from(data.subcats.entries())
-          .map(([subName, subStats]) => ({ name: subName, stats: subStats }))
-          .sort((a, b) => b.stats.totalUs - a.stats.totalUs),
-        totalUs: data.outerStats!.totalUs, // Outer span total
-        stepCount: data.outerStats!.count, // Number of steps
-      }))
-      .sort((a, b) => b.totalUs - a.totalUs);
-  }
-
-  async function downloadTrace() {
-    if (!taskId) return;
-    const response = await fetch(getTraceRawUrl(taskId));
-    const blob = await response.blob();
-    const url = URL.createObjectURL(blob);
-    const a = document.createElement("a");
-    a.href = url;
-    a.download = `trace_${taskId}.json`;
-    a.click();
-    URL.revokeObjectURL(url);
-  }
-
-  async function openInPerfetto() {
-    if (!taskId) return;
-
-    // Fetch trace data from our local API
-    const response = await fetch(getTraceRawUrl(taskId));
-    const traceData = await response.arrayBuffer();
-
-    // Open Perfetto UI
-    const perfettoWindow = window.open("https://ui.perfetto.dev");
-    if (!perfettoWindow) {
-      alert("Failed to open Perfetto. Please allow popups.");
-      return;
-    }
-
-    // Wait for Perfetto to be ready, then send trace via postMessage
-    const onMessage = (e: MessageEvent) => {
-      if (e.data === "PONG") {
-        window.removeEventListener("message", onMessage);
-        perfettoWindow.postMessage(
-          {
-            perfetto: {
-              buffer: traceData,
-              title: `Trace ${taskId}`,
-            },
-          },
-          "https://ui.perfetto.dev",
-        );
-      }
-    };
-    window.addEventListener("message", onMessage);
-
-    // Ping Perfetto until it responds
-    const pingInterval = setInterval(() => {
-      perfettoWindow.postMessage("PING", "https://ui.perfetto.dev");
-    }, 50);
-
-    // Clean up after 10 seconds
-    setTimeout(() => {
-      clearInterval(pingInterval);
-      window.removeEventListener("message", onMessage);
-    }, 10000);
-  }
-
-  onMount(async () => {
-    if (!taskId) {
-      error = "No task ID provided";
-      loading = false;
-      return;
-    }
-
-    try {
-      stats = await fetchTraceStats(taskId);
-    } catch (e) {
-      error = e instanceof Error ? e.message : "Failed to load trace";
-    } finally {
-      loading = false;
-    }
-  });
-
-  const phases = $derived(stats ? parsePhases(stats.byCategory) : []);
-  const sortedRanks = $derived(
-    stats
-      ? Object.keys(stats.byRank)
-          .map(Number)
-          .sort((a, b) => a - b)
-      : [],
-  );
-  const nodeCount = $derived(sortedRanks.length || 1);
-</script>
-
-<div class="min-h-screen bg-exo-dark-gray text-white">
-  <HeaderNav showHome={true} />
-  <div class="max-w-7xl mx-auto px-4 lg:px-8 py-6 space-y-6">
-    <div class="flex items-center justify-between gap-4 flex-wrap">
-      <div>
-        <h1
-          class="text-2xl font-mono tracking-[0.2em] uppercase text-exo-yellow"
-        >
-          Trace
-        </h1>
-        <p class="text-sm text-exo-light-gray font-mono truncate max-w-lg">
-          {taskId}
-        </p>
-      </div>
-      <div class="flex items-center gap-3">
-        <a
-          href="#/traces"
-          class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-3 py-1.5 rounded"
-        >
-          All Traces
-        </a>
-        <button
-          type="button"
-          class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-3 py-1.5 rounded"
-          onclick={downloadTrace}
-          disabled={loading || !!error}
-        >
-          Download
-        </button>
-        <button
-          type="button"
-          class="text-xs font-mono text-exo-dark-gray bg-exo-yellow hover:bg-exo-yellow/90 transition-colors uppercase px-3 py-1.5 rounded font-semibold"
-          onclick={openInPerfetto}
-          disabled={loading || !!error}
-        >
-          View Trace
-        </button>
-      </div>
-    </div>
-
-    {#if loading}
-      <div
-        class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-6 text-center text-exo-light-gray"
-      >
-        <div class="text-sm">Loading trace data...</div>
-      </div>
-    {:else if error}
-      <div
-        class="rounded border border-red-500/30 bg-red-500/10 p-6 text-center text-red-400"
-      >
-        <div class="text-sm">{error}</div>
-      </div>
-    {:else if stats}
-      <!-- Wall Time Summary -->
-      <div
-        class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-4 space-y-2"
-      >
-        <h2
-          class="text-sm font-mono uppercase tracking-wider text-exo-light-gray"
-        >
-          Summary
-        </h2>
-        <div class="text-3xl font-mono text-exo-yellow">
-          {formatDuration(stats.totalWallTimeUs)}
-        </div>
-        <div class="text-xs text-exo-light-gray">Total wall time</div>
-      </div>
-
-      <!-- By Phase -->
-      {#if phases.length > 0}
-        <div
-          class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-4 space-y-4"
-        >
-          <h2
-            class="text-sm font-mono uppercase tracking-wider text-exo-light-gray"
-          >
-            By Phase <span class="text-exo-light-gray/50">(avg per node)</span>
-          </h2>
-          <div class="space-y-4">
-            {#each phases as phase}
-              {@const normalizedTotal = phase.totalUs / nodeCount}
-              {@const normalizedStepCount = phase.stepCount / nodeCount}
-              <div class="space-y-2">
-                <div class="flex items-center justify-between">
-                  <span class="text-sm font-mono text-white">{phase.name}</span>
-                  <span class="text-sm font-mono">
-                    <span class="text-exo-yellow"
-                      >{formatDuration(normalizedTotal)}</span
-                    >
-                    <span class="text-exo-light-gray ml-2">
-                      ({normalizedStepCount} steps, {formatDuration(
-                        normalizedTotal / normalizedStepCount,
-                      )}/step)
-                    </span>
-                  </span>
-                </div>
-                {#if phase.subcategories.length > 0}
-                  <div class="pl-4 space-y-1.5">
-                    {#each phase.subcategories as subcat}
-                      {@const normalizedSubcat =
-                        subcat.stats.totalUs / nodeCount}
-                      {@const pct = formatPercentage(
-                        normalizedSubcat,
-                        normalizedTotal,
-                      )}
-                      {@const perStep = normalizedSubcat / normalizedStepCount}
-                      <div
-                        class="flex items-center justify-between text-xs font-mono"
-                      >
-                        <span class="text-exo-light-gray">{subcat.name}</span>
-                        <span class="text-white">
-                          {formatDuration(normalizedSubcat)}
-                          <span class="text-exo-light-gray ml-2">({pct})</span>
-                          <span class="text-exo-light-gray/60 ml-2"
-                            >{formatDuration(perStep)}/step</span
-                          >
-                        </span>
-                      </div>
-                      <!-- Progress bar -->
-                      <div
-                        class="relative h-1.5 bg-exo-black/60 rounded-sm overflow-hidden"
-                      >
-                        <div
-                          class="absolute inset-y-0 left-0 bg-gradient-to-r from-exo-yellow to-exo-yellow/70 transition-all duration-300"
-                          style="width: {pct}"
-                        ></div>
-                      </div>
-                    {/each}
-                  </div>
-                {/if}
-              </div>
-            {/each}
-          </div>
-        </div>
-      {/if}
-
-      <!-- By Rank -->
-      {#if sortedRanks.length > 0}
-        <div
-          class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-4 space-y-4"
-        >
-          <h2
-            class="text-sm font-mono uppercase tracking-wider text-exo-light-gray"
-          >
-            By Rank
-          </h2>
-          <div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
-            {#each sortedRanks as rank}
-              {@const rankStats = stats.byRank[rank]}
-              {@const rankPhases = parsePhases(rankStats.byCategory)}
-              <div
-                class="rounded border border-exo-medium-gray/20 bg-exo-dark-gray/60 p-3 space-y-3"
-              >
-                <div class="text-sm font-mono text-exo-yellow">
-                  Rank {rank}
-                </div>
-                <div class="space-y-2">
-                  {#each rankPhases as phase}
-                    <div class="space-y-1">
-                      <div class="flex items-center justify-between text-xs">
-                        <span class="font-mono text-exo-light-gray"
-                          >{phase.name}</span
-                        >
-                        <span class="font-mono text-white">
-                          {formatDuration(phase.totalUs)}
-                          <span class="text-exo-light-gray/50 ml-1">
-                            ({phase.stepCount}x)
-                          </span>
-                        </span>
-                      </div>
-                      {#if phase.subcategories.length > 0}
-                        <div class="pl-2 space-y-0.5">
-                          {#each phase.subcategories as subcat}
-                            {@const pct = formatPercentage(
-                              subcat.stats.totalUs,
-                              phase.totalUs,
-                            )}
-                            {@const perStep =
-                              subcat.stats.totalUs / phase.stepCount}
-                            <div
-                              class="flex items-center justify-between text-[10px] font-mono"
-                            >
-                              <span class="text-exo-light-gray/70"
-                                >{subcat.name}</span
-                              >
-                              <span class="text-exo-light-gray">
-                                {formatDuration(subcat.stats.totalUs)}
-                                <span class="text-exo-light-gray/50"
-                                  >({pct})</span
-                                >
-                                <span class="text-exo-light-gray/30 ml-1"
-                                  >{formatDuration(perStep)}/step</span
-                                >
-                              </span>
-                            </div>
-                          {/each}
-                        </div>
-                      {/if}
-                    </div>
-                  {/each}
-                </div>
-              </div>
-            {/each}
-          </div>
-        </div>
-      {/if}
-    {/if}
-  </div>
-</div>
--- a/flake.lock
+++ b/flake.lock
@@ -21,9 +21,7 @@
          "nixpkgs"
        ],
        "purescript-overlay": "purescript-overlay",
-        "pyproject-nix": [
-          "pyproject-nix"
-        ]
+        "pyproject-nix": "pyproject-nix"
      },
      "locked": {
        "lastModified": 1765953015,
@@ -151,44 +149,19 @@
        "type": "github"
      }
    },
-    "pyproject-build-systems": {
-      "inputs": {
-        "nixpkgs": [
-          "nixpkgs"
-        ],
-        "pyproject-nix": [
-          "pyproject-nix"
-        ],
-        "uv2nix": [
-          "uv2nix"
-        ]
-      },
-      "locked": {
-        "lastModified": 1763662255,
-        "narHash": "sha256-4bocaOyLa3AfiS8KrWjZQYu+IAta05u3gYZzZ6zXbT0=",
-        "owner": "pyproject-nix",
-        "repo": "build-system-pkgs",
-        "rev": "042904167604c681a090c07eb6967b4dd4dae88c",
-        "type": "github"
-      },
-      "original": {
-        "owner": "pyproject-nix",
-        "repo": "build-system-pkgs",
-        "type": "github"
-      }
-    },
    "pyproject-nix": {
      "inputs": {
        "nixpkgs": [
+          "dream2nix",
          "nixpkgs"
        ]
      },
      "locked": {
-        "lastModified": 1764134915,
-        "narHash": "sha256-xaKvtPx6YAnA3HQVp5LwyYG1MaN4LLehpQI8xEdBvBY=",
+        "lastModified": 1763017646,
+        "narHash": "sha256-Z+R2lveIp6Skn1VPH3taQIuMhABg1IizJd8oVdmdHsQ=",
        "owner": "pyproject-nix",
        "repo": "pyproject.nix",
-        "rev": "2c8df1383b32e5443c921f61224b198a2282a657",
+        "rev": "47bd6f296502842643078d66128f7b5e5370790c",
        "type": "github"
      },
      "original": {
@@ -205,10 +178,7 @@
        "flake-parts": "flake-parts",
        "nixpkgs": "nixpkgs",
        "nixpkgs-swift": "nixpkgs-swift",
-        "pyproject-build-systems": "pyproject-build-systems",
-        "pyproject-nix": "pyproject-nix",
-        "treefmt-nix": "treefmt-nix",
-        "uv2nix": "uv2nix"
+        "treefmt-nix": "treefmt-nix"
      }
    },
    "rust-analyzer-src": {
@@ -269,29 +239,6 @@
        "repo": "treefmt-nix",
        "type": "github"
      }
-    },
-    "uv2nix": {
-      "inputs": {
-        "nixpkgs": [
-          "nixpkgs"
-        ],
-        "pyproject-nix": [
-          "pyproject-nix"
-        ]
-      },
-      "locked": {
-        "lastModified": 1767701098,
-        "narHash": "sha256-CJhKZnWb3gumR9oTRjFvCg/6lYTGbZRU7xtvcyWIRwU=",
-        "owner": "pyproject-nix",
-        "repo": "uv2nix",
-        "rev": "9d357f0d2ce6f5f35ec7959d7e704452352eb4da",
-        "type": "github"
-      },
-      "original": {
-        "owner": "pyproject-nix",
-        "repo": "uv2nix",
-        "type": "github"
-      }
    }
  },
  "root": "root",
--- a/flake.nix
+++ b/flake.nix
@@ -24,26 +24,6 @@
    dream2nix = {
      url = "github:nix-community/dream2nix";
      inputs.nixpkgs.follows = "nixpkgs";
-      inputs.pyproject-nix.follows = "pyproject-nix";
-    };
-
-    # Python packaging with uv2nix
-    pyproject-nix = {
-      url = "github:pyproject-nix/pyproject.nix";
-      inputs.nixpkgs.follows = "nixpkgs";
-    };
-
-    uv2nix = {
-      url = "github:pyproject-nix/uv2nix";
-      inputs.pyproject-nix.follows = "pyproject-nix";
-      inputs.nixpkgs.follows = "nixpkgs";
-    };
-
-    pyproject-build-systems = {
-      url = "github:pyproject-nix/build-system-pkgs";
-      inputs.pyproject-nix.follows = "pyproject-nix";
-      inputs.uv2nix.follows = "uv2nix";
-      inputs.nixpkgs.follows = "nixpkgs";
    };

    # Pinned nixpkgs for swift-format (swift is broken on x86_64-linux in newer nixpkgs)
@@ -68,7 +48,6 @@
        inputs.treefmt-nix.flakeModule
        ./dashboard/parts.nix
        ./rust/parts.nix
-        ./python/parts.nix
      ];

      perSystem =
@@ -79,11 +58,6 @@
          pkgsSwift = import inputs.nixpkgs-swift { inherit system; };
        in
        {
-          # Allow unfree for metal-toolchain (needed for Darwin Metal packages)
-          _module.args.pkgs = import inputs.nixpkgs {
-            inherit system;
-            config.allowUnfreePredicate = pkg: (pkg.pname or "") == "metal-toolchain";
-          };
          treefmt = {
            projectRootFile = "flake.nix";
            programs = {
@@ -105,24 +79,14 @@
                enable = true;
                package = pkgsSwift.swiftPackages.swift-format;
              };
-              shfmt.enable = true;
            };
          };

-          packages = lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin (
-            let
-              uvLock = builtins.fromTOML (builtins.readFile ./uv.lock);
-              mlxPackage = builtins.head (builtins.filter (p: p.name == "mlx") uvLock.package);
-              uvLockMlxVersion = mlxPackage.version;
-            in
-            {
-              metal-toolchain = pkgs.callPackage ./nix/metal-toolchain.nix { };
-              mlx = pkgs.callPackage ./nix/mlx.nix {
-                metal-toolchain = self'.packages.metal-toolchain;
-                inherit uvLockMlxVersion;
-              };
-            }
-          );
+          checks.lint = pkgs.runCommand "lint-check" { } ''
+            export RUFF_CACHE_DIR="$TMPDIR/ruff-cache"
+            ${pkgs.ruff}/bin/ruff check ${inputs.self}/
+            touch $out
+          '';

          devShells.default = with pkgs; pkgs.mkShell {
            inputsFrom = [ self'.checks.cargo-build ];
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 export NIX_CONFIG := "extra-experimental-features = nix-command flakes"

 fmt:
-    treefmt || nix fmt
+    nix fmt

 lint:
    uv run ruff check --fix
--- a/nix/darwin-build-fixes.patch
+++ b/nix/darwin-build-fixes.patch
@@ -1,79 +0,0 @@
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 0ed30932..d8528132 100644
--- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -177,11 +177,7 @@ if(MLX_BUILD_METAL)
-     add_compile_definitions(MLX_METAL_DEBUG)
-   endif()
-
-  # Throw an error if xcrun not found
-  execute_process(
-    COMMAND zsh "-c" "/usr/bin/xcrun -sdk macosx --show-sdk-version"
-    OUTPUT_VARIABLE MACOS_SDK_VERSION
-    OUTPUT_STRIP_TRAILING_WHITESPACE COMMAND_ERROR_IS_FATAL ANY)
-+  set(MACOS_SDK_VERSION @sdkVersion@)
-
-   if(${MACOS_SDK_VERSION} LESS 14.0)
-     message(
-@@ -199,11 +195,8 @@ if(MLX_BUILD_METAL)
-     endif()
-     set(XCRUN_FLAGS "-mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
-   endif()
-  execute_process(
-    COMMAND
-      zsh "-c"
-      "echo \"__METAL_VERSION__\" | xcrun -sdk macosx metal ${XCRUN_FLAGS} -E -x metal -P - | tail -1 | tr -d '\n'"
-    OUTPUT_VARIABLE MLX_METAL_VERSION COMMAND_ERROR_IS_FATAL ANY)
-+  set(
-+    MLX_METAL_VERSION @metalVersion@)
-   FetchContent_Declare(metal_cpp URL ${METAL_CPP_URL})
-   FetchContent_MakeAvailable(metal_cpp)
-   target_include_directories(
-diff --git a/cmake/extension.cmake b/cmake/extension.cmake
-index 13db804a..5b385132 100644
--- a/cmake/extension.cmake
-+++ b/cmake/extension.cmake
-@@ -36,7 +36,7 @@ macro(mlx_build_metallib)
-   add_custom_command(
-     OUTPUT ${MTLLIB_BUILD_TARGET}
-     COMMAND
-      xcrun -sdk macosx metal
-+      metal -fmodules-cache-path=${CMAKE_BINARY_DIR}/metal-cache
-       "$<LIST:TRANSFORM,${MTLLIB_INCLUDE_DIRS},PREPEND,-I>"
-       ${MTLLIB_COMPILE_OPTIONS} ${MTLLIB_SOURCES} -o ${MTLLIB_BUILD_TARGET}
-     DEPENDS ${MTLLIB_DEPS} ${MTLLIB_SOURCES}
-diff --git a/mlx/backend/metal/kernels/CMakeLists.txt b/mlx/backend/metal/kernels/CMakeLists.txt
-index 262b0495..5c7446ad 100644
--- a/mlx/backend/metal/kernels/CMakeLists.txt
-+++ b/mlx/backend/metal/kernels/CMakeLists.txt
-@@ -29,7 +29,7 @@ function(build_kernel_base TARGET SRCFILE DEPS)
-                     "-mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
-   endif()
-   add_custom_command(
-    COMMAND xcrun -sdk macosx metal ${METAL_FLAGS} -c ${SRCFILE}
-+    COMMAND metal -fmodules-cache-path=${CMAKE_BINARY_DIR}/metal-cache ${METAL_FLAGS} -c ${SRCFILE}
-             -I${PROJECT_SOURCE_DIR} -o ${TARGET}.air
-     DEPENDS ${SRCFILE} ${DEPS} ${BASE_HEADERS}
-     OUTPUT ${TARGET}.air
-@@ -170,7 +170,7 @@ endif()
-
- add_custom_command(
-   OUTPUT ${MLX_METAL_PATH}/mlx.metallib
-  COMMAND xcrun -sdk macosx metallib ${KERNEL_AIR} -o
-+  COMMAND metallib ${KERNEL_AIR} -o
-           ${MLX_METAL_PATH}/mlx.metallib
-   DEPENDS ${KERNEL_AIR}
-   COMMENT "Building mlx.metallib"
-diff --git a/mlx/backend/metal/make_compiled_preamble.sh b/mlx/backend/metal/make_compiled_preamble.sh
-index bb55ed3a..94ea7dd7 100644
--- a/mlx/backend/metal/make_compiled_preamble.sh
-+++ b/mlx/backend/metal/make_compiled_preamble.sh
-@@ -31,7 +31,7 @@ OUTPUT_FILE=${OUTPUT_DIR}/${SRC_NAME}.cpp
- mkdir -p "$OUTPUT_DIR"
-
- # Use the metal compiler to get a list of headers (with depth)
-CCC="xcrun -sdk macosx metal -x metal"
-+CCC="metal -x metal -fmodules-cache-path=${OUTPUT_DIR}/metal-cache"
- HDRS=$( $CCC -I"$SRC_DIR" -I"$JIT_INCLUDES" -DMLX_METAL_JIT -E -P -CC -C -H "$INPUT_FILE" $CFLAGS -w 2>&1 1>/dev/null )
-
- # Remove any included system frameworks (for MetalPerformancePrimitive headers)
--- a/nix/metal-toolchain.nix
+++ b/nix/metal-toolchain.nix
@@ -1,56 +0,0 @@
-{ lib, stdenvNoCC, requireFile, nix }:
-
-let
-  narFile = requireFile {
-    name = "metal-toolchain-17C48.nar";
-    message = ''
-      The Metal Toolchain NAR must be available.
-
-      If you have cachix configured for exo.cachix.org, this should be automatic.
-
-      Otherwise:
-        1. Install Xcode 26+ from the App Store
-        2. Run: xcodebuild -downloadComponent MetalToolchain
-        3. Export the toolchain:
-           hdiutil attach "$(find /System/Library/AssetsV2/com_apple_MobileAsset_MetalToolchain -name '*.dmg' | head -1)" -mountpoint /tmp/metal-dmg
-           cp -R /tmp/metal-dmg/Metal.xctoolchain /tmp/metal-export
-           hdiutil detach /tmp/metal-dmg
-        4. Create NAR and add to store:
-           nix nar pack /tmp/metal-export > /tmp/metal-toolchain-17C48.nar
-           nix store add --mode flat /tmp/metal-toolchain-17C48.nar
-    '';
-    hash = "sha256-ayR5mXN4sZAddwKEG2OszGRF93k9ZFc7H0yi2xbylQw=";
-  };
-in
-stdenvNoCC.mkDerivation {
-  pname = "metal-toolchain";
-  version = "17C48";
-
-  dontUnpack = true;
-  dontBuild = true;
-  dontFixup = true;
-
-  nativeBuildInputs = [ nix ];
-
-  installPhase = ''
-    runHook preInstall
-
-    nix-store --restore $out < ${narFile}
-
-    # Create bin directory with symlinks for PATH
-    mkdir -p $out/bin
-    ln -s $out/usr/bin/metal $out/bin/metal
-    ln -s $out/usr/bin/metallib $out/bin/metallib
-
-    runHook postInstall
-  '';
-
-  # Metal language version for CMake (from: echo __METAL_VERSION__ | metal -E -x metal -P -)
-  passthru.metalVersion = "400";
-
-  meta = {
-    description = "Apple Metal compiler toolchain";
-    platforms = [ "aarch64-darwin" ];
-    license = lib.licenses.unfree;
-  };
-}
--- a/nix/mlx.nix
+++ b/nix/mlx.nix
@@ -1,158 +0,0 @@
-{ stdenv
-, lib
-, fetchFromGitHub
-, replaceVars
-, fetchzip
-, cmake
-, nlohmann_json
-, apple-sdk_26
-, metal-toolchain
-, runCommand
-, fmt
-, python313Packages
-, uvLockMlxVersion
-}:
-
-assert stdenv.isDarwin;
-
-let
-  python = python313Packages.python;
-
-  # Static dependencies included directly during compilation
-  gguf-tools = fetchFromGitHub {
-    owner = "antirez";
-    repo = "gguf-tools";
-    rev = "8fa6eb65236618e28fd7710a0fba565f7faa1848";
-    hash = "sha256-15FvyPOFqTOr5vdWQoPnZz+mYH919++EtghjozDlnSA=";
-  };
-
-  metal_cpp = fetchzip {
-    url = "https://developer.apple.com/metal/cpp/files/metal-cpp_26.zip";
-    hash = "sha256-7n2eI2lw/S+Us6l7YPAATKwcIbRRpaQ8VmES7S8ZjY8=";
-  };
-
-  nanobind = fetchFromGitHub {
-    owner = "wjakob";
-    repo = "nanobind";
-    rev = "v2.10.2";
-    hash = "sha256-io44YhN+VpfHFWyvvLWSanRgbzA0whK8WlDNRi3hahU=";
-    fetchSubmodules = true;
-  };
-
-  mlx = stdenv.mkDerivation rec {
-    pname = "mlx";
-    version = let v = "0.30.4"; in
-      assert v == uvLockMlxVersion || throw "MLX version mismatch: nix/mlx.nix has ${v} but uv.lock has ${uvLockMlxVersion}. Update both the version and hash in nix/mlx.nix.";
-      v;
-    pyproject = true;
-
-    src = fetchFromGitHub {
-      owner = "ml-explore";
-      repo = "mlx";
-      tag = "v${version}";
-      hash = "sha256-OJk6jPlbaSlsUdk3ADz3tWcRzTWXRof3/q8Soe1AO6w=";
-    };
-
-    patches = [
-      (replaceVars ./darwin-build-fixes.patch {
-        sdkVersion = apple-sdk_26.version;
-        metalVersion = metal-toolchain.metalVersion;
-      })
-    ];
-
-    postPatch = ''
-      substituteInPlace mlx/backend/cpu/jit_compiler.cpp \
-        --replace-fail "g++" "$CXX"
-    '';
-
-    dontUseCmakeConfigure = true;
-
-    enableParallelBuilding = true;
-
-    # Allows multiple cores to be used in Python builds.
-    postUnpack = ''
-      export MAKEFLAGS+="''${enableParallelBuilding:+-j$NIX_BUILD_CORES}"
-    '';
-
-    # Updates the wrong fetcher rev attribute
-    passthru.skipBulkUpdate = true;
-
-    env = {
-      DEV_RELEASE = 1;
-      CMAKE_ARGS = toString [
-        (lib.cmakeBool "USE_SYSTEM_FMT" true)
-        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_GGUFLIB" "${gguf-tools}")
-        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_JSON" "${nlohmann_json.src}")
-        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_NANOBIND" "${nanobind}")
-        (lib.cmakeBool "FETCHCONTENT_FULLY_DISCONNECTED" true)
-        (lib.cmakeBool "MLX_BUILD_METAL" true)
-        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_METAL_CPP" "${metal_cpp}")
-        (lib.cmakeOptionType "string" "CMAKE_OSX_DEPLOYMENT_TARGET" "${apple-sdk_26.version}")
-        (lib.cmakeOptionType "filepath" "CMAKE_OSX_SYSROOT" "${apple-sdk_26.passthru.sdkroot}")
-      ];
-      SDKROOT = apple-sdk_26.passthru.sdkroot;
-      MACOSX_DEPLOYMENT_TARGET = apple-sdk_26.version;
-    };
-
-    build-system = [
-      python313Packages.setuptools
-    ];
-
-    nativeBuildInputs = [
-      cmake
-      metal-toolchain
-      python313Packages.pypaBuildHook
-      python313Packages.pypaInstallHook
-      python313Packages.setuptools
-      python313Packages.typing-extensions
-      python313Packages.wheel
-      python313Packages.cmake
-      python313Packages.ninja
-    ];
-
-    buildInputs = [
-      fmt
-      gguf-tools
-      python313Packages.nanobind
-      python313Packages.pybind11
-      apple-sdk_26
-    ];
-
-    # Tests require Metal GPU access which isn't available in the Nix sandbox.
-    # To run tests, build with: nix build --option sandbox false .#mlx.passthru.tests.mlxTest
-    doCheck = false;
-
-    pythonImportsCheck = [ "mlx" ];
-
-    passthru.tests = {
-      # Runs example scripts to verify MLX works. Requires --option sandbox false
-      # since Metal GPU access is needed.
-      mlxTest =
-        runCommand "run-mlx-examples"
-          {
-            buildInputs = [ mlx ];
-            nativeBuildInputs = [ python ];
-          }
-          ''
-            cp ${src}/examples/python/logistic_regression.py .
-            ${python.interpreter} logistic_regression.py
-            rm logistic_regression.py
-
-            cp ${src}/examples/python/linear_regression.py .
-            ${python.interpreter} linear_regression.py
-            rm linear_regression.py
-
-            touch $out
-          '';
-    };
-
-    meta = {
-      homepage = "https://github.com/ml-explore/mlx";
-      description = "Array framework for Apple silicon";
-      changelog = "https://github.com/ml-explore/mlx/releases/tag/${src.tag}";
-      license = lib.licenses.mit;
-      platforms = [ "aarch64-darwin" ];
-    };
-  };
-in
-mlx
--- a/packaging/pyinstaller/exo.spec
+++ b/packaging/pyinstaller/exo.spec
@@ -10,7 +10,6 @@ PROJECT_ROOT = Path.cwd()
 SOURCE_ROOT = PROJECT_ROOT / "src"
 ENTRYPOINT = SOURCE_ROOT / "exo" / "__main__.py"
 DASHBOARD_DIR = PROJECT_ROOT / "dashboard" / "build"
-RESOURCES_DIR = PROJECT_ROOT / "resources"
 EXO_SHARED_MODELS_DIR = SOURCE_ROOT / "exo" / "shared" / "models"

 if not ENTRYPOINT.is_file():
@@ -19,9 +18,6 @@ if not ENTRYPOINT.is_file():
 if not DASHBOARD_DIR.is_dir():
    raise SystemExit(f"Dashboard assets are missing: {DASHBOARD_DIR}")

-if not RESOURCES_DIR.is_dir():
-    raise SystemExit(f"Resource assets are missing: {RESOURCES_DIR}")
-
 if not EXO_SHARED_MODELS_DIR.is_dir():
    raise SystemExit(f"Shared model assets are missing: {EXO_SHARED_MODELS_DIR}")

@@ -62,7 +58,6 @@ HIDDEN_IMPORTS = sorted(

 DATAS: list[tuple[str, str]] = [
    (str(DASHBOARD_DIR), "dashboard"),
-    (str(RESOURCES_DIR), "resources"),
    (str(MLX_LIB_DIR), "mlx/lib"),
    (str(EXO_SHARED_MODELS_DIR), "exo/shared/models"),
 ]
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,9 +17,9 @@ dependencies = [
    "loguru>=0.7.3",
    "exo_pyo3_bindings", # rust bindings
    "anyio==4.11.0",
-    "mlx==0.30.4; sys_platform == 'darwin'",
-    "mlx[cpu]==0.30.4; sys_platform == 'linux'",
-    "mlx-lm",
+    "mlx==0.30.3; sys_platform == 'darwin'",
+    "mlx[cpu]==0.30.3; sys_platform == 'linux'",
+    "mlx-lm @ git+https://github.com/AlexCheema/mlx-lm.git@fix-transformers-5.0.0rc2",
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
    "hypercorn>=0.18.0",
    "openai-harmony>=0.0.8",
@@ -63,7 +63,6 @@ members = [

 [tool.uv.sources]
 exo_pyo3_bindings = { workspace = true }
-mlx-lm = { git = "https://github.com/ml-explore/mlx-lm", branch = "main" }
 # Uncomment to use local mlx/mlx-lm development versions:
 # mlx = { path = "/Users/Shared/mlx", editable=true }
 # mlx-lm = { path = "/Users/Shared/mlx-lm", editable=true }
--- a/python/parts.nix
+++ b/python/parts.nix
@@ -1,95 +0,0 @@
-{ inputs, ... }:
-{
-  perSystem =
-    { config, self', pkgs, lib, system, ... }:
-    let
-      # Load workspace from uv.lock
-      workspace = inputs.uv2nix.lib.workspace.loadWorkspace {
-        workspaceRoot = inputs.self;
-      };
-
-      # Create overlay from workspace
-      # Use wheels from PyPI for most packages; we override mlx with our pure Nix Metal build
-      overlay = workspace.mkPyprojectOverlay { sourcePreference = "wheel"; };
-
-      # Override overlay to inject Nix-built components
-      exoOverlay = final: prev: {
-        # Replace workspace exo_pyo3_bindings with Nix-built wheel
-        exo-pyo3-bindings = pkgs.stdenv.mkDerivation {
-          pname = "exo-pyo3-bindings";
-          version = "0.1.0";
-          src = self'.packages.exo_pyo3_bindings;
-          # Install from pre-built wheel
-          nativeBuildInputs = [ final.pyprojectWheelHook ];
-          dontStrip = true;
-        };
-      };
-
-      python = pkgs.python313;
-
-      # Overlay to provide build systems and custom packages
-      buildSystemsOverlay = final: prev: {
-        # Use our pure Nix-built MLX with Metal support
-        mlx = self'.packages.mlx;
-
-        # mlx-lm is a git dependency that needs setuptools
-        mlx-lm = prev.mlx-lm.overrideAttrs (old: {
-          nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
-            final.setuptools
-          ];
-        });
-      };
-
-      pythonSet = (pkgs.callPackage inputs.pyproject-nix.build.packages {
-        inherit python;
-      }).overrideScope (
-        lib.composeManyExtensions [
-          inputs.pyproject-build-systems.overlays.default
-          overlay
-          exoOverlay
-          buildSystemsOverlay
-        ]
-      );
-      exoVenv = pythonSet.mkVirtualEnv "exo-env" workspace.deps.default;
-
-      # Virtual environment with dev dependencies for testing
-      testVenv = pythonSet.mkVirtualEnv "exo-test-env" (
-        workspace.deps.default // {
-          exo = [ "dev" ]; # Include pytest, pytest-asyncio, pytest-env
-        }
-      );
-
-      exoPackage = pkgs.runCommand "exo"
-        {
-          nativeBuildInputs = [ pkgs.makeWrapper ];
-        }
-        ''
-          mkdir -p $out/bin
-
-          # Create wrapper scripts
-          for script in exo exo-master exo-worker; do
-            makeWrapper ${exoVenv}/bin/$script $out/bin/$script \
-              --set EXO_DASHBOARD_DIR ${self'.packages.dashboard} \
-              --set EXO_RESOURCES_DIR ${inputs.self + "/resources"} \
-              ${lib.optionalString pkgs.stdenv.isDarwin "--prefix PATH : ${pkgs.macmon}/bin"}
-          done
-        '';
-    in
-    {
-      # Python package only available on macOS (requires MLX/Metal)
-      packages = lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin {
-        exo = exoPackage;
-        # Test environment for running pytest outside of Nix sandbox (needs GPU access)
-        exo-test-env = testVenv;
-      };
-
-      checks = {
-        # Ruff linting (works on all platforms)
-        lint = pkgs.runCommand "ruff-lint" { } ''
-          export RUFF_CACHE_DIR="$TMPDIR/ruff-cache"
-          ${pkgs.ruff}/bin/ruff check ${inputs.self}/
-          touch $out
-        '';
-      };
-    };
-}
--- a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-4bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-4bit.toml
@@ -1,45 +0,0 @@
-model_id = "exolabs/FLUX.1-Krea-dev-4bit"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-
-[storage_size]
-in_bytes = 15475325472
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 5950704160
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-8bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-8bit.toml
@@ -1,45 +0,0 @@
-model_id = "exolabs/FLUX.1-Krea-dev-8bit"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-
-[storage_size]
-in_bytes = 21426029632
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 11901408320
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev.toml
@@ -1,45 +0,0 @@
-model_id = "exolabs/FLUX.1-Krea-dev"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-
-[storage_size]
-in_bytes = 33327437952
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 23802816640
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-dev-4bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-dev-4bit.toml
@@ -1,45 +0,0 @@
-model_id = "exolabs/FLUX.1-dev-4bit"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-
-[storage_size]
-in_bytes = 15475325472
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 5950704160
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-dev-8bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-dev-8bit.toml
@@ -1,45 +0,0 @@
-model_id = "exolabs/FLUX.1-dev-8bit"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-
-[storage_size]
-in_bytes = 21426029632
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 11901408320
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-dev.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-dev.toml
@@ -1,45 +0,0 @@
-model_id = "exolabs/FLUX.1-dev"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-
-[storage_size]
-in_bytes = 33327437952
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 23802816640
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-schnell-4bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-schnell-4bit.toml
@@ -1,45 +0,0 @@
-model_id = "exolabs/FLUX.1-schnell-4bit"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-
-[storage_size]
-in_bytes = 15470210592
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 5945589280
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-schnell-8bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-schnell-8bit.toml
@@ -1,45 +0,0 @@
-model_id = "exolabs/FLUX.1-schnell-8bit"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-
-[storage_size]
-in_bytes = 21415799872
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 11891178560
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-schnell.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-schnell.toml
@@ -1,45 +0,0 @@
-model_id = "exolabs/FLUX.1-schnell"
-n_layers = 57
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-
-[storage_size]
-in_bytes = 33306978432
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
-
-[[components]]
-component_name = "text_encoder_2"
-component_path = "text_encoder_2/"
-n_layers = 24
-can_shard = false
-safetensors_index_filename = "model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 9524621312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 57
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 23782357120
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-4bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-4bit.toml
@@ -1,36 +0,0 @@
-model_id = "exolabs/Qwen-Image-4bit"
-n_layers = 60
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-uses_cfg = true
-
-[storage_size]
-in_bytes = 26799533856
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 16584333312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 60
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 10215200544
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-8bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-8bit.toml
@@ -1,36 +0,0 @@
-model_id = "exolabs/Qwen-Image-8bit"
-n_layers = 60
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-uses_cfg = true
-
-[storage_size]
-in_bytes = 37014734400
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 16584333312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 60
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 20430401088
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-4bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-4bit.toml
@@ -1,36 +0,0 @@
-model_id = "exolabs/Qwen-Image-Edit-2509-4bit"
-n_layers = 60
-hidden_size = 1
-supports_tensor = false
-tasks = ["ImageToImage"]
-uses_cfg = true
-
-[storage_size]
-in_bytes = 26799533856
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 16584333312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 60
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 10215200544
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-8bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-8bit.toml
@@ -1,36 +0,0 @@
-model_id = "exolabs/Qwen-Image-Edit-2509-8bit"
-n_layers = 60
-hidden_size = 1
-supports_tensor = false
-tasks = ["ImageToImage"]
-uses_cfg = true
-
-[storage_size]
-in_bytes = 37014734400
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 16584333312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 60
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 20430401088
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509.toml
@@ -1,36 +0,0 @@
-model_id = "exolabs/Qwen-Image-Edit-2509"
-n_layers = 60
-hidden_size = 1
-supports_tensor = false
-tasks = ["ImageToImage"]
-uses_cfg = true
-
-[storage_size]
-in_bytes = 57445135488
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 16584333312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 60
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 40860802176
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image.toml
@@ -1,36 +0,0 @@
-model_id = "exolabs/Qwen-Image"
-n_layers = 60
-hidden_size = 1
-supports_tensor = false
-tasks = ["TextToImage"]
-uses_cfg = true
-
-[storage_size]
-in_bytes = 57445135488
-
-[[components]]
-component_name = "text_encoder"
-component_path = "text_encoder/"
-n_layers = 12
-can_shard = false
-
-[components.storage_size]
-in_bytes = 16584333312
-
-[[components]]
-component_name = "transformer"
-component_path = "transformer/"
-n_layers = 60
-can_shard = true
-safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
-
-[components.storage_size]
-in_bytes = 40860802176
-
-[[components]]
-component_name = "vae"
-component_path = "vae/"
-can_shard = false
-
-[components.storage_size]
-in_bytes = 0
--- a/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/DeepSeek-V3.1-4bit"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "deepseek"
-quantization = "4bit"
-base_model = "DeepSeek V3.1"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 405874409472
--- a/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/DeepSeek-V3.1-8bit"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "deepseek"
-quantization = "8bit"
-base_model = "DeepSeek V3.1"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 765577920512
--- a/resources/inference_model_cards/mlx-community--GLM-4.5-Air-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.5-Air-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.5-Air-8bit"
-n_layers = 46
-hidden_size = 4096
-supports_tensor = false
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "8bit"
-base_model = "GLM 4.5 Air"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 122406567936
--- a/resources/inference_model_cards/mlx-community--GLM-4.5-Air-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.5-Air-bf16.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.5-Air-bf16"
-n_layers = 46
-hidden_size = 4096
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "bf16"
-base_model = "GLM 4.5 Air"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 229780750336
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.7-4bit"
-n_layers = 91
-hidden_size = 5120
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "4bit"
-base_model = "GLM 4.7"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 198556925568
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-6bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.7-6bit"
-n_layers = 91
-hidden_size = 5120
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "6bit"
-base_model = "GLM 4.7"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 286737579648
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-8bit-gs32.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-8bit-gs32.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.7-8bit-gs32"
-n_layers = 91
-hidden_size = 5120
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "8bit"
-base_model = "GLM 4.7"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 396963397248
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.7-Flash-4bit"
-n_layers = 47
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "4bit"
-base_model = "GLM 4.7 Flash"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 19327352832
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-5bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-5bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.7-Flash-5bit"
-n_layers = 47
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "5bit"
-base_model = "GLM 4.7 Flash"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 22548578304
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-6bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.7-Flash-6bit"
-n_layers = 47
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "6bit"
-base_model = "GLM 4.7 Flash"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 26843545600
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-4.7-Flash-8bit"
-n_layers = 47
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "8bit"
-base_model = "GLM 4.7 Flash"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 34359738368
--- a/resources/inference_model_cards/mlx-community--Kimi-K2-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2-Instruct-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Kimi-K2-Instruct-4bit"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "kimi"
-quantization = "4bit"
-base_model = "Kimi K2"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 620622774272
--- a/resources/inference_model_cards/mlx-community--Kimi-K2-Thinking.toml
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2-Thinking.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Kimi-K2-Thinking"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "kimi"
-quantization = ""
-base_model = "Kimi K2"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 706522120192
--- a/resources/inference_model_cards/mlx-community--Kimi-K2.5.toml
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2.5.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Kimi-K2.5"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "kimi"
-quantization = ""
-base_model = "Kimi K2.5"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 662498705408
--- a/resources/inference_model_cards/mlx-community--Llama-3.2-1B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.2-1B-Instruct-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Llama-3.2-1B-Instruct-4bit"
-n_layers = 16
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "llama"
-quantization = "4bit"
-base_model = "Llama 3.2 1B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 729808896
--- a/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Llama-3.2-3B-Instruct-4bit"
-n_layers = 28
-hidden_size = 3072
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "llama"
-quantization = "4bit"
-base_model = "Llama 3.2 3B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 1863319552
--- a/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Llama-3.2-3B-Instruct-8bit"
-n_layers = 28
-hidden_size = 3072
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "llama"
-quantization = "8bit"
-base_model = "Llama 3.2 3B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 3501195264
--- a/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Llama-3.3-70B-Instruct-4bit"
-n_layers = 80
-hidden_size = 8192
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "llama"
-quantization = "4bit"
-base_model = "Llama 3.3 70B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 40652242944
--- a/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Llama-3.3-70B-Instruct-8bit"
-n_layers = 80
-hidden_size = 8192
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "llama"
-quantization = "8bit"
-base_model = "Llama 3.3 70B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 76799803392
--- a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-70B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-70B-Instruct-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
-n_layers = 80
-hidden_size = 8192
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "llama"
-quantization = "4bit"
-base_model = "Llama 3.1 70B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 40652242944
--- a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
-n_layers = 32
-hidden_size = 4096
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "llama"
-quantization = "4bit"
-base_model = "Llama 3.1 8B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 4637851648
--- a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
-n_layers = 32
-hidden_size = 4096
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "llama"
-quantization = "8bit"
-base_model = "Llama 3.1 8B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 8954839040
--- a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-bf16.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"
-n_layers = 32
-hidden_size = 4096
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "llama"
-quantization = "bf16"
-base_model = "Llama 3.1 8B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 16882073600
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.1-3bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.1-3bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/MiniMax-M2.1-3bit"
-n_layers = 61
-hidden_size = 3072
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "minimax"
-quantization = "3bit"
-base_model = "MiniMax M2.1"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 100086644736
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.1-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.1-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/MiniMax-M2.1-8bit"
-n_layers = 61
-hidden_size = 3072
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "minimax"
-quantization = "8bit"
-base_model = "MiniMax M2.1"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 242986745856
--- a/resources/inference_model_cards/mlx-community--Qwen3-0.6B-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-0.6B-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Qwen3-0.6B-4bit"
-n_layers = 28
-hidden_size = 1024
-supports_tensor = false
-tasks = ["TextGeneration"]
-family = "qwen"
-quantization = "4bit"
-base_model = "Qwen3 0.6B"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 342884352
--- a/resources/inference_model_cards/mlx-community--Qwen3-0.6B-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-0.6B-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Qwen3-0.6B-8bit"
-n_layers = 28
-hidden_size = 1024
-supports_tensor = false
-tasks = ["TextGeneration"]
-family = "qwen"
-quantization = "8bit"
-base_model = "Qwen3 0.6B"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 698351616
--- a/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"
-n_layers = 94
-hidden_size = 4096
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "qwen"
-quantization = "4bit"
-base_model = "Qwen3 235B"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 141733920768
--- a/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"
-n_layers = 94
-hidden_size = 4096
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "qwen"
-quantization = "8bit"
-base_model = "Qwen3 235B"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 268435456000
--- a/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Qwen3-30B-A3B-4bit"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "qwen"
-quantization = "4bit"
-base_model = "Qwen3 30B"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 17612931072
--- a/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Qwen3-30B-A3B-8bit"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "qwen"
-quantization = "8bit"
-base_model = "Qwen3 30B"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 33279705088
--- a/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"
-n_layers = 62
-hidden_size = 6144
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "qwen"
-quantization = "4bit"
-base_model = "Qwen3 Coder 480B"
-capabilities = ["text", "code"]
-
-[storage_size]
-in_bytes = 289910292480
--- a/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"
-n_layers = 62
-hidden_size = 6144
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "qwen"
-quantization = "8bit"
-base_model = "Qwen3 Coder 480B"
-capabilities = ["text", "code"]
-
-[storage_size]
-in_bytes = 579820584960
--- a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "qwen"
-quantization = "4bit"
-base_model = "Qwen3 Next 80B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 46976204800
--- a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "qwen"
-quantization = "8bit"
-base_model = "Qwen3 Next 80B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 88814387200
--- a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "qwen"
-quantization = "4bit"
-base_model = "Qwen3 Next 80B"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 47080074240
--- a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "qwen"
-quantization = "8bit"
-base_model = "Qwen3 Next 80B"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 88814387200
--- a/resources/inference_model_cards/mlx-community--gpt-oss-120b-MXFP4-Q8.toml
+++ b/resources/inference_model_cards/mlx-community--gpt-oss-120b-MXFP4-Q8.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/gpt-oss-120b-MXFP4-Q8"
-n_layers = 36
-hidden_size = 2880
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "gpt-oss"
-quantization = "MXFP4-Q8"
-base_model = "GPT-OSS 120B"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 70652212224
--- a/resources/inference_model_cards/mlx-community--gpt-oss-20b-MXFP4-Q8.toml
+++ b/resources/inference_model_cards/mlx-community--gpt-oss-20b-MXFP4-Q8.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/gpt-oss-20b-MXFP4-Q8"
-n_layers = 24
-hidden_size = 2880
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "gpt-oss"
-quantization = "MXFP4-Q8"
-base_model = "GPT-OSS 20B"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 12025908224
--- a/resources/inference_model_cards/mlx-community--llama-3.3-70b-instruct-fp16.toml
+++ b/resources/inference_model_cards/mlx-community--llama-3.3-70b-instruct-fp16.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/llama-3.3-70b-instruct-fp16"
-n_layers = 80
-hidden_size = 8192
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "llama"
-quantization = "fp16"
-base_model = "Llama 3.3 70B"
-capabilities = ["text"]
-
-[storage_size]
-in_bytes = 144383672320
--- a/src/exo/download/coordinator.py
+++ b/src/exo/download/coordinator.py
@@ -1,5 +1,4 @@
 import asyncio
-import socket
 from dataclasses import dataclass, field
 from typing import Iterator

@@ -61,37 +60,10 @@ class DownloadCoordinator:

    async def run(self) -> None:
        logger.info("Starting DownloadCoordinator")
-        self._test_internet_connection()
        async with self._tg as tg:
            tg.start_soon(self._command_processor)
            tg.start_soon(self._forward_events)
            tg.start_soon(self._emit_existing_download_progress)
-            tg.start_soon(self._check_internet_connection)
-
-    def _test_internet_connection(self) -> None:
-        try:
-            socket.create_connection(("1.1.1.1", 443), timeout=3).close()
-            self.shard_downloader.set_internet_connection(True)
-        except OSError:
-            self.shard_downloader.set_internet_connection(False)
-        logger.debug(
-            f"Internet connectivity: {self.shard_downloader.internet_connection}"
-        )
-
-    async def _check_internet_connection(self) -> None:
-        first_connection = True
-        while True:
-            await asyncio.sleep(10)
-
-            # Assume that internet connection is set to False on 443 errors.
-            if self.shard_downloader.internet_connection:
-                continue
-
-            self._test_internet_connection()
-
-            if first_connection and self.shard_downloader.internet_connection:
-                first_connection = False
-                self._tg.start_soon(self._emit_existing_download_progress)

    def shutdown(self) -> None:
        self._tg.cancel_scope.cancel()
@@ -269,7 +241,7 @@ class DownloadCoordinator:
    async def _emit_existing_download_progress(self) -> None:
        try:
            while True:
-                logger.debug(
+                logger.info(
                    "DownloadCoordinator: Fetching and emitting existing download progress..."
                )
                async for (
@@ -302,10 +274,10 @@ class DownloadCoordinator:
                    await self.event_sender.send(
                        NodeDownloadProgress(download_progress=status)
                    )
-                logger.debug(
+                logger.info(
                    "DownloadCoordinator: Done emitting existing download progress."
                )
-                await anyio.sleep(60)
+                await anyio.sleep(5 * 60)  # 5 minutes
        except Exception as e:
            logger.error(
                f"DownloadCoordinator: Error emitting existing download progress: {e}"
--- a/src/exo/download/download_utils.py
+++ b/src/exo/download/download_utils.py
@@ -49,10 +49,6 @@ class HuggingFaceAuthenticationError(Exception):
    """Raised when HuggingFace returns 401/403 for a model download."""


-class HuggingFaceRateLimitError(Exception):
-    """429 Huggingface code"""
-
-
 async def _build_auth_error_message(status_code: int, model_id: ModelId) -> str:
    token = await get_hf_token()
    if status_code == 401 and token is None:
@@ -125,20 +121,11 @@ async def ensure_models_dir() -> Path:


 async def delete_model(model_id: ModelId) -> bool:
-    models_dir = await ensure_models_dir()
-    model_dir = models_dir / model_id.normalize()
-    cache_dir = models_dir / "caches" / model_id.normalize()
-
-    deleted = False
-    if await aios.path.exists(model_dir):
-        await asyncio.to_thread(shutil.rmtree, model_dir, ignore_errors=False)
-        deleted = True
-
-    # Also clear cache
-    if await aios.path.exists(cache_dir):
-        await asyncio.to_thread(shutil.rmtree, cache_dir, ignore_errors=False)
-
-    return deleted
+    model_dir = await ensure_models_dir() / model_id.normalize()
+    if not await aios.path.exists(model_dir):
+        return False
+    await asyncio.to_thread(shutil.rmtree, model_dir, ignore_errors=False)
+    return True


 async def seed_models(seed_dir: str | Path):
@@ -158,76 +145,37 @@ async def seed_models(seed_dir: str | Path):
                    logger.error(traceback.format_exc())


-_fetched_file_lists_this_session: set[str] = set()
-
-
 async def fetch_file_list_with_cache(
-    model_id: ModelId,
-    revision: str = "main",
-    recursive: bool = False,
-    skip_internet: bool = False,
-    on_connection_lost: Callable[[], None] = lambda: None,
+    model_id: ModelId, revision: str = "main", recursive: bool = False
 ) -> list[FileListEntry]:
    target_dir = (await ensure_models_dir()) / "caches" / model_id.normalize()
    await aios.makedirs(target_dir, exist_ok=True)
    cache_file = target_dir / f"{model_id.normalize()}--{revision}--file_list.json"
-    cache_key = f"{model_id.normalize()}--{revision}"
-
-    if cache_key in _fetched_file_lists_this_session and await aios.path.exists(
-        cache_file
-    ):
+    if await aios.path.exists(cache_file):
        async with aiofiles.open(cache_file, "r") as f:
            return TypeAdapter(list[FileListEntry]).validate_json(await f.read())
-
-    if skip_internet:
-        if await aios.path.exists(cache_file):
-            async with aiofiles.open(cache_file, "r") as f:
-                return TypeAdapter(list[FileListEntry]).validate_json(await f.read())
-        raise FileNotFoundError(
-            f"No internet connection and no cached file list for {model_id}"
-        )
-
-    try:
-        file_list = await fetch_file_list_with_retry(
-            model_id,
-            revision,
-            recursive=recursive,
-            on_connection_lost=on_connection_lost,
-        )
-        async with aiofiles.open(cache_file, "w") as f:
-            await f.write(
-                TypeAdapter(list[FileListEntry]).dump_json(file_list).decode()
-            )
-        _fetched_file_lists_this_session.add(cache_key)
-        return file_list
-    except Exception as e:
-        if await aios.path.exists(cache_file):
-            logger.warning(
-                f"Failed to fetch file list for {model_id}, using cached data: {e}"
-            )
-            async with aiofiles.open(cache_file, "r") as f:
-                return TypeAdapter(list[FileListEntry]).validate_json(await f.read())
-        raise FileNotFoundError(f"Failed to fetch file list for {model_id}: {e}") from e
+    file_list = await fetch_file_list_with_retry(
+        model_id, revision, recursive=recursive
+    )
+    await aios.makedirs(cache_file.parent, exist_ok=True)
+    async with aiofiles.open(cache_file, "w") as f:
+        await f.write(TypeAdapter(list[FileListEntry]).dump_json(file_list).decode())
+    return file_list


 async def fetch_file_list_with_retry(
-    model_id: ModelId,
-    revision: str = "main",
-    path: str = "",
-    recursive: bool = False,
-    on_connection_lost: Callable[[], None] = lambda: None,
+    model_id: ModelId, revision: str = "main", path: str = "", recursive: bool = False
 ) -> list[FileListEntry]:
-    n_attempts = 3
+    n_attempts = 30
    for attempt in range(n_attempts):
        try:
            return await _fetch_file_list(model_id, revision, path, recursive)
        except HuggingFaceAuthenticationError:
            raise
        except Exception as e:
-            on_connection_lost()
            if attempt == n_attempts - 1:
                raise e
-            await asyncio.sleep(2.0**attempt)
+            await asyncio.sleep(min(8, 0.1 * float(2.0 ** int(attempt))))
    raise Exception(
        f"Failed to fetch file list for {model_id=} {revision=} {path=} {recursive=}"
    )
@@ -247,11 +195,7 @@ async def _fetch_file_list(
        if response.status in [401, 403]:
            msg = await _build_auth_error_message(response.status, model_id)
            raise HuggingFaceAuthenticationError(msg)
-        elif response.status == 429:
-            raise HuggingFaceRateLimitError(
-                f"Couldn't download {model_id} because of HuggingFace rate limit."
-            )
-        elif response.status == 200:
+        if response.status == 200:
            data_json = await response.text()
            data = TypeAdapter(list[FileListEntry]).validate_json(data_json)
            files: list[FileListEntry] = []
@@ -284,7 +228,7 @@ def create_http_session(
    else:
        total_timeout = 1800
        connect_timeout = 60
-        sock_read_timeout = 60
+        sock_read_timeout = 1800
        sock_connect_timeout = 60

    ssl_context = ssl.create_default_context(
@@ -359,9 +303,8 @@ async def download_file_with_retry(
    path: str,
    target_dir: Path,
    on_progress: Callable[[int, int, bool], None] = lambda _, __, ___: None,
-    on_connection_lost: Callable[[], None] = lambda: None,
 ) -> Path:
-    n_attempts = 3
+    n_attempts = 30
    for attempt in range(n_attempts):
        try:
            return await _download_file(
@@ -369,19 +312,14 @@ async def download_file_with_retry(
            )
        except HuggingFaceAuthenticationError:
            raise
-        except HuggingFaceRateLimitError as e:
-            if attempt == n_attempts - 1:
+        except Exception as e:
+            if isinstance(e, FileNotFoundError) or attempt == n_attempts - 1:
                raise e
            logger.error(
                f"Download error on attempt {attempt}/{n_attempts} for {model_id=} {revision=} {path=} {target_dir=}"
            )
            logger.error(traceback.format_exc())
-            await asyncio.sleep(2.0**attempt)
-        except Exception as e:
-            on_connection_lost()
-            if attempt == n_attempts - 1:
-                raise e
-            break
+            await asyncio.sleep(min(8, 0.1 * (2.0**attempt)))
    raise Exception(
        f"Failed to download file {model_id=} {revision=} {path=} {target_dir=}"
    )
@@ -394,28 +332,8 @@ async def _download_file(
    target_dir: Path,
    on_progress: Callable[[int, int, bool], None] = lambda _, __, ___: None,
 ) -> Path:
-    target_path = target_dir / path
-
-    if await aios.path.exists(target_path):
-        local_size = (await aios.stat(target_path)).st_size
-
-        # Try to verify against remote, but allow offline operation
-        try:
-            remote_size, _ = await file_meta(model_id, revision, path)
-            if local_size != remote_size:
-                logger.info(
-                    f"File {path} size mismatch (local={local_size}, remote={remote_size}), re-downloading"
-                )
-                await aios.remove(target_path)
-            else:
-                return target_path
-        except Exception as e:
-            # Offline or network error - trust local file
-            logger.debug(
-                f"Could not verify {path} against remote (offline?): {e}, using local file"
-            )
-            return target_path
-
+    if await aios.path.exists(target_dir / path):
+        return target_dir / path
    await aios.makedirs((target_dir / path).parent, exist_ok=True)
    length, etag = await file_meta(model_id, revision, path)
    remote_hash = etag[:-5] if etag.endswith("-gzip") else etag
@@ -583,9 +501,7 @@ async def download_shard(
    on_progress: Callable[[ShardMetadata, RepoDownloadProgress], Awaitable[None]],
    max_parallel_downloads: int = 8,
    skip_download: bool = False,
-    skip_internet: bool = False,
    allow_patterns: list[str] | None = None,
-    on_connection_lost: Callable[[], None] = lambda: None,
 ) -> tuple[Path, RepoDownloadProgress]:
    if not skip_download:
        logger.debug(f"Downloading {shard.model_card.model_id=}")
@@ -605,11 +521,7 @@ async def download_shard(

    all_start_time = time.time()
    file_list = await fetch_file_list_with_cache(
-        shard.model_card.model_id,
-        revision,
-        recursive=True,
-        skip_internet=skip_internet,
-        on_connection_lost=on_connection_lost,
+        shard.model_card.model_id, revision, recursive=True
    )
    filtered_file_list = list(
        filter_repo_objects(
@@ -630,26 +542,17 @@ async def download_shard(
    async def on_progress_wrapper(
        file: FileListEntry, curr_bytes: int, total_bytes: int, is_renamed: bool
    ) -> None:
-        previous_progress = file_progress.get(file.path)
-
-        # Detect re-download: curr_bytes < previous downloaded means file was deleted and restarted
-        is_redownload = (
-            previous_progress is not None
-            and curr_bytes < previous_progress.downloaded.in_bytes
+        start_time = (
+            file_progress[file.path].start_time
+            if file.path in file_progress
+            else time.time()
+        )
+        downloaded_this_session = (
+            file_progress[file.path].downloaded_this_session.in_bytes
+            + (curr_bytes - file_progress[file.path].downloaded.in_bytes)
+            if file.path in file_progress
+            else curr_bytes
        )
-
-        if is_redownload or previous_progress is None:
-            # Fresh download or re-download: reset tracking
-            start_time = time.time()
-            downloaded_this_session = curr_bytes
-        else:
-            # Continuing download: accumulate
-            start_time = previous_progress.start_time
-            downloaded_this_session = (
-                previous_progress.downloaded_this_session.in_bytes
-                + (curr_bytes - previous_progress.downloaded.in_bytes)
-            )
-
        speed = (
            downloaded_this_session / (time.time() - start_time)
            if time.time() - start_time > 0
@@ -719,7 +622,6 @@ async def download_shard(
                lambda curr_bytes, total_bytes, is_renamed: schedule_progress(
                    file, curr_bytes, total_bytes, is_renamed
                ),
-                on_connection_lost=on_connection_lost,
            )

    if not skip_download:
--- a/src/exo/download/impl_shard_downloader.py
+++ b/src/exo/download/impl_shard_downloader.py
@@ -1,5 +1,4 @@
 import asyncio
-from asyncio import create_task
 from collections.abc import Awaitable
 from pathlib import Path
 from typing import AsyncIterator, Callable
@@ -8,7 +7,7 @@ from loguru import logger

 from exo.download.download_utils import RepoDownloadProgress, download_shard
 from exo.download.shard_downloader import ShardDownloader
-from exo.shared.models.model_cards import ModelCard, ModelId, get_model_cards
+from exo.shared.models.model_cards import MODEL_CARDS, ModelCard, ModelId
 from exo.shared.types.worker.shards import (
    PipelineShardMetadata,
    ShardMetadata,
@@ -22,7 +21,7 @@ def exo_shard_downloader(max_parallel_downloads: int = 8) -> ShardDownloader:


 async def build_base_shard(model_id: ModelId) -> ShardMetadata:
-    model_card = await ModelCard.load(model_id)
+    model_card = await ModelCard.from_hf(model_id)
    return PipelineShardMetadata(
        model_card=model_card,
        device_rank=0,
@@ -50,10 +49,6 @@ class SingletonShardDownloader(ShardDownloader):
        self.shard_downloader = shard_downloader
        self.active_downloads: dict[ShardMetadata, asyncio.Task[Path]] = {}

-    def set_internet_connection(self, value: bool) -> None:
-        self.internet_connection = value
-        self.shard_downloader.set_internet_connection(value)
-
    def on_progress(
        self,
        callback: Callable[[ShardMetadata, RepoDownloadProgress], Awaitable[None]],
@@ -90,10 +85,6 @@ class CachedShardDownloader(ShardDownloader):
        self.shard_downloader = shard_downloader
        self.cache: dict[tuple[str, ShardMetadata], Path] = {}

-    def set_internet_connection(self, value: bool) -> None:
-        self.internet_connection = value
-        self.shard_downloader.set_internet_connection(value)
-
    def on_progress(
        self,
        callback: Callable[[ShardMetadata, RepoDownloadProgress], Awaitable[None]],
@@ -151,8 +142,6 @@ class ResumableShardDownloader(ShardDownloader):
            self.on_progress_wrapper,
            max_parallel_downloads=self.max_parallel_downloads,
            allow_patterns=allow_patterns,
-            skip_internet=not self.internet_connection,
-            on_connection_lost=lambda: self.set_internet_connection(False),
        )
        return target_dir

@@ -165,31 +154,21 @@ class ResumableShardDownloader(ShardDownloader):
            """Helper coroutine that builds the shard for a model and gets its download status."""
            shard = await build_full_shard(model_id)
            return await download_shard(
-                shard,
-                self.on_progress_wrapper,
-                skip_download=True,
-                skip_internet=not self.internet_connection,
-                on_connection_lost=lambda: self.set_internet_connection(False),
+                shard, self.on_progress_wrapper, skip_download=True
            )

-        semaphore = asyncio.Semaphore(self.max_parallel_downloads)
-
-        async def download_with_semaphore(
-            model_card: ModelCard,
-        ) -> tuple[Path, RepoDownloadProgress]:
-            async with semaphore:
-                return await _status_for_model(model_card.model_id)
-
+        # Kick off download status coroutines concurrently
        tasks = [
-            create_task(download_with_semaphore(model_card))
-            for model_card in await get_model_cards()
+            asyncio.create_task(_status_for_model(model_card.model_id))
+            for model_card in MODEL_CARDS.values()
        ]

        for task in asyncio.as_completed(tasks):
            try:
                yield await task
+            # TODO: except Exception
            except Exception as e:
-                logger.warning(f"Error downloading shard: {type(e).__name__}")
+                logger.error("Error downloading shard:", e)

    async def get_shard_download_status_for_shard(
        self, shard: ShardMetadata
--- a/src/exo/download/shard_downloader.py
+++ b/src/exo/download/shard_downloader.py
@@ -16,11 +16,6 @@ from exo.shared.types.worker.shards import (

 # TODO: the PipelineShardMetadata getting reinstantiated is a bit messy. Should this be a classmethod?
 class ShardDownloader(ABC):
-    internet_connection: bool = False
-
-    def set_internet_connection(self, value: bool) -> None:
-        self.internet_connection = value
-
    @abstractmethod
    async def ensure_shard(
        self, shard: ShardMetadata, config_only: bool = False
--- a/src/exo/download/tests/init.py
+++ b/src/exo/download/tests/init.py
--- a/src/exo/download/tests/test_download_verification.py
+++ b/src/exo/download/tests/test_download_verification.py
@@ -1,451 +0,0 @@
-"""Tests for download verification and cache behavior."""
-
-import time
-from collections.abc import AsyncIterator
-from datetime import timedelta
-from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import aiofiles
-import aiofiles.os as aios
-import pytest
-from pydantic import TypeAdapter
-
-from exo.download.download_utils import (
-    delete_model,
-    fetch_file_list_with_cache,
-)
-from exo.shared.types.common import ModelId
-from exo.shared.types.memory import Memory
-from exo.shared.types.worker.downloads import FileListEntry, RepoFileDownloadProgress
-
-
-@pytest.fixture
-def model_id() -> ModelId:
-    return ModelId("test-org/test-model")
-
-
-@pytest.fixture
-async def temp_models_dir(tmp_path: Path) -> AsyncIterator[Path]:
-    """Set up a temporary models directory for testing."""
-    models_dir = tmp_path / "models"
-    await aios.makedirs(models_dir, exist_ok=True)
-    with patch("exo.download.download_utils.EXO_MODELS_DIR", models_dir):
-        yield models_dir
-
-
-class TestFileVerification:
-    """Tests for file size verification in _download_file."""
-
-    async def test_redownload_when_file_size_changes_upstream(
-        self, model_id: ModelId, tmp_path: Path
-    ) -> None:
-        """Test that files with mismatched sizes are re-downloaded."""
-        # Import inside test to allow patching
-        from exo.download.download_utils import (
-            _download_file,  # pyright: ignore[reportPrivateUsage]
-        )
-
-        target_dir = tmp_path / "downloads"
-        await aios.makedirs(target_dir, exist_ok=True)
-
-        # Create a local file with wrong size
-        local_file = target_dir / "test.safetensors"
-        async with aiofiles.open(local_file, "wb") as f:
-            await f.write(b"local content")  # 13 bytes
-
-        remote_size = 1000  # Different from local
-        remote_hash = "abc123"
-
-        with (
-            patch(
-                "exo.download.download_utils.file_meta",
-                new_callable=AsyncMock,
-                return_value=(remote_size, remote_hash),
-            ) as mock_file_meta,
-            patch(
-                "exo.download.download_utils.create_http_session"
-            ) as mock_session_factory,
-        ):
-            # Set up mock HTTP response for re-download
-            mock_response = MagicMock()
-            mock_response.status = 200
-            mock_response.content.read = AsyncMock(  # pyright: ignore[reportAny]
-                side_effect=[b"x" * remote_size, b""]
-            )
-
-            mock_session = MagicMock()
-            mock_session.get.return_value.__aenter__ = AsyncMock(  # pyright: ignore[reportAny]
-                return_value=mock_response
-            )
-            mock_session.get.return_value.__aexit__ = AsyncMock(  # pyright: ignore[reportAny]
-                return_value=None
-            )
-            mock_session_factory.return_value.__aenter__ = AsyncMock(  # pyright: ignore[reportAny]
-                return_value=mock_session
-            )
-            mock_session_factory.return_value.__aexit__ = AsyncMock(  # pyright: ignore[reportAny]
-                return_value=None
-            )
-
-            # Mock calc_hash to return the expected hash
-            with patch(
-                "exo.download.download_utils.calc_hash",
-                new_callable=AsyncMock,
-                return_value=remote_hash,
-            ):
-                await _download_file(model_id, "main", "test.safetensors", target_dir)
-
-            # file_meta should be called twice: once for verification, once for download
-            assert mock_file_meta.call_count == 2
-
-    async def test_skip_download_when_file_size_matches(
-        self, model_id: ModelId, tmp_path: Path
-    ) -> None:
-        """Test that files with matching sizes are not re-downloaded."""
-        from exo.download.download_utils import (
-            _download_file,  # pyright: ignore[reportPrivateUsage]
-        )
-
-        target_dir = tmp_path / "downloads"
-        await aios.makedirs(target_dir, exist_ok=True)
-
-        # Create a local file
-        local_file = target_dir / "test.safetensors"
-        local_content = b"local content"
-        async with aiofiles.open(local_file, "wb") as f:
-            await f.write(local_content)
-
-        remote_size = len(local_content)  # Same as local
-        remote_hash = "abc123"
-
-        with (
-            patch(
-                "exo.download.download_utils.file_meta",
-                new_callable=AsyncMock,
-                return_value=(remote_size, remote_hash),
-            ) as mock_file_meta,
-            patch(
-                "exo.download.download_utils.create_http_session"
-            ) as mock_session_factory,
-        ):
-            result = await _download_file(
-                model_id, "main", "test.safetensors", target_dir
-            )
-
-            # Should return immediately without downloading
-            assert result == local_file
-            mock_file_meta.assert_called_once()
-            mock_session_factory.assert_not_called()
-
-    async def test_offline_fallback_uses_local_file(
-        self, model_id: ModelId, tmp_path: Path
-    ) -> None:
-        """Test that local files are used when network is unavailable."""
-        from exo.download.download_utils import (
-            _download_file,  # pyright: ignore[reportPrivateUsage]
-        )
-
-        target_dir = tmp_path / "downloads"
-        await aios.makedirs(target_dir, exist_ok=True)
-
-        # Create a local file
-        local_file = target_dir / "test.safetensors"
-        async with aiofiles.open(local_file, "wb") as f:
-            await f.write(b"local content")
-
-        with (
-            patch(
-                "exo.download.download_utils.file_meta",
-                new_callable=AsyncMock,
-                side_effect=Exception("Network error"),
-            ),
-            patch(
-                "exo.download.download_utils.create_http_session"
-            ) as mock_session_factory,
-        ):
-            result = await _download_file(
-                model_id, "main", "test.safetensors", target_dir
-            )
-
-            # Should return local file without attempting download
-            assert result == local_file
-            mock_session_factory.assert_not_called()
-
-
-class TestFileListCache:
-    """Tests for file list caching behavior."""
-
-    async def test_fetch_fresh_and_update_cache(
-        self, model_id: ModelId, tmp_path: Path
-    ) -> None:
-        """Test that fresh data is fetched and cache is updated."""
-        models_dir = tmp_path / "models"
-
-        file_list = [
-            FileListEntry(type="file", path="model.safetensors", size=1000),
-            FileListEntry(type="file", path="config.json", size=100),
-        ]
-
-        with (
-            patch("exo.download.download_utils.EXO_MODELS_DIR", models_dir),
-            patch(
-                "exo.download.download_utils.fetch_file_list_with_retry",
-                new_callable=AsyncMock,
-                return_value=file_list,
-            ) as mock_fetch,
-        ):
-            result = await fetch_file_list_with_cache(model_id, "main")
-
-            assert result == file_list
-            mock_fetch.assert_called_once()
-
-            # Verify cache was written
-            cache_file = (
-                models_dir
-                / "caches"
-                / model_id.normalize()
-                / f"{model_id.normalize()}--main--file_list.json"
-            )
-            assert await aios.path.exists(cache_file)
-
-            async with aiofiles.open(cache_file, "r") as f:
-                cached_data = TypeAdapter(list[FileListEntry]).validate_json(
-                    await f.read()
-                )
-            assert cached_data == file_list
-
-    async def test_fallback_to_cache_when_fetch_fails(
-        self, model_id: ModelId, tmp_path: Path
-    ) -> None:
-        """Test that cached data is used when fetch fails."""
-        models_dir = tmp_path / "models"
-        cache_dir = models_dir / "caches" / model_id.normalize()
-        await aios.makedirs(cache_dir, exist_ok=True)
-
-        # Create cache file
-        cached_file_list = [
-            FileListEntry(type="file", path="model.safetensors", size=1000),
-        ]
-        cache_file = cache_dir / f"{model_id.normalize()}--main--file_list.json"
-        async with aiofiles.open(cache_file, "w") as f:
-            await f.write(
-                TypeAdapter(list[FileListEntry]).dump_json(cached_file_list).decode()
-            )
-
-        with (
-            patch("exo.download.download_utils.EXO_MODELS_DIR", models_dir),
-            patch(
-                "exo.download.download_utils.fetch_file_list_with_retry",
-                new_callable=AsyncMock,
-                side_effect=Exception("Network error"),
-            ),
-        ):
-            result = await fetch_file_list_with_cache(model_id, "main")
-
-            assert result == cached_file_list
-
-    async def test_error_propagates_when_no_cache(
-        self, model_id: ModelId, tmp_path: Path
-    ) -> None:
-        """Test that errors propagate when fetch fails and no cache exists."""
-        models_dir = tmp_path / "models"
-
-        with (
-            patch("exo.download.download_utils.EXO_MODELS_DIR", models_dir),
-            patch(
-                "exo.download.download_utils.fetch_file_list_with_retry",
-                new_callable=AsyncMock,
-                side_effect=Exception("Network error"),
-            ),
-            pytest.raises(Exception, match="Network error"),
-        ):
-            await fetch_file_list_with_cache(model_id, "main")
-
-
-class TestModelDeletion:
-    """Tests for model deletion including cache cleanup."""
-
-    async def test_delete_model_clears_cache(
-        self, model_id: ModelId, tmp_path: Path
-    ) -> None:
-        """Test that deleting a model also deletes its cache."""
-        models_dir = tmp_path / "models"
-        model_dir = models_dir / model_id.normalize()
-        cache_dir = models_dir / "caches" / model_id.normalize()
-
-        # Create model and cache directories
-        await aios.makedirs(model_dir, exist_ok=True)
-        await aios.makedirs(cache_dir, exist_ok=True)
-
-        # Add some files
-        async with aiofiles.open(model_dir / "model.safetensors", "w") as f:
-            await f.write("model data")
-        async with aiofiles.open(cache_dir / "file_list.json", "w") as f:
-            await f.write("[]")
-
-        with patch("exo.download.download_utils.EXO_MODELS_DIR", models_dir):
-            result = await delete_model(model_id)
-
-            assert result is True
-            assert not await aios.path.exists(model_dir)
-            assert not await aios.path.exists(cache_dir)
-
-    async def test_delete_model_only_cache_exists(
-        self, model_id: ModelId, tmp_path: Path
-    ) -> None:
-        """Test deleting when only cache exists (model already deleted)."""
-        models_dir = tmp_path / "models"
-        cache_dir = models_dir / "caches" / model_id.normalize()
-
-        # Only create cache directory
-        await aios.makedirs(cache_dir, exist_ok=True)
-        async with aiofiles.open(cache_dir / "file_list.json", "w") as f:
-            await f.write("[]")
-
-        with patch("exo.download.download_utils.EXO_MODELS_DIR", models_dir):
-            result = await delete_model(model_id)
-
-            # Returns False because model dir didn't exist
-            assert result is False
-            # But cache should still be cleaned up
-            assert not await aios.path.exists(cache_dir)
-
-    async def test_delete_nonexistent_model(
-        self, model_id: ModelId, tmp_path: Path
-    ) -> None:
-        """Test deleting a model that doesn't exist."""
-        models_dir = tmp_path / "models"
-        await aios.makedirs(models_dir, exist_ok=True)
-
-        with patch("exo.download.download_utils.EXO_MODELS_DIR", models_dir):
-            result = await delete_model(model_id)
-
-            assert result is False
-
-
-class TestProgressResetOnRedownload:
-    """Tests for progress tracking when files are re-downloaded."""
-
-    async def test_progress_resets_correctly_on_redownload(
-        self, model_id: ModelId
-    ) -> None:
-        """Test that progress tracking resets when a file is re-downloaded.
-
-        When a file is deleted and re-downloaded (due to size mismatch),
-        the progress tracking should reset rather than calculating negative
-        downloaded_this_session values.
-        """
-        # Simulate file_progress dict as it exists in download_shard
-        file_progress: dict[str, RepoFileDownloadProgress] = {}
-
-        # Initialize with old file progress (simulating existing large file)
-        old_file_size = 1_500_000_000  # 1.5 GB
-        file_progress["model.safetensors"] = RepoFileDownloadProgress(
-            repo_id=model_id,
-            repo_revision="main",
-            file_path="model.safetensors",
-            downloaded=Memory.from_bytes(old_file_size),
-            downloaded_this_session=Memory.from_bytes(0),
-            total=Memory.from_bytes(old_file_size),
-            speed=0,
-            eta=timedelta(0),
-            status="not_started",
-            start_time=time.time() - 10,  # Started 10 seconds ago
-        )
-
-        # Simulate the logic from on_progress_wrapper after re-download starts
-        # This is the exact logic from the fixed on_progress_wrapper
-        curr_bytes = 100_000  # 100 KB - new download just started
-        previous_progress = file_progress.get("model.safetensors")
-
-        # Detect re-download: curr_bytes < previous downloaded
-        is_redownload = (
-            previous_progress is not None
-            and curr_bytes < previous_progress.downloaded.in_bytes
-        )
-
-        if is_redownload or previous_progress is None:
-            # Fresh download or re-download: reset tracking
-            start_time = time.time()
-            downloaded_this_session = curr_bytes
-        else:
-            # Continuing download: accumulate
-            start_time = previous_progress.start_time
-            downloaded_this_session = (
-                previous_progress.downloaded_this_session.in_bytes
-                + (curr_bytes - previous_progress.downloaded.in_bytes)
-            )
-
-        # Key assertions
-        assert is_redownload is True, "Should detect re-download scenario"
-        assert downloaded_this_session == curr_bytes, (
-            "downloaded_this_session should equal curr_bytes on re-download"
-        )
-        assert downloaded_this_session > 0, (
-            "downloaded_this_session should be positive, not negative"
-        )
-
-        # Calculate speed (should be positive)
-        elapsed = time.time() - start_time
-        speed = downloaded_this_session / elapsed if elapsed > 0 else 0
-        assert speed >= 0, "Speed should be non-negative"
-
-    async def test_progress_accumulates_on_continuing_download(
-        self, model_id: ModelId
-    ) -> None:
-        """Test that progress accumulates correctly for continuing downloads.
-
-        When a download continues from where it left off (resume),
-        the progress should accumulate correctly.
-        """
-        file_progress: dict[str, RepoFileDownloadProgress] = {}
-
-        # Initialize with partial download progress
-        initial_downloaded = 500_000  # 500 KB already downloaded
-        start_time = time.time() - 5  # Started 5 seconds ago
-        file_progress["model.safetensors"] = RepoFileDownloadProgress(
-            repo_id=model_id,
-            repo_revision="main",
-            file_path="model.safetensors",
-            downloaded=Memory.from_bytes(initial_downloaded),
-            downloaded_this_session=Memory.from_bytes(initial_downloaded),
-            total=Memory.from_bytes(1_000_000),
-            speed=100_000,
-            eta=timedelta(seconds=5),
-            status="in_progress",
-            start_time=start_time,
-        )
-
-        # Progress callback with more bytes downloaded
-        curr_bytes = 600_000  # 600 KB - continuing download
-        previous_progress = file_progress.get("model.safetensors")
-
-        # This is NOT a re-download (curr_bytes > previous downloaded)
-        is_redownload = (
-            previous_progress is not None
-            and curr_bytes < previous_progress.downloaded.in_bytes
-        )
-
-        if is_redownload or previous_progress is None:
-            downloaded_this_session = curr_bytes
-            used_start_time = time.time()
-        else:
-            used_start_time = previous_progress.start_time
-            downloaded_this_session = (
-                previous_progress.downloaded_this_session.in_bytes
-                + (curr_bytes - previous_progress.downloaded.in_bytes)
-            )
-
-        # Key assertions
-        assert is_redownload is False, (
-            "Should NOT detect re-download for continuing download"
-        )
-        assert used_start_time == start_time, "Should preserve original start_time"
-        expected_session = initial_downloaded + (curr_bytes - initial_downloaded)
-        assert downloaded_this_session == expected_session, (
-            f"Should accumulate: {downloaded_this_session} == {expected_session}"
-        )
-        assert downloaded_this_session == 600_000, (
-            "downloaded_this_session should equal total downloaded so far"
-        )
--- a/src/exo/main.py
+++ b/src/exo/main.py
@@ -90,6 +90,7 @@ class Node:
            worker = Worker(
                node_id,
                session_id,
+                connection_message_receiver=router.receiver(topics.CONNECTION_MESSAGES),
                global_event_receiver=router.receiver(topics.GLOBAL_EVENTS),
                local_event_sender=router.sender(topics.LOCAL_EVENTS),
                command_sender=router.sender(topics.COMMANDS),
@@ -226,6 +227,9 @@ class Node:
                        self.worker = Worker(
                            self.node_id,
                            result.session_id,
+                            connection_message_receiver=self.router.receiver(
+                                topics.CONNECTION_MESSAGES
+                            ),
                            global_event_receiver=self.router.receiver(
                                topics.GLOBAL_EVENTS
                            ),
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
ciaranbor	409fa80600	Fix cancellation during async step	2026-01-26 17:37:24 +00:00
ciaranbor	5a94c21daa	Skip final rank async send upon cancellation	2026-01-26 15:55:47 +00:00
ciaranbor	56ec049321	Handle cancellation completion in dashboard	2026-01-26 10:23:54 +00:00
ciaranbor	b477f88ace	Handle cancellation signal in diffusion runner	2026-01-26 10:23:54 +00:00
ciaranbor	4ea6e32f7b	Refactor duplicate image generation and image editing runner logic. Add cancellation checker to inject into model inference	2026-01-26 10:23:54 +00:00
ciaranbor	49c5345e93	Add generation cancellation button to UI	2026-01-26 10:22:21 +00:00
Evan	ea593075d7	api cancellation closing the http request to the api now - sends a cancellation from the api - writes that canellation in the master - worker plans off the cancellation - runner observes that cancellation after every generation step (+1 communication per token) - cancellation happens synchronously to prevent gpu locks	2026-01-24 21:50:50 +00:00