implement mlx-lm tool calling

Fix GPT OSS tensor sharding with upstream MLX LM (#1223 )
## Motivation MLX LM has given GPT OSS a shard method, but MLX does not have an update to match. ## Changes  ## Why It Works  ## Test Plan ### Manual Testing    ### Automated Testing
2026-01-20 20:10:10 -05:00 · 2026-01-21 00:27:20 +00:00 · 2026-01-20 18:24:54 +00:00 · 2026-01-20 18:20:01 +00:00 · 2026-01-20 17:38:06 +00:00 · 2026-01-20 17:07:27 +00:00
173 changed files with 7362 additions and 28725 deletions
--- a/.github/workflows/build-app.yml
+++ b/.github/workflows/build-app.yml
@@ -1,5 +1,16 @@
 name: Build EXO macOS DMG

+# Release workflow:
+# 1. Create a draft GitHub Release with the tag name (e.g. v1.0.0) and write release notes in markdown
+# 2. Push the tag: git tag v1.0.0 && git push origin v1.0.0
+# 3. This workflow builds, signs, and notarizes the DMG
+# 4. Release notes are embedded in appcast.xml for Sparkle (rendered as markdown)
+# 5. DMG and appcast.xml are uploaded to S3
+# 6. The draft GitHub Release is published with the DMG attached
+#
+# For alpha releases (e.g. v1.0.0-alpha.1): draft release and notes are optional.
+# If no draft exists, a release is auto-created with generated notes.
+
 on:
  workflow_dispatch:
  push:
@@ -11,8 +22,10 @@ on:
 jobs:
  build-macos-app:
    runs-on: "macos-26"
+    permissions:
+      contents: write
    env:
-      SPARKLE_VERSION: 2.8.1
+      SPARKLE_VERSION: 2.9.0-beta.1
      SPARKLE_DOWNLOAD_PREFIX: ${{ secrets.SPARKLE_DOWNLOAD_PREFIX }}
      SPARKLE_FEED_URL: ${{ secrets.SPARKLE_FEED_URL }}
      SPARKLE_ED25519_PUBLIC: ${{ secrets.SPARKLE_ED25519_PUBLIC }}
@@ -87,6 +100,52 @@ jobs:
            exit 1
          fi

+      - name: Fetch and validate release notes
+        if: github.ref_type == 'tag'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Find draft release by name using gh release list (more reliable with default token)
+          echo "Looking for draft release named '$GITHUB_REF_NAME'..."
+          DRAFT_EXISTS=$(gh release list --json name,isDraft --jq ".[] | select(.isDraft == true) | select(.name == \"$GITHUB_REF_NAME\") | .name" 2>/dev/null || echo "")
+
+          if [[ -z "$DRAFT_EXISTS" ]]; then
+            if [[ "$IS_ALPHA" == "true" ]]; then
+              echo "No draft release found for alpha tag $GITHUB_REF_NAME (optional for alphas)"
+              echo "HAS_RELEASE_NOTES=false" >> $GITHUB_ENV
+              exit 0
+            fi
+            echo "ERROR: No draft release found for tag $GITHUB_REF_NAME"
+            echo "Please create a draft release with release notes before pushing the tag."
+            exit 1
+          fi
+
+          # Fetch full release details via API to get body and ID
+          echo "Found draft release, fetching details..."
+          RELEASE_JSON=$(gh api repos/${{ github.repository }}/releases --jq ".[] | select(.draft == true) | select(.name == \"$GITHUB_REF_NAME\")" 2>/dev/null || echo "")
+
+          # Extract release notes
+          NOTES=$(echo "$RELEASE_JSON" | jq -r '.body // ""')
+          if [[ -z "$NOTES" || "$NOTES" == "null" ]]; then
+            if [[ "$IS_ALPHA" == "true" ]]; then
+              echo "Draft release has no notes (optional for alphas)"
+              echo "HAS_RELEASE_NOTES=false" >> $GITHUB_ENV
+              exit 0
+            fi
+            echo "ERROR: Draft release exists but has no release notes"
+            echo "Please add release notes to the draft release before pushing the tag."
+            exit 1
+          fi
+
+          # Save release ID for later publishing
+          RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id')
+          echo "DRAFT_RELEASE_ID=$RELEASE_ID" >> $GITHUB_ENV
+          echo "HAS_RELEASE_NOTES=true" >> $GITHUB_ENV
+
+          echo "Found draft release (ID: $RELEASE_ID), saving release notes..."
+          echo "$NOTES" > /tmp/release_notes.md
+          echo "RELEASE_NOTES_FILE=/tmp/release_notes.md" >> $GITHUB_ENV
+
      # ============================================================
      # Install dependencies
      # ============================================================
@@ -113,11 +172,22 @@ jobs:
          uv python install
          uv sync --locked

+      - name: Install Nix
+        uses: cachix/install-nix-action@v31
+        with:
+          nix_path: nixpkgs=channel:nixos-unstable
+
+      - name: Configure Cachix
+        uses: cachix/cachix-action@v14
+        with:
+          name: exo
+          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
+
      - name: Build dashboard
        run: |
-          cd dashboard
-          npm ci
-          npm run build
+          DASHBOARD_OUT=$(nix build .#dashboard --print-build-logs --no-link --print-out-paths)
+          mkdir -p dashboard/build
+          cp -r "$DASHBOARD_OUT"/* dashboard/build/

      - name: Install Sparkle CLI
        run: |
@@ -293,6 +363,28 @@ jobs:
            $CHANNEL_FLAG \
            .

+      - name: Inject release notes into appcast
+        if: github.ref_type == 'tag' && env.HAS_RELEASE_NOTES == 'true'
+        env:
+          RELEASE_VERSION: ${{ env.RELEASE_VERSION }}
+        run: |
+          # Inject markdown release notes with sparkle:format="markdown" (Sparkle 2.9+)
+          export NOTES=$(cat "$RELEASE_NOTES_FILE")
+
+          # Insert description after the enclosure tag for this version
+          awk '
+            /<enclosure[^>]*>/ && index($0, ENVIRON["RELEASE_VERSION"]) {
+              print
+              print "            <description sparkle:format=\"markdown\"><![CDATA["
+              print ENVIRON["NOTES"]
+              print "            ]]></description>"
+              next
+            }
+            { print }
+          ' output/appcast.xml > output/appcast.xml.tmp && mv output/appcast.xml.tmp output/appcast.xml
+
+          echo "Injected markdown release notes for version $RELEASE_VERSION"
+
      # ============================================================
      # Upload artifacts
      # ============================================================
@@ -325,3 +417,26 @@ jobs:
            aws s3 cp "$DMG_NAME" "s3://${SPARKLE_S3_BUCKET}/${PREFIX}EXO-latest.dmg"
            aws s3 cp appcast.xml "s3://${SPARKLE_S3_BUCKET}/${PREFIX}appcast.xml" --content-type application/xml --cache-control no-cache
          fi
+
+      - name: Publish GitHub Release
+        if: github.ref_type == 'tag'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          DMG_PATH="output/EXO-${RELEASE_VERSION}.dmg"
+
+          if [[ "$HAS_RELEASE_NOTES" == "true" ]]; then
+            # Update the draft release with the tag and upload DMG
+            gh api --method PATCH "repos/${{ github.repository }}/releases/$DRAFT_RELEASE_ID" \
+              -f tag_name="$GITHUB_REF_NAME" \
+              -F draft=false
+            gh release upload "$GITHUB_REF_NAME" "$DMG_PATH" --clobber
+            echo "Published release $GITHUB_REF_NAME with DMG attached"
+          else
+            # Alpha without draft release - create one with auto-generated notes
+            gh release create "$GITHUB_REF_NAME" "$DMG_PATH" \
+              --title "$GITHUB_REF_NAME" \
+              --generate-notes \
+              --prerelease
+            echo "Created alpha release $GITHUB_REF_NAME with auto-generated notes"
+          fi
--- a/.github/workflows/pipeline.yml
+++ b/.github/workflows/pipeline.yml
@@ -20,6 +20,12 @@ jobs:
        with:
          nix_path: nixpkgs=channel:nixos-unstable

+      - uses: cachix/cachix-action@v14
+        name: Configure Cachix
+        with:
+          name: exo
+          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
+
      - name: Configure git user
        run: |
          git config --local user.email "github-actions@users.noreply.github.com"
@@ -88,9 +94,19 @@ jobs:

      - uses: ./.github/actions/typecheck

-  nix-flake-check:
-    name: Check Nix flake
-    runs-on: ubuntu-latest
+  nix:
+    name: Build and check (${{ matrix.system }})
+    runs-on: ${{ matrix.runner }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - runner: macos-26
+            system: aarch64-darwin
+          - runner: ubuntu-latest
+            system: x86_64-linux
+          - runner: ubuntu-24.04-arm
+            system: aarch64-linux
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -101,83 +117,20 @@ jobs:
        with:
          nix_path: nixpkgs=channel:nixos-unstable

-      - name: Run nix flake check
-        run: |
-          nix flake check
-        shell: bash
+      - uses: cachix/cachix-action@v14
+        name: Configure Cachix
+        with:
+          name: exo
+          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"

-#  ci:
-#    needs: typecheck
-#    runs-on: ubuntu-latest
-#    permissions:
-#      contents: read
-#    env:
-#      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-#    steps:
-#      - name: Checkout repository
-#        uses: actions/checkout@v4
-#        with:
-#          fetch-depth: 0
-#          token: ${{ secrets.GITHUB_TOKEN }}
-#          lfs: true
-#
-#      - name: Configure git user
-#        run: |
-#          git config --local user.email "github-actions@users.noreply.github.com"
-#          git config --local user.name  "github-actions bot"
-#        shell: bash
-#
-#      - name: Pull LFS files
-#        run: |
-#          echo "Pulling Git LFS files..."
-#          git lfs pull
-#        shell: bash
-#
-#      - name: Setup EXO_HOME and API_PORT
-#        run: |
-#          EXO_HOME=$(mktemp -d -t exo-ci-XXXXXXXX)
-#          # Generate random port (macOS compatible method)
-#          API_PORT=$((49152 + RANDOM % (65535 - 49152 + 1)))
-#          echo "EXO_HOME=$EXO_HOME" >> $GITHUB_ENV
-#          echo "API_PORT=$API_PORT" >> $GITHUB_ENV
-#          echo "Created EXO_HOME: $EXO_HOME"
-#          echo "Generated API_PORT: $API_PORT"
-#        shell: bash
-#
-#      - name: Setup Nix Environment
-#        run: |
-#          echo "Checking for nix installation..."
-#          
-#          # Check if nix binary exists directly
-#          if [ -f /nix/var/nix/profiles/default/bin/nix ]; then
-#            echo "Found nix binary at /nix/var/nix/profiles/default/bin/nix"
-#            export PATH="/nix/var/nix/profiles/default/bin:$PATH"
-#            echo "PATH=$PATH" >> $GITHUB_ENV
-#            nix --version
-#          elif [ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]; then
-#            echo "Found nix profile script, sourcing..."
-#            source /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
-#            nix --version
-#          elif command -v nix >/dev/null 2>&1; then
-#            echo "Nix already in PATH"
-#            nix --version
-#          else
-#            echo "Nix not found. Debugging info:"
-#            echo "Contents of /nix/var/nix/profiles/default/:"
-#            ls -la /nix/var/nix/profiles/default/ 2>/dev/null || echo "Directory not found"
-#            echo "Contents of /nix/var/nix/profiles/default/bin/:"
-#            ls -la /nix/var/nix/profiles/default/bin/ 2>/dev/null || echo "Directory not found"
-#            exit 1
-#          fi
-#        shell: bash
-#
-#      - uses: ./.github/actions/lint-check
-#
-#      - uses: ./.github/actions/unit-test
-#
-#      - name: Cleanup EXO_HOME
-#        run: |
-#          echo "Cleaning up EXO_HOME: $EXO_HOME"
-#          rm -rf "$EXO_HOME"
-#        shell: bash
-#        if: always()
+      - name: Build all Nix outputs
+        run: |
+          nix flake show --json | jq -r '
+            [
+              (.packages."${{ matrix.system }}" // {} | keys[] | ".#packages.${{ matrix.system }}.\(.)"),
+              (.devShells."${{ matrix.system }}" // {} | keys[] | ".#devShells.${{ matrix.system }}.\(.)")
+            ] | .[]
+          ' | xargs nix build
+
+      - name: Run nix flake check
+        run: nix flake check
--- a/.mlx_typings/mlx_lm/models/deepseek_v3.pyi
+++ b/.mlx_typings/mlx_lm/models/deepseek_v3.pyi
@@ -0,0 +1,156 @@
+"""Type stubs for mlx_lm.models.deepseek_v3"""
+
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+
+import mlx.core as mx
+import mlx.nn as nn
+
+from .base import BaseModelArgs
+from .switch_layers import SwitchGLU
+
+@dataclass
+class ModelArgs(BaseModelArgs):
+    model_type: str
+    vocab_size: int
+    hidden_size: int
+    intermediate_size: int
+    moe_intermediate_size: int
+    num_hidden_layers: int
+    num_attention_heads: int
+    num_key_value_heads: int
+    n_shared_experts: Optional[int]
+    n_routed_experts: Optional[int]
+    routed_scaling_factor: float
+    kv_lora_rank: int
+    q_lora_rank: Optional[int]
+    qk_rope_head_dim: int
+    v_head_dim: int
+    qk_nope_head_dim: int
+    topk_method: str
+    scoring_func: str
+    norm_topk_prob: bool
+    n_group: int
+    topk_group: int
+    num_experts_per_tok: int
+    moe_layer_freq: int
+    first_k_dense_replace: int
+    max_position_embeddings: int
+    rms_norm_eps: float
+    rope_theta: float
+    rope_scaling: Optional[Dict[str, Any]]
+    attention_bias: bool
+
+class DeepseekV3Attention(nn.Module):
+    config: ModelArgs
+    hidden_size: int
+    num_heads: int
+    max_position_embeddings: int
+    rope_theta: float
+    q_lora_rank: Optional[int]
+    qk_rope_head_dim: int
+    kv_lora_rank: int
+    v_head_dim: int
+    qk_nope_head_dim: int
+    q_head_dim: int
+    scale: float
+    q_proj: nn.Linear
+    q_a_proj: nn.Linear
+    q_a_layernorm: nn.RMSNorm
+    q_b_proj: nn.Linear
+    kv_a_proj_with_mqa: nn.Linear
+    kv_a_layernorm: nn.RMSNorm
+    kv_b_proj: nn.Linear
+    o_proj: nn.Linear
+    rope: Any
+
+    def __init__(self, config: ModelArgs) -> None: ...
+    def __call__(
+        self,
+        x: mx.array,
+        mask: Optional[mx.array] = None,
+        cache: Optional[Any] = None,
+    ) -> mx.array: ...
+
+class DeepseekV3MLP(nn.Module):
+    config: ModelArgs
+    hidden_size: int
+    intermediate_size: int
+    gate_proj: nn.Linear
+    up_proj: nn.Linear
+    down_proj: nn.Linear
+
+    def __init__(
+        self,
+        config: ModelArgs,
+        hidden_size: Optional[int] = None,
+        intermediate_size: Optional[int] = None,
+    ) -> None: ...
+    def __call__(self, x: mx.array) -> mx.array: ...
+
+class MoEGate(nn.Module):
+    config: ModelArgs
+    top_k: int
+    norm_topk_prob: bool
+    n_routed_experts: Optional[int]
+    routed_scaling_factor: float
+    n_group: int
+    topk_group: int
+    weight: mx.array
+    e_score_correction_bias: mx.array
+
+    def __init__(self, config: ModelArgs) -> None: ...
+    def __call__(self, x: mx.array) -> tuple[mx.array, mx.array]: ...
+
+class DeepseekV3MoE(nn.Module):
+    config: ModelArgs
+    num_experts_per_tok: int
+    switch_mlp: SwitchGLU
+    gate: MoEGate
+    shared_experts: DeepseekV3MLP
+    sharding_group: Optional[mx.distributed.Group]
+
+    def __init__(self, config: ModelArgs) -> None: ...
+    def __call__(self, x: mx.array) -> mx.array: ...
+
+class DeepseekV3DecoderLayer(nn.Module):
+    self_attn: DeepseekV3Attention
+    mlp: DeepseekV3MLP | DeepseekV3MoE
+    input_layernorm: nn.RMSNorm
+    post_attention_layernorm: nn.RMSNorm
+
+    def __init__(self, config: ModelArgs, layer_idx: int) -> None: ...
+    def __call__(
+        self,
+        x: mx.array,
+        mask: Optional[mx.array] = None,
+        cache: Optional[Any] = None,
+    ) -> mx.array: ...
+
+class DeepseekV3Model(nn.Module):
+    vocab_size: int
+    embed_tokens: nn.Embedding
+    layers: list[DeepseekV3DecoderLayer]
+    norm: nn.RMSNorm
+
+    def __init__(self, config: ModelArgs) -> None: ...
+    def __call__(
+        self,
+        x: mx.array,
+        cache: Optional[Any] = None,
+    ) -> mx.array: ...
+
+class Model(nn.Module):
+    model_type: str
+    model: DeepseekV3Model
+    lm_head: nn.Linear
+
+    def __init__(self, config: ModelArgs) -> None: ...
+    def __call__(
+        self,
+        inputs: mx.array,
+        cache: Optional[Any] = None,
+    ) -> mx.array: ...
+    def sanitize(self, weights: dict[str, Any]) -> dict[str, Any]: ...
+    @property
+    def layers(self) -> list[DeepseekV3DecoderLayer]: ...
--- a/.mlx_typings/mlx_lm/models/switch_layers.pyi
+++ b/.mlx_typings/mlx_lm/models/switch_layers.pyi
@@ -57,6 +57,11 @@ class SwiGLU(nn.Module):
    def __call__(self, x, gate): ...

 class SwitchGLU(nn.Module):
+    gate_proj: SwitchLinear
+    up_proj: SwitchLinear
+    down_proj: SwitchLinear
+    activation: SwiGLU
+
    def __init__(
        self,
        input_dims: int,
--- a/.mlx_typings/mlx_lm/tokenizer_utils.pyi
+++ b/.mlx_typings/mlx_lm/tokenizer_utils.pyi
@@ -4,6 +4,7 @@ This type stub file was generated by pyright.

 from functools import partial
 from pathlib import Path
+from typing import Any

 from transformers import PreTrainedTokenizerFast

@@ -103,37 +104,55 @@ class TokenizerWrapper:
    Accessing any attribute other than the ``detokenizer`` is forwarded to the
    huggingface tokenizer.
    """
-    def __init__(self, tokenizer, detokenizer_class=..., eos_token_ids=...) -> None: ...
-    def add_eos_token(self, token: str):  # -> None:
-        ...
-    @property
-    def has_thinking(self):  # -> bool:
-        ...
-    @property
-    def think_start(self):  # -> str | None:
-        ...
-    @property
-    def think_end(self):  # -> str | None:
-        ...
-    @property
-    def has_tool_calling(self):  # -> bool:
-        ...
-    @property
-    def tool_call_start(self):  # -> str | None:
-        ...
-    @property
-    def tool_call_end(self):  # -> str | None:
-        ...
-    @property
-    def detokenizer(self):  # -> NaiveStreamingDetokenizer:
-        """
-        Get a stateful streaming detokenizer.
-        """

-    def __getattr__(self, attr):  # -> set[Any] | Any:
-        ...
-    def __setattr__(self, attr, value):  # -> None:
-        ...
+    _tokenizer: PreTrainedTokenizerFast
+    eos_token_id: int | None
+    eos_token: str | None
+    bos_token_id: int | None
+    bos_token: str | None
+    vocab_size: int
+    all_special_tokens: list[str]
+
+    def __init__(
+        self,
+        tokenizer: Any,
+        detokenizer_class: Any = ...,
+        eos_token_ids: list[int] | None = ...,
+        chat_template: Any = ...,
+        tool_parser: Any = ...,
+        tool_call_start: str | None = ...,
+        tool_call_end: str | None = ...,
+    ) -> None: ...
+    def encode(self, text: str, **kwargs: Any) -> list[int]: ...
+    def decode(self, token_ids: list[int], **kwargs: Any) -> str: ...
+    def apply_chat_template(
+        self,
+        messages: list[dict[str, Any]],
+        tokenize: bool = False,
+        add_generation_prompt: bool = False,
+        tools: Any = None,
+        **kwargs: Any,
+    ) -> str: ...
+    def get_vocab(self) -> dict[str, int]: ...
+    def add_eos_token(self, token: str) -> None: ...
+    @property
+    def has_thinking(self) -> bool: ...
+    @property
+    def think_start(self) -> str | None: ...
+    @property
+    def think_end(self) -> str | None: ...
+    @property
+    def has_tool_calling(self) -> bool: ...
+    @property
+    def tool_call_start(self) -> str | None: ...
+    @property
+    def tool_call_end(self) -> str | None: ...
+    @property
+    def detokenizer(self) -> NaiveStreamingDetokenizer:
+        """Get a stateful streaming detokenizer."""
+
+    def __getattr__(self, attr: str) -> Any: ...
+    def __setattr__(self, attr: str, value: Any) -> None: ...

 class NewlineTokenizer(PreTrainedTokenizerFast):
    """A tokenizer that replaces newlines with <n> and <n> with new line."""
@@ -146,18 +165,11 @@ class NewlineTokenizer(PreTrainedTokenizerFast):
    def batch_decode(self, *args, **kwargs):  # -> list[str]:
        ...

-def load_tokenizer(
+def load(
    model_path: Path,
-    tokenizer_config_extra=...,
-    return_tokenizer=...,
-    eos_token_ids=...,
-) -> (
-    TokenizerWrapper
-    | type[SPMStreamingDetokenizer]
-    | partial[SPMStreamingDetokenizer]
-    | type[BPEStreamingDetokenizer]
-    | type[NaiveStreamingDetokenizer]
-):
+    tokenizer_config_extra: dict[str, Any] | None = None,
+    eos_token_ids: list[int] | int | None = None,
+) -> TokenizerWrapper:
    """Load a huggingface tokenizer and try to infer the type of streaming
    detokenizer to use.

@@ -165,4 +177,7 @@ def load_tokenizer(
    a Hugging Face repo ID.
    """

-def no_bos_or_eos(sequence: list, bos: int, eos: int) -> list: ...
+# Alias for backward compatibility
+load_tokenizer = load
+
+def no_bos_or_eos(sequence: list[int], bos: int, eos: int) -> list[int]: ...
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -0,0 +1,121 @@
+# AGENTS.md
+
+This file provides guidance to AI coding agents when working with code in this repository.
+
+## Project Overview
+
+exo is a distributed AI inference system that connects multiple devices into a cluster. It enables running large language models across multiple machines using MLX as the inference backend and libp2p for peer-to-peer networking.
+
+## Build & Run Commands
+
+```bash
+# Build the dashboard (required before running exo)
+cd dashboard && npm install && npm run build && cd ..
+
+# Run exo (starts both master and worker with API at http://localhost:52415)
+uv run exo
+
+# Run with verbose logging
+uv run exo -v   # or -vv for more verbose
+
+# Run tests (excludes slow tests by default)
+uv run pytest
+
+# Run all tests including slow tests
+uv run pytest -m ""
+
+# Run a specific test file
+uv run pytest src/exo/shared/tests/test_election.py
+
+# Run a specific test function
+uv run pytest src/exo/shared/tests/test_election.py::test_function_name
+
+# Type checking (strict mode)
+uv run basedpyright
+
+# Linting
+uv run ruff check
+
+# Format code (using nix)
+nix fmt
+```
+
+## Pre-Commit Checks (REQUIRED)
+
+**IMPORTANT: Always run these checks before committing code. CI will fail if these don't pass.**
+
+```bash
+# 1. Type checking - MUST pass with 0 errors
+uv run basedpyright
+
+# 2. Linting - MUST pass
+uv run ruff check
+
+# 3. Formatting - MUST be applied
+nix fmt
+
+# 4. Tests - MUST pass
+uv run pytest
+```
+
+Run all checks in sequence:
+```bash
+uv run basedpyright && uv run ruff check && nix fmt && uv run pytest
+```
+
+If `nix fmt` changes any files, stage them before committing. The CI runs `nix flake check` which verifies formatting, linting, and runs Rust tests.
+
+## Architecture
+
+### Node Composition
+A single exo `Node` (src/exo/main.py) runs multiple components:
+- **Router**: libp2p-based pub/sub messaging via Rust bindings (exo_pyo3_bindings)
+- **Worker**: Handles inference tasks, downloads models, manages runner processes
+- **Master**: Coordinates cluster state, places model instances across nodes
+- **Election**: Bully algorithm for master election
+- **API**: FastAPI server for OpenAI-compatible chat completions
+
+### Message Flow
+Components communicate via typed pub/sub topics (src/exo/routing/topics.py):
+- `GLOBAL_EVENTS`: Master broadcasts indexed events to all workers
+- `LOCAL_EVENTS`: Workers send events to master for indexing
+- `COMMANDS`: Workers/API send commands to master
+- `ELECTION_MESSAGES`: Election protocol messages
+- `CONNECTION_MESSAGES`: libp2p connection updates
+
+### Event Sourcing
+The system uses event sourcing for state management:
+- `State` (src/exo/shared/types/state.py): Immutable state object
+- `apply()` (src/exo/shared/apply.py): Pure function that applies events to state
+- Master indexes events and broadcasts; workers apply indexed events
+
+### Key Type Hierarchy
+- `src/exo/shared/types/`: Pydantic models for all shared types
+  - `events.py`: Event types (discriminated union)
+  - `commands.py`: Command types
+  - `tasks.py`: Task types for worker execution
+  - `state.py`: Cluster state model
+
+### Rust Components
+Rust code in `rust/` provides:
+- `networking`: libp2p networking (gossipsub, peer discovery)
+- `exo_pyo3_bindings`: PyO3 bindings exposing Rust to Python
+- `system_custodian`: System-level operations
+
+### Dashboard
+Svelte 5 + TypeScript frontend in `dashboard/`. Build output goes to `dashboard/build/` and is served by the API.
+
+## Code Style Requirements
+
+From .cursorrules:
+- Strict, exhaustive typing - never bypass the type-checker
+- Use `Literal[...]` for enum-like sets, `typing.NewType` for primitives
+- Pydantic models with `frozen=True` and `strict=True`
+- Pure functions with injectable effect handlers for side-effects
+- Descriptive names - no abbreviations or 3-letter acronyms
+- Catch exceptions only where you can handle them meaningfully
+- Use `@final` and immutability wherever applicable
+
+## Testing
+
+Tests use pytest-asyncio with `asyncio_mode = "auto"`. Tests are in `tests/` subdirectories alongside the code they test. The `EXO_TESTS=1` env var is set during tests.
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -0,0 +1 @@
+AGENTS.md
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,9 +2,7 @@
 resolver = "3"
 members = [
    "rust/networking",
-    "rust/downloads",
    "rust/exo_pyo3_bindings",
-    "rust/system_custodian",
    "rust/util",
 ]

@@ -26,8 +24,6 @@ opt-level = 3
 [workspace.dependencies]
 ## Crate members as common dependencies
 networking = { path = "rust/networking" }
-downloads = { path = "rust/downloads" }
-system_custodian = { path = "rust/system_custodian" }
 util = { path = "rust/util" }

 # Proc-macro authoring tools
--- a/MISSED_THINGS.md
+++ b/MISSED_THINGS.md
@@ -0,0 +1,41 @@
+# Missed things
+[X] Log EXO_LIBP2P_NAMESPACE on start in exo/main.py
+[X] Ordering of warmup was changed, which is wrong. It was changed to rank < n-1, then rank=n-1. It should be rank!=0 then rank=0 (this matches the auto_parallel implementation. NOTE: we use a different convention to mlx-lm, our terminal rank is rank=n-1 whereas mlx-lm is rank=0 hence i can see why this was changed wrongly).
+[X] Downloads keying by model_id not shard_metadata (worker/plan.py, worker/main.py).
+[X] Fetching download status of all models on start
+[X] Deduplication of tasks in plan_step.
+[X] resolve_allow_patterns should just be wildcard now.
+[] no mx_barrier in genreate.py mlx_generate at the end.
+[] cache assertion not needed in auto_parallel.py PipelineLastLayer.
+[] GPTOSS support dropped in auto_parallel.py.
+[] sharding changed "all-to-sharded" became _all_to_sharded in auto_parallel.py.
+[] same as above with "sharded-to-all" became _sharded_to_all in auto_parallel.py.
+[] Dropped support for Ministral3Model, DeepseekV32Model, Glm4MoeModel, Qwen3NextModel, GptOssMode in auto_parallel.py.
+[] Dropped prefill/decode code in auto_parallel.py and utils_mlx.py.
+[X] KV_CACHE_BITS should be None to disable quantized KV cache.
+[] Dropped _set_nofile_limit in utils_mlx.py.
+[] We have group optional in load_mlx_items in utils_mlx.py.
+[] Dropped add_missing_chat_templates for GptOss in load_mlx_items in utils_mlx.py.
+[] Dropped model.make_cache in make_kv_cache in utils_mlx.py.
+[X] We put cache limit back in utils_mlx.py.
+[] topology.py remove_node removes the connections after checking if node is is in self._node_id_to_rx_id_map. on beta_1 it checks after, so would remove stale connections I guess?
+[] Missing Glm 4.7 model cards (this isn't ready yet but should be picked up, probably create an issue... the blocker is transforemrs version doesn't support the tokenizer for Glm 4.7. rc-1 does but we can't upgrade as it breaks other things.)
+[] try-except in _command_processor only excepts ValueError. This was silently failing leading to un-debuggable errors (we had a KeyError that was happening ). Changed this to catch Exception instead of ValueError. See exo-v2 89ae38405e0052e3c22405daf094b065878aa873 and fb99fea69b5a39017efc90c5dad0072e677455f0.
+[X] In placement.py, place_instance no longer looks at model_meta.supports_tensor and check if this tensor parallel number of nodes is supported by the model's tensor dimensions.
+[X] In placement.py, place_instanec, we no longer have the special case to exclude DeepSeek v3.1 pipeline parallel (it doesn't work).
+[] logger.warning("You have likely selected ibv for a single node instance; falling back to MlxRing") was changed to debug. That will spam this warning since it happens every time we query instance previews.
+[X] In placement_utils.py, get_mlx_jaccl_coordinators, We no longer prioritise Jaccl Coordinator IP. Now it picks the first one, which is unstable (Jaccl coordinator over TB5 is unstable).
+
+
+
+[X] Downloads keying by model_id not shard_metadata (worker/plan.py, worker/main.py).
+[X] Fetching download status of all models on start
+[X] Deduplication of tasks in plan_step.
+[X] resolve_allow_patterns should just be wildcard now.
+[X] KV_CACHE_BITS should be None to disable quantized KV cache.
+[X] We put cache limit back in utils_mlx.py.
+[X] In placement.py, place_instance no longer looks at model_meta.supports_tensor and check if this tensor parallel number of nodes is supported by the model's tensor dimensions.
+[X] In placement.py, place_instanec, we no longer have the special case to exclude DeepSeek v3.1 pipeline parallel (it doesn't work).
+[X] In placement_utils.py, get_mlx_jaccl_coordinators, We no longer prioritise Jaccl Coordinator IP. Now it picks the first one, which is unstable (Jaccl coordinator over TB5 is unstable).
+
+
--- a/README.md
+++ b/README.md
@@ -27,13 +27,22 @@ exo connects all your devices into an AI cluster. Not only does exo enable runni
 - **Tensor Parallelism**: exo supports sharding models, for up to 1.8x speedup on 2 devices and 3.2x speedup on 4 devices.
 - **MLX Support**: exo uses [MLX](https://github.com/ml-explore/mlx) as an inference backend and [MLX distributed](https://ml-explore.github.io/mlx/build/html/usage/distributed.html) for distributed communication.

+## Dashboard
+
+exo includes a built-in dashboard for managing your cluster and chatting with models.
+
+<p align="center">
+  <img src="docs/imgs/dashboard-cluster-view.png" alt="exo dashboard - cluster view showing 4 x M3 Ultra Mac Studio with DeepSeek v3.1 and Kimi-K2-Thinking loaded" width="80%" />
+</p>
+<p align="center"><em>4 × 512GB M3 Ultra Mac Studio running DeepSeek v3.1 (8-bit) and Kimi-K2-Thinking (4-bit)</em></p>
+
 ## Benchmarks

 <details>
  <summary>Qwen3-235B (8-bit) on 4 × M3 Ultra Mac Studio with Tensor Parallel RDMA</summary>
  <img src="docs/benchmarks/jeffgeerling/mac-studio-cluster-ai-full-1-qwen3-235b.jpeg" alt="Benchmark - Qwen3-235B (8-bit) on 4 × M3 Ultra Mac Studio with Tensor Parallel RDMA" width="80%" />
  <p>
-    <strong>Source:</strong> <a href="https://www.jeffgeerling.com/blog/2025/15-tb-vram-on-mac-studio-rdma-over-thunderbolt-5">Jeff Geerling: 15 TB VRAM on Mac Studio – RDMA over Thunderbolt 5</a>
+    <strong>Source:</strong> <a href="https://www.jeffgeerling.com/blog/2025/15-tb-vram-on-mac-studio-rdma-over-thunderbolt-5">Jeff Geerling: 15 TB VRAM on Mac Studio – RDMA over Thunderbolt 5</a>
  </p>
 </details>

@@ -41,7 +50,7 @@ exo connects all your devices into an AI cluster. Not only does exo enable runni
  <summary>DeepSeek v3.1 671B (8-bit) on 4 × M3 Ultra Mac Studio with Tensor Parallel RDMA</summary>
  <img src="docs/benchmarks/jeffgeerling/mac-studio-cluster-ai-full-2-deepseek-3.1-671b.jpeg" alt="Benchmark - DeepSeek v3.1 671B (8-bit) on 4 × M3 Ultra Mac Studio with Tensor Parallel RDMA" width="80%" />
  <p>
-    <strong>Source:</strong> <a href="https://www.jeffgeerling.com/blog/2025/15-tb-vram-on-mac-studio-rdma-over-thunderbolt-5">Jeff Geerling: 15 TB VRAM on Mac Studio – RDMA over Thunderbolt 5</a>
+    <strong>Source:</strong> <a href="https://www.jeffgeerling.com/blog/2025/15-tb-vram-on-mac-studio-rdma-over-thunderbolt-5">Jeff Geerling: 15 TB VRAM on Mac Studio – RDMA over Thunderbolt 5</a>
  </p>
 </details>

@@ -49,7 +58,7 @@ exo connects all your devices into an AI cluster. Not only does exo enable runni
  <summary>Kimi K2 Thinking (native 4-bit) on 4 × M3 Ultra Mac Studio with Tensor Parallel RDMA</summary>
  <img src="docs/benchmarks/jeffgeerling/mac-studio-cluster-ai-full-3-kimi-k2-thinking.jpeg" alt="Benchmark - Kimi K2 Thinking (native 4-bit) on 4 × M3 Ultra Mac Studio with Tensor Parallel RDMA" width="80%" />
  <p>
-    <strong>Source:</strong> <a href="https://www.jeffgeerling.com/blog/2025/15-tb-vram-on-mac-studio-rdma-over-thunderbolt-5">Jeff Geerling: 15 TB VRAM on Mac Studio – RDMA over Thunderbolt 5</a>
+    <strong>Source:</strong> <a href="https://www.jeffgeerling.com/blog/2025/15-tb-vram-on-mac-studio-rdma-over-thunderbolt-5">Jeff Geerling: 15 TB VRAM on Mac Studio – RDMA over Thunderbolt 5</a>
  </p>
 </details>

@@ -154,6 +163,24 @@ This starts the exo dashboard and API at http://localhost:52415/

 **Important note for Linux users:** Currently, exo runs on CPU on Linux. GPU support for Linux platforms is under development. If you'd like to see support for your specific Linux hardware, please [search for existing feature requests](https://github.com/exo-explore/exo/issues) or create a new one.

+**Configuration Options:**
+
+- `--no-worker`: Run exo without the worker component. Useful for coordinator-only nodes that handle networking and orchestration but don't execute inference tasks. This is helpful for machines without sufficient GPU resources but with good network connectivity.
+
+  ```bash
+  uv run exo --no-worker
+  ```
+
+**File Locations (Linux):**
+
+exo follows the [XDG Base Directory Specification](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) on Linux:
+
+- **Configuration files**: `~/.config/exo/` (or `$XDG_CONFIG_HOME/exo/`)
+- **Data files**: `~/.local/share/exo/` (or `$XDG_DATA_HOME/exo/`)
+- **Cache files**: `~/.cache/exo/` (or `$XDG_CACHE_HOME/exo/`)
+
+You can override these locations by setting the corresponding XDG environment variables.
+
 ### macOS App

 exo ships a macOS app that runs in the background on your Mac.
@@ -166,6 +193,19 @@ Download the latest build here: [EXO-latest.dmg](https://assets.exolabs.net/EXO-

 The app will ask for permission to modify system settings and install a new Network profile. Improvements to this are being worked on.

+**Custom Namespace for Cluster Isolation:**
+
+The macOS app includes a custom namespace feature that allows you to isolate your exo cluster from others on the same network. This is configured through the `EXO_LIBP2P_NAMESPACE` setting:
+
+- **Use cases**:
+  - Running multiple separate exo clusters on the same network
+  - Isolating development/testing clusters from production clusters
+  - Preventing accidental cluster joining
+
+- **Configuration**: Access this setting in the app's Advanced settings (or set the `EXO_LIBP2P_NAMESPACE` environment variable when running from source)
+
+The namespace is logged on startup for debugging purposes.
+
 #### Uninstalling the macOS App

 The recommended way to uninstall is through the app itself: click the menu bar icon → Advanced → Uninstall. This cleanly removes all system components.
@@ -312,6 +352,52 @@ For further details, see:

 ---

+## Benchmarking
+
+The `exo-bench` tool measures model prefill and token generation speed across different placement configurations. This helps you optimize model performance and validate improvements.
+
+**Prerequisites:**
+- Nodes should be running with `uv run exo` before benchmarking
+- The tool uses the `/bench/chat/completions` endpoint
+
+**Basic usage:**
+
+```bash
+uv run bench/exo_bench.py \
+  --model llama-3.2-1b \
+  --pp 128,256,512 \
+  --tg 128,256
+```
+
+**Key parameters:**
+
+- `--model`: Model to benchmark (short ID or HuggingFace ID)
+- `--pp`: Prompt size hints (comma-separated integers)
+- `--tg`: Generation lengths (comma-separated integers)
+- `--max-nodes`: Limit placements to N nodes (default: 4)
+- `--instance-meta`: Filter by `ring`, `jaccl`, or `both` (default: both)
+- `--sharding`: Filter by `pipeline`, `tensor`, or `both` (default: both)
+- `--repeat`: Number of repetitions per configuration (default: 1)
+- `--warmup`: Warmup runs per placement (default: 0)
+- `--json-out`: Output file for results (default: bench/results.json)
+
+**Example with filters:**
+
+```bash
+uv run bench/exo_bench.py \
+  --model llama-3.2-1b \
+  --pp 128,512 \
+  --tg 128 \
+  --max-nodes 2 \
+  --sharding tensor \
+  --repeat 3 \
+  --json-out my-results.json
+```
+
+The tool outputs performance metrics including prompt tokens per second (prompt_tps), generation tokens per second (generation_tps), and peak memory usage for each configuration.
+
+---
+
 ## Hardware Accelerator Support

 On macOS, exo uses the GPU. On Linux, exo currently runs on CPU. We are working on extending hardware accelerator support. If you'd like support for a new hardware platform, please [search for an existing feature request](https://github.com/exo-explore/exo/issues) and add a thumbs up so we know what hardware is important to the community.
@@ -320,4 +406,4 @@ On macOS, exo uses the GPU. On Linux, exo currently runs on CPU. We are working

 ## Contributing

-See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on how to contribute to exo.
+See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on how to contribute to exo.
--- a/TODO.md
+++ b/TODO.md
@@ -19,6 +19,7 @@
 25. Rethink retry logic
 26. Task cancellation. When API http request gets cancelled, it should cancel corresponding task.
 27. Log cleanup - per-module log filters and default to DEBUG log levels
+28. Validate RDMA connections with ibv_devinfo in the info gatherer

 Potential refactors:

--- a/app/EXO/EXO.xcodeproj/project.pbxproj
+++ b/app/EXO/EXO.xcodeproj/project.pbxproj
@@ -585,7 +585,7 @@
 			repositoryURL = "https://github.com/sparkle-project/Sparkle.git";
 			requirement = {
 				kind = upToNextMajorVersion;
-				minimumVersion = 2.8.1;
+				minimumVersion = 2.9.0-beta.1;
 			};
 		};
 /* End XCRemoteSwiftPackageReference section */
--- a/app/EXO/EXO.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
+++ b/app/EXO/EXO.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
@@ -6,8 +6,8 @@
      "kind" : "remoteSourceControl",
      "location" : "https://github.com/sparkle-project/Sparkle.git",
      "state" : {
-        "revision" : "5581748cef2bae787496fe6d61139aebe0a451f6",
-        "version" : "2.8.1"
+        "revision" : "e641adb41915a8409895e2e30666aa64e487b637",
+        "version" : "2.9.0-beta.1"
      }
    }
  ],
--- a/app/EXO/EXO/ContentView.swift
+++ b/app/EXO/EXO/ContentView.swift
@@ -56,6 +56,11 @@ struct ContentView: View {
    }

    private var shouldShowLocalNetworkWarning: Bool {
+        // Show warning if local network is not working and EXO is running.
+        // The checker uses a longer timeout on first launch to allow time for
+        // the permission prompt, so this correctly handles both:
+        // 1. User denied permission on first launch
+        // 2. Permission broke after restart (macOS TCC bug)
        if case .notWorking = localNetworkChecker.status {
            return controller.status != .stopped
        }
--- a/app/EXO/EXO/Services/LocalNetworkChecker.swift
+++ b/app/EXO/EXO/Services/LocalNetworkChecker.swift
@@ -5,8 +5,8 @@ import os.log
 /// Checks if the app's local network permission is actually functional.
 ///
 /// macOS local network permission can appear enabled in System Preferences but not
-/// actually work after a restart. This service detects this by creating a UDP
-/// connection to the mDNS multicast address (224.0.0.251:5353).
+/// actually work after a restart. This service uses NWConnection to mDNS multicast
+/// to verify actual connectivity.
@MainActor
 final class LocalNetworkChecker: ObservableObject {
    enum Status: Equatable {
@@ -35,30 +35,43 @@ final class LocalNetworkChecker: ObservableObject {
    }

    private static let logger = Logger(subsystem: "io.exo.EXO", category: "LocalNetworkChecker")
+    private static let hasCompletedInitialCheckKey = "LocalNetworkChecker.hasCompletedInitialCheck"

    @Published private(set) var status: Status = .unknown
-    @Published private(set) var lastConnectionState: String = "none"

    private var connection: NWConnection?
    private var checkTask: Task<Void, Never>?

+    /// Whether we've completed at least one check (stored in UserDefaults)
+    private var hasCompletedInitialCheck: Bool {
+        get { UserDefaults.standard.bool(forKey: Self.hasCompletedInitialCheckKey) }
+        set { UserDefaults.standard.set(newValue, forKey: Self.hasCompletedInitialCheckKey) }
+    }
+
    /// Checks if local network access is working.
    func check() {
        checkTask?.cancel()
        status = .checking
-        lastConnectionState = "connecting"
+
+        // Use longer timeout on first launch to allow time for permission prompt
+        let isFirstCheck = !hasCompletedInitialCheck
+        let timeout: UInt64 = isFirstCheck ? 30_000_000_000 : 3_000_000_000

        checkTask = Task { [weak self] in
            guard let self else { return }
-            let result = await self.performCheck()
+
+            Self.logger.info("Checking local network connectivity (first check: \(isFirstCheck))")
+            let result = await self.checkConnectivity(timeout: timeout)
            self.status = result
+            self.hasCompletedInitialCheck = true
+
            Self.logger.info("Local network check complete: \(result.displayText)")
        }
    }

-    private func performCheck() async -> Status {
-        Self.logger.info("Checking local network access via UDP multicast")
-
+    /// Checks connectivity using NWConnection to mDNS multicast.
+    /// The connection attempt triggers the permission prompt if not yet shown.
+    private func checkConnectivity(timeout: UInt64) async -> Status {
        connection?.cancel()
        connection = nil

@@ -84,22 +97,7 @@ final class LocalNetworkChecker: ObservableObject {
                continuation.resume(returning: status)
            }

-            conn.stateUpdateHandler = { [weak self] state in
-                let stateStr: String
-                switch state {
-                case .setup: stateStr = "setup"
-                case .preparing: stateStr = "preparing"
-                case .ready: stateStr = "ready"
-                case .waiting(let e): stateStr = "waiting(\(e))"
-                case .failed(let e): stateStr = "failed(\(e))"
-                case .cancelled: stateStr = "cancelled"
-                @unknown default: stateStr = "unknown"
-                }
-
-                Task { @MainActor in
-                    self?.lastConnectionState = stateStr
-                }
-
+            conn.stateUpdateHandler = { state in
                switch state {
                case .ready:
                    resumeOnce(.working)
@@ -108,6 +106,7 @@ final class LocalNetworkChecker: ObservableObject {
                    if errorStr.contains("54") || errorStr.contains("ECONNRESET") {
                        resumeOnce(.notWorking(reason: "Connection blocked"))
                    }
+                // Otherwise keep waiting - might be showing permission prompt
                case .failed(let error):
                    let errorStr = "\(error)"
                    if errorStr.contains("65") || errorStr.contains("EHOSTUNREACH")
@@ -127,7 +126,7 @@ final class LocalNetworkChecker: ObservableObject {
            conn.start(queue: .main)

            Task {
-                try? await Task.sleep(nanoseconds: 3_000_000_000)
+                try? await Task.sleep(nanoseconds: timeout)
                let state = conn.state
                switch state {
                case .ready:
--- a/app/EXO/EXO/Services/NetworkSetupHelper.swift
+++ b/app/EXO/EXO/Services/NetworkSetupHelper.swift
@@ -6,7 +6,7 @@ enum NetworkSetupHelper {
    private static let logger = Logger(subsystem: "io.exo.EXO", category: "NetworkSetup")
    private static let daemonLabel = "io.exo.networksetup"
    private static let scriptDestination =
-        "/Library/Application Support/EXO/disable_bridge_enable_dhcp.sh"
+        "/Library/Application Support/EXO/disable_bridge.sh"
    private static let plistDestination = "/Library/LaunchDaemons/io.exo.networksetup.plist"
    private static let requiredStartInterval: Int = 1791

@@ -28,35 +28,6 @@ enum NetworkSetupHelper {
        # Remove Thunderbolt Bridge from VirtualNetworkInterfaces in preferences.plist
        /usr/libexec/PlistBuddy -c "Delete :VirtualNetworkInterfaces:Bridge:bridge0" "$PREFS" 2>/dev/null || true

-        networksetup -listlocations | grep -q exo || {
-          networksetup -createlocation exo
-        }
-
-        networksetup -switchtolocation exo
-        networksetup -listallhardwareports \\
-          | awk -F': ' '/Hardware Port: / {print $2}' \\
-          | while IFS=":" read -r name; do
-              case "$name" in
-                "Ethernet Adapter"*)
-                        ;;
-                "Thunderbolt Bridge")
-                        ;;
-                "Thunderbolt "*)
-                  networksetup -listallnetworkservices \\
-                    | grep -q "EXO $name" \\
-                      || networksetup -createnetworkservice "EXO $name" "$name" 2>/dev/null \\
-                      || continue
-                  networksetup -setdhcp "EXO $name"
-                        ;;
-                *)
-                  networksetup -listallnetworkservices \\
-                    | grep -q "$name" \\
-                      || networksetup -createnetworkservice "$name" "$name" 2>/dev/null \\
-                      || continue
-                        ;;
-              esac
-            done
-
        networksetup -listnetworkservices | grep -q "Thunderbolt Bridge" && {
          networksetup -setnetworkserviceenabled "Thunderbolt Bridge" off
        } || true
@@ -141,6 +112,13 @@ enum NetworkSetupHelper {
        let scriptExists = manager.fileExists(atPath: scriptDestination)
        let plistExists = manager.fileExists(atPath: plistDestination)
        guard scriptExists, plistExists else { return false }
+        guard
+            let installedScript = try? String(contentsOfFile: scriptDestination, encoding: .utf8),
+            installedScript.trimmingCharacters(in: .whitespacesAndNewlines)
+                == setupScript.trimmingCharacters(in: .whitespacesAndNewlines)
+        else {
+            return false
+        }
        guard
            let data = try? Data(contentsOf: URL(fileURLWithPath: plistDestination)),
            let plist = try? PropertyListSerialization.propertyList(
--- a/bench/exo_bench.py
+++ b/bench/exo_bench.py
@@ -3,6 +3,7 @@
 from __future__ import annotations

 import argparse
+import contextlib
 import http.client
 import json
 import os
@@ -15,9 +16,6 @@ from urllib.parse import urlencode
 from loguru import logger
 from transformers import AutoTokenizer

-from exo.shared.models.model_cards import MODEL_CARDS
-from exo.shared.types.memory import Memory
-

 class ExoHttpError(RuntimeError):
    def __init__(self, status: int, reason: str, body_preview: str):
@@ -26,7 +24,7 @@ class ExoHttpError(RuntimeError):


 class ExoClient:
-    def __init__(self, host: str, port: int, timeout_s: float = 2400.0):
+    def __init__(self, host: str, port: int, timeout_s: float = 600.0):
        self.host = host
        self.port = port
        self.timeout_s = timeout_s
@@ -104,22 +102,46 @@ def runner_ready(runner: dict[str, Any]) -> bool:
    return "RunnerReady" in runner


+def runner_failed(runner: dict[str, Any]) -> bool:
+    return "RunnerFailed" in runner
+
+
+def get_runner_failed_message(runner: dict[str, Any]) -> str | None:
+    if "RunnerFailed" in runner:
+        return runner["RunnerFailed"].get("errorMessage")
+    return None
+
+
 def wait_for_instance_ready(
    client: ExoClient, instance_id: str, timeout: float = 24000.0
 ) -> None:
    start_time = time.time()
+    instance_existed = False
    while time.time() - start_time < timeout:
        state = client.request_json("GET", "/state")
        instances = state.get("instances", {})

        if instance_id not in instances:
+            if instance_existed:
+                # Instance was deleted after being created - likely due to runner failure
+                raise RuntimeError(
+                    f"Instance {instance_id} was deleted (runner may have failed)"
+                )
            time.sleep(0.1)
            continue

+        instance_existed = True
        instance = instances[instance_id]
        runner_ids = runner_ids_from_instance(instance)
        runners = state.get("runners", {})

+        # Check for failed runners first
+        for rid in runner_ids:
+            runner = runners.get(rid, {})
+            if runner_failed(runner):
+                error_msg = get_runner_failed_message(runner) or "Unknown error"
+                raise RuntimeError(f"Runner {rid} failed: {error_msg}")
+
        if all(runner_ready(runners.get(rid, {})) for rid in runner_ids):
            return

@@ -241,6 +263,9 @@ class PromptSizer:
            ids = tokenizer.apply_chat_template(
                messages, tokenize=True, add_generation_prompt=True
            )
+            # Fix for transformers 5.x
+            if hasattr(ids, "input_ids"):
+                ids = ids.input_ids
            return int(len(ids))

        return count_fn
@@ -296,6 +321,12 @@ def main() -> int:
        default=4,
        help="Only consider placements using <= this many nodes.",
    )
+    ap.add_argument(
+        "--min-nodes",
+        type=int,
+        default=1,
+        help="Only consider placements using >= this many nodes.",
+    )
    ap.add_argument(
        "--instance-meta", choices=["ring", "jaccl", "both"], default="both"
    )
@@ -317,7 +348,7 @@ def main() -> int:
        help="Warmup runs per placement (uses first pp/tg).",
    )
    ap.add_argument(
-        "--timeout", type=float, default=2400.0, help="HTTP timeout (seconds)."
+        "--timeout", type=float, default=600.0, help="HTTP timeout (seconds)."
    )
    ap.add_argument(
        "--json-out",
@@ -396,7 +427,7 @@ def main() -> int:
        ):
            continue

-        if 0 < n <= args.max_nodes:
+        if args.min_nodes <= n <= args.max_nodes:
            selected.append(p)

    if not selected:
@@ -438,7 +469,13 @@ def main() -> int:
        )

        client.request_json("POST", "/instance", body={"instance": instance})
-        wait_for_instance_ready(client, instance_id)
+        try:
+            wait_for_instance_ready(client, instance_id)
+        except (RuntimeError, TimeoutError) as e:
+            logger.error(f"Failed to initialize placement: {e}")
+            with contextlib.suppress(ExoHttpError):
+                client.request_json("DELETE", f"/instance/{instance_id}")
+            continue

        time.sleep(1)

@@ -450,17 +487,17 @@ def main() -> int:
                logger.debug(f"  warmup {i + 1}/{args.warmup} done")

            for pp in pp_list:
-                if (
-                    pp * n_nodes > 2048
-                    and "ring" in instance_meta.lower()
-                    and "tensor" in sharding.lower()
-                ):
-                    model_card = MODEL_CARDS[short_id]
-                    if model_card.metadata.storage_size > Memory.from_gb(10):
-                        logger.info(
-                            f"Skipping tensor ring as this is too slow for model of size {model_card.metadata.storage_size} on {n_nodes=}"
-                        )
-                        continue
+                # if (
+                #     pp * n_nodes > 2048
+                #     and "ring" in instance_meta.lower()
+                #     and "tensor" in sharding.lower()
+                # ):
+                #     model_card = MODEL_CARDS[short_id]
+                #     if model_card.metadata.storage_size > Memory.from_gb(10):
+                #         logger.info(
+                #             f"Skipping tensor ring as this is too slow for model of size {model_card.metadata.storage_size} on {n_nodes=}"
+                #         )
+                #         continue
                for tg in tg_list:
                    runs: list[dict[str, Any]] = []
                    for r in range(args.repeat):
--- a/dashboard/dashboard.nix
+++ b/dashboard/dashboard.nix
@@ -0,0 +1,60 @@
+{ lib
+, config
+, dream2nix
+, ...
+}:
+let
+  # Read and parse the lock file
+  rawLockFile = builtins.fromJSON (builtins.readFile "${config.deps.dashboardSrc}/package-lock.json");
+
+  # For packages with bundleDependencies, filter out deps that are bundled
+  # (bundled deps are inside the tarball, not separate lockfile entries)
+  fixedPackages = lib.mapAttrs
+    (path: entry:
+      if entry ? bundleDependencies && entry.bundleDependencies != [ ]
+      then entry // {
+        dependencies = lib.filterAttrs
+          (name: _: !(lib.elem name entry.bundleDependencies))
+          (entry.dependencies or { });
+      }
+      else entry
+    )
+    (rawLockFile.packages or { });
+
+  fixedLockFile = rawLockFile // { packages = fixedPackages; };
+in
+{
+  imports = [
+    dream2nix.modules.dream2nix.nodejs-package-lock-v3
+    dream2nix.modules.dream2nix.nodejs-granular-v3
+  ];
+
+  name = "exo-dashboard";
+  version = "1.0.0";
+
+  mkDerivation = {
+    src = config.deps.dashboardSrc;
+
+    buildPhase = ''
+      runHook preBuild
+      npm run build
+      runHook postBuild
+    '';
+
+    installPhase = ''
+      runHook preInstall
+      cp -r build $out/build
+      runHook postInstall
+    '';
+  };
+
+  deps = { nixpkgs, ... }: {
+    inherit (nixpkgs) stdenv;
+    dashboardSrc = null; # Injected by parts.nix
+  };
+
+  nodejs-package-lock-v3 = {
+    # Don't use packageLockFile - provide the fixed lock content directly
+    packageLock = fixedLockFile;
+  };
+}
--- a/dashboard/parts.nix
+++ b/dashboard/parts.nix
@@ -0,0 +1,44 @@
+{ inputs, ... }:
+{
+  perSystem =
+    { pkgs, lib, ... }:
+    let
+      # Filter source to only include dashboard directory
+      src = lib.cleanSourceWith {
+        src = inputs.self;
+        filter =
+          path: type:
+          let
+            baseName = builtins.baseNameOf path;
+            inDashboardDir =
+              (lib.hasInfix "/dashboard/" path)
+              || (lib.hasSuffix "/dashboard" (builtins.dirOf path))
+              || (baseName == "dashboard" && type == "directory");
+          in
+          inDashboardDir;
+      };
+
+      # Build the dashboard with dream2nix (includes node_modules in output)
+      dashboardFull = inputs.dream2nix.lib.evalModules {
+        packageSets.nixpkgs = pkgs;
+        modules = [
+          ./dashboard.nix
+          {
+            paths.projectRoot = inputs.self;
+            paths.projectRootFile = "flake.nix";
+            paths.package = inputs.self + "/dashboard";
+          }
+          # Inject the filtered source
+          {
+            deps.dashboardSrc = lib.mkForce "${src}/dashboard";
+          }
+        ];
+      };
+    in
+    {
+      # Extract just the static site from the full build
+      packages.dashboard = pkgs.runCommand "exo-dashboard" { } ''
+        cp -r ${dashboardFull}/build $out
+      '';
+    };
+}
--- a/dashboard/src/lib/components/ChatForm.svelte
+++ b/dashboard/src/lib/components/ChatForm.svelte
@@ -60,12 +60,39 @@
 		return models;
 	});

-	// Auto-select the first available model if none is selected
+	// Track previous model IDs to detect newly added models (plain variable to avoid reactive loop)
+	let previousModelIds: Set<string> = new Set();
+
+	// Auto-select the first available model if none is selected, if current selection is stale, or if a new model is added
 	$effect(() => {
 		const models = availableModels();
-		if (models.length > 0 && !currentModel) {
-			setSelectedChatModel(models[0].id);
+		const currentModelIds = new Set(models.map(m => m.id));
+
+		if (models.length > 0) {
+			// Find newly added models (in current but not in previous)
+			const newModels = models.filter(m => !previousModelIds.has(m.id));
+
+			// If no model selected, select the first available
+			if (!currentModel) {
+				setSelectedChatModel(models[0].id);
+			}
+			// If current model is stale (no longer has a running instance), reset to first available
+			else if (!models.some(m => m.id === currentModel)) {
+				setSelectedChatModel(models[0].id);
+			}
+			// If a new model was just added, select it
+			else if (newModels.length > 0 && previousModelIds.size > 0) {
+				setSelectedChatModel(newModels[0].id);
+			}
+		} else {
+			// No instances running - clear the selected model
+			if (currentModel) {
+				setSelectedChatModel('');
+			}
 		}
+
+		// Update previous model IDs for next comparison
+		previousModelIds = currentModelIds;
 	});

 	function getInstanceModelId(instanceWrapped: unknown): string {
--- a/dashboard/src/lib/components/MarkdownContent.svelte
+++ b/dashboard/src/lib/components/MarkdownContent.svelte
@@ -53,62 +53,285 @@
 	marked.use({ renderer });

 	/**
-	 * Preprocess LaTeX: convert \(...\) to $...$ and \[...\] to $$...$$
-	 * Also protect code blocks from LaTeX processing
+	 * Unescape HTML entities that marked may have escaped
+	 */
+	function unescapeHtmlEntities(text: string): string {
+		return text
+			.replace(/&lt;/g, '<')
+			.replace(/&gt;/g, '>')
+			.replace(/&amp;/g, '&')
+			.replace(/&quot;/g, '"')
+			.replace(/&#39;/g, "'");
+	}
+
+	// Storage for math expressions extracted before markdown processing
+	const mathExpressions: Map<string, { content: string; displayMode: boolean }> = new Map();
+	let mathCounter = 0;
+
+	// Storage for HTML snippets that need protection from markdown
+	const htmlSnippets: Map<string, string> = new Map();
+	let htmlCounter = 0;
+
+	// Use alphanumeric placeholders that won't be interpreted as HTML tags
+	const MATH_PLACEHOLDER_PREFIX = 'MATHPLACEHOLDER';
+	const CODE_PLACEHOLDER_PREFIX = 'CODEPLACEHOLDER';
+	const HTML_PLACEHOLDER_PREFIX = 'HTMLPLACEHOLDER';
+
+	/**
+	 * Preprocess LaTeX: extract math, handle LaTeX document commands, and protect content
 	 */
 	function preprocessLaTeX(text: string): string {
-		// Protect code blocks
+		// Reset storage
+		mathExpressions.clear();
+		mathCounter = 0;
+		htmlSnippets.clear();
+		htmlCounter = 0;
+
+		// Protect code blocks first
 		const codeBlocks: string[] = [];
 		let processed = text.replace(/```[\s\S]*?```|`[^`]+`/g, (match) => {
 			codeBlocks.push(match);
-			return `<<CODE_${codeBlocks.length - 1}>>`;
+			return `${CODE_PLACEHOLDER_PREFIX}${codeBlocks.length - 1}END`;
 		});

-		// Convert \(...\) to $...$
-		processed = processed.replace(/\\\((.+?)\\\)/g, '$$$1$');
-		
-		// Convert \[...\] to $$...$$
-		processed = processed.replace(/\\\[([\s\S]*?)\\\]/g, '$$$$$1$$$$');
+		// Remove LaTeX document commands
+		processed = processed.replace(/\\documentclass(\[[^\]]*\])?\{[^}]*\}/g, '');
+		processed = processed.replace(/\\usepackage(\[[^\]]*\])?\{[^}]*\}/g, '');
+		processed = processed.replace(/\\begin\{document\}/g, '');
+		processed = processed.replace(/\\end\{document\}/g, '');
+		processed = processed.replace(/\\maketitle/g, '');
+		processed = processed.replace(/\\title\{[^}]*\}/g, '');
+		processed = processed.replace(/\\author\{[^}]*\}/g, '');
+		processed = processed.replace(/\\date\{[^}]*\}/g, '');
+
+		// Remove \require{...} commands (MathJax-specific, not supported by KaTeX)
+		processed = processed.replace(/\$\\require\{[^}]*\}\$/g, '');
+		processed = processed.replace(/\\require\{[^}]*\}/g, '');
+
+		// Remove unsupported LaTeX commands/environments (tikzpicture, figure, center, etc.)
+		processed = processed.replace(/\\begin\{tikzpicture\}[\s\S]*?\\end\{tikzpicture\}/g, () => {
+			const placeholder = `${HTML_PLACEHOLDER_PREFIX}${htmlCounter}END`;
+			htmlSnippets.set(placeholder, '<div class="latex-diagram-placeholder"><span class="latex-diagram-icon">📐</span><span class="latex-diagram-text">Diagram</span></div>');
+			htmlCounter++;
+			return placeholder;
+		});
+		processed = processed.replace(/\\begin\{figure\}[\s\S]*?\\end\{figure\}/g, () => {
+			const placeholder = `${HTML_PLACEHOLDER_PREFIX}${htmlCounter}END`;
+			htmlSnippets.set(placeholder, '<div class="latex-diagram-placeholder"><span class="latex-diagram-icon">🖼️</span><span class="latex-diagram-text">Figure</span></div>');
+			htmlCounter++;
+			return placeholder;
+		});
+		// Strip center environment (layout only, no content change)
+		processed = processed.replace(/\\begin\{center\}/g, '');
+		processed = processed.replace(/\\end\{center\}/g, '');
+		// Strip other layout environments
+		processed = processed.replace(/\\begin\{flushleft\}/g, '');
+		processed = processed.replace(/\\end\{flushleft\}/g, '');
+		processed = processed.replace(/\\begin\{flushright\}/g, '');
+		processed = processed.replace(/\\end\{flushright\}/g, '');
+		processed = processed.replace(/\\label\{[^}]*\}/g, '');
+		processed = processed.replace(/\\caption\{[^}]*\}/g, '');
+
+		// Protect escaped dollar signs (e.g., \$50 should become $50, not LaTeX)
+		processed = processed.replace(/\\\$/g, 'ESCAPEDDOLLARPLACEHOLDER');
+
+		// Convert LaTeX math environments to display math (both bare and wrapped in $...$)
+		const mathEnvs = ['align', 'align\\*', 'equation', 'equation\\*', 'gather', 'gather\\*', 'multline', 'multline\\*', 'eqnarray', 'eqnarray\\*', 'array', 'matrix', 'pmatrix', 'bmatrix', 'vmatrix', 'cases'];
+		for (const env of mathEnvs) {
+			// Handle $\begin{env}...\end{env}$ (with dollar signs, possibly multiline)
+			const wrappedRegex = new RegExp(`\\$\\\\begin\\{${env}\\}(\\{[^}]*\\})?([\\s\\S]*?)\\\\end\\{${env}\\}\\$`, 'g');
+			processed = processed.replace(wrappedRegex, (_, args, content) => {
+				const cleanEnv = env.replace('\\*', '*');
+				const mathContent = `\\begin{${cleanEnv}}${args || ''}${content}\\end{${cleanEnv}}`;
+				const placeholder = `${MATH_PLACEHOLDER_PREFIX}DISPLAY${mathCounter}END`;
+				mathExpressions.set(placeholder, { content: mathContent, displayMode: true });
+				mathCounter++;
+				return placeholder;
+			});
+
+			// Handle bare \begin{env}...\end{env} (without dollar signs)
+			const bareRegex = new RegExp(`\\\\begin\\{${env}\\}(\\{[^}]*\\})?([\\s\\S]*?)\\\\end\\{${env}\\}`, 'g');
+			processed = processed.replace(bareRegex, (_, args, content) => {
+				const cleanEnv = env.replace('\\*', '*');
+				const mathContent = `\\begin{${cleanEnv}}${args || ''}${content}\\end{${cleanEnv}}`;
+				const placeholder = `${MATH_PLACEHOLDER_PREFIX}DISPLAY${mathCounter}END`;
+				mathExpressions.set(placeholder, { content: mathContent, displayMode: true });
+				mathCounter++;
+				return placeholder;
+			});
+		}
+
+		// Convert LaTeX proof environments to styled blocks (use placeholders for HTML)
+		processed = processed.replace(
+			/\\begin\{proof\}([\s\S]*?)\\end\{proof\}/g,
+			(_, content) => {
+				const html = `<div class="latex-proof"><div class="latex-proof-header">Proof</div><div class="latex-proof-content">${content}</div></div>`;
+				const placeholder = `${HTML_PLACEHOLDER_PREFIX}${htmlCounter}END`;
+				htmlSnippets.set(placeholder, html);
+				htmlCounter++;
+				return placeholder;
+			}
+		);
+
+		// Convert LaTeX theorem-like environments
+		const theoremEnvs = ['theorem', 'lemma', 'corollary', 'proposition', 'definition', 'remark', 'example'];
+		for (const env of theoremEnvs) {
+			const envRegex = new RegExp(`\\\\begin\\{${env}\\}([\\s\\S]*?)\\\\end\\{${env}\\}`, 'gi');
+			const envName = env.charAt(0).toUpperCase() + env.slice(1);
+			processed = processed.replace(envRegex, (_, content) => {
+				const html = `<div class="latex-theorem"><div class="latex-theorem-header">${envName}</div><div class="latex-theorem-content">${content}</div></div>`;
+				const placeholder = `${HTML_PLACEHOLDER_PREFIX}${htmlCounter}END`;
+				htmlSnippets.set(placeholder, html);
+				htmlCounter++;
+				return placeholder;
+			});
+		}
+
+		// Convert LaTeX text formatting commands (use placeholders to protect from markdown)
+		processed = processed.replace(/\\emph\{([^}]*)\}/g, (_, content) => {
+			const placeholder = `${HTML_PLACEHOLDER_PREFIX}${htmlCounter}END`;
+			htmlSnippets.set(placeholder, `<em>${content}</em>`);
+			htmlCounter++;
+			return placeholder;
+		});
+		processed = processed.replace(/\\textit\{([^}]*)\}/g, (_, content) => {
+			const placeholder = `${HTML_PLACEHOLDER_PREFIX}${htmlCounter}END`;
+			htmlSnippets.set(placeholder, `<em>${content}</em>`);
+			htmlCounter++;
+			return placeholder;
+		});
+		processed = processed.replace(/\\textbf\{([^}]*)\}/g, (_, content) => {
+			const placeholder = `${HTML_PLACEHOLDER_PREFIX}${htmlCounter}END`;
+			htmlSnippets.set(placeholder, `<strong>${content}</strong>`);
+			htmlCounter++;
+			return placeholder;
+		});
+		processed = processed.replace(/\\texttt\{([^}]*)\}/g, (_, content) => {
+			const placeholder = `${HTML_PLACEHOLDER_PREFIX}${htmlCounter}END`;
+			htmlSnippets.set(placeholder, `<code class="inline-code">${content}</code>`);
+			htmlCounter++;
+			return placeholder;
+		});
+		processed = processed.replace(/\\underline\{([^}]*)\}/g, (_, content) => {
+			const placeholder = `${HTML_PLACEHOLDER_PREFIX}${htmlCounter}END`;
+			htmlSnippets.set(placeholder, `<u>${content}</u>`);
+			htmlCounter++;
+			return placeholder;
+		});
+
+		// Handle LaTeX line breaks and spacing
+		processed = processed.replace(/\\\\(?:\s*\n)?/g, '\n'); // \\ -> newline
+		processed = processed.replace(/\\newline/g, '\n');
+		processed = processed.replace(/\\par\b/g, '\n\n');
+		processed = processed.replace(/\\quad/g, ' ');
+		processed = processed.replace(/\\qquad/g, '  ');
+		processed = processed.replace(/~~/g, ' '); // non-breaking space
+
+		// Remove other common LaTeX commands that don't render
+		processed = processed.replace(/\\centering/g, '');
+		processed = processed.replace(/\\noindent/g, '');
+		processed = processed.replace(/\\hfill/g, '');
+		processed = processed.replace(/\\vspace\{[^}]*\}/g, '');
+		processed = processed.replace(/\\hspace\{[^}]*\}/g, ' ');
+
+		// Convert \(...\) to placeholder (display: false)
+		processed = processed.replace(/\\\(([\s\S]+?)\\\)/g, (_, content) => {
+			const placeholder = `${MATH_PLACEHOLDER_PREFIX}INLINE${mathCounter}END`;
+			mathExpressions.set(placeholder, { content, displayMode: false });
+			mathCounter++;
+			return placeholder;
+		});
+
+		// Convert \[...\] to placeholder (display: true)
+		processed = processed.replace(/\\\[([\s\S]*?)\\\]/g, (_, content) => {
+			const placeholder = `${MATH_PLACEHOLDER_PREFIX}DISPLAY${mathCounter}END`;
+			mathExpressions.set(placeholder, { content, displayMode: true });
+			mathCounter++;
+			return placeholder;
+		});
+
+		// Extract display math ($$...$$) BEFORE markdown processing
+		processed = processed.replace(/\$\$([\s\S]*?)\$\$/g, (_, content) => {
+			const placeholder = `${MATH_PLACEHOLDER_PREFIX}DISPLAY${mathCounter}END`;
+			mathExpressions.set(placeholder, { content: content.trim(), displayMode: true });
+			mathCounter++;
+			return placeholder;
+		});
+
+		// Extract inline math ($...$) BEFORE markdown processing
+		// Allow single-line only, skip currency patterns like $5 or $50
+		processed = processed.replace(/\$([^\$\n]+?)\$/g, (match, content) => {
+			if (/^\d/.test(content.trim())) {
+				return match; // Keep as-is for currency
+			}
+			const placeholder = `${MATH_PLACEHOLDER_PREFIX}INLINE${mathCounter}END`;
+			mathExpressions.set(placeholder, { content: content.trim(), displayMode: false });
+			mathCounter++;
+			return placeholder;
+		});
+
+		// Restore escaped dollar signs
+		processed = processed.replace(/ESCAPEDDOLLARPLACEHOLDER/g, '$');

 		// Restore code blocks
-		processed = processed.replace(/<<CODE_(\d+)>>/g, (_, index) => codeBlocks[parseInt(index)]);
+		processed = processed.replace(new RegExp(`${CODE_PLACEHOLDER_PREFIX}(\\d+)END`, 'g'), (_, index) => codeBlocks[parseInt(index)]);
+
+		// Clean up any remaining stray backslashes from unrecognized commands
+		processed = processed.replace(/\\(?=[a-zA-Z])/g, ''); // Remove \ before letters (unrecognized commands)

 		return processed;
 	}

 	/**
-	 * Render math expressions with KaTeX after HTML is generated
+	 * Render math expressions with KaTeX and restore HTML placeholders
 	 */
 	function renderMath(html: string): string {
-		// Render display math ($$...$$)
-		html = html.replace(/\$\$([\s\S]*?)\$\$/g, (_, math) => {
-			try {
-				return katex.renderToString(math.trim(), {
-					displayMode: true,
-					throwOnError: false,
-					output: 'html'
-				});
-			} catch {
-				return `<span class="math-error">$$${math}$$</span>`;
-			}
-		});
+		// Replace all math placeholders with rendered KaTeX
+		for (const [placeholder, { content, displayMode }] of mathExpressions) {
+			const escapedPlaceholder = placeholder.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+			const regex = new RegExp(escapedPlaceholder, 'g');

-		// Render inline math ($...$) but avoid matching currency like $5
-		html = html.replace(/\$([^\$\n]+?)\$/g, (match, math) => {
-			// Skip if it looks like currency ($ followed by number)
-			if (/^\d/.test(math.trim())) {
-				return match;
-			}
-			try {
-				return katex.renderToString(math.trim(), {
-					displayMode: false,
-					throwOnError: false,
-					output: 'html'
-				});
-			} catch {
-				return `<span class="math-error">$${math}$</span>`;
-			}
-		});
+			html = html.replace(regex, () => {
+				try {
+					const rendered = katex.renderToString(content, {
+						displayMode,
+						throwOnError: false,
+						output: 'html'
+					});
+
+					if (displayMode) {
+						return `
+							<div class="math-display-wrapper">
+								<div class="math-display-header">
+									<span class="math-label">LaTeX</span>
+									<button type="button" class="copy-math-btn" data-math-source="${encodeURIComponent(content)}" title="Copy LaTeX source">
+										<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+											<rect width="14" height="14" x="8" y="8" rx="2" ry="2"/>
+											<path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/>
+										</svg>
+									</button>
+								</div>
+								<div class="math-display-content">
+									${rendered}
+								</div>
+							</div>
+						`;
+					} else {
+						return `<span class="math-inline">${rendered}</span>`;
+					}
+				} catch {
+					const display = displayMode ? `$$${content}$$` : `$${content}$`;
+					return `<span class="math-error"><span class="math-error-icon">⚠</span> ${display}</span>`;
+				}
+			});
+		}
+
+		// Restore HTML placeholders (for \textbf, \emph, etc.)
+		for (const [placeholder, htmlContent] of htmlSnippets) {
+			const escapedPlaceholder = placeholder.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+			const regex = new RegExp(escapedPlaceholder, 'g');
+			html = html.replace(regex, htmlContent);
+		}

 		return html;
 	}
@@ -154,16 +377,50 @@
 		}
 	}

+	async function handleMathCopyClick(event: Event) {
+		const target = event.currentTarget as HTMLButtonElement;
+		const encodedSource = target.getAttribute('data-math-source');
+		if (!encodedSource) return;
+
+		const source = decodeURIComponent(encodedSource);
+
+		try {
+			await navigator.clipboard.writeText(source);
+			// Show copied feedback
+			const originalHtml = target.innerHTML;
+			target.innerHTML = `
+				<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+					<path d="M20 6L9 17l-5-5"/>
+				</svg>
+			`;
+			target.classList.add('copied');
+			setTimeout(() => {
+				target.innerHTML = originalHtml;
+				target.classList.remove('copied');
+			}, 2000);
+		} catch (error) {
+			console.error('Failed to copy math:', error);
+		}
+	}
+
 	function setupCopyButtons() {
 		if (!containerRef || !browser) return;

-		const buttons = containerRef.querySelectorAll<HTMLButtonElement>('.copy-code-btn');
-		for (const button of buttons) {
+		const codeButtons = containerRef.querySelectorAll<HTMLButtonElement>('.copy-code-btn');
+		for (const button of codeButtons) {
 			if (button.dataset.listenerBound !== 'true') {
 				button.dataset.listenerBound = 'true';
 				button.addEventListener('click', handleCopyClick);
 			}
 		}
+
+		const mathButtons = containerRef.querySelectorAll<HTMLButtonElement>('.copy-math-btn');
+		for (const button of mathButtons) {
+			if (button.dataset.listenerBound !== 'true') {
+				button.dataset.listenerBound = 'true';
+				button.addEventListener('click', handleMathCopyClick);
+			}
+		}
 	}

 	$effect(() => {
@@ -424,28 +681,290 @@
 		color: #60a5fa;
 	}

-	/* KaTeX math styling */
+	/* KaTeX math styling - Base */
 	.markdown-content :global(.katex) {
 		font-size: 1.1em;
+		color: oklch(0.9 0 0);
 	}

-	.markdown-content :global(.katex-display) {
+	/* Display math container wrapper */
+	.markdown-content :global(.math-display-wrapper) {
 		margin: 1rem 0;
+		border-radius: 0.5rem;
+		overflow: hidden;
+		border: 1px solid rgba(255, 215, 0, 0.15);
+		background: rgba(0, 0, 0, 0.3);
+		transition: border-color 0.2s ease, box-shadow 0.2s ease;
+	}
+
+	.markdown-content :global(.math-display-wrapper:hover) {
+		border-color: rgba(255, 215, 0, 0.25);
+		box-shadow: 0 0 12px rgba(255, 215, 0, 0.08);
+	}
+
+	/* Display math header - hidden by default, slides in on hover */
+	.markdown-content :global(.math-display-header) {
+		display: flex;
+		justify-content: space-between;
+		align-items: center;
+		padding: 0.375rem 0.75rem;
+		background: rgba(255, 215, 0, 0.03);
+		border-bottom: 1px solid rgba(255, 215, 0, 0.08);
+		opacity: 0;
+		max-height: 0;
+		padding-top: 0;
+		padding-bottom: 0;
+		overflow: hidden;
+		transition:
+			opacity 0.2s ease,
+			max-height 0.2s ease,
+			padding 0.2s ease;
+	}
+
+	.markdown-content :global(.math-display-wrapper:hover .math-display-header) {
+		opacity: 1;
+		max-height: 2.5rem;
+		padding: 0.375rem 0.75rem;
+	}
+
+	.markdown-content :global(.math-label) {
+		color: rgba(255, 215, 0, 0.7);
+		font-size: 0.65rem;
+		font-weight: 500;
+		text-transform: uppercase;
+		letter-spacing: 0.1em;
+		font-family: ui-monospace, SFMono-Regular, 'SF Mono', Monaco, Consolas, monospace;
+	}
+
+	.markdown-content :global(.copy-math-btn) {
+		display: flex;
+		align-items: center;
+		justify-content: center;
+		padding: 0.25rem;
+		background: transparent;
+		border: none;
+		color: var(--exo-light-gray, #9ca3af);
+		cursor: pointer;
+		transition: color 0.2s;
+		border-radius: 0.25rem;
+		opacity: 0;
+		transition:
+			color 0.2s,
+			opacity 0.15s ease;
+	}
+
+	.markdown-content :global(.math-display-wrapper:hover .copy-math-btn) {
+		opacity: 1;
+	}
+
+	.markdown-content :global(.copy-math-btn:hover) {
+		color: var(--exo-yellow, #ffd700);
+	}
+
+	.markdown-content :global(.copy-math-btn.copied) {
+		color: #22c55e;
+	}
+
+	/* Display math content area */
+	.markdown-content :global(.math-display-content) {
+		padding: 1rem 1.25rem;
 		overflow-x: auto;
 		overflow-y: hidden;
-		padding: 0.5rem 0;
 	}

-	.markdown-content :global(.katex-display > .katex) {
+	/* Custom scrollbar for math overflow */
+	.markdown-content :global(.math-display-content::-webkit-scrollbar) {
+		height: 6px;
+	}
+
+	.markdown-content :global(.math-display-content::-webkit-scrollbar-track) {
+		background: rgba(255, 255, 255, 0.05);
+		border-radius: 3px;
+	}
+
+	.markdown-content :global(.math-display-content::-webkit-scrollbar-thumb) {
+		background: rgba(255, 215, 0, 0.2);
+		border-radius: 3px;
+	}
+
+	.markdown-content :global(.math-display-content::-webkit-scrollbar-thumb:hover) {
+		background: rgba(255, 215, 0, 0.35);
+	}
+
+	.markdown-content :global(.math-display-content .katex-display) {
+		margin: 0;
+		padding: 0;
+	}
+
+	.markdown-content :global(.math-display-content .katex-display > .katex) {
 		text-align: center;
 	}

+	/* Inline math wrapper */
+	.markdown-content :global(.math-inline) {
+		display: inline;
+		padding: 0 0.125rem;
+		border-radius: 0.25rem;
+		transition: background-color 0.15s ease;
+	}
+
+	.markdown-content :global(.math-inline:hover) {
+		background: rgba(255, 215, 0, 0.05);
+	}
+
+	/* Dark theme KaTeX overrides */
+	.markdown-content :global(.katex .mord),
+	.markdown-content :global(.katex .minner),
+	.markdown-content :global(.katex .mop),
+	.markdown-content :global(.katex .mbin),
+	.markdown-content :global(.katex .mrel),
+	.markdown-content :global(.katex .mpunct) {
+		color: oklch(0.9 0 0);
+	}
+
+	/* Fraction lines and rules */
+	.markdown-content :global(.katex .frac-line),
+	.markdown-content :global(.katex .overline-line),
+	.markdown-content :global(.katex .underline-line),
+	.markdown-content :global(.katex .hline),
+	.markdown-content :global(.katex .rule) {
+		border-color: oklch(0.85 0 0) !important;
+		background: oklch(0.85 0 0);
+	}
+
+	/* Square roots and SVG elements */
+	.markdown-content :global(.katex .sqrt-line) {
+		border-color: oklch(0.85 0 0) !important;
+	}
+
+	.markdown-content :global(.katex svg) {
+		fill: oklch(0.85 0 0);
+		stroke: oklch(0.85 0 0);
+	}
+
+	.markdown-content :global(.katex svg path) {
+		stroke: oklch(0.85 0 0);
+	}
+
+	/* Delimiters (parentheses, brackets, braces) */
+	.markdown-content :global(.katex .delimsizing),
+	.markdown-content :global(.katex .delim-size1),
+	.markdown-content :global(.katex .delim-size2),
+	.markdown-content :global(.katex .delim-size3),
+	.markdown-content :global(.katex .delim-size4),
+	.markdown-content :global(.katex .mopen),
+	.markdown-content :global(.katex .mclose) {
+		color: oklch(0.75 0 0);
+	}
+
+	/* Math error styling */
 	.markdown-content :global(.math-error) {
+		display: inline-flex;
+		align-items: center;
+		gap: 0.375rem;
 		color: #f87171;
 		font-family: ui-monospace, SFMono-Regular, 'SF Mono', Monaco, Consolas, monospace;
 		font-size: 0.875em;
 		background: rgba(248, 113, 113, 0.1);
-		padding: 0.125rem 0.25rem;
+		padding: 0.25rem 0.5rem;
 		border-radius: 0.25rem;
+		border: 1px solid rgba(248, 113, 113, 0.2);
+	}
+
+	.markdown-content :global(.math-error-icon) {
+		font-size: 0.875em;
+		opacity: 0.9;
+	}
+
+	/* LaTeX proof environment */
+	.markdown-content :global(.latex-proof) {
+		margin: 1rem 0;
+		padding: 1rem 1.25rem;
+		background: rgba(255, 255, 255, 0.02);
+		border-left: 3px solid rgba(255, 215, 0, 0.4);
+		border-radius: 0 0.375rem 0.375rem 0;
+	}
+
+	.markdown-content :global(.latex-proof-header) {
+		font-weight: 600;
+		font-style: italic;
+		color: oklch(0.85 0 0);
+		margin-bottom: 0.5rem;
+	}
+
+	.markdown-content :global(.latex-proof-header::after) {
+		content: '.';
+	}
+
+	.markdown-content :global(.latex-proof-content) {
+		color: oklch(0.9 0 0);
+	}
+
+	.markdown-content :global(.latex-proof-content p:last-child) {
+		margin-bottom: 0;
+	}
+
+	/* QED symbol at end of proof */
+	.markdown-content :global(.latex-proof-content::after) {
+		content: '∎';
+		display: block;
+		text-align: right;
+		color: oklch(0.7 0 0);
+		margin-top: 0.5rem;
+	}
+
+	/* LaTeX theorem-like environments */
+	.markdown-content :global(.latex-theorem) {
+		margin: 1rem 0;
+		padding: 1rem 1.25rem;
+		background: rgba(255, 215, 0, 0.03);
+		border: 1px solid rgba(255, 215, 0, 0.15);
+		border-radius: 0.375rem;
+	}
+
+	.markdown-content :global(.latex-theorem-header) {
+		font-weight: 700;
+		color: var(--exo-yellow, #ffd700);
+		margin-bottom: 0.5rem;
+	}
+
+	.markdown-content :global(.latex-theorem-header::after) {
+		content: '.';
+	}
+
+	.markdown-content :global(.latex-theorem-content) {
+		color: oklch(0.9 0 0);
+		font-style: italic;
+	}
+
+	.markdown-content :global(.latex-theorem-content p:last-child) {
+		margin-bottom: 0;
+	}
+
+	/* LaTeX diagram/figure placeholder */
+	.markdown-content :global(.latex-diagram-placeholder) {
+		display: flex;
+		align-items: center;
+		justify-content: center;
+		gap: 0.5rem;
+		margin: 1rem 0;
+		padding: 1.5rem 2rem;
+		background: rgba(255, 255, 255, 0.02);
+		border: 1px dashed rgba(255, 215, 0, 0.25);
+		border-radius: 0.5rem;
+		color: rgba(255, 215, 0, 0.6);
+		font-size: 0.875rem;
+	}
+
+	.markdown-content :global(.latex-diagram-icon) {
+		font-size: 1.25rem;
+		opacity: 0.8;
+	}
+
+	.markdown-content :global(.latex-diagram-text) {
+		font-family: ui-monospace, SFMono-Regular, 'SF Mono', Monaco, Consolas, monospace;
+		font-size: 0.75rem;
+		text-transform: uppercase;
+		letter-spacing: 0.05em;
 	}
 </style>
--- a/dashboard/src/lib/components/ModelCard.svelte
+++ b/dashboard/src/lib/components/ModelCard.svelte
@@ -197,7 +197,7 @@ function toggleNodeDetails(nodeId: string): void {
 	// Uses API preview data when available, falls back to local estimation
 	const placementPreview = $derived(() => {
 		const nodeArray = nodeList();
-		if (nodeArray.length === 0) return { nodes: [], canFit: false, totalAvailable: 0, error: null };
+		if (nodeArray.length === 0) return { nodes: [], canFit: false, totalAvailable: 0, topoWidth: 260, topoHeight: 90, error: null };
 		
 		const numNodes = nodeArray.length;
 		const iconSize = numNodes === 1 ? 50 : 36;
--- a/dashboard/src/lib/components/TopologyGraph.svelte
+++ b/dashboard/src/lib/components/TopologyGraph.svelte
@@ -1,7 +1,7 @@
 <script lang="ts">
 	import { onMount, onDestroy } from 'svelte';
 	import * as d3 from 'd3';
-import { topologyData, isTopologyMinimized, debugMode } from '$lib/stores/app.svelte';
+import { topologyData, isTopologyMinimized, debugMode, type NodeInfo } from '$lib/stores/app.svelte';

 	interface Props {
 		class?: string;
@@ -24,14 +24,14 @@ function getNodeLabel(nodeId: string): string {

 function getInterfaceLabel(nodeId: string, ip?: string): { label: string; missing: boolean } {
 	if (!ip) return { label: '?', missing: true };
-	
+
 	// Strip port if present (e.g., "192.168.1.1:8080" -> "192.168.1.1")
 	const cleanIp = ip.includes(':') && !ip.includes('[') ? ip.split(':')[0] : ip;
-	
+
 	// Helper to check a node's interfaces
-	function checkNode(node: typeof data.nodes[string]): string | null {
+	function checkNode(node: NodeInfo | undefined): string | null {
 		if (!node) return null;
-		
+
 		const matchFromInterfaces = node.network_interfaces?.find((iface) =>
 			(iface.addresses || []).some((addr) => addr === cleanIp || addr === ip)
 		);
@@ -39,17 +39,19 @@ function getInterfaceLabel(nodeId: string, ip?: string): { label: string; missin
 			return matchFromInterfaces.name;
 		}

-		const mapped = node.ip_to_interface?.[cleanIp] || node.ip_to_interface?.[ip];
-		if (mapped && mapped.trim().length > 0) {
-			return mapped;
+		if (node.ip_to_interface) {
+			const mapped = node.ip_to_interface[cleanIp] || (ip ? node.ip_to_interface[ip] : undefined);
+			if (mapped && mapped.trim().length > 0) {
+				return mapped;
+			}
 		}
 		return null;
 	}
-	
+
 	// Try specified node first
 	const result = checkNode(data?.nodes?.[nodeId]);
 	if (result) return { label: result, missing: false };
-	
+
 	// Fallback: search all nodes for this IP
 	for (const [, otherNode] of Object.entries(data?.nodes || {})) {
 		const otherResult = checkNode(otherNode);
@@ -255,21 +257,24 @@ function wrapLine(text: string, maxLen: number): string[] {
 		const arrowsGroup = svg.append('g').attr('class', 'arrows-group');
 		const debugLabelsGroup = svg.append('g').attr('class', 'debug-edge-labels');

-		const pairMap = new Map<string, { a: string; b: string; aToB: boolean; bToA: boolean; connections: Array<{ from: string; to: string; ip: string; ifaceLabel: string; missingIface: boolean }> }>();
-		let debugEdgeLabels: Array<{ connections: typeof pairMap extends Map<string, infer V> ? V['connections'] : never; isLeft: boolean; isTop: boolean; mx: number; my: number }> | null = null;
+		type ConnectionInfo = { from: string; to: string; ip: string; ifaceLabel: string; missingIface: boolean };
+		type PairEntry = { a: string; b: string; aToB: boolean; bToA: boolean; connections: ConnectionInfo[] };
+		type DebugEdgeLabelEntry = { connections: ConnectionInfo[]; isLeft: boolean; isTop: boolean; mx: number; my: number };
+		const pairMap = new Map<string, PairEntry>();
+		const debugEdgeLabels: DebugEdgeLabelEntry[] = [];
 		edges.forEach(edge => {
 			if (!edge.source || !edge.target || edge.source === edge.target) return;
 			if (!positionById[edge.source] || !positionById[edge.target]) return;
-			
+
 			const a = edge.source < edge.target ? edge.source : edge.target;
 			const b = edge.source < edge.target ? edge.target : edge.source;
 			const key = `${a}|${b}`;
 			const entry = pairMap.get(key) || { a, b, aToB: false, bToA: false, connections: [] };
-			
+
 			if (edge.source === a) entry.aToB = true;
 			else entry.bToA = true;

-			const ip = edge.sendBackIp || edge.sendBackMultiaddr?.ip_address || '?';
+			const ip = edge.sendBackIp || '?';
 			const ifaceInfo = getInterfaceLabel(edge.source, ip);
 			entry.connections.push({
 				from: edge.source,
@@ -338,9 +343,8 @@ function wrapLine(text: string, maxLen: number): string[] {
 				// Determine which side of viewport based on edge midpoint
 				const isLeft = mx < centerX;
 				const isTop = my < safeCenterY;
-				
+
 				// Store for batch rendering after all edges processed
-				if (!debugEdgeLabels) debugEdgeLabels = [];
 				debugEdgeLabels.push({
 					connections: entry.connections,
 					isLeft,
@@ -381,32 +385,32 @@ function wrapLine(text: string, maxLen: number): string[] {
 			}
 			
 			// Group by quadrant: topLeft, topRight, bottomLeft, bottomRight
-			const quadrants: Record<string, typeof debugEdgeLabels> = {
+			const quadrants: Record<string, DebugEdgeLabelEntry[]> = {
 				topLeft: [],
 				topRight: [],
 				bottomLeft: [],
 				bottomRight: []
 			};
-			
+
 			debugEdgeLabels.forEach(edge => {
 				const key = (edge.isTop ? 'top' : 'bottom') + (edge.isLeft ? 'Left' : 'Right');
 				quadrants[key].push(edge);
 			});
-			
+
 			// Render each quadrant
-			Object.entries(quadrants).forEach(([quadrant, edges]) => {
-				if (edges.length === 0) return;
-				
+			Object.entries(quadrants).forEach(([quadrant, quadrantEdges]) => {
+				if (quadrantEdges.length === 0) return;
+
 				const isLeft = quadrant.includes('Left');
 				const isTop = quadrant.includes('top');
-				
+
 				let baseX = isLeft ? padding : width - padding;
 				let baseY = isTop ? padding : height - padding;
 				const textAnchor = isLeft ? 'start' : 'end';
-				
+
 				let currentY = baseY;
-				
-				edges.forEach(edge => {
+
+				quadrantEdges.forEach(edge => {
 					edge.connections.forEach(conn => {
 						const arrow = getArrow(conn.from, conn.to);
 						const label = `${arrow} ${conn.ip} ${conn.ifaceLabel}`;
--- a/dashboard/src/lib/stores/app.svelte.ts
+++ b/dashboard/src/lib/stores/app.svelte.ts
@@ -71,52 +71,66 @@ export interface Instance {
 	};
 }

-interface RawNodeProfile {
+// Granular node state types from the new state structure
+interface RawNodeIdentity {
 	modelId?: string;
 	chipId?: string;
 	friendlyName?: string;
-	networkInterfaces?: Array<{
-		name?: string;
-		ipAddress?: string;
-		addresses?: Array<{ address?: string } | string>;
-		ipv4?: string;
-		ipv6?: string;
-		ipAddresses?: string[];
-		ips?: string[];
-	}>;
-	memory?: {
-		ramTotal?: { inBytes: number };
-		ramAvailable?: { inBytes: number };
-		swapTotal?: { inBytes: number };
-		swapAvailable?: { inBytes: number };
-	};
-	system?: {
-		gpuUsage?: number;
-		temp?: number;
-		sysPower?: number;
+}
+
+interface RawMemoryUsage {
+	ramTotal?: { inBytes: number };
+	ramAvailable?: { inBytes: number };
+	swapTotal?: { inBytes: number };
+	swapAvailable?: { inBytes: number };
+}
+
+interface RawSystemPerformanceProfile {
+	gpuUsage?: number;
+	temp?: number;
+	sysPower?: number;
+	pcpuUsage?: number;
+	ecpuUsage?: number;
+}
+
+interface RawNetworkInterfaceInfo {
+	name?: string;
+	ipAddress?: string;
+	addresses?: Array<{ address?: string } | string>;
+	ipv4?: string;
+	ipv6?: string;
+	ipAddresses?: string[];
+	ips?: string[];
+}
+
+interface RawNodeNetworkInfo {
+	interfaces?: RawNetworkInterfaceInfo[];
+}
+
+interface RawSocketConnection {
+	sinkMultiaddr?: {
+		address?: string;
+		ip_address?: string;
+		address_type?: string;
+		port?: number;
 	};
 }

-interface RawTopologyNode {
-	nodeId: string;
-	nodeProfile: RawNodeProfile;
+interface RawRDMAConnection {
+	sourceRdmaIface?: string;
+	sinkRdmaIface?: string;
 }

-interface RawTopologyConnection {
-	localNodeId: string;
-	sendBackNodeId: string;
-	sendBackMultiaddr?:
-		| { multiaddr?: string; address?: string; ip_address?: string }
-		| string;
-}
+type RawConnectionEdge = RawSocketConnection | RawRDMAConnection;
+
+// New nested mapping format: { source: { sink: [edge1, edge2, ...] } }
+type RawConnectionsMap = Record<string, Record<string, RawConnectionEdge[]>>;

 interface RawTopology {
-	nodes: RawTopologyNode[];
-	connections?: RawTopologyConnection[];
+	nodes: string[];
+	connections?: RawConnectionsMap;
 }

-type RawNodeProfiles = Record<string, RawNodeProfile>;
-
 export interface DownloadProgress {
 	totalBytes: number;
 	downloadedBytes: number;
@@ -171,7 +185,11 @@ interface RawStateResponse {
 	>;
 	runners?: Record<string, unknown>;
 	downloads?: Record<string, unknown[]>;
-	nodeProfiles?: RawNodeProfiles;
+	// New granular node state fields
+	nodeIdentities?: Record<string, RawNodeIdentity>;
+	nodeMemory?: Record<string, RawMemoryUsage>;
+	nodeSystem?: Record<string, RawSystemPerformanceProfile>;
+	nodeNetwork?: Record<string, RawNodeNetworkInfo>;
 }

 export interface MessageAttachment {
@@ -206,56 +224,69 @@ export interface Conversation {

 const STORAGE_KEY = "exo-conversations";

+interface GranularNodeState {
+	nodeIdentities?: Record<string, RawNodeIdentity>;
+	nodeMemory?: Record<string, RawMemoryUsage>;
+	nodeSystem?: Record<string, RawSystemPerformanceProfile>;
+	nodeNetwork?: Record<string, RawNodeNetworkInfo>;
+}
+
+function transformNetworkInterface(iface: RawNetworkInterfaceInfo): {
+	name?: string;
+	addresses: string[];
+} {
+	const addresses: string[] = [];
+	if (iface.ipAddress && typeof iface.ipAddress === "string") {
+		addresses.push(iface.ipAddress);
+	}
+	if (Array.isArray(iface.addresses)) {
+		for (const addr of iface.addresses) {
+			if (typeof addr === "string") addresses.push(addr);
+			else if (addr && typeof addr === "object" && addr.address)
+				addresses.push(addr.address);
+		}
+	}
+	if (Array.isArray(iface.ipAddresses)) {
+		addresses.push(
+			...iface.ipAddresses.filter((a): a is string => typeof a === "string"),
+		);
+	}
+	if (Array.isArray(iface.ips)) {
+		addresses.push(
+			...iface.ips.filter((a): a is string => typeof a === "string"),
+		);
+	}
+	if (iface.ipv4 && typeof iface.ipv4 === "string") addresses.push(iface.ipv4);
+	if (iface.ipv6 && typeof iface.ipv6 === "string") addresses.push(iface.ipv6);
+
+	return {
+		name: iface.name,
+		addresses: Array.from(new Set(addresses)),
+	};
+}
+
 function transformTopology(
 	raw: RawTopology,
-	profiles?: RawNodeProfiles,
+	granularState: GranularNodeState,
 ): TopologyData {
 	const nodes: Record<string, NodeInfo> = {};
 	const edges: TopologyEdge[] = [];

-	for (const node of raw.nodes || []) {
-		const mergedProfile = profiles?.[node.nodeId];
-		const profile = { ...(node.nodeProfile ?? {}), ...(mergedProfile ?? {}) };
-		const ramTotal = profile?.memory?.ramTotal?.inBytes ?? 0;
-		const ramAvailable = profile?.memory?.ramAvailable?.inBytes ?? 0;
+	for (const nodeId of raw.nodes || []) {
+		if (!nodeId) continue;
+
+		// Get data from granular state mappings
+		const identity = granularState.nodeIdentities?.[nodeId];
+		const memory = granularState.nodeMemory?.[nodeId];
+		const system = granularState.nodeSystem?.[nodeId];
+		const network = granularState.nodeNetwork?.[nodeId];
+
+		const ramTotal = memory?.ramTotal?.inBytes ?? 0;
+		const ramAvailable = memory?.ramAvailable?.inBytes ?? 0;
 		const ramUsage = Math.max(ramTotal - ramAvailable, 0);

-		const networkInterfaces = (profile?.networkInterfaces || []).map(
-			(iface) => {
-				const addresses: string[] = [];
-				if (iface.ipAddress && typeof iface.ipAddress === "string") {
-					addresses.push(iface.ipAddress);
-				}
-				if (Array.isArray(iface.addresses)) {
-					for (const addr of iface.addresses) {
-						if (typeof addr === "string") addresses.push(addr);
-						else if (addr && typeof addr === "object" && addr.address)
-							addresses.push(addr.address);
-					}
-				}
-				if (Array.isArray(iface.ipAddresses)) {
-					addresses.push(
-						...iface.ipAddresses.filter(
-							(a): a is string => typeof a === "string",
-						),
-					);
-				}
-				if (Array.isArray(iface.ips)) {
-					addresses.push(
-						...iface.ips.filter((a): a is string => typeof a === "string"),
-					);
-				}
-				if (iface.ipv4 && typeof iface.ipv4 === "string")
-					addresses.push(iface.ipv4);
-				if (iface.ipv6 && typeof iface.ipv6 === "string")
-					addresses.push(iface.ipv6);
-
-				return {
-					name: iface.name,
-					addresses: Array.from(new Set(addresses)),
-				};
-			},
-		);
+		const rawInterfaces = network?.interfaces || [];
+		const networkInterfaces = rawInterfaces.map(transformNetworkInterface);

 		const ipToInterface: Record<string, string> = {};
 		for (const iface of networkInterfaces) {
@@ -264,10 +295,10 @@ function transformTopology(
 			}
 		}

-		nodes[node.nodeId] = {
+		nodes[nodeId] = {
 			system_info: {
-				model_id: profile?.modelId ?? "Unknown",
-				chip: profile?.chipId,
+				model_id: identity?.modelId ?? "Unknown",
+				chip: identity?.chipId,
 				memory: ramTotal,
 			},
 			network_interfaces: networkInterfaces,
@@ -278,43 +309,42 @@ function transformTopology(
 					ram_total: ramTotal,
 				},
 				temp:
-					profile?.system?.temp !== undefined
-						? { gpu_temp_avg: profile.system.temp }
+					system?.temp !== undefined
+						? { gpu_temp_avg: system.temp }
 						: undefined,
 				gpu_usage:
-					profile?.system?.gpuUsage !== undefined
-						? [0, profile.system.gpuUsage]
-						: undefined,
-				sys_power: profile?.system?.sysPower,
+					system?.gpuUsage !== undefined ? [0, system.gpuUsage] : undefined,
+				sys_power: system?.sysPower,
 			},
 			last_macmon_update: Date.now() / 1000,
-			friendly_name: profile?.friendlyName,
+			friendly_name: identity?.friendlyName,
 		};
 	}

-	for (const conn of raw.connections || []) {
-		if (!conn.localNodeId || !conn.sendBackNodeId) continue;
-		if (conn.localNodeId === conn.sendBackNodeId) continue;
-		if (!nodes[conn.localNodeId] || !nodes[conn.sendBackNodeId]) continue;
+	// Handle connections - nested mapping format { source: { sink: [edges] } }
+	const connections = raw.connections;
+	if (connections && typeof connections === "object") {
+		for (const [source, sinks] of Object.entries(connections)) {
+			if (!sinks || typeof sinks !== "object") continue;
+			for (const [sink, edgeList] of Object.entries(sinks)) {
+				if (!Array.isArray(edgeList)) continue;
+				for (const edge of edgeList) {
+					let sendBackIp: string | undefined;
+					if (edge && typeof edge === "object" && "sinkMultiaddr" in edge) {
+						const multiaddr = edge.sinkMultiaddr;
+						if (multiaddr) {
+							sendBackIp =
+								multiaddr.ip_address ||
+								extractIpFromMultiaddr(multiaddr.address);
+						}
+					}

-		let sendBackIp: string | undefined;
-		if (conn.sendBackMultiaddr) {
-			const multi = conn.sendBackMultiaddr;
-			if (typeof multi === "string") {
-				sendBackIp = extractIpFromMultiaddr(multi);
-			} else {
-				sendBackIp =
-					multi.ip_address ||
-					extractIpFromMultiaddr(multi.multiaddr) ||
-					extractIpFromMultiaddr(multi.address);
+					if (nodes[source] && nodes[sink] && source !== sink) {
+						edges.push({ source, target: sink, sendBackIp });
+					}
+				}
 			}
 		}
-
-		edges.push({
-			source: conn.localNodeId,
-			target: conn.sendBackNodeId,
-			sendBackIp,
-		});
 	}

 	return { nodes, edges };
@@ -868,7 +898,12 @@ class AppStore {
 			const data: RawStateResponse = await response.json();

 			if (data.topology) {
-				this.topologyData = transformTopology(data.topology, data.nodeProfiles);
+				this.topologyData = transformTopology(data.topology, {
+					nodeIdentities: data.nodeIdentities,
+					nodeMemory: data.nodeMemory,
+					nodeSystem: data.nodeSystem,
+					nodeNetwork: data.nodeNetwork,
+				});
 			}
 			if (data.instances) {
 				this.instances = data.instances;
--- a/dashboard/src/routes/+page.svelte
+++ b/dashboard/src/routes/+page.svelte
@@ -400,10 +400,8 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 				const errorText = await response.text();
 				console.error('Failed to launch instance:', errorText);
 			} else {
-				// Auto-select the launched model only if no model is currently selected
-				if (!selectedChatModel()) {
-					setSelectedChatModel(modelId);
-				}
+				// Always auto-select the newly launched model so the user chats to what they just launched
+				setSelectedChatModel(modelId);
 				
 				// Scroll to the bottom of instances container to show the new instance
 				// Use multiple attempts to ensure DOM has updated with the new instance
@@ -436,8 +434,8 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 		const shardData = shardObj[shardKeys[0]] as Record<string, unknown>;
 		if (!shardData) return null;
 		
-		// Model meta is nested: shard.model_meta.model_id
-		const modelMeta = shardData.model_meta ?? shardData.modelMeta;
+		// Model meta is nested: shard.model_card.model_id
+		const modelMeta = shardData.model_card ?? shardData.modelCard;
 		if (!modelMeta || typeof modelMeta !== 'object') return null;
 		
 		const meta = modelMeta as Record<string, unknown>;
@@ -763,6 +761,10 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 	async function deleteInstance(instanceId: string) {
 		if (!confirm(`Delete instance ${instanceId.slice(0, 8)}...?`)) return;
 		
+		// Get the model ID of the instance being deleted before we delete it
+		const deletedInstanceModelId = getInstanceModelId(instanceData[instanceId]);
+		const wasSelected = selectedChatModel() === deletedInstanceModelId;
+		
 		try {
 			const response = await fetch(`/instance/${instanceId}`, {
 				method: 'DELETE',
@@ -771,6 +773,24 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 			
 			if (!response.ok) {
 				console.error('Failed to delete instance:', response.status);
+			} else if (wasSelected) {
+				// If we deleted the currently selected model, switch to another available model
+				// Find another instance that isn't the one we just deleted
+				const remainingInstances = Object.entries(instanceData).filter(([id]) => id !== instanceId);
+				if (remainingInstances.length > 0) {
+					// Select the last instance (most recently added, since objects preserve insertion order)
+					const [, lastInstance] = remainingInstances[remainingInstances.length - 1];
+					const newModelId = getInstanceModelId(lastInstance);
+					if (newModelId && newModelId !== 'Unknown' && newModelId !== 'Unknown Model') {
+						setSelectedChatModel(newModelId);
+					} else {
+						// Clear selection if no valid model found
+						setSelectedChatModel('');
+					}
+				} else {
+					// No more instances, clear the selection
+					setSelectedChatModel('');
+				}
 			}
 		} catch (error) {
 			console.error('Error deleting instance:', error);
@@ -895,7 +915,7 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 		const runnerEntries = Object.entries(runnerToShard).map(([runnerId, shardWrapped]) => {
 			const [tag, shard] = getTagged(shardWrapped);
 			const meta = (shard as { modelMeta?: { worldSize?: number; nLayers?: number; deviceRank?: number } } | undefined);
-			const deviceRank = (meta?.deviceRank as number | undefined) ?? 0;
+			const deviceRank = meta?.modelMeta?.deviceRank ?? 0;
 			return { runnerId, tag, deviceRank };
 		});

--- a/dashboard/src/routes/downloads/+page.svelte
+++ b/dashboard/src/routes/downloads/+page.svelte
@@ -98,7 +98,7 @@
 		const shardData = shardObj[shardKeys[0]] as Record<string, unknown>;
 		if (!shardData) return null;

-		const modelMeta = shardData.model_meta ?? shardData.modelMeta;
+		const modelMeta = shardData.model_card ?? shardData.modelCard;
 		if (!modelMeta || typeof modelMeta !== 'object') return null;

 		const meta = modelMeta as Record<string, unknown>;
@@ -190,7 +190,7 @@
 						const shardKeys = Object.keys(shardObj);
 						if (shardKeys.length !== 1) return null;
 						const shardData = shardObj[shardKeys[0]] as Record<string, unknown>;
-						const modelMeta = shardData?.model_meta ?? shardData?.modelMeta;
+						const modelMeta = shardData?.model_card ?? shardData?.modelCard;
 						if (!modelMeta || typeof modelMeta !== 'object') return null;
 						const meta = modelMeta as Record<string, unknown>;
 						return (meta.prettyName as string) ?? null;
--- a/docs/imgs/dashboard-cluster-view.png
+++ b/docs/imgs/dashboard-cluster-view.png
--- a/flake.lock
+++ b/flake.lock
@@ -1,5 +1,42 @@
 {
  "nodes": {
+    "crane": {
+      "locked": {
+        "lastModified": 1767744144,
+        "narHash": "sha256-9/9ntI0D+HbN4G0TrK3KmHbTvwgswz7p8IEJsWyef8Q=",
+        "owner": "ipetkov",
+        "repo": "crane",
+        "rev": "2fb033290bf6b23f226d4c8b32f7f7a16b043d7e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "ipetkov",
+        "repo": "crane",
+        "type": "github"
+      }
+    },
+    "dream2nix": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ],
+        "purescript-overlay": "purescript-overlay",
+        "pyproject-nix": "pyproject-nix"
+      },
+      "locked": {
+        "lastModified": 1765953015,
+        "narHash": "sha256-5FBZbbWR1Csp3Y2icfRkxMJw/a/5FGg8hCXej2//bbI=",
+        "owner": "nix-community",
+        "repo": "dream2nix",
+        "rev": "69eb01fa0995e1e90add49d8ca5bcba213b0416f",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-community",
+        "repo": "dream2nix",
+        "type": "github"
+      }
+    },
    "fenix": {
      "inputs": {
        "nixpkgs": [
@@ -8,11 +45,11 @@
        "rust-analyzer-src": "rust-analyzer-src"
      },
      "locked": {
-        "lastModified": 1761893049,
-        "narHash": "sha256-1TtFDPhC+ZsrOOtBnry1EZC+WipTTvsOVjIEVugqji8=",
+        "lastModified": 1768287139,
+        "narHash": "sha256-nsXFt0OzUi6K7dUzzJD5/v9e0Ic+fvclfIW936/43ZM=",
        "owner": "nix-community",
        "repo": "fenix",
-        "rev": "c2ac9a5c0d6d16630c3b225b874bd14528d1abe6",
+        "rev": "a4a3aa956931f90f35453cb519e4545e9ad7f773",
        "type": "github"
      },
      "original": {
@@ -21,25 +58,59 @@
        "type": "github"
      }
    },
-    "flake-utils": {
-      "inputs": {
-        "systems": "systems"
-      },
+    "flake-compat": {
+      "flake": false,
      "locked": {
-        "lastModified": 1731533236,
-        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+        "lastModified": 1696426674,
+        "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
        "type": "github"
      },
      "original": {
-        "owner": "numtide",
-        "repo": "flake-utils",
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "type": "github"
+      }
+    },
+    "flake-parts": {
+      "inputs": {
+        "nixpkgs-lib": [
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1768135262,
+        "narHash": "sha256-PVvu7OqHBGWN16zSi6tEmPwwHQ4rLPU9Plvs8/1TUBY=",
+        "owner": "hercules-ci",
+        "repo": "flake-parts",
+        "rev": "80daad04eddbbf5a4d883996a73f3f542fa437ac",
+        "type": "github"
+      },
+      "original": {
+        "owner": "hercules-ci",
+        "repo": "flake-parts",
        "type": "github"
      }
    },
    "nixpkgs": {
+      "locked": {
+        "lastModified": 1768127708,
+        "narHash": "sha256-1Sm77VfZh3mU0F5OqKABNLWxOuDeHIlcFjsXeeiPazs=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "ffbc9f8cbaacfb331b6017d5a5abb21a492c9a38",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "nixpkgs-swift": {
      "locked": {
        "lastModified": 1761672384,
        "narHash": "sha256-o9KF3DJL7g7iYMZq9SWgfS1BFlNbsm6xplRjVlOCkXI=",
@@ -50,27 +121,74 @@
      },
      "original": {
        "owner": "NixOS",
-        "ref": "nixos-unstable",
        "repo": "nixpkgs",
+        "rev": "08dacfca559e1d7da38f3cf05f1f45ee9bfd213c",
+        "type": "github"
+      }
+    },
+    "purescript-overlay": {
+      "inputs": {
+        "flake-compat": "flake-compat",
+        "nixpkgs": [
+          "dream2nix",
+          "nixpkgs"
+        ],
+        "slimlock": "slimlock"
+      },
+      "locked": {
+        "lastModified": 1728546539,
+        "narHash": "sha256-Sws7w0tlnjD+Bjck1nv29NjC5DbL6nH5auL9Ex9Iz2A=",
+        "owner": "thomashoneyman",
+        "repo": "purescript-overlay",
+        "rev": "4ad4c15d07bd899d7346b331f377606631eb0ee4",
+        "type": "github"
+      },
+      "original": {
+        "owner": "thomashoneyman",
+        "repo": "purescript-overlay",
+        "type": "github"
+      }
+    },
+    "pyproject-nix": {
+      "inputs": {
+        "nixpkgs": [
+          "dream2nix",
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1763017646,
+        "narHash": "sha256-Z+R2lveIp6Skn1VPH3taQIuMhABg1IizJd8oVdmdHsQ=",
+        "owner": "pyproject-nix",
+        "repo": "pyproject.nix",
+        "rev": "47bd6f296502842643078d66128f7b5e5370790c",
+        "type": "github"
+      },
+      "original": {
+        "owner": "pyproject-nix",
+        "repo": "pyproject.nix",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
+        "crane": "crane",
+        "dream2nix": "dream2nix",
        "fenix": "fenix",
-        "flake-utils": "flake-utils",
+        "flake-parts": "flake-parts",
        "nixpkgs": "nixpkgs",
+        "nixpkgs-swift": "nixpkgs-swift",
        "treefmt-nix": "treefmt-nix"
      }
    },
    "rust-analyzer-src": {
      "flake": false,
      "locked": {
-        "lastModified": 1761849405,
-        "narHash": "sha256-igXdvC+WCUN+3gnfk+ptT7rMmxQuY6WbIg1rXMUN1DM=",
+        "lastModified": 1768224240,
+        "narHash": "sha256-Pp1dDrXKPBUJReZnnDElFyHYn67XTd48zRhToheLjtk=",
        "owner": "rust-lang",
        "repo": "rust-analyzer",
-        "rev": "f7de8ae045a5fe80f1203c5a1c3015b05f7c3550",
+        "rev": "725349602e525df37f377701e001fe8aab807878",
        "type": "github"
      },
      "original": {
@@ -80,18 +198,25 @@
        "type": "github"
      }
    },
-    "systems": {
+    "slimlock": {
+      "inputs": {
+        "nixpkgs": [
+          "dream2nix",
+          "purescript-overlay",
+          "nixpkgs"
+        ]
+      },
      "locked": {
-        "lastModified": 1681028828,
-        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
-        "owner": "nix-systems",
-        "repo": "default",
-        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "lastModified": 1688756706,
+        "narHash": "sha256-xzkkMv3neJJJ89zo3o2ojp7nFeaZc2G0fYwNXNJRFlo=",
+        "owner": "thomashoneyman",
+        "repo": "slimlock",
+        "rev": "cf72723f59e2340d24881fd7bf61cb113b4c407c",
        "type": "github"
      },
      "original": {
-        "owner": "nix-systems",
-        "repo": "default",
+        "owner": "thomashoneyman",
+        "repo": "slimlock",
        "type": "github"
      }
    },
@@ -102,11 +227,11 @@
        ]
      },
      "locked": {
-        "lastModified": 1762938485,
-        "narHash": "sha256-AlEObg0syDl+Spi4LsZIBrjw+snSVU4T8MOeuZJUJjM=",
+        "lastModified": 1768158989,
+        "narHash": "sha256-67vyT1+xClLldnumAzCTBvU0jLZ1YBcf4vANRWP3+Ak=",
        "owner": "numtide",
        "repo": "treefmt-nix",
-        "rev": "5b4ee75aeefd1e2d5a1cc43cf6ba65eba75e83e4",
+        "rev": "e96d59dff5c0d7fddb9d113ba108f03c3ef99eca",
        "type": "github"
      },
      "original": {
--- a/flake.nix
+++ b/flake.nix
@@ -3,132 +3,134 @@

  inputs = {
    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
-    flake-utils.url = "github:numtide/flake-utils";
-    # Provides Rust dev-env integration:
+
+    flake-parts = {
+      url = "github:hercules-ci/flake-parts";
+      inputs.nixpkgs-lib.follows = "nixpkgs";
+    };
+
+    crane.url = "github:ipetkov/crane";
+
    fenix = {
      url = "github:nix-community/fenix";
      inputs.nixpkgs.follows = "nixpkgs";
    };
-    # Provides formatting infrastructure:
+
    treefmt-nix = {
      url = "github:numtide/treefmt-nix";
      inputs.nixpkgs.follows = "nixpkgs";
    };
+
+    dream2nix = {
+      url = "github:nix-community/dream2nix";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
+
+    # Pinned nixpkgs for swift-format (swift is broken on x86_64-linux in newer nixpkgs)
+    nixpkgs-swift.url = "github:NixOS/nixpkgs/08dacfca559e1d7da38f3cf05f1f45ee9bfd213c";
  };

-  # TODO: figure out caching story
-  # nixConfig = {
-  #   # nix community cachix
-  #   extra-trusted-public-keys = "nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs=";
-  #   extra-substituters = "https://nix-community.cachix.org";
-  # };
+  nixConfig = {
+    extra-trusted-public-keys = "exo.cachix.org-1:okq7hl624TBeAR3kV+g39dUFSiaZgLRkLsFBCuJ2NZI=";
+    extra-substituters = "https://exo.cachix.org";
+  };

  outputs =
    inputs:
-    let
+    inputs.flake-parts.lib.mkFlake { inherit inputs; } {
      systems = [
        "x86_64-linux"
        "aarch64-darwin"
        "aarch64-linux"
      ];
-      fenixToolchain = system: inputs.fenix.packages.${system}.complete;
-    in
-    inputs.flake-utils.lib.eachSystem systems (
-      system:
-      let
-        pkgs = import inputs.nixpkgs {
-          inherit system;
-          overlays = [ inputs.fenix.overlays.default ];
-        };
-        treefmtEval = inputs.treefmt-nix.lib.evalModule pkgs {
-          projectRootFile = "flake.nix";
-          programs = {
-            nixpkgs-fmt.enable = true;
-            ruff-format = {
-              enable = true;
-              excludes = [ "rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi" ];
+
+      imports = [
+        inputs.treefmt-nix.flakeModule
+        ./dashboard/parts.nix
+        ./rust/parts.nix
+      ];
+
+      perSystem =
+        { config, self', inputs', pkgs, lib, system, ... }:
+        let
+          fenixToolchain = inputs'.fenix.packages.complete;
+          # Use pinned nixpkgs for swift-format (swift is broken on x86_64-linux in newer nixpkgs)
+          pkgsSwift = import inputs.nixpkgs-swift { inherit system; };
+        in
+        {
+          treefmt = {
+            projectRootFile = "flake.nix";
+            programs = {
+              nixpkgs-fmt.enable = true;
+              ruff-format = {
+                enable = true;
+                excludes = [ "rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi" ];
+              };
+              rustfmt = {
+                enable = true;
+                package = config.rust.toolchain;
+              };
+              prettier = {
+                enable = true;
+                includes = [ "*.ts" ];
+              };
+              swift-format = {
+                enable = true;
+                package = pkgsSwift.swiftPackages.swift-format;
+              };
            };
-            rustfmt = {
-              enable = true;
-              package = (fenixToolchain system).rustfmt;
-            };
-            prettier = {
-              enable = true;
-              includes = [ "*.ts" ];
-            };
-            swift-format.enable = true;
          };
-        };
-      in
-      {
-        formatter = treefmtEval.config.build.wrapper;
-        checks.formatting = treefmtEval.config.build.check inputs.self;
-        checks.lint = pkgs.runCommand "lint-check" { } ''
-          export RUFF_CACHE_DIR="$TMPDIR/ruff-cache"
-          ${pkgs.ruff}/bin/ruff check ${inputs.self}/
-          touch $out
-        '';

-        devShells.default = pkgs.mkShell {
-          packages =
-            with pkgs;
-            [
-              # FORMATTING
-              treefmtEval.config.build.wrapper
-
-              # PYTHON
-              python313
-              uv
-              ruff
-              basedpyright
-
-              # RUST
-              ((fenixToolchain system).withComponents [
-                "cargo"
-                "rustc"
-                "clippy"
-                "rustfmt"
-                "rust-src"
-              ])
-              rustup # Just here to make RustRover happy
-
-              # NIX
-              nixpkgs-fmt
-
-              # SVELTE
-              nodejs
-
-              # MISC
-              just
-              jq
-            ]
-            ++ (pkgs.lib.optionals pkgs.stdenv.isLinux [
-              # IFCONFIG
-              unixtools.ifconfig
-
-              # Build dependencies for Linux
-              pkg-config
-              openssl
-            ])
-            ++ (pkgs.lib.optionals pkgs.stdenv.isDarwin [
-              # MACMON
-              macmon
-            ]);
-
-          shellHook = ''
-            # PYTHON
-            export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${pkgs.python313}/lib"
-            ${pkgs.lib.optionalString pkgs.stdenv.isLinux ''
-              # Build environment for Linux
-              export PKG_CONFIG_PATH="${pkgs.openssl.dev}/lib/pkgconfig:$PKG_CONFIG_PATH"
-              export LD_LIBRARY_PATH="${pkgs.openssl.out}/lib:$LD_LIBRARY_PATH"
-            ''}
-            echo
-            echo "🍎🍎 Run 'just <recipe>' to get started"
-            just --list
+          checks.lint = pkgs.runCommand "lint-check" { } ''
+            export RUFF_CACHE_DIR="$TMPDIR/ruff-cache"
+            ${pkgs.ruff}/bin/ruff check ${inputs.self}/
+            touch $out
          '';

+          devShells.default = with pkgs; pkgs.mkShell {
+            inputsFrom = [ self'.checks.cargo-build ];
+
+            packages =
+              [
+                # FORMATTING
+                config.treefmt.build.wrapper
+
+                # PYTHON
+                python313
+                uv
+                ruff
+                basedpyright
+
+                # RUST
+                config.rust.toolchain
+                maturin
+
+                # NIX
+                nixpkgs-fmt
+
+                # SVELTE
+                nodejs
+
+                # MISC
+                just
+                jq
+              ]
+              ++ lib.optionals stdenv.isLinux [
+                unixtools.ifconfig
+              ]
+              ++ lib.optionals stdenv.isDarwin [
+                macmon
+              ];
+
+            OPENSSL_NO_VENDOR = "1";
+
+            shellHook = ''
+              export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${python313}/lib"
+              ${lib.optionalString stdenv.isLinux ''
+                export LD_LIBRARY_PATH="${openssl.out}/lib:$LD_LIBRARY_PATH"
+              ''}
+            '';
+          };
        };
-      }
-    );
+    };
 }
--- a/2
+++ b/2
@@ -1,3 +1,5 @@
+export NIX_CONFIG := "extra-experimental-features = nix-command flakes"
+
 fmt:
    nix fmt

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,12 +17,14 @@ dependencies = [
    "loguru>=0.7.3",
    "exo_pyo3_bindings", # rust bindings
    "anyio==4.11.0",
-    "mlx>=0.30.1; sys_platform == 'darwin'",
-    "mlx[cpu]>=0.30.1; sys_platform == 'linux'",
-    "mlx-lm>=0.28.3",
+    "mlx==0.30.3; sys_platform == 'darwin'",
+    "mlx[cpu]==0.30.3; sys_platform == 'linux'",
+    "mlx-lm @ git+https://github.com/AlexCheema/mlx-lm.git@fix-transformers-5.0.0rc2",
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
    "hypercorn>=0.18.0",
    "openai-harmony>=0.0.8",
+    "httpx>=0.28.1",
+    "tomlkit>=0.14.0",
 ]

 [project.scripts]
@@ -33,6 +35,7 @@ exo = "exo.main:main"
 # dependencies only required for development
 [dependency-groups]
 dev = [
+    "basedpyright>=1.29.0",
    "pyinstaller>=6.17.0",
    "pytest>=8.4.0",
    "pytest-asyncio>=1.0.0",
@@ -98,6 +101,7 @@ root = "src"

 # supported platforms for this project
 [tool.uv]
+prerelease = "allow"
 environments = [
    "sys_platform == 'darwin'",
    "sys_platform == 'linux'",
@@ -123,3 +127,6 @@ env = [
  "EXO_TESTS=1"
 ]
 addopts = "-m 'not slow'"
+filterwarnings = [
+    "ignore:builtin type Swig:DeprecationWarning",
+]
--- a/rust/downloads/Cargo.toml
+++ b/rust/downloads/Cargo.toml
@@ -1,40 +0,0 @@
-[package]
-name = "downloads"
-version = { workspace = true }
-edition = { workspace = true }
-publish = false
-
-[lib]
-doctest = false
-name = "downloads"
-path = "src/lib.rs"
-
-[lints]
-workspace = true
-
-[dependencies]
-# macro dependencies
-derive_more = { workspace = true }
-
-# async
-tokio = { workspace = true, features = ["full"] }
-futures = { workspace = true }
-futures-util = { workspace = true }
-
-# utility dependencies
-util = { workspace = true }
-thiserror = { workspace = true }
-anyhow = { workspace = true }
-itertools = { workspace = true }
-
-# tracing/logging
-log = { workspace = true }
-
-# BitTorrent library
-librqbit = { git = "https://github.com/JakeHillion/rqbit", rev = "c4e2ecf81d03bd8acd96a0803d06a70b34d5da19" }
-
-# Embed torrent files
-include_dir = "0.7"
-
-# Serialization
-serde = { version = "1.0", features = ["derive"] }
--- a/rust/downloads/src/bencode.rs
+++ b/rust/downloads/src/bencode.rs
@@ -1,162 +0,0 @@
-//! Bencode encoding for BitTorrent tracker responses
-//!
-//! Implements the subset of bencoding needed for tracker announce responses.
-
-use std::collections::BTreeMap;
-
-/// Parameters from a tracker announce request
-#[derive(Debug, Clone)]
-pub struct AnnounceParams {
-    /// 20-byte info hash of the torrent
-    pub info_hash: [u8; 20],
-    /// 20-byte peer ID of the client
-    pub peer_id: [u8; 20],
-    /// Port the client is listening on
-    pub port: u16,
-    /// Total bytes uploaded
-    pub uploaded: u64,
-    /// Total bytes downloaded
-    pub downloaded: u64,
-    /// Bytes remaining to download
-    pub left: u64,
-    /// Whether to return compact peer list (6 bytes per peer)
-    pub compact: bool,
-    /// Optional event (started, stopped, completed)
-    pub event: Option<AnnounceEvent>,
-}
-
-/// Announce event types
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum AnnounceEvent {
-    Started,
-    Stopped,
-    Completed,
-}
-
-/// A bencoded value
-#[derive(Debug, Clone)]
-pub enum BencodeValue {
-    Integer(i64),
-    Bytes(Vec<u8>),
-    List(Vec<BencodeValue>),
-    Dict(BTreeMap<Vec<u8>, BencodeValue>),
-}
-
-impl BencodeValue {
-    /// Create a string value from a &str
-    #[inline]
-    pub fn string(s: &str) -> Self {
-        Self::Bytes(s.as_bytes().to_vec())
-    }
-
-    /// Create an integer value
-    #[inline]
-    pub fn integer(i: i64) -> Self {
-        Self::Integer(i)
-    }
-
-    /// Create an empty list
-    #[inline]
-    pub fn list() -> Self {
-        Self::List(Vec::new())
-    }
-
-    /// Create an empty dict
-    #[inline]
-    pub fn dict() -> Self {
-        Self::Dict(BTreeMap::new())
-    }
-
-    /// Add an item to a list (builder pattern)
-    #[inline]
-    pub fn push(mut self, value: BencodeValue) -> Self {
-        if let Self::List(ref mut list) = self {
-            list.push(value);
-        }
-        self
-    }
-
-    /// Insert a key-value pair into a dict (builder pattern)
-    #[inline]
-    pub fn insert(mut self, key: &str, value: BencodeValue) -> Self {
-        if let Self::Dict(ref mut dict) = self {
-            dict.insert(key.as_bytes().to_vec(), value);
-        }
-        self
-    }
-
-    /// Encode to bencoded bytes
-    pub fn encode(&self) -> Vec<u8> {
-        let mut buf = Vec::new();
-        self.encode_into(&mut buf);
-        buf
-    }
-
-    /// Encode into an existing buffer
-    pub fn encode_into(&self, buf: &mut Vec<u8>) {
-        match self {
-            Self::Integer(i) => {
-                buf.push(b'i');
-                buf.extend_from_slice(i.to_string().as_bytes());
-                buf.push(b'e');
-            }
-            Self::Bytes(bytes) => {
-                buf.extend_from_slice(bytes.len().to_string().as_bytes());
-                buf.push(b':');
-                buf.extend_from_slice(bytes);
-            }
-            Self::List(list) => {
-                buf.push(b'l');
-                for item in list {
-                    item.encode_into(buf);
-                }
-                buf.push(b'e');
-            }
-            Self::Dict(dict) => {
-                buf.push(b'd');
-                // BTreeMap keeps keys sorted
-                for (key, value) in dict {
-                    buf.extend_from_slice(key.len().to_string().as_bytes());
-                    buf.push(b':');
-                    buf.extend_from_slice(key);
-                    value.encode_into(buf);
-                }
-                buf.push(b'e');
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_encode_integer() {
-        assert_eq!(BencodeValue::integer(42).encode(), b"i42e");
-        assert_eq!(BencodeValue::integer(-1).encode(), b"i-1e");
-        assert_eq!(BencodeValue::integer(0).encode(), b"i0e");
-    }
-
-    #[test]
-    fn test_encode_string() {
-        assert_eq!(BencodeValue::string("spam").encode(), b"4:spam");
-        assert_eq!(BencodeValue::string("").encode(), b"0:");
-    }
-
-    #[test]
-    fn test_encode_list() {
-        let list = BencodeValue::list()
-            .push(BencodeValue::string("spam"))
-            .push(BencodeValue::integer(42));
-        assert_eq!(list.encode(), b"l4:spami42ee");
-    }
-
-    #[test]
-    fn test_encode_dict() {
-        let dict = BencodeValue::dict()
-            .insert("bar", BencodeValue::string("spam"))
-            .insert("foo", BencodeValue::integer(42));
-        assert_eq!(dict.encode(), b"d3:bar4:spam3:fooi42ee");
-    }
-}
--- a/rust/downloads/src/embedded.rs
+++ b/rust/downloads/src/embedded.rs
@@ -1,108 +0,0 @@
-//! Embedded torrent file access
-//!
-//! Provides access to .torrent files embedded in the binary at compile time.
-//! Each model/revision can have multiple torrent variants (e.g., "small", "large").
-
-use include_dir::{Dir, include_dir};
-
-/// Embedded torrent files directory
-static TORRENTS: Dir<'_> = include_dir!("$CARGO_MANIFEST_DIR/torrents");
-
-/// Get all embedded torrent variants for a model_id and revision
-///
-/// # Arguments
-/// * `model_id` - Model identifier (e.g., "mlx-community/Qwen3-30B-A3B-4bit")
-/// * `revision` - Git commit hash
-///
-/// # Returns
-/// Vec of (variant_name, torrent_data) tuples, e.g., [("small", data), ("large", data)]
-/// Returns empty Vec if no torrents found for this model/revision.
-#[inline]
-pub fn get_embedded_torrents(model_id: &str, revision: &str) -> Vec<(String, Vec<u8>)> {
-    let dir_path = format!("{model_id}");
-
-    let Some(model_dir) = TORRENTS.get_dir(&dir_path) else {
-        return Vec::new();
-    };
-
-    let mut results = Vec::new();
-    let prefix = format!("{revision}.");
-    let suffix = ".torrent";
-
-    for file in model_dir.files() {
-        let Some(name) = file.path().file_name().and_then(|n| n.to_str()) else {
-            continue;
-        };
-
-        // Match files like "{revision}.small.torrent" or "{revision}.large.torrent"
-        if name.starts_with(&prefix) && name.ends_with(suffix) {
-            // Extract variant: "{revision}.{variant}.torrent" -> "{variant}"
-            let middle = &name[prefix.len()..name.len() - suffix.len()];
-
-            // Skip plain "{revision}.torrent" files (wrong format)
-            if middle.is_empty() {
-                continue;
-            }
-
-            results.push((middle.to_string(), file.contents().to_vec()));
-        }
-    }
-
-    // Sort by variant name for consistent ordering
-    results.sort_by(|a, b| a.0.cmp(&b.0));
-    results
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_get_embedded_torrents() {
-        // Test with the Qwen3 torrent we have
-        let result = get_embedded_torrents(
-            "mlx-community/Qwen3-30B-A3B-4bit",
-            "d388dead1515f5e085ef7a0431dd8fadf0886c57",
-        );
-
-        assert!(!result.is_empty(), "Expected to find embedded torrents");
-
-        // Should have both small and large variants
-        let variants: Vec<&str> = result.iter().map(|(v, _)| v.as_str()).collect();
-        assert!(
-            variants.contains(&"small"),
-            "Expected 'small' variant, got: {variants:?}"
-        );
-        assert!(
-            variants.contains(&"large"),
-            "Expected 'large' variant, got: {variants:?}"
-        );
-
-        // Verify data is not empty
-        for (variant, data) in &result {
-            assert!(!data.is_empty(), "Torrent data for '{variant}' should not be empty");
-        }
-    }
-
-    #[test]
-    fn test_missing_torrent() {
-        let result = get_embedded_torrents("nonexistent/model", "abc123");
-        assert!(result.is_empty(), "Expected empty Vec for missing torrent");
-    }
-
-    #[test]
-    fn test_variant_ordering() {
-        let result = get_embedded_torrents(
-            "mlx-community/Qwen3-30B-A3B-4bit",
-            "d388dead1515f5e085ef7a0431dd8fadf0886c57",
-        );
-
-        if result.len() >= 2 {
-            // Verify alphabetical ordering
-            let variants: Vec<&str> = result.iter().map(|(v, _)| v.as_str()).collect();
-            let mut sorted = variants.clone();
-            sorted.sort();
-            assert_eq!(variants, sorted, "Variants should be sorted alphabetically");
-        }
-    }
-}
--- a/rust/downloads/src/lib.rs
+++ b/rust/downloads/src/lib.rs
@@ -1,22 +0,0 @@
-//! BitTorrent-based download system for model shards using rqbit
-//!
-//! This crate provides:
-//! - Torrent session management via rqbit
-//! - Embedded torrent file access
-//! - Private tracker announce handling
-//! - Selective file download based on shard layer ranges
-
-#![allow(clippy::missing_inline_in_public_items)]
-
-pub mod bencode;
-pub mod embedded;
-pub mod progress;
-pub mod session;
-pub mod torrent_files;
-pub mod tracker;
-
-pub use bencode::AnnounceParams;
-pub use embedded::get_embedded_torrents;
-pub use session::{DownloadProgress, TorrentSession};
-pub use torrent_files::{get_torrent_file_list, TorrentFileInfo};
-pub use tracker::{handle_announce, PeerInfo, TopologyData};
--- a/rust/downloads/src/progress.rs
+++ b/rust/downloads/src/progress.rs
@@ -1,77 +0,0 @@
-//! Download progress tracking
-//!
-//! Types for tracking and reporting download progress to Python
-
-use std::collections::HashMap;
-
-/// Progress update for a torrent download
-#[derive(Debug, Clone)]
-pub struct DownloadProgress {
-    /// Total bytes to download
-    pub total_bytes: u64,
-
-    /// Bytes downloaded so far
-    pub downloaded_bytes: u64,
-
-    /// Number of pieces completed
-    pub pieces_completed: usize,
-
-    /// Total number of pieces
-    pub total_pieces: usize,
-
-    /// Number of peers connected
-    pub peers_connected: usize,
-
-    /// Download speed in bytes/second
-    pub speed_bytes_per_sec: f64,
-
-    /// Estimated time remaining in seconds
-    pub eta_seconds: Option<f64>,
-
-    /// Per-file progress
-    pub files: HashMap<String, FileProgress>,
-}
-
-#[derive(Debug, Clone)]
-pub struct FileProgress {
-    /// Total file size
-    pub total_bytes: u64,
-
-    /// Bytes downloaded for this file
-    pub downloaded_bytes: u64,
-
-    /// Whether the file is complete
-    pub complete: bool,
-}
-
-impl DownloadProgress {
-    #[inline]
-    pub fn new(total_bytes: u64, total_pieces: usize) -> Self {
-        Self {
-            total_bytes,
-            downloaded_bytes: 0,
-            pieces_completed: 0,
-            total_pieces,
-            peers_connected: 0,
-            speed_bytes_per_sec: 0.0,
-            eta_seconds: None,
-            files: HashMap::new(),
-        }
-    }
-
-    #[inline]
-    pub fn progress_fraction(&self) -> f64 {
-        if self.total_bytes == 0 {
-            0.0
-        } else {
-            #[allow(clippy::cast_precision_loss)]
-            let fraction = self.downloaded_bytes as f64 / self.total_bytes as f64;
-            fraction
-        }
-    }
-
-    #[inline]
-    pub fn is_complete(&self) -> bool {
-        self.pieces_completed >= self.total_pieces
-    }
-}
--- a/rust/downloads/src/session.rs
+++ b/rust/downloads/src/session.rs
@@ -1,166 +0,0 @@
-//! Torrent session management using rqbit
-//!
-//! Provides a wrapper around rqbit's Session for managing torrent downloads
-//! with persistent seeding and selective file downloads.
-
-use anyhow::{Context, Result};
-use librqbit::{AddTorrent, AddTorrentOptions, AddTorrentResponse, Api, ManagedTorrent, Session, SessionOptions, SessionPersistenceConfig};
-use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
-use std::path::PathBuf;
-use std::sync::Arc;
-use tokio::sync::RwLock;
-
-/// Download progress information
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct DownloadProgress {
-    pub downloaded_bytes: u64,
-    pub total_bytes: u64,
-    pub download_speed: f64,
-    pub upload_speed: f64,
-    pub peers_connected: usize,
-    pub is_finished: bool,
-}
-
-/// Torrent session handle for managing multiple torrents
-pub struct TorrentSession {
-    session: Arc<Session>,
-    api: Arc<Api>,
-    session_dir: PathBuf,
-    torrents: Arc<RwLock<HashMap<String, Arc<ManagedTorrent>>>>,
-}
-
-impl TorrentSession {
-    /// Create a new torrent session
-    ///
-    /// # Arguments
-    /// * `session_dir` - Directory to store session state and downloaded files
-    pub async fn new(session_dir: PathBuf) -> Result<Self> {
-        std::fs::create_dir_all(&session_dir).context("Failed to create session directory")?;
-
-        let opts = SessionOptions {
-            disable_dht: false,
-            disable_dht_persistence: false,
-            dht_config: None,
-            persistence: Some(SessionPersistenceConfig::Json { folder: None }),
-            fastresume: true,
-            ..Default::default()
-        };
-
-        let session = Session::new_with_opts(session_dir.clone(), opts)
-            .await
-            .context("Failed to create rqbit session")?;
-
-        let api = Api::new(Arc::clone(&session), None);
-
-        Ok(Self {
-            session,
-            api: Arc::new(api),
-            session_dir,
-            torrents: Arc::new(RwLock::new(HashMap::new())),
-        })
-    }
-
-    /// Add a torrent from raw bytes
-    ///
-    /// # Arguments
-    /// * `torrent_data` - Raw .torrent file contents
-    /// * `save_path` - Where to save the downloaded files
-    /// * `file_indices` - Optional list of file indices to download (None = all files)
-    ///
-    /// # Returns
-    /// Info hash as hex string
-    pub async fn add_torrent(
-        &self,
-        torrent_data: Vec<u8>,
-        save_path: PathBuf,
-        file_indices: Option<Vec<usize>>,
-    ) -> Result<String> {
-        let opts = AddTorrentOptions {
-            overwrite: false,
-            only_files_regex: None,
-            only_files: file_indices,
-            output_folder: Some(save_path.to_string_lossy().to_string()),
-            ..Default::default()
-        };
-
-        let add_torrent = AddTorrent::from_bytes(torrent_data);
-
-        let response = self
-            .session
-            .add_torrent(add_torrent, Some(opts))
-            .await
-            .context("Failed to add torrent")?;
-
-        let handle = match response {
-            AddTorrentResponse::Added(_, handle) => handle,
-            AddTorrentResponse::AlreadyManaged(_, handle) => handle,
-            AddTorrentResponse::ListOnly(_) => anyhow::bail!("Torrent was list-only, not added"),
-        };
-
-        let info_hash = handle.info_hash().as_string();
-
-        self.torrents
-            .write()
-            .await
-            .insert(info_hash.clone(), handle);
-
-        Ok(info_hash)
-    }
-
-    /// Get download progress for a torrent
-    pub async fn get_progress(&self, info_hash: &str) -> Result<DownloadProgress> {
-        let torrents = self.torrents.read().await;
-        let handle = torrents.get(info_hash).context("Torrent not found")?;
-
-        let stats = handle.stats();
-
-        Ok(DownloadProgress {
-            downloaded_bytes: stats.progress_bytes,
-            total_bytes: stats.total_bytes,
-            download_speed: stats.live.as_ref().map_or(0.0, |l| l.download_speed.mbps * 1024.0 * 1024.0),
-            upload_speed: stats.live.as_ref().map_or(0.0, |l| l.upload_speed.mbps * 1024.0 * 1024.0),
-            peers_connected: stats.live.as_ref().map_or(0, |l| l.snapshot.peer_stats.live as usize),
-            is_finished: stats.finished,
-        })
-    }
-
-    /// Wait until torrent download is completed
-    pub async fn wait_until_completed(&self, info_hash: &str) -> Result<()> {
-        let torrents = self.torrents.read().await;
-        let handle = torrents.get(info_hash).context("Torrent not found")?;
-
-        handle
-            .wait_until_completed()
-            .await
-            .context("Failed to wait for completion")?;
-
-        Ok(())
-    }
-
-    /// Enable seeding for a completed torrent
-    ///
-    /// Note: rqbit seeds by default after completion, this is a no-op
-    /// but kept for API compatibility
-    pub async fn enable_seeding(&self, _info_hash: &str) -> Result<()> {
-        // rqbit automatically seeds after download completion
-        // This is kept for API compatibility
-        Ok(())
-    }
-
-    /// Remove a torrent from the session
-    pub async fn remove_torrent(&self, info_hash: &str) -> Result<()> {
-        let mut torrents = self.torrents.write().await;
-
-        if let Some(handle) = torrents.remove(info_hash) {
-            drop(handle);
-        }
-
-        Ok(())
-    }
-
-    /// Get list of all torrent info hashes in the session
-    pub async fn list_torrents(&self) -> Vec<String> {
-        self.torrents.read().await.keys().cloned().collect()
-    }
-}
--- a/rust/downloads/src/torrent_files.rs
+++ b/rust/downloads/src/torrent_files.rs
@@ -1,100 +0,0 @@
-//! Torrent file list parsing
-//!
-//! Provides functionality to extract file information from torrent metadata
-//! without adding the torrent to a session.
-
-use anyhow::{Context, Result};
-use librqbit::torrent_from_bytes;
-use serde::{Deserialize, Serialize};
-
-/// Information about a file in a torrent
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct TorrentFileInfo {
-    /// File index (0-based)
-    pub index: usize,
-    /// File path relative to torrent root
-    pub path: String,
-    /// File size in bytes
-    pub size: u64,
-}
-
-/// Get the list of files in a torrent from its raw bytes
-///
-/// # Arguments
-/// * `torrent_data` - Raw .torrent file contents
-///
-/// # Returns
-/// List of file information (index, path, size)
-pub fn get_torrent_file_list(torrent_data: &[u8]) -> Result<Vec<TorrentFileInfo>> {
-    let torrent_meta = torrent_from_bytes(torrent_data).context("Failed to parse torrent")?;
-
-    // Access the data inside WithRawBytes wrapper
-    let info = &torrent_meta.info.data;
-
-    let mut files = Vec::new();
-
-    // Handle both single-file and multi-file torrents
-    if let Some(ref file_list) = info.files {
-        // Multi-file torrent
-        for (index, file) in file_list.iter().enumerate() {
-            let path = file
-                .path
-                .iter()
-                .map(|buf| String::from_utf8_lossy(buf.0).to_string())
-                .collect::<Vec<_>>()
-                .join("/");
-
-            files.push(TorrentFileInfo {
-                index,
-                path,
-                size: file.length,
-            });
-        }
-    } else {
-        // Single-file torrent
-        let name = match &info.name {
-            Some(n) => String::from_utf8_lossy(n.0).to_string(),
-            None => String::new(),
-        };
-        files.push(TorrentFileInfo {
-            index: 0,
-            path: name,
-            size: info.length.unwrap_or(0),
-        });
-    }
-
-    Ok(files)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::get_embedded_torrents;
-
-    #[test]
-    fn test_get_torrent_file_list() {
-        // Use an embedded torrent for testing
-        let torrents = get_embedded_torrents(
-            "mlx-community/Qwen3-30B-A3B-4bit",
-            "d388dead1515f5e085ef7a0431dd8fadf0886c57",
-        );
-
-        assert!(!torrents.is_empty(), "Expected to find embedded torrents");
-
-        for (variant, data) in torrents {
-            let files = get_torrent_file_list(&data).expect("Failed to parse torrent");
-            assert!(!files.is_empty(), "Expected files in {variant} variant");
-
-            // Verify file info makes sense
-            for file in &files {
-                assert!(!file.path.is_empty(), "File path should not be empty");
-                assert!(file.size > 0, "File size should be positive");
-            }
-
-            println!("Variant '{variant}' has {} files", files.len());
-            for file in files.iter().take(5) {
-                println!("  [{}] {} ({} bytes)", file.index, file.path, file.size);
-            }
-        }
-    }
-}
--- a/rust/downloads/src/tracker.rs
+++ b/rust/downloads/src/tracker.rs
@@ -1,185 +0,0 @@
-//! Fake tracker implementation for Exo topology-based peer discovery
-//!
-//! Instead of contacting real BitTorrent trackers, this module generates
-//! tracker announce responses using Exo's cluster topology data.
-
-use std::net::Ipv4Addr;
-
-use anyhow::Result;
-
-use crate::bencode::{AnnounceParams, BencodeValue};
-
-/// Information about a peer in the Exo topology
-#[derive(Debug, Clone)]
-pub struct PeerInfo {
-    /// Unique node identifier in the Exo cluster
-    pub node_id: String,
-    /// IPv4 address of the peer
-    pub ip: Ipv4Addr,
-    /// BitTorrent listening port
-    pub port: u16,
-    /// Whether this peer has the complete torrent
-    pub has_complete: bool,
-    /// Priority for peer selection (higher = prefer)
-    pub priority: i32,
-}
-
-/// Topology data containing available peers
-#[derive(Debug, Clone)]
-pub struct TopologyData {
-    /// List of peers in the topology
-    pub peers: Vec<PeerInfo>,
-}
-
-/// Default announce interval in seconds
-const DEFAULT_INTERVAL: i64 = 1800;
-
-/// Handle a tracker announce request using Exo topology data
-///
-/// Returns a bencoded tracker response containing peers from the topology.
-///
-/// # Arguments
-/// * `params` - Announce request parameters
-/// * `topology` - Current Exo cluster topology
-///
-/// # Returns
-/// Bencoded announce response as bytes
-pub fn handle_announce(params: &AnnounceParams, topology: &TopologyData) -> Result<Vec<u8>> {
-    // Sort peers by priority (descending) for better peer selection
-    let mut peers: Vec<_> = topology.peers.iter().collect();
-    peers.sort_by(|a, b| b.priority.cmp(&a.priority));
-
-    let response = if params.compact {
-        // Compact format: 6 bytes per peer (4 IP + 2 port)
-        let mut peer_data = Vec::with_capacity(peers.len() * 6);
-        for peer in &peers {
-            peer_data.extend_from_slice(&peer.ip.octets());
-            peer_data.extend_from_slice(&peer.port.to_be_bytes());
-        }
-
-        BencodeValue::dict()
-            .insert("interval", BencodeValue::integer(DEFAULT_INTERVAL))
-            .insert("peers", BencodeValue::Bytes(peer_data))
-    } else {
-        // Non-compact format: list of dicts
-        let mut peer_list = BencodeValue::list();
-        for peer in &peers {
-            let peer_dict = BencodeValue::dict()
-                .insert("ip", BencodeValue::string(&peer.ip.to_string()))
-                .insert("port", BencodeValue::integer(i64::from(peer.port)))
-                .insert("peer id", BencodeValue::Bytes(vec![0u8; 20])); // Placeholder peer ID
-            peer_list = peer_list.push(peer_dict);
-        }
-
-        BencodeValue::dict()
-            .insert("interval", BencodeValue::integer(DEFAULT_INTERVAL))
-            .insert("peers", peer_list)
-    };
-
-    Ok(response.encode())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn make_test_params(compact: bool) -> AnnounceParams {
-        AnnounceParams {
-            info_hash: [0u8; 20],
-            peer_id: [0u8; 20],
-            port: 6881,
-            uploaded: 0,
-            downloaded: 0,
-            left: 1000,
-            compact,
-            event: None,
-        }
-    }
-
-    fn make_test_topology() -> TopologyData {
-        TopologyData {
-            peers: vec![
-                PeerInfo {
-                    node_id: "node1".to_string(),
-                    ip: Ipv4Addr::new(192, 168, 1, 1),
-                    port: 6881,
-                    has_complete: true,
-                    priority: 10,
-                },
-                PeerInfo {
-                    node_id: "node2".to_string(),
-                    ip: Ipv4Addr::new(192, 168, 1, 2),
-                    port: 6882,
-                    has_complete: false,
-                    priority: 5,
-                },
-            ],
-        }
-    }
-
-    #[test]
-    fn test_compact_response() {
-        let params = make_test_params(true);
-        let topology = make_test_topology();
-
-        let response = handle_announce(&params, &topology).unwrap();
-
-        // Should contain "interval" and "peers" keys
-        assert!(response.starts_with(b"d"));
-        assert!(response.ends_with(b"e"));
-
-        // Verify we have 12 bytes of peer data (2 peers * 6 bytes)
-        // The compact peers field should be "12:<12 bytes>"
-        let response_str = String::from_utf8_lossy(&response);
-        assert!(response_str.contains("8:interval"));
-        assert!(response_str.contains("5:peers"));
-    }
-
-    #[test]
-    fn test_non_compact_response() {
-        let params = make_test_params(false);
-        let topology = make_test_topology();
-
-        let response = handle_announce(&params, &topology).unwrap();
-
-        // Should contain peers as a list
-        let response_str = String::from_utf8_lossy(&response);
-        assert!(response_str.contains("8:interval"));
-        assert!(response_str.contains("5:peers"));
-        assert!(response_str.contains("2:ip"));
-        assert!(response_str.contains("4:port"));
-    }
-
-    #[test]
-    fn test_peer_priority_ordering() {
-        let params = make_test_params(true);
-        let topology = make_test_topology();
-
-        let response = handle_announce(&params, &topology).unwrap();
-
-        // In compact format, first peer should be node1 (priority 10)
-        // which is 192.168.1.1:6881
-        // Look for the peer data after "5:peers12:"
-        let peers_marker = b"5:peers12:";
-        let pos = response
-            .windows(peers_marker.len())
-            .position(|w| w == peers_marker)
-            .unwrap();
-        let peer_data = &response[pos + peers_marker.len()..pos + peers_marker.len() + 6];
-
-        // First peer should be 192.168.1.1 (node1 with higher priority)
-        assert_eq!(&peer_data[0..4], &[192, 168, 1, 1]);
-    }
-
-    #[test]
-    fn test_empty_topology() {
-        let params = make_test_params(true);
-        let topology = TopologyData { peers: vec![] };
-
-        let response = handle_announce(&params, &topology).unwrap();
-
-        // Should still be valid bencoded response with empty peers
-        assert!(response.starts_with(b"d"));
-        assert!(response.ends_with(b"e"));
-    }
-}
--- a/rust/downloads/torrents/mlx-community/DeepSeek-R1-0528-4bit/5b83525ead6d2f731d4149aa844aa541fa59b2f3.large.torrent
+++ b/rust/downloads/torrents/mlx-community/DeepSeek-R1-0528-4bit/5b83525ead6d2f731d4149aa844aa541fa59b2f3.large.torrent
--- a/rust/downloads/torrents/mlx-community/DeepSeek-R1-0528-4bit/5b83525ead6d2f731d4149aa844aa541fa59b2f3.small.torrent
+++ b/rust/downloads/torrents/mlx-community/DeepSeek-R1-0528-4bit/5b83525ead6d2f731d4149aa844aa541fa59b2f3.small.torrent
--- a/rust/downloads/torrents/mlx-community/DeepSeek-R1-0528-8bit/70db8e99b432bd39087558fa18e2c7acc3d3b9cb.large.torrent
+++ b/rust/downloads/torrents/mlx-community/DeepSeek-R1-0528-8bit/70db8e99b432bd39087558fa18e2c7acc3d3b9cb.large.torrent
--- a/rust/downloads/torrents/mlx-community/DeepSeek-R1-0528-8bit/70db8e99b432bd39087558fa18e2c7acc3d3b9cb.small.torrent
+++ b/rust/downloads/torrents/mlx-community/DeepSeek-R1-0528-8bit/70db8e99b432bd39087558fa18e2c7acc3d3b9cb.small.torrent
--- a/rust/downloads/torrents/mlx-community/DeepSeek-V3.1-4bit/1634da2179770a14024405afa6d1e0ce70a71ff4.large.torrent
+++ b/rust/downloads/torrents/mlx-community/DeepSeek-V3.1-4bit/1634da2179770a14024405afa6d1e0ce70a71ff4.large.torrent
--- a/rust/downloads/torrents/mlx-community/DeepSeek-V3.1-4bit/1634da2179770a14024405afa6d1e0ce70a71ff4.small.torrent
+++ b/rust/downloads/torrents/mlx-community/DeepSeek-V3.1-4bit/1634da2179770a14024405afa6d1e0ce70a71ff4.small.torrent
--- a/rust/downloads/torrents/mlx-community/DeepSeek-V3.2-4bit/98188a6058c077a48c553abd6ca2beb705af58df.large.torrent
+++ b/rust/downloads/torrents/mlx-community/DeepSeek-V3.2-4bit/98188a6058c077a48c553abd6ca2beb705af58df.large.torrent
--- a/rust/downloads/torrents/mlx-community/DeepSeek-V3.2-4bit/98188a6058c077a48c553abd6ca2beb705af58df.small.torrent
+++ b/rust/downloads/torrents/mlx-community/DeepSeek-V3.2-4bit/98188a6058c077a48c553abd6ca2beb705af58df.small.torrent
--- a/rust/downloads/torrents/mlx-community/DeepSeek-V3.2-8bit/999056d2845aeffee54f6df148f94d6fdf229f52.large.torrent
+++ b/rust/downloads/torrents/mlx-community/DeepSeek-V3.2-8bit/999056d2845aeffee54f6df148f94d6fdf229f52.large.torrent
--- a/rust/downloads/torrents/mlx-community/DeepSeek-V3.2-8bit/999056d2845aeffee54f6df148f94d6fdf229f52.small.torrent
+++ b/rust/downloads/torrents/mlx-community/DeepSeek-V3.2-8bit/999056d2845aeffee54f6df148f94d6fdf229f52.small.torrent
--- a/rust/downloads/torrents/mlx-community/DeepSeek-V3.2_bf16/038e65a642e1a258d68bda4dc845091f32f97273.large.torrent
+++ b/rust/downloads/torrents/mlx-community/DeepSeek-V3.2_bf16/038e65a642e1a258d68bda4dc845091f32f97273.large.torrent
--- a/rust/downloads/torrents/mlx-community/DeepSeek-V3.2_bf16/038e65a642e1a258d68bda4dc845091f32f97273.small.torrent
+++ b/rust/downloads/torrents/mlx-community/DeepSeek-V3.2_bf16/038e65a642e1a258d68bda4dc845091f32f97273.small.torrent
--- a/rust/downloads/torrents/mlx-community/Kimi-K2-Instruct-4bit/91fb4f9fd1de100104925196d62b8ee06fd2ad60.large.torrent
+++ b/rust/downloads/torrents/mlx-community/Kimi-K2-Instruct-4bit/91fb4f9fd1de100104925196d62b8ee06fd2ad60.large.torrent
--- a/rust/downloads/torrents/mlx-community/Kimi-K2-Instruct-4bit/91fb4f9fd1de100104925196d62b8ee06fd2ad60.small.torrent
+++ b/rust/downloads/torrents/mlx-community/Kimi-K2-Instruct-4bit/91fb4f9fd1de100104925196d62b8ee06fd2ad60.small.torrent
@@ -1 +0,0 @@
-d8:announce42:udp://tracker.opentrackr.org:1337/announce10:created by13:mktorrent 1.14:infod5:filesld6:lengthi1519e4:pathl14:.gitattributeseed6:lengthi884e4:pathl9:README.mdeed6:lengthi1249e4:pathl19:chat_template.jinjaeed6:lengthi1848e4:pathl11:config.jsoneed6:lengthi10652e4:pathl25:configuration_deepseek.pyeed6:lengthi52e4:pathl22:generation_config.jsoneed6:lengthi221164e4:pathl28:model.safetensors.index.jsoneed6:lengthi75769e4:pathl20:modeling_deepseek.pyeed6:lengthi760e4:pathl23:special_tokens_map.jsoneed6:lengthi11330e4:pathl20:tokenization_kimi.pyeed6:lengthi2738e4:pathl21:tokenizer_config.jsoneee4:name40:91fb4f9fd1de100104925196d62b8ee06fd2ad6012:piece lengthi262144e6:pieces40:<3A>C<EFBFBD>t:<3A><>I_<49>i*xg<78><04>s|,<2C>4S<34><53><EFBFBD>j<EFBFBD><6A><EFBFBD>S<EFBFBD><03>|d<>e8:url-list63:https://huggingface.co/mlx-community/Kimi-K2-Instruct-4bit/raw/e
--- a/rust/downloads/torrents/mlx-community/Kimi-K2-Thinking/035a0cdd221ae0dca6b03120e20704a251a7bc9b.large.torrent
+++ b/rust/downloads/torrents/mlx-community/Kimi-K2-Thinking/035a0cdd221ae0dca6b03120e20704a251a7bc9b.large.torrent
--- a/rust/downloads/torrents/mlx-community/Kimi-K2-Thinking/035a0cdd221ae0dca6b03120e20704a251a7bc9b.small.torrent
+++ b/rust/downloads/torrents/mlx-community/Kimi-K2-Thinking/035a0cdd221ae0dca6b03120e20704a251a7bc9b.small.torrent
@@ -1 +0,0 @@
-d8:announce42:udp://tracker.opentrackr.org:1337/announce10:created by13:mktorrent 1.14:infod5:filesld6:lengthi1519e4:pathl14:.gitattributeseed6:lengthi864e4:pathl9:README.mdeed6:lengthi3442e4:pathl19:chat_template.jinjaeed6:lengthi3445e4:pathl11:config.jsoneed6:lengthi10652e4:pathl25:configuration_deepseek.pyeed6:lengthi53e4:pathl22:generation_config.jsoneed6:lengthi129766e4:pathl28:model.safetensors.index.jsoneed6:lengthi75769e4:pathl20:modeling_deepseek.pyeed6:lengthi760e4:pathl23:special_tokens_map.jsoneed6:lengthi12597e4:pathl20:tokenization_kimi.pyeed6:lengthi4047e4:pathl21:tokenizer_config.jsoneee4:name40:035a0cdd221ae0dca6b03120e20704a251a7bc9b12:piece lengthi262144e6:pieces20:<3A>^<5E>9`<60>C<18><>Y<EFBFBD>-L<><4C>*EC*e8:url-list58:https://huggingface.co/mlx-community/Kimi-K2-Thinking/raw/e
--- a/rust/downloads/torrents/mlx-community/Llama-3.2-3B-Instruct-8bit/ff054899609078569493def2823f9acd2780c0c9.large.torrent
+++ b/rust/downloads/torrents/mlx-community/Llama-3.2-3B-Instruct-8bit/ff054899609078569493def2823f9acd2780c0c9.large.torrent
--- a/rust/downloads/torrents/mlx-community/Llama-3.2-3B-Instruct-8bit/ff054899609078569493def2823f9acd2780c0c9.small.torrent
+++ b/rust/downloads/torrents/mlx-community/Llama-3.2-3B-Instruct-8bit/ff054899609078569493def2823f9acd2780c0c9.small.torrent
--- a/rust/downloads/torrents/mlx-community/Llama-3.3-70B-Instruct-4bit/de2dfaf56839b7d0e834157d2401dee02726874d.large.torrent
+++ b/rust/downloads/torrents/mlx-community/Llama-3.3-70B-Instruct-4bit/de2dfaf56839b7d0e834157d2401dee02726874d.large.torrent
--- a/rust/downloads/torrents/mlx-community/Llama-3.3-70B-Instruct-4bit/de2dfaf56839b7d0e834157d2401dee02726874d.small.torrent
+++ b/rust/downloads/torrents/mlx-community/Llama-3.3-70B-Instruct-4bit/de2dfaf56839b7d0e834157d2401dee02726874d.small.torrent
@@ -1 +0,0 @@
-d8:announce42:udp://tracker.opentrackr.org:1337/announce10:created by13:mktorrent 1.14:infod5:filesld6:lengthi1570e4:pathl14:.gitattributeseed6:lengthi16485e4:pathl9:README.mdeed6:lengthi1123e4:pathl11:config.jsoneed6:lengthi158327e4:pathl28:model.safetensors.index.jsoneed6:lengthi454e4:pathl23:special_tokens_map.jsoneed6:lengthi55425e4:pathl21:tokenizer_config.jsoneee4:name40:de2dfaf56839b7d0e834157d2401dee02726874d12:piece lengthi262144e6:pieces20:<3A>*_<1F><><EFBFBD><18>Tij<04><>+<2B>]<5D><>e8:url-list69:https://huggingface.co/mlx-community/Llama-3.3-70B-Instruct-4bit/raw/e
--- a/rust/downloads/torrents/mlx-community/Llama-3.3-70B-Instruct-8bit/c5bfd839cd4cda0e5a39a97e00218d9c56e468af.large.torrent
+++ b/rust/downloads/torrents/mlx-community/Llama-3.3-70B-Instruct-8bit/c5bfd839cd4cda0e5a39a97e00218d9c56e468af.large.torrent
--- a/rust/downloads/torrents/mlx-community/Llama-3.3-70B-Instruct-8bit/c5bfd839cd4cda0e5a39a97e00218d9c56e468af.small.torrent
+++ b/rust/downloads/torrents/mlx-community/Llama-3.3-70B-Instruct-8bit/c5bfd839cd4cda0e5a39a97e00218d9c56e468af.small.torrent
@@ -1 +0,0 @@
-d8:announce42:udp://tracker.opentrackr.org:1337/announce10:created by13:mktorrent 1.14:infod5:filesld6:lengthi1570e4:pathl14:.gitattributeseed6:lengthi16485e4:pathl9:README.mdeed6:lengthi1123e4:pathl11:config.jsoneed6:lengthi158327e4:pathl28:model.safetensors.index.jsoneed6:lengthi454e4:pathl23:special_tokens_map.jsoneed6:lengthi55425e4:pathl21:tokenizer_config.jsoneee4:name40:c5bfd839cd4cda0e5a39a97e00218d9c56e468af12:piece lengthi262144e6:pieces20:܌!<0E><><EFBFBD>TO<54><4F>4<><34><EFBFBD>P<EFBFBD>_Qe8:url-list69:https://huggingface.co/mlx-community/Llama-3.3-70B-Instruct-8bit/raw/e
--- a/rust/downloads/torrents/mlx-community/Meta-Llama-3.1-70B-Instruct-4bit/7772c93cf077b642f5503dd8d763a4176d7d406c.large.torrent
+++ b/rust/downloads/torrents/mlx-community/Meta-Llama-3.1-70B-Instruct-4bit/7772c93cf077b642f5503dd8d763a4176d7d406c.large.torrent
--- a/rust/downloads/torrents/mlx-community/Meta-Llama-3.1-70B-Instruct-4bit/7772c93cf077b642f5503dd8d763a4176d7d406c.small.torrent
+++ b/rust/downloads/torrents/mlx-community/Meta-Llama-3.1-70B-Instruct-4bit/7772c93cf077b642f5503dd8d763a4176d7d406c.small.torrent
--- a/rust/downloads/torrents/mlx-community/Meta-Llama-3.1-8B-Instruct-4bit/241a666dad6cb93c8ff213d39a7f34a36bf26db4.large.torrent
+++ b/rust/downloads/torrents/mlx-community/Meta-Llama-3.1-8B-Instruct-4bit/241a666dad6cb93c8ff213d39a7f34a36bf26db4.large.torrent
--- a/rust/downloads/torrents/mlx-community/Meta-Llama-3.1-8B-Instruct-4bit/241a666dad6cb93c8ff213d39a7f34a36bf26db4.small.torrent
+++ b/rust/downloads/torrents/mlx-community/Meta-Llama-3.1-8B-Instruct-4bit/241a666dad6cb93c8ff213d39a7f34a36bf26db4.small.torrent
--- a/rust/downloads/torrents/mlx-community/Qwen3-0.6B-8bit/11de96878523501bcaa86104e3c186de07ff9068.large.torrent
+++ b/rust/downloads/torrents/mlx-community/Qwen3-0.6B-8bit/11de96878523501bcaa86104e3c186de07ff9068.large.torrent
--- a/rust/downloads/torrents/mlx-community/Qwen3-0.6B-8bit/11de96878523501bcaa86104e3c186de07ff9068.small.torrent
+++ b/rust/downloads/torrents/mlx-community/Qwen3-0.6B-8bit/11de96878523501bcaa86104e3c186de07ff9068.small.torrent
--- a/rust/downloads/torrents/mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit/4dbf8a62338880825560dff3f58f2e9f0c56210f.large.torrent
+++ b/rust/downloads/torrents/mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit/4dbf8a62338880825560dff3f58f2e9f0c56210f.large.torrent
--- a/rust/downloads/torrents/mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit/4dbf8a62338880825560dff3f58f2e9f0c56210f.small.torrent
+++ b/rust/downloads/torrents/mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit/4dbf8a62338880825560dff3f58f2e9f0c56210f.small.torrent
--- a/rust/downloads/torrents/mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit/97042893088decff8468f7729c1076dcad2f251b.large.torrent
+++ b/rust/downloads/torrents/mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit/97042893088decff8468f7729c1076dcad2f251b.large.torrent
--- a/rust/downloads/torrents/mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit/97042893088decff8468f7729c1076dcad2f251b.small.torrent
+++ b/rust/downloads/torrents/mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit/97042893088decff8468f7729c1076dcad2f251b.small.torrent
--- a/rust/downloads/torrents/mlx-community/Qwen3-30B-A3B-4bit/d388dead1515f5e085ef7a0431dd8fadf0886c57.large.torrent
+++ b/rust/downloads/torrents/mlx-community/Qwen3-30B-A3B-4bit/d388dead1515f5e085ef7a0431dd8fadf0886c57.large.torrent
--- a/rust/downloads/torrents/mlx-community/Qwen3-30B-A3B-4bit/d388dead1515f5e085ef7a0431dd8fadf0886c57.small.torrent
+++ b/rust/downloads/torrents/mlx-community/Qwen3-30B-A3B-4bit/d388dead1515f5e085ef7a0431dd8fadf0886c57.small.torrent
--- a/rust/downloads/torrents/mlx-community/Qwen3-30B-A3B-8bit/7d5b2e500d961076e3c16d6bf957b9c36783b0f5.large.torrent
+++ b/rust/downloads/torrents/mlx-community/Qwen3-30B-A3B-8bit/7d5b2e500d961076e3c16d6bf957b9c36783b0f5.large.torrent
--- a/rust/downloads/torrents/mlx-community/Qwen3-30B-A3B-8bit/7d5b2e500d961076e3c16d6bf957b9c36783b0f5.small.torrent
+++ b/rust/downloads/torrents/mlx-community/Qwen3-30B-A3B-8bit/7d5b2e500d961076e3c16d6bf957b9c36783b0f5.small.torrent
--- a/rust/downloads/torrents/mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit/ca8dbf41071f579fbe3260f20bbe1ab896f79031.large.torrent
+++ b/rust/downloads/torrents/mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit/ca8dbf41071f579fbe3260f20bbe1ab896f79031.large.torrent
--- a/rust/downloads/torrents/mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit/ca8dbf41071f579fbe3260f20bbe1ab896f79031.small.torrent
+++ b/rust/downloads/torrents/mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit/ca8dbf41071f579fbe3260f20bbe1ab896f79031.small.torrent
@@ -1,2 +0,0 @@
-d8:announce42:udp://tracker.opentrackr.org:1337/announce10:created by13:mktorrent 1.14:infod5:filesld6:lengthi1570e4:pathl14:.gitattributeseed6:lengthi1033e4:pathl9:README.mdeed6:lengthi707e4:pathl17:added_tokens.jsoneed6:lengthi6722e4:pathl19:chat_template.jinjaeed6:lengthi1222e4:pathl11:config.jsoneed6:lengthi180e4:pathl22:generation_config.jsoneed6:lengthi1671853e4:pathl10:merges.txteed6:lengthi154390e4:pathl28:model.safetensors.index.jsoneed6:lengthi28881e4:pathl24:qwen3_xml_tool_parser.pyeed6:lengthi613e4:pathl23:special_tokens_map.jsoneed6:lengthi5405e4:pathl21:tokenizer_config.jsoneed6:lengthi2776833e4:pathl10:vocab.jsoneee4:name40:ca8dbf41071f579fbe3260f20bbe1ab896f7903112:piece lengthi262144e6:pieces360:<3A>3<EFBFBD>\<5C>PDE<44><45><17><><EFBFBD><06><06><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>c+<2B>h{"<0B><>_
-m<EFBFBD> 7<><37><EFBFBD><EFBFBD>.<2E>h<14>:٣<>fm<66><6D>,<2C>w<EFBFBD><77>nOМ<4F><11><>"<22><><EFBFBD><EFBFBD>&j<><6A>_<EFBFBD><5F>"F<><46><EFBFBD>u<18>gU<67><08><><EFBFBD>QW<51><57><EFBFBD><EFBFBD>@qiiq<69><71>T<EFBFBD><54><EFBFBD>P<>lSJƤ<4A>\<5C><><EFBFBD>R!<21>=<3D><>v<EFBFBD><76><EFBFBD>F<EFBFBD>q9<71><39><EFBFBD><EFBFBD><01><><EFBFBD><EFBFBD><av<61>B@<40><>	<09>z
--- a/rust/downloads/torrents/mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit/b4b2d06d678ac2819da4c41618a36a2dc8eeec03.large.torrent
+++ b/rust/downloads/torrents/mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit/b4b2d06d678ac2819da4c41618a36a2dc8eeec03.large.torrent
--- a/rust/downloads/torrents/mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit/b4b2d06d678ac2819da4c41618a36a2dc8eeec03.small.torrent
+++ b/rust/downloads/torrents/mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit/b4b2d06d678ac2819da4c41618a36a2dc8eeec03.small.torrent
--- a/rust/downloads/torrents/mlx-community/SmolLM-135M-4bit/f56bc6adfb74c794203dc8ca94e0bccfe2bcd6cc.large.torrent
+++ b/rust/downloads/torrents/mlx-community/SmolLM-135M-4bit/f56bc6adfb74c794203dc8ca94e0bccfe2bcd6cc.large.torrent
@@ -1 +0,0 @@
-d8:announce42:udp://tracker.opentrackr.org:1337/announce10:created by13:mktorrent 1.14:infod5:filesld6:lengthi75789955e4:pathl17:model.safetensorseee4:name40:f56bc6adfb74c794203dc8ca94e0bccfe2bcd6cc12:piece lengthi16777216e6:pieces100:QM0Ts@Ev<>XԄ=<3D>6_xhњU4=<3D><>7<EFBFBD>j<EFBFBD><6A><EFBFBD><18>F<EFBFBD>M<EFBFBD>q<EFBFBD><71><EFBFBD><EFBFBD>m>a<><61>H°*'<27>5<EFBFBD><35>/9B<39><42>^V<>4H9m<39><6D><EFBFBD><EFBFBD>0<EFBFBD>^z<><7A>+YS*<2A>M<EFBFBD><4D>G<EFBFBD>+<2B>.<02>h<EFBFBD>5e8:url-list62:https://huggingface.co/mlx-community/SmolLM-135M-4bit/resolve/e
--- a/rust/downloads/torrents/mlx-community/SmolLM-135M-4bit/f56bc6adfb74c794203dc8ca94e0bccfe2bcd6cc.small.torrent
+++ b/rust/downloads/torrents/mlx-community/SmolLM-135M-4bit/f56bc6adfb74c794203dc8ca94e0bccfe2bcd6cc.small.torrent
--- a/rust/downloads/torrents/mlx-community/gpt-oss-120b-MXFP4-Q8/81e5ac3ad0af6efb1298a8e8c7a10ed2990c137b.large.torrent
+++ b/rust/downloads/torrents/mlx-community/gpt-oss-120b-MXFP4-Q8/81e5ac3ad0af6efb1298a8e8c7a10ed2990c137b.large.torrent
--- a/rust/downloads/torrents/mlx-community/gpt-oss-120b-MXFP4-Q8/81e5ac3ad0af6efb1298a8e8c7a10ed2990c137b.small.torrent
+++ b/rust/downloads/torrents/mlx-community/gpt-oss-120b-MXFP4-Q8/81e5ac3ad0af6efb1298a8e8c7a10ed2990c137b.small.torrent
@@ -1 +0,0 @@
-d8:announce42:udp://tracker.opentrackr.org:1337/announce10:created by13:mktorrent 1.14:infod5:filesld6:lengthi1570e4:pathl14:.gitattributeseed6:lengthi845e4:pathl9:README.mdeed6:lengthi16738e4:pathl19:chat_template.jinjaeed6:lengthi50145e4:pathl11:config.jsoneed6:lengthi177e4:pathl22:generation_config.jsoneed6:lengthi100431e4:pathl28:model.safetensors.index.jsoneed6:lengthi440e4:pathl23:special_tokens_map.jsoneed6:lengthi4200e4:pathl21:tokenizer_config.jsoneee4:name40:81e5ac3ad0af6efb1298a8e8c7a10ed2990c137b12:piece lengthi262144e6:pieces20:ME<4D>TVE@ͯ<>N՗<4E>8<><38><EFBFBD>`e8:url-list63:https://huggingface.co/mlx-community/gpt-oss-120b-MXFP4-Q8/raw/e
--- a/rust/downloads/torrents/mlx-community/gpt-oss-20b-MXFP4-Q4/f356f2747216d7e98fee755df25987459fc19089.large.torrent
+++ b/rust/downloads/torrents/mlx-community/gpt-oss-20b-MXFP4-Q4/f356f2747216d7e98fee755df25987459fc19089.large.torrent
--- a/rust/downloads/torrents/mlx-community/gpt-oss-20b-MXFP4-Q4/f356f2747216d7e98fee755df25987459fc19089.small.torrent
+++ b/rust/downloads/torrents/mlx-community/gpt-oss-20b-MXFP4-Q4/f356f2747216d7e98fee755df25987459fc19089.small.torrent
@@ -1 +0,0 @@
-d8:announce42:udp://tracker.opentrackr.org:1337/announce10:created by13:mktorrent 1.14:infod5:filesld6:lengthi1570e4:pathl14:.gitattributeseed6:lengthi838e4:pathl9:README.mdeed6:lengthi33998e4:pathl11:config.jsoneed6:lengthi177e4:pathl22:generation_config.jsoneed6:lengthi67046e4:pathl28:model.safetensors.index.jsoneed6:lengthi440e4:pathl23:special_tokens_map.jsoneed6:lengthi21694e4:pathl21:tokenizer_config.jsoneee4:name40:f356f2747216d7e98fee755df25987459fc1908912:piece lengthi262144e6:pieces20:<3A><><EFBFBD><EFBFBD>ͥ<><CDA5><EFBFBD>g#`<60><>f<EFBFBD>x<EFBFBD><78>e8:url-list62:https://huggingface.co/mlx-community/gpt-oss-20b-MXFP4-Q4/raw/e
--- a/rust/downloads/torrents/mlx-community/granite-3.3-2b-instruct-fp16/39b35eaa97282c34db81f61a983b4b83344e10f1.large.torrent
+++ b/rust/downloads/torrents/mlx-community/granite-3.3-2b-instruct-fp16/39b35eaa97282c34db81f61a983b4b83344e10f1.large.torrent
--- a/rust/downloads/torrents/mlx-community/granite-3.3-2b-instruct-fp16/39b35eaa97282c34db81f61a983b4b83344e10f1.small.torrent
+++ b/rust/downloads/torrents/mlx-community/granite-3.3-2b-instruct-fp16/39b35eaa97282c34db81f61a983b4b83344e10f1.small.torrent
@@ -1,6 +0,0 @@
-d8:announce42:udp://tracker.opentrackr.org:1337/announce10:created by13:mktorrent 1.14:infod5:filesld6:lengthi1519e4:pathl14:.gitattributeseed6:lengthi956e4:pathl9:README.mdeed6:lengthi207e4:pathl17:added_tokens.jsoneed6:lengthi848e4:pathl11:config.jsoneed6:lengthi441810e4:pathl10:merges.txteed6:lengthi25864e4:pathl28:model.safetensors.index.jsoneed6:lengthi801e4:pathl23:special_tokens_map.jsoneed6:lengthi3476578e4:pathl14:tokenizer.jsoneed6:lengthi9935e4:pathl21:tokenizer_config.jsoneed6:lengthi776995e4:pathl10:vocab.jsoneee4:name40:39b35eaa97282c34db81f61a983b4b83344e10f112:piece lengthi262144e6:pieces380:<3A>ih֨
-[׬-<2D><><EFBFBD>}<7D><19><>U<EFBFBD>){[<5B>+<2B>7PU<><13>nR`<60><>g<EFBFBD><0C>vH<76>x׬<78>q<EFBFBD><71>Lz<4C>^џ<>Q@؂<>Ĉ|Š<><C5A0><EFBFBD><EFBFBD>\<5C><>ehۢ<68>S<EFBFBD><0B>#<23>g<EFBFBD>Y%@D:ҩ<><D2A9><EFBFBD>}ޥXO<><4F><EFBFBD><EFBFBD><EFBFBD><06><0C><><EFBFBD><EFBFBD><EFBFBD><1B>Y<EFBFBD>"<22><>|<7C>JH<4A><0C>w<EFBFBD><05>MH<4D>*k<>@R<><52>1i<31>|<7C>y<H<02><>H{<7B><14>
-<EFBFBD><1B><P<><50>@<40><><16><>E<<3C><><EFBFBD>S<EFBFBD><53>|<7C><><EFBFBD>A
-<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>_<><5F>
-;<3B>Rg<52><67><EFBFBD>?Ĩ<>$<24><><EFBFBD>|@`X<58><7F>#<23><><EFBFBD>M<EFBFBD>$n-<2D><10><>i<EFBFBD>
-9<>6ɝ@t<><74>j<EFBFBD><16>n<EFBFBD><6E><EFBFBD><EFBFBD>ɃH<C983><48>,<2C><>
--- a/rust/downloads/torrents/mlx-community/granite-3.3-8b-instruct-fp16/763b22974256cf574f87cced1b6a4cbfb13a8a45.large.torrent
+++ b/rust/downloads/torrents/mlx-community/granite-3.3-8b-instruct-fp16/763b22974256cf574f87cced1b6a4cbfb13a8a45.large.torrent
--- a/rust/downloads/torrents/mlx-community/granite-3.3-8b-instruct-fp16/763b22974256cf574f87cced1b6a4cbfb13a8a45.small.torrent
+++ b/rust/downloads/torrents/mlx-community/granite-3.3-8b-instruct-fp16/763b22974256cf574f87cced1b6a4cbfb13a8a45.small.torrent
--- a/rust/downloads/torrents/mlx-community/llama-3.3-70b-instruct-fp16/8103891b028a8933068e47751bc2acc10bb59aa2.large.torrent
+++ b/rust/downloads/torrents/mlx-community/llama-3.3-70b-instruct-fp16/8103891b028a8933068e47751bc2acc10bb59aa2.large.torrent
--- a/rust/downloads/torrents/mlx-community/llama-3.3-70b-instruct-fp16/8103891b028a8933068e47751bc2acc10bb59aa2.small.torrent
+++ b/rust/downloads/torrents/mlx-community/llama-3.3-70b-instruct-fp16/8103891b028a8933068e47751bc2acc10bb59aa2.small.torrent
@@ -1 +0,0 @@
-d8:announce42:udp://tracker.opentrackr.org:1337/announce10:created by13:mktorrent 1.14:infod5:filesld6:lengthi1570e4:pathl14:.gitattributeseed6:lengthi16447e4:pathl9:README.mdeed6:lengthi970e4:pathl11:config.jsoneed6:lengthi62518e4:pathl28:model.safetensors.index.jsoneed6:lengthi454e4:pathl23:special_tokens_map.jsoneed6:lengthi55421e4:pathl21:tokenizer_config.jsoneee4:name40:8103891b028a8933068e47751bc2acc10bb59aa212:piece lengthi262144e6:pieces20:<3A>l<EFBFBD>f<EFBFBD>7<>.<2E><><EFBFBD><0B>	a<><61>e8:url-list69:https://huggingface.co/mlx-community/llama-3.3-70b-instruct-fp16/raw/e
--- a/rust/exo_pyo3_bindings/Cargo.toml
+++ b/rust/exo_pyo3_bindings/Cargo.toml
@@ -23,7 +23,6 @@ workspace = true

 [dependencies]
 networking = { workspace = true }
-downloads = { workspace = true }

 # interop
 pyo3 = { version = "0.27.1", features = [
--- a/rust/exo_pyo3_bindings/src/downloads.rs
+++ b/rust/exo_pyo3_bindings/src/downloads.rs
@@ -1,334 +0,0 @@
-//! Downloads module - BitTorrent downloads PyO3 bindings
-
-use crate::ext::*;
-use downloads::bencode::AnnounceParams;
-use downloads::tracker::{PeerInfo, TopologyData, handle_announce as rust_handle_announce};
-use downloads::{DownloadProgress, TorrentSession};
-use pyo3::prelude::*;
-use pyo3::types::{PyBytes, PyDict};
-use std::net::Ipv4Addr;
-use std::path::PathBuf;
-use std::sync::Arc;
-use tokio::sync::Mutex;
-
-/// Handle a tracker announce request
-///
-/// Args:
-///     params: Dictionary with announce parameters (info_hash, peer_id, port, etc.)
-///     peers: List of peer dictionaries (node_id, ip, port, has_complete, priority)
-///
-/// Returns:
-///     Bencoded announce response as bytes
-#[pyfunction]
-fn handle_tracker_announce(
-    py: Python<'_>,
-    params: &Bound<'_, PyDict>,
-    peers: &Bound<'_, pyo3::types::PyList>,
-) -> PyResult<Py<PyBytes>> {
-    // Parse announce params
-    let info_hash = {
-        let info_hash_item = params
-            .get_item("info_hash")?
-            .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Missing info_hash"))?;
-        let info_hash_bytes: &[u8] = info_hash_item.extract()?;
-
-        if info_hash_bytes.len() != 20 {
-            return Err(pyo3::exceptions::PyValueError::new_err(
-                "info_hash must be 20 bytes",
-            ));
-        }
-
-        let mut info_hash = [0u8; 20];
-        info_hash.copy_from_slice(info_hash_bytes);
-        info_hash
-    };
-
-    let peer_id = {
-        let peer_id_item = params
-            .get_item("peer_id")?
-            .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Missing peer_id"))?;
-        let peer_id_bytes: &[u8] = peer_id_item.extract()?;
-
-        if peer_id_bytes.len() != 20 {
-            return Err(pyo3::exceptions::PyValueError::new_err(
-                "peer_id must be 20 bytes",
-            ));
-        }
-
-        let mut peer_id = [0u8; 20];
-        peer_id.copy_from_slice(peer_id_bytes);
-        peer_id
-    };
-
-    let port: u16 = params
-        .get_item("port")?
-        .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Missing port"))?
-        .extract()?;
-
-    let uploaded: u64 = params
-        .get_item("uploaded")?
-        .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Missing uploaded"))?
-        .extract()?;
-
-    let downloaded: u64 = params
-        .get_item("downloaded")?
-        .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Missing downloaded"))?
-        .extract()?;
-
-    let left: u64 = params
-        .get_item("left")?
-        .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Missing left"))?
-        .extract()?;
-
-    let compact: bool = params
-        .get_item("compact")?
-        .map(|v| v.extract().unwrap_or(true))
-        .unwrap_or(true);
-
-    let announce_params = AnnounceParams {
-        info_hash,
-        peer_id,
-        port,
-        uploaded,
-        downloaded,
-        left,
-        compact,
-        event: None, // TODO: parse event if needed
-    };
-
-    // Parse peer list
-    let peer_infos: Result<Vec<PeerInfo>, PyErr> = peers
-        .iter()
-        .map(|peer_item| {
-            let peer_dict: &Bound<'_, PyDict> = peer_item.downcast()?;
-            let node_id: String = peer_dict
-                .get_item("node_id")?
-                .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Missing node_id"))?
-                .extract()?;
-
-            let ip_str: String = peer_dict
-                .get_item("ip")?
-                .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Missing ip"))?
-                .extract()?;
-
-            let ip: Ipv4Addr = ip_str
-                .parse()
-                .map_err(|_| pyo3::exceptions::PyValueError::new_err("Invalid IP address"))?;
-
-            let port: u16 = peer_dict
-                .get_item("port")?
-                .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Missing port"))?
-                .extract()?;
-
-            let has_complete: bool = peer_dict
-                .get_item("has_complete")?
-                .map(|v: Bound<'_, pyo3::PyAny>| v.extract().unwrap_or(false))
-                .unwrap_or(false);
-
-            let priority: i32 = peer_dict
-                .get_item("priority")?
-                .map(|v: Bound<'_, pyo3::PyAny>| v.extract().unwrap_or(0))
-                .unwrap_or(0);
-
-            Ok(PeerInfo {
-                node_id,
-                ip,
-                port,
-                has_complete,
-                priority,
-            })
-        })
-        .collect();
-
-    let peer_infos = peer_infos?;
-
-    let topology = TopologyData { peers: peer_infos };
-
-    // Call Rust tracker handler
-    let response_bytes = rust_handle_announce(&announce_params, &topology).pyerr()?;
-
-    // Return as Python bytes
-    Ok(PyBytes::new(py, &response_bytes).unbind())
-}
-
-/// Get all embedded torrent variants for a model
-///
-/// Args:
-///     model_id: Model identifier (e.g., "mlx-community/Qwen3-30B-A3B-4bit")
-///     revision: Git commit hash
-///
-/// Returns:
-///     List of (variant_name, torrent_data) tuples, e.g., [("small", bytes), ("large", bytes)]
-///     Returns empty list if no torrents found.
-#[pyfunction]
-fn get_embedded_torrents(
-    py: Python<'_>,
-    model_id: String,
-    revision: String,
-) -> PyResult<Vec<(String, Py<PyBytes>)>> {
-    let torrents = downloads::get_embedded_torrents(&model_id, &revision);
-    Ok(torrents
-        .into_iter()
-        .map(|(variant, data)| (variant, PyBytes::new(py, &data).unbind()))
-        .collect())
-}
-
-/// Get file list from torrent data
-///
-/// Args:
-///     torrent_data: Raw .torrent file contents
-///
-/// Returns:
-///     List of (index, path, size_bytes) tuples for each file in the torrent
-#[pyfunction]
-fn get_torrent_file_list(torrent_data: Vec<u8>) -> PyResult<Vec<(usize, String, u64)>> {
-    let files = downloads::get_torrent_file_list(&torrent_data).pyerr()?;
-    Ok(files
-        .into_iter()
-        .map(|f| (f.index, f.path, f.size))
-        .collect())
-}
-
-/// Python wrapper for TorrentSession
-#[pyclass]
-struct TorrentSessionHandle {
-    session: Arc<Mutex<TorrentSession>>,
-}
-
-#[pymethods]
-impl TorrentSessionHandle {
-    /// Create a new torrent session
-    ///
-    /// Args:
-    ///     session_dir: Directory to store session state and downloads
-    #[new]
-    fn new(session_dir: String) -> PyResult<Self> {
-        let session_path = PathBuf::from(session_dir);
-
-        let session = tokio::runtime::Runtime::new()
-            .pyerr()?
-            .block_on(async { TorrentSession::new(session_path).await })
-            .pyerr()?;
-
-        Ok(Self {
-            session: Arc::new(Mutex::new(session)),
-        })
-    }
-
-    /// Add a torrent from bytes
-    ///
-    /// Args:
-    ///     torrent_data: Raw .torrent file contents
-    ///     save_path: Where to save downloaded files
-    ///     file_indices: Optional list of file indices to download
-    ///
-    /// Returns:
-    ///     Info hash as hex string
-    fn add_torrent(
-        &self,
-        _py: Python<'_>,
-        torrent_data: Vec<u8>,
-        save_path: String,
-        file_indices: Option<Vec<usize>>,
-    ) -> PyResult<String> {
-        let session = Arc::clone(&self.session);
-        let save_path = PathBuf::from(save_path);
-
-        tokio::runtime::Runtime::new()
-            .pyerr()?
-            .block_on(async {
-                session
-                    .lock()
-                    .await
-                    .add_torrent(torrent_data, save_path, file_indices)
-                    .await
-            })
-            .pyerr()
-    }
-
-    /// Get download progress for a torrent
-    ///
-    /// Args:
-    ///     info_hash: Torrent info hash
-    ///
-    /// Returns:
-    ///     Dictionary with progress information
-    fn get_progress(&self, py: Python<'_>, info_hash: String) -> PyResult<Py<PyDict>> {
-        let session = Arc::clone(&self.session);
-
-        let progress: DownloadProgress = tokio::runtime::Runtime::new()
-            .pyerr()?
-            .block_on(async { session.lock().await.get_progress(&info_hash).await })
-            .pyerr()?;
-
-        let dict = PyDict::new(py);
-        dict.set_item("downloaded_bytes", progress.downloaded_bytes)?;
-        dict.set_item("total_bytes", progress.total_bytes)?;
-        dict.set_item("download_speed", progress.download_speed)?;
-        dict.set_item("upload_speed", progress.upload_speed)?;
-        dict.set_item("peers_connected", progress.peers_connected)?;
-        dict.set_item("is_finished", progress.is_finished)?;
-
-        Ok(dict.unbind())
-    }
-
-    /// Wait until torrent download is completed
-    ///
-    /// Args:
-    ///     info_hash: Torrent info hash
-    fn wait_until_completed(&self, _py: Python<'_>, info_hash: String) -> PyResult<()> {
-        let session = Arc::clone(&self.session);
-
-        tokio::runtime::Runtime::new()
-            .pyerr()?
-            .block_on(async { session.lock().await.wait_until_completed(&info_hash).await })
-            .pyerr()
-    }
-
-    /// Enable seeding for a torrent
-    ///
-    /// Args:
-    ///     info_hash: Torrent info hash
-    fn enable_seeding(&self, _py: Python<'_>, info_hash: String) -> PyResult<()> {
-        let session = Arc::clone(&self.session);
-
-        tokio::runtime::Runtime::new()
-            .pyerr()?
-            .block_on(async { session.lock().await.enable_seeding(&info_hash).await })
-            .pyerr()
-    }
-
-    /// Remove a torrent from the session
-    ///
-    /// Args:
-    ///     info_hash: Torrent info hash
-    fn remove_torrent(&self, _py: Python<'_>, info_hash: String) -> PyResult<()> {
-        let session = Arc::clone(&self.session);
-
-        tokio::runtime::Runtime::new()
-            .pyerr()?
-            .block_on(async { session.lock().await.remove_torrent(&info_hash).await })
-            .pyerr()
-    }
-
-    /// List all torrents in the session
-    ///
-    /// Returns:
-    ///     List of info hashes
-    fn list_torrents(&self, _py: Python<'_>) -> PyResult<Vec<String>> {
-        let session = Arc::clone(&self.session);
-
-        tokio::runtime::Runtime::new()
-            .pyerr()?
-            .block_on(async { Ok(session.lock().await.list_torrents().await) })
-    }
-}
-
-/// Downloads submodule
-pub(crate) fn downloads_submodule(m: &Bound<'_, PyModule>) -> PyResult<()> {
-    m.add_function(wrap_pyfunction!(handle_tracker_announce, m)?)?;
-    m.add_function(wrap_pyfunction!(get_embedded_torrents, m)?)?;
-    m.add_function(wrap_pyfunction!(get_torrent_file_list, m)?)?;
-    m.add_class::<TorrentSessionHandle>()?;
-    Ok(())
-}
--- a/rust/exo_pyo3_bindings/src/lib.rs
+++ b/rust/exo_pyo3_bindings/src/lib.rs
@@ -17,12 +17,10 @@

 extern crate core;
 mod allow_threading;
-pub(crate) mod downloads;
 mod examples;
 pub(crate) mod networking;
 pub(crate) mod pylibp2p;

-use crate::downloads::downloads_submodule;
 use crate::networking::networking_submodule;
 use crate::pylibp2p::ident::ident_submodule;
 use crate::pylibp2p::multiaddr::multiaddr_submodule;
@@ -209,7 +207,6 @@ fn main_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
    ident_submodule(m)?;
    multiaddr_submodule(m)?;
    networking_submodule(m)?;
-    downloads_submodule(m)?;

    // top-level constructs
    // TODO: ...
--- a/rust/parts.nix
+++ b/rust/parts.nix
@@ -0,0 +1,145 @@
+{ inputs, ... }:
+{
+  perSystem =
+    { config, self', inputs', pkgs, lib, ... }:
+    let
+      # Fenix nightly toolchain with all components
+      fenixPkgs = inputs'.fenix.packages;
+      rustToolchain = fenixPkgs.complete.withComponents [
+        "cargo"
+        "rustc"
+        "clippy"
+        "rustfmt"
+        "rust-src"
+        "rust-analyzer"
+      ];
+
+      # Crane with fenix toolchain
+      craneLib = (inputs.crane.mkLib pkgs).overrideToolchain rustToolchain;
+
+      # Source filtering - only include rust/ directory and root Cargo files
+      # This ensures changes to Python/docs/etc don't trigger Rust rebuilds
+      src = lib.cleanSourceWith {
+        src = inputs.self;
+        filter =
+          path: type:
+          let
+            baseName = builtins.baseNameOf path;
+            parentDir = builtins.dirOf path;
+            inRustDir =
+              (lib.hasInfix "/rust/" path)
+              || (lib.hasSuffix "/rust" parentDir)
+              || (baseName == "rust" && type == "directory");
+            isRootCargoFile =
+              (baseName == "Cargo.toml" || baseName == "Cargo.lock")
+              && (builtins.dirOf path == toString inputs.self);
+          in
+          isRootCargoFile
+          || (inRustDir && (craneLib.filterCargoSources path type || lib.hasSuffix ".toml" path || lib.hasSuffix ".md" path));
+      };
+
+      # Common arguments for all Rust builds
+      commonArgs = {
+        inherit src;
+        pname = "exo-rust";
+        version = "0.0.1";
+        strictDeps = true;
+
+        nativeBuildInputs = [
+          pkgs.pkg-config
+          pkgs.python313 # Required for pyo3-build-config
+        ];
+
+        buildInputs = [
+          pkgs.openssl
+          pkgs.python313 # Required for pyo3 tests
+        ];
+
+        OPENSSL_NO_VENDOR = "1";
+
+        # Required for pyo3 tests to find libpython
+        LD_LIBRARY_PATH = lib.makeLibraryPath [ pkgs.python313 ];
+      };
+
+      # Build dependencies once for caching
+      cargoArtifacts = craneLib.buildDepsOnly (
+        commonArgs
+        // {
+          cargoExtraArgs = "--workspace";
+        }
+      );
+    in
+    {
+      # Export toolchain for use in treefmt and devShell
+      options.rust = {
+        toolchain = lib.mkOption {
+          type = lib.types.package;
+          default = rustToolchain;
+          description = "The Rust toolchain to use";
+        };
+      };
+
+      config = {
+        packages = {
+          # Python bindings wheel via maturin
+          exo_pyo3_bindings = craneLib.buildPackage (
+            commonArgs
+            // {
+              inherit cargoArtifacts;
+              pname = "exo_pyo3_bindings";
+
+              nativeBuildInputs = commonArgs.nativeBuildInputs ++ [
+                pkgs.maturin
+              ];
+
+              buildPhaseCargoCommand = ''
+                maturin build \
+                  --release \
+                  --manylinux off \
+                  --manifest-path rust/exo_pyo3_bindings/Cargo.toml \
+                  --features "pyo3/extension-module,pyo3/experimental-async" \
+                  --interpreter ${pkgs.python313}/bin/python \
+                  --out dist
+              '';
+
+              # Don't use crane's default install behavior
+              doNotPostBuildInstallCargoBinaries = true;
+
+              installPhaseCommand = ''
+                mkdir -p $out
+                cp dist/*.whl $out/
+              '';
+            }
+          );
+        };
+
+        checks = {
+          # Full workspace build (all crates)
+          cargo-build = craneLib.buildPackage (
+            commonArgs
+            // {
+              inherit cargoArtifacts;
+              cargoExtraArgs = "--workspace";
+            }
+          );
+          # Run tests with nextest
+          cargo-nextest = craneLib.cargoNextest (
+            commonArgs
+            // {
+              inherit cargoArtifacts;
+              cargoExtraArgs = "--workspace";
+            }
+          );
+
+          # Build documentation
+          cargo-doc = craneLib.cargoDoc (
+            commonArgs
+            // {
+              inherit cargoArtifacts;
+              cargoExtraArgs = "--workspace";
+            }
+          );
+        };
+      };
+    };
+}
--- a/rust/system_custodian/Cargo.toml
+++ b/rust/system_custodian/Cargo.toml
@@ -1,47 +0,0 @@
-[package]
-name = "system_custodian"
-version = { workspace = true }
-edition = { workspace = true }
-publish = false
-
-[lib]
-doctest = false
-name = "system_custodian"
-path = "src/lib.rs"
-
-[[bin]]
-path = "src/bin/main.rs"
-name = "system_custodian"
-doc = false
-
-[lints]
-workspace = true
-
-[dependencies]
-# datastructures
-either = { workspace = true }
-
-# macro dependencies
-extend = { workspace = true }
-delegate = { workspace = true }
-impl-trait-for-tuples = { workspace = true }
-derive_more = { workspace = true }
-
-# async
-tokio = { workspace = true, features = ["full"] }
-futures = { workspace = true }
-futures-timer = { workspace = true }
-
-# utility dependencies
-util = { workspace = true }
-thiserror = { workspace = true }
-#internment = { workspace = true }
-#recursion = { workspace = true }
-#generativity = { workspace = true }
-#itertools = { workspace = true }
-tracing-subscriber = { version = "0.3.19", features = ["default", "env-filter"] }
-keccak-const = { workspace = true }
-
-# tracing/logging
-log = { workspace = true }
-
--- a/rust/system_custodian/src/bin/main.rs
+++ b/rust/system_custodian/src/bin/main.rs
@@ -1,4 +0,0 @@
-//! TODO: documentation
-//!
-
-fn main() {}
--- a/rust/system_custodian/src/lib.rs
+++ b/rust/system_custodian/src/lib.rs
@@ -1,69 +0,0 @@
-//! This crate defines the logic of, and ways to interact with, Exo's **_System Custodian_** daemon.
-//!
-//! The **_System Custodian_** daemon is supposed to be a long-living process that precedes the
-//! launch of the Exo application, and responsible for ensuring the system (configuration, settings,
-//! etc.) is in an appropriate state to facilitate the running of Exo application.
-//! The **_System Custodian_** daemon shall expose a [D-Bus](https://www.freedesktop.org/wiki/Software/dbus/)
-//! service which Exo application use to _control & query_ it.
-//!
-//! # Lifecycle
-//! When the Exo application starts, it will _wake_ the **_System Custodian_** daemon for the
-//! duration of its lifetime, and after it has terminated the daemon will go back to sleep. When
-//! the daemon wakes up, it will configure the system into a state suitable for the Exo Application;
-//! When the daemon goes to sleep, it will revert those changes as much as it can in case they were
-//! destructive to the user's pre-existing configurations.
-//!
-//! # Responsibilities
-//! TODO: these are purely on MacOS, but change to be more broad
-//! The **_System Custodian_** daemon is responsible for using System Configuration framework to
-//!  1. duplicate the current network set
-//!  2. modify existing services to turn on IPv6 if not there
-//!  3. remove any bridge services & add any missing services that AREN'T bridge
-//! TODO: In the future:
-//!  1. run a dummy AWDL service to [allow for macOS peer-to-peer wireless networking](https://yggdrasil-network.github.io/2019/08/19/awdl.html)
-//!  2. toggle some GPU/memory configurations to speed up GPU (ask Alex what those configurations are)
-//!  3. if we ever decide to provide our **own network interfaces** that abstract over some userland
-//!     logic, this would be the place to spin that up.
-//!
-//! Then it will watch the SCDynamicStore for:
-//!  1. all __actual__ network interfaces -> collect information on them e.g. their BSD name, MAC
-//!     address, MTU, IPv6 addresses, etc. -> and set up watchers/notifiers to inform the DBus
-//!     interface of any changes
-//!  2. watch for any __undesirable__ changes to configuration and revert it
-//!
-//! It should somehow (probably through system sockets and/or BSD interface) trigger IPv6 NDP on
-//! each of the interfaces & also listen to/query for any changes on the OS routing cache??
-//! Basically emulate the `ping6 ff02::1%enX` and `ndp -an` commands BUT BETTER!!!
-//!  1. all that info should coalesce back to the overall state colleted -> should be queryable
-//!     over D-Bus
-//! TODO:
-//!  1. we might potentially add to this step a handshake of some kind...? To ensure that we can
-//!     ACTUALLY communicate with that machine over that link over e.g. TCP, UDP, etc. Will the
-//!     handshake require to know Node ID? Will the handshake require heartbeats? Who knows...
-//!  2. if we ever decide to write proprietary L2/L3 protocols for quicker communication,
-//!     e.g. [AF_NDRV](https://www.zerotier.com/blog/how-zerotier-eliminated-kernel-extensions-on-macos/)
-//!     for raw ethernet frame communication, or even a [custom thunderbolt PCIe driver](https://developer.apple.com/documentation/pcidriverkit/creating-custom-pcie-drivers-for-thunderbolt-devices),
-//!     then this would be the place to carry out discovery and propper handshakes with devices
-//!     on the other end of the link.
-//!
-
-// enable Rust-unstable features for convenience
-#![feature(trait_alias)]
-#![feature(stmt_expr_attributes)]
-#![feature(type_alias_impl_trait)]
-#![feature(specialization)]
-#![feature(unboxed_closures)]
-#![feature(const_trait_impl)]
-#![feature(fn_traits)]
-
-pub(crate) mod private {
-    // sealed traits support
-    pub trait Sealed {}
-    impl<T: ?Sized> Sealed for T {}
-}
-
-/// Namespace for all the type/trait aliases used by this crate.
-pub(crate) mod alias {}
-
-/// Namespace for crate-wide extension traits/methods
-pub(crate) mod ext {}
--- a/scripts/mktorrent.sh
+++ b/scripts/mktorrent.sh
@@ -1,58 +0,0 @@
-#!/usr/bin/env nix-shell
-#!nix-shell -i bash -p mktorrent -p python3Packages.huggingface-hub -p git -p git-lfs
-set -euo pipefail
-set -x
-
-MODEL="$1"
-
-mkdir -p "$MODEL"
-
-# Step 1: Clone/fetch the repo and get the hash of head
-mkdir -p "$MODEL"
-if test -d "$MODEL/git"; then
-  # Assert that the origin is correct
-  git -C "$MODEL/git" fetch
-else
-  git clone "https://huggingface.co/$MODEL" "$MODEL/git"
-fi
-
-HASH=$(git -C "$MODEL/git" rev-parse origin/main)
-LARGE_FILES=$(git -C "$MODEL/git" lfs ls-files --all --name-only)
-
-SMALL_DIR="$MODEL/$HASH-small"
-LARGE_DIR="$MODEL/$HASH-large"
-mkdir -p "$SMALL_DIR" "$LARGE_DIR"
-
-# Step 2: Prepare files. Two torrents: one for large files and one for metadata.
-git -C "$MODEL/git" archive "$HASH" | tar -x -C "$SMALL_DIR"
-echo "$LARGE_FILES" | xargs -I{} rm "$SMALL_DIR/{}"
-
-echo "$LARGE_FILES" | xargs hf download "$MODEL" --revision "$HASH" --local-dir "$LARGE_DIR" --cache-dir "$(realpath .cache)" --include
-if test -d "$LARGE_DIR/.cache"; then
-  echo ".cache created against our wishes, deleting it..."
-  rm -r "$LARGE_DIR/.cache"
-fi
-
-# Step 3: Create both torrents
-mkdir -p "torrents/$MODEL/"
-SMALL_TORRENT_PATH="torrents/$MODEL/${HASH}.small.torrent"
-LARGE_TORRENT_PATH="torrents/$MODEL/${HASH}.large.torrent"
-
-mktorrent "$SMALL_DIR/" --output="$SMALL_TORRENT_PATH" \
-  -n "$HASH" \
-  --web-seed="https://huggingface.co/$MODEL/raw/" \
-  --no-date \
-  --announce="udp://tracker.opentrackr.org:1337/announce"
-  # --private
-
-mktorrent "$LARGE_DIR/" --output="$LARGE_TORRENT_PATH" \
-  -n "$HASH" \
-  --web-seed="https://huggingface.co/$MODEL/resolve/" \
-  --piece-length=24 \
-  --no-date \
-  --announce="udp://tracker.opentrackr.org:1337/announce"
-  # --private
-
-echo "Successfully created torrent files in:"
-echo "$SMALL_TORRENT_PATH"
-echo "$LARGE_TORRENT_PATH"
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				d8:announce42:udp://tracker.opentrackr.org:1337/announce10:created by13:mktorrent 1.14:infod5:filesld6:lengthi1519e4:pathl14:.gitattributeseed6:lengthi884e4:pathl9:README.mdeed6:lengthi1249e4:pathl19:chat_template.jinjaeed6:lengthi1848e4:pathl11:config.jsoneed6:lengthi10652e4:pathl25:configuration_deepseek.pyeed6:lengthi52e4:pathl22:generation_config.jsoneed6:lengthi221164e4:pathl28:model.safetensors.index.jsoneed6:lengthi75769e4:pathl20:modeling_deepseek.pyeed6:lengthi760e4:pathl23:special_tokens_map.jsoneed6:lengthi11330e4:pathl20:tokenization_kimi.pyeed6:lengthi2738e4:pathl21:tokenizer_config.jsoneee4:name40:91fb4f9fd1de100104925196d62b8ee06fd2ad6012:piece lengthi262144e6:pieces40:<3A>C<EFBFBD>t:<3A><>I_<49>i*xg<78><04>s\|,<2C>4S<34><53><EFBFBD>j<EFBFBD><6A><EFBFBD>S<EFBFBD><03>\|d<>e8:url-list63:https://huggingface.co/mlx-community/Kimi-K2-Instruct-4bit/raw/e