optimize dashboard

2026-01-10 15:08:15 -05:00 · 2025-12-29 22:00:59 +05:00
71 changed files with 5713 additions and 2974 deletions
--- a/.github/benchmark-dashboard/README.md
+++ b/.github/benchmark-dashboard/README.md
@@ -0,0 +1,159 @@
+# EXO Benchmark Dashboard
+
+A fully self-contained, browser-based dashboard for tracking EXO benchmark performance over time.
+
+## Features
+
+- 📊 **Success Rate Tracking**: Monitor cluster reliability across commits
+- ⚡ **Response Time Analysis**: Track average request completion times  
+- 🎯 **Throughput Metrics**: Tokens per second visualization
+- 📈 **Request Distribution**: Success/failure breakdown over time
+- 🔄 **Auto-Refresh**: Updates every 60 seconds
+- 📺 **TV-Ready**: Large, clear visualizations perfect for display
+- 🔐 **Secure**: Credentials stored in browser localStorage only
+- 🌐 **No Backend**: Directly accesses S3 from the browser
+
+## Quick Start
+
+### Option 1: Direct File Access (Simplest)
+
+Just open the HTML file directly in your browser:
+
+```bash
+open .github/benchmark-dashboard/index.html
+```
+
+Then click "Configure AWS Credentials" and enter your keys.
+
+### Option 2: URL Parameters (For Quick Setup)
+
+```bash
+# Serve with credentials in URL (they'll be moved to localStorage)
+open ".github/benchmark-dashboard/index.html?accessKey=YOUR_KEY&secretKey=YOUR_SECRET&region=us-east-1"
+```
+
+The credentials will be saved to localStorage and removed from the URL immediately.
+
+### Option 3: Simple HTTP Server
+
+```bash
+# From repo root
+python3 -m http.server 8080
+
+# Then open: http://localhost:8080/.github/benchmark-dashboard/
+```
+
+## AWS Credentials
+
+The dashboard needs read-only access to the `exo-benchmark-results` S3 bucket.
+
+### Required IAM Permissions
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Action": [
+        "s3:GetObject",
+        "s3:ListBucket"
+      ],
+      "Resource": [
+        "arn:aws:s3:::exo-benchmark-results",
+        "arn:aws:s3:::exo-benchmark-results/*"
+      ]
+    }
+  ]
+}
+```
+
+### Security Notes
+
+- ✅ Credentials stored in browser `localStorage` only
+- ✅ Never sent to any server (except AWS)
+- ✅ All S3 access happens client-side
+- ✅ Use read-only IAM credentials
+- ⚠️ Don't commit credentials to git
+- ⚠️ Use a dedicated read-only IAM user
+
+## TV/Kiosk Mode
+
+For permanent display on a TV:
+
+### macOS
+```bash
+open -a "Google Chrome" --args --kiosk ".github/benchmark-dashboard/index.html"
+```
+
+### Linux
+```bash
+chromium-browser --kiosk --app="file://$(pwd)/.github/benchmark-dashboard/index.html"
+```
+
+### Auto-start on Boot
+
+Create a simple startup script:
+
+```bash
+#!/bin/bash
+# /usr/local/bin/start-benchmark-dashboard.sh
+
+cd /path/to/exo
+python3 -m http.server 8080 &
+sleep 2
+chromium-browser --kiosk http://localhost:8080/.github/benchmark-dashboard/
+```
+
+## Data Displayed
+
+### Summary Cards
+- **Latest Success Rate**: Most recent benchmark success percentage with trend
+- **Avg Response Time**: Latest average response time in ms with trend
+- **Total Benchmarks**: Count of all benchmarks run
+- **Active Configurations**: Number of unique benchmark configs
+
+### Charts
+1. **Success Rate Over Time**: Line chart showing reliability trends
+2. **Average Response Time**: Performance over time (lower is better)
+3. **Throughput**: Tokens/second metric (higher is better)
+4. **Request Distribution**: Stacked bar chart of successes/failures
+
+## How It Works
+
+1. **Loads AWS SDK**: Uses AWS SDK for JavaScript (browser version)
+2. **Lists S3 Objects**: Fetches all files from `s3://exo-benchmark-results/bench/`
+3. **Downloads Results**: Fetches each JSON result file
+4. **Parses & Visualizes**: Uses Chart.js to create interactive charts
+5. **Auto-Refreshes**: Polls S3 every 60 seconds for new results
+
+## Customization
+
+To modify the dashboard:
+
+1. Edit `index.html` 
+2. Adjust `REFRESH_INTERVAL` for different polling frequency
+3. Modify chart colors/styles in the Chart.js configuration
+4. Add new metrics by extending the results parsing
+
+## Troubleshooting
+
+**"AWS credentials not configured"**
+- Click "Configure AWS Credentials" and enter your keys
+
+**"Error loading benchmark data"**
+- Check AWS credentials are correct
+- Verify S3 bucket name is `exo-benchmark-results`
+- Ensure IAM user has read permissions
+- Check browser console for detailed errors
+
+**"No benchmark results found"**
+- Wait for benchmark workflows to run
+- Verify results are being uploaded to S3
+- Check S3 bucket has files in `bench/` prefix
+
+**Charts not updating**
+- Check browser console for errors
+- Verify network connectivity to S3
+- Try refreshing the page manually
+
--- a/.github/benchmark-dashboard/index.html
+++ b/.github/benchmark-dashboard/index.html
--- a/.github/configs/README.md
+++ b/.github/configs/README.md
@@ -0,0 +1,186 @@
+# EXO Benchmark Configurations
+
+This directory contains configuration files for the EXO staged benchmark system.
+
+## Overview
+
+The staged benchmark system allows you to run complex, multi-stage load tests against EXO clusters. Each stage can have different characteristics:
+
+- **Prompt Length**: Number of tokens in the input prompt
+- **Generation Length**: Maximum tokens to generate in the response
+- **Time Between Requests**: Delay (in seconds) between firing consecutive requests
+- **Iterations**: Number of requests to send in this stage
+
+Requests are **fire-and-forget** - they don't wait for the previous request to complete. This allows you to test overlapping request handling and measure success rates under load.
+
+## Configuration Files
+
+### `bench_simple.yaml`
+A minimal configuration that replicates the behavior of the original `bench.py` script:
+- Single stage with 1 iteration
+- Short prompt (~20 tokens)
+- Generates up to 100 tokens
+
+This is useful for quick smoke tests.
+
+### `bench_config.yaml`
+A comprehensive multi-stage benchmark with:
+1. **Warmup** (10 requests): Light load with short prompts
+2. **Medium Load** (20 requests): Moderate load with medium prompts
+3. **Stress Test** (30 requests): Heavy overlapping requests with long prompts
+4. **Cooldown** (5 requests): Light load to wind down
+
+This tests the cluster's behavior under varying load patterns.
+
+## Configuration Schema
+
+```yaml
+# Hardware configuration - maps runner labels to instance counts
+hardware_plan:
+  M3ULTRA_GPU80_512GB: 4
+
+# Environment variables to set on each node (optional)
+environment:
+  OVERRIDE_MEMORY_MB: 512
+
+# Timeout for instance and runner readiness (seconds)
+timeout_seconds: 600
+
+# Model instances to run concurrently
+model_ids:
+  - "mlx-community/Llama-3.2-1B-Instruct-4bit"
+
+# Benchmark stages
+stages:
+  - name: "stage_name"              # Human-readable name for this stage
+    prompt_length: 100               # Target prompt length in tokens
+    generation_length: 200           # Max tokens to generate
+    time_between_requests: 2.0       # Seconds between firing requests
+    iterations: 10                   # Number of requests in this stage
+```
+
+## Running Benchmarks
+
+### Via GitHub Actions
+
+**Automatic (every commit):**
+- The **`bench`** workflow runs automatically on every push
+- Uses `bench_simple.yaml` as the default configuration
+- All settings (hardware plan, timeout, environment variables, models, stages) are defined in the config file
+
+**Manual (on-demand):**
+1. Go to **Actions** → **bench** workflow
+2. Click **Run workflow**
+3. Configure:
+   - **Config File**: Path to your YAML config (default: `.github/configs/bench_simple.yaml`)
+     - `.github/configs/bench_simple.yaml` for quick tests
+     - `.github/configs/bench_config.yaml` for complex multi-stage tests
+   
+All other settings (hardware plan, timeout, environment variables, models, stages) are read from the specified config file.
+
+### Via Command Line
+
+```bash
+# Start EXO on localhost:8000
+uv run exo --api-port 8000
+
+# Run simple benchmark (1 stage, 1 iteration)
+python3 .github/scripts/bench.py \
+  --api-port 8000 \
+  --config .github/configs/bench_simple.yaml \
+  --expected-nodes 1 \
+  --is-primary true \
+  --timeout-seconds 600
+
+# Run complex staged benchmark (4 stages, multiple iterations)
+python3 .github/scripts/bench.py \
+  --api-port 8000 \
+  --config .github/configs/bench_config.yaml \
+  --expected-nodes 1 \
+  --is-primary true \
+  --timeout-seconds 600
+```
+
+## Output Metrics
+
+For each stage, the benchmark reports:
+
+- **Total Requests**: Number of requests fired
+- **Successful Requests**: Requests that completed successfully
+- **Failed Requests**: Requests that encountered errors
+- **Success Rate**: Percentage of successful requests
+- **Total Tokens**: Sum of all tokens generated across successful requests
+- **Avg Tokens/Request**: Average tokens per successful request
+- **Avg Time/Request**: Average completion time per successful request
+
+A JSON summary is also printed for easy parsing and storage.
+
+## Creating Custom Benchmarks
+
+To create a custom benchmark:
+
+1. Copy an existing config file (e.g., `bench_config.yaml`)
+2. Modify the stages to match your test scenario
+3. Save it in this directory with a descriptive name
+4. Run it using the workflow or command line
+
+### Example: Sustained Load Test
+
+```yaml
+hardware_plan:
+  M3ULTRA_GPU80_512GB: 2
+
+environment:
+  OVERRIDE_MEMORY_MB: 1024
+
+timeout_seconds: 600
+
+model_ids:
+  - "mlx-community/Llama-3.2-1B-Instruct-4bit"
+
+stages:
+  - name: "sustained_load"
+    prompt_length: 200
+    generation_length: 150
+    time_between_requests: 0.5     # Very fast - 2 requests/second
+    iterations: 100                 # Run for ~50 seconds
+```
+
+### Example: Varying Prompt Sizes
+
+```yaml
+hardware_plan:
+  M4PRO_GPU16_24GB: 3
+
+timeout_seconds: 900
+
+model_ids:
+  - "mlx-community/Llama-3.2-1B-Instruct-4bit"
+
+stages:
+  - name: "tiny_prompts"
+    prompt_length: 10
+    generation_length: 100
+    time_between_requests: 1.0
+    iterations: 10
+    
+  - name: "medium_prompts"
+    prompt_length: 200
+    generation_length: 100
+    time_between_requests: 1.0
+    iterations: 10
+    
+  - name: "large_prompts"
+    prompt_length: 1000
+    generation_length: 100
+    time_between_requests: 1.0
+    iterations: 10
+```
+
+## Tips
+
+- **Overlapping Requests**: Set `time_between_requests` < expected completion time to test concurrent request handling
+- **Sequential Requests**: Set `time_between_requests` > expected completion time to ensure requests don't overlap
+- **Realistic Load**: Model real usage patterns by varying prompt/generation lengths across stages
+- **Success Rate**: A 100% success rate indicates the cluster handled the load well; lower rates suggest capacity limits
+
--- a/.github/configs/bench_config.yaml
+++ b/.github/configs/bench_config.yaml
@@ -0,0 +1,49 @@
+# EXO Staged Benchmark Configuration
+# This configuration defines a multi-stage load test for EXO clusters
+
+# Hardware configuration - maps runner labels to instance counts
+hardware_plan:
+  M3ULTRA_GPU80_512GB: 4
+
+# Environment variables to set on each node (optional)
+environment:
+  OVERRIDE_MEMORY_MB: 512
+
+# Timeout for instance and runner readiness (seconds)
+timeout_seconds: 600
+
+# Multiple instances run concurrently on the cluster
+model_ids:
+  - "mlx-community/Qwen3-0.6B-4bit"
+  - "mlx-community/Qwen3-0.6B-4bit"
+
+# Stages run sequentially, each with its own characteristics
+stages:
+  # Stage 1: Light load with short prompts
+  - name: "warmup"
+    prompt_length: 50          # Number of tokens in prompt
+    generation_length: 100     # Max tokens to generate
+    time_between_requests: 5.0 # Seconds between firing requests
+    iterations: 10             # Number of requests to send in this stage
+    
+  # Stage 2: Medium load with medium prompts
+  - name: "medium_load"
+    prompt_length: 200
+    generation_length: 150
+    time_between_requests: 3.0
+    iterations: 20
+    
+  # Stage 3: Heavy load with long prompts - requests will overlap
+  - name: "stress_test"
+    prompt_length: 500
+    generation_length: 200
+    time_between_requests: 1.0  # Fast firing - will definitely overlap
+    iterations: 30
+    
+  # Stage 4: Cool down with simple prompts
+  - name: "cooldown"
+    prompt_length: 50
+    generation_length: 50
+    time_between_requests: 10.0
+    iterations: 5
+
--- a/.github/configs/bench_simple.yaml
+++ b/.github/configs/bench_simple.yaml
@@ -0,0 +1,125 @@
+# Simple single-shot benchmark
+# Tests 2 instances concurrently on 2 nodes
+
+# Hardware configuration - maps runner labels to instance counts
+hardware_plan:
+  puffin4: 1
+  puffin8: 1
+
+# Environment variables to set on each node
+environment:
+  PLACEHOLDER: "placeholder"
+  # OVERRIDE_MEMORY_MB: 50000
+  MLX_METAL_FAST_SYNCH: 1
+
+# Timeout for instance and runner readiness (seconds)
+timeout_seconds: 1800
+
+# Model instances to run concurrently
+model_ids:
+  # - "mlx-community/DeepSeek-V3.1-8bit"
+  # - "mlx-community/Kimi-K2-Instruct-4bit"
+  - "mlx-community/Kimi-K2-Thinking"
+  # - "mlx-community/Qwen3-235B-A22B-4bit"
+  # - "mlx-community/Llama-3.3-70B-Instruct-4bit"
+  # - "mlx-community/Llama-3.3-70B-Instruct-8bit"
+  # - "mlx-community/Llama-3.2-1B-Instruct-4bit"
+
+# Sharding strategy: "Pipeline" or "Tensor"
+sharding: "Tensor"
+
+# Instance type: "MlxRing" or "MlxIbv"
+instance_meta: "MlxIbv"
+
+# If true, run requests sequentially (no overlap); if false, fire-and-forget (default: false)
+no_overlap: true
+
+# Benchmark stages
+# pp: 64, 256, 1024, 2048, 4096, 8192, 16384
+# g: 64, 512
+stages:
+  # - name: "simple"
+  #   prompt_length: 512
+  #   generation_length: 10
+  #   time_between_requests: 2.0
+  #   iterations: 5
+  # - name: "pp64_g64"
+  #   prompt_length: 64
+  #   generation_length: 64
+  #   time_between_requests: 2.0
+  #   iterations: 5
+  # - name: "pp64_g64"
+  #   prompt_length: 64
+  #   generation_length: 64
+  #   time_between_requests: 2.0
+  #   iterations: 5
+  # - name: "pp64_g512"
+  #   prompt_length: 64
+  #   generation_length: 512
+  #   time_between_requests: 2.0
+  #   iterations: 10
+  # - name: "pp256_g64"
+  #   prompt_length: 256
+  #   generation_length: 64
+  #   time_between_requests: 2.0
+  #   iterations: 5
+  - name: "pp256_g64"
+    prompt_length: 256
+    generation_length: 64
+    time_between_requests: 2.0
+    iterations: 5
+  # - name: "pp256_g512"
+  #   prompt_length: 256
+  #   generation_length: 512
+  #   time_between_requests: 2.0
+  #   iterations: 10
+  # - name: "pp1024_g64"
+  #   prompt_length: 1024
+  #   generation_length: 64
+  #   time_between_requests: 2.0
+  #   iterations: 5
+  # - name: "pp1024_g512"
+  #   prompt_length: 1024
+  #   generation_length: 512
+  #   time_between_requests: 2.0
+  #   iterations: 10
+  # - name: "pp2048_g64"
+  #   prompt_length: 2048
+  #   generation_length: 64
+  #   time_between_requests: 2.0
+  #   iterations: 5
+  # - name: "pp2048_g512"
+  #   prompt_length: 2048
+  #   generation_length: 512
+  #   time_between_requests: 2.0
+  #   iterations: 10
+  # - name: "pp4096_g64"
+  #   prompt_length: 4096
+  #   generation_length: 64
+  #   time_between_requests: 2.0
+  #   iterations: 4
+  # - name: "pp4096_g512"
+  #   prompt_length: 4096
+  #   generation_length: 512
+  #   time_between_requests: 2.0
+  #   iterations: 10
+  # - name: "pp8192_g64"
+  #   prompt_length: 8192
+  #   generation_length: 64
+  #   time_between_requests: 2.0
+  #   iterations: 5
+  # - name: "pp8192_g512"
+  #   prompt_length: 8192
+  #   generation_length: 512
+  #   time_between_requests: 2.0
+  #   iterations: 5
+  # - name: "pp16384_g64"
+  #   prompt_length: 16384
+  #   generation_length: 64
+  #   time_between_requests: 2.0
+  #   iterations: 10
+  # - name: "pp16384_g512"
+  #   prompt_length: 16384
+  #   generation_length: 512
+  #   time_between_requests: 2.0
+  #   iterations: 10
--- a/.github/scripts/bench.py
+++ b/.github/scripts/bench.py
--- a/.github/scripts/build_matrix.py
+++ b/.github/scripts/build_matrix.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+import json
+import os
+from typing import NotRequired, TypedDict, cast
+
+import yaml
+
+
+class MatrixEntry(TypedDict):
+    label: str
+    index: int
+
+
+class MatrixInclude(TypedDict):
+    label: str
+    index: int
+    is_primary: bool
+    expected_nodes: int
+
+
+class Config(TypedDict):
+    hardware_plan: dict[str, int]
+    timeout_seconds: NotRequired[int]
+    environment: NotRequired[dict[str, str]]
+
+
+# Read the config file
+config_file: str = os.environ["CONFIG_FILE"]
+with open(config_file, "r") as f:
+    config: Config = cast(Config, yaml.safe_load(f))
+
+# Extract hardware plan from config
+plan: dict[str, int] = config["hardware_plan"]
+if not plan:
+    raise ValueError(f"No hardware_plan found in {config_file}")
+
+# Build matrix entries
+entries: list[MatrixEntry] = []
+for label, count in plan.items():
+    for idx in range(count):
+        entries.append({"label": label, "index": idx})
+
+total_nodes: int = len(entries)
+matrix: dict[str, list[MatrixInclude]] = {
+    "include": [
+        {
+            "label": e["label"],
+            "index": e["index"],
+            "is_primary": (i == 0),
+            "expected_nodes": total_nodes,
+        }
+        for i, e in enumerate(entries)
+    ]
+}
+
+# Extract other config values
+timeout_seconds: int = config.get("timeout_seconds", 600)
+environment: dict[str, str] = config.get("environment", {})
+
+# Output to GitHub Actions
+with open(os.environ["GITHUB_OUTPUT"], "a") as f:
+    f.write(f"matrix={json.dumps(matrix)}\n")
+    f.write(f"config_file={config_file}\n")
+    f.write(f"timeout_seconds={timeout_seconds}\n")
+    f.write(f"environment={json.dumps(environment)}\n")
+
+print(f"Matrix: {json.dumps(matrix)}")
+print(f"Config file: {config_file}")
+print(f"Timeout: {timeout_seconds}")
+print(f"Environment: {json.dumps(environment)}")
--- a/.github/workflows/BENCH_USAGE.md
+++ b/.github/workflows/BENCH_USAGE.md
@@ -0,0 +1,156 @@
+# Benchmark Workflow Usage
+
+## Overview
+
+The `bench_matrix.yml` workflow enables distributed benchmarking of models across multiple self-hosted macOS runners with different hardware configurations.
+
+## Workflow Inputs
+
+| Input | Description | Default | Required |
+|-------|-------------|---------|----------|
+| `model_id` | Model ID to benchmark | `mlx-community/Llama-3.2-1B-Instruct-4bit` | Yes |
+| `hardware_plan` | JSON mapping of runner labels to counts | `{"M4PRO_GPU16_24GB": 1}` | Yes |
+| `prompt` | Benchmark prompt text | `What is the capital of France?` | No |
+| `timeout_seconds` | Timeout for instance/runner readiness | `600` | No |
+
+## Hardware Plan Format
+
+The `hardware_plan` input is a JSON object mapping runner labels to the number of machines:
+
+```json
+{
+  "M4PRO_GPU16_24GB": 2,
+  "M3ULTRA_GPU80_512GB": 1
+}
+```
+
+This example would:
+- Start 2 runners with the `M4PRO_GPU16_24GB` label
+- Start 1 runner with the `M3ULTRA_GPU80_512GB` label
+- Total of 3 runners coordinating on a single distributed inference instance
+
+## How It Works
+
+1. **Planning Job** (`plan`)
+   - Runs on `ubuntu-latest`
+   - Parses the `hardware_plan` JSON
+   - Generates a dynamic matrix with one entry per runner
+   - Only the first runner (index 0) is marked as `is_primary`
+
+2. **Benchmark Worker Jobs** (`bench_worker`)
+   - Each job runs on a self-hosted macOS runner with the specified label
+   - All runners start EXO in parallel
+   - The primary runner creates the model instance
+   - All runners wait for their assigned runner to be ready (Loaded/Running status)
+   - The primary runner executes the benchmark and prints results
+   - The primary runner deletes the instance
+
+## Example Usage
+
+### Single Machine Benchmark
+
+```yaml
+model_id: mlx-community/Llama-3.2-1B-Instruct-4bit
+hardware_plan: '{"M4PRO_GPU16_24GB": 1}'
+prompt: What is the capital of France?
+timeout_seconds: 600
+```
+
+### Multi-Machine Distributed Benchmark
+
+```yaml
+model_id: mlx-community/Llama-3.2-3B-Instruct-4bit
+hardware_plan: '{"M4PRO_GPU16_24GB": 2, "M3ULTRA_GPU80_512GB": 1}'
+prompt: Explain quantum computing in simple terms.
+timeout_seconds: 900
+```
+
+## Benchmark Output
+
+The primary runner outputs a JSON object with benchmark results:
+
+```json
+{
+  "model_id": "mlx-community/Llama-3.2-1B-Instruct-4bit",
+  "instance_id": "abc-123-def",
+  "tokens": 42,
+  "elapsed_s": 2.451,
+  "tps": 17.136
+}
+```
+
+Where:
+- `tokens`: Number of chunks/tokens generated
+- `elapsed_s`: Total elapsed time in seconds
+- `tps`: Tokens per second (tokens / elapsed_s)
+
+## Runner Requirements
+
+Each self-hosted runner must:
+- Be labeled with appropriate hardware tags (e.g., `M4PRO_GPU16_24GB`)
+- Have the `self-hosted` and `macOS` labels
+- Have Nix installed with flakes enabled
+- Have network connectivity to other runners in the same job
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│ GitHub Actions Workflow (bench_matrix.yml)                  │
+├─────────────────────────────────────────────────────────────┤
+│                                                              │
+│  ┌────────────────┐                                         │
+│  │  Plan Job      │                                         │
+│  │  (ubuntu)      │──┬─► Matrix: [{label, index, primary}] │
+│  └────────────────┘  │                                      │
+│                      │                                      │
+│  ┌───────────────────▼──────────────────────────────────┐  │
+│  │  Bench Worker Jobs (Matrix)                         │  │
+│  ├──────────────────────────────────────────────────────┤  │
+│  │                                                       │  │
+│  │  Runner 0 (Primary)     Runner 1         Runner 2    │  │
+│  │  ┌─────────────┐       ┌─────────────┐ ┌──────────┐ │  │
+│  │  │ Start EXO   │       │ Start EXO   │ │ Start EXO│ │  │
+│  │  │ Create Inst │       │ Wait...     │ │ Wait...  │ │  │
+│  │  │ Wait Ready  │       │ Wait Ready  │ │ Wait...  │ │  │
+│  │  │ Run Bench   │       │ (idle)      │ │ (idle)   │ │  │
+│  │  │ Print TPS   │       │             │ │          │ │  │
+│  │  │ Delete Inst │       │             │ │          │ │  │
+│  │  └─────────────┘       └─────────────┘ └──────────┘ │  │
+│  └───────────────────────────────────────────────────────┘  │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## Implementation Details
+
+### `scripts/bench.py`
+
+A standalone Python script that:
+- Creates instance (primary only)
+- Polls `/state` endpoint until instance and all runners are ready
+- Executes chat completion with timing (primary only)
+- Parses SSE stream and counts tokens
+- Computes TPS metrics
+- Cleans up instance (primary only)
+
+### Key Functions
+
+- `wait_for_instance()`: Polls until instance with model_id appears
+- `wait_for_runners_ready()`: Polls until expected number of runners reach Loaded/Running status
+- `run_benchmark()`: Executes chat completion, measures time, counts tokens
+
+## Troubleshooting
+
+### Instance never becomes ready
+- Check EXO logs in the workflow output
+- Verify model_id is valid and accessible
+- Increase `timeout_seconds`
+
+### Runner mismatch
+- Ensure hardware_plan counts match available labeled runners
+- Check runner labels match exactly (case-sensitive)
+
+### Network issues
+- Verify runners can communicate on the network
+- Check firewall rules between runner hosts
+
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -0,0 +1,305 @@
+name: bench
+
+on: [push]
+
+jobs:
+  plan:
+    if: contains(github.event.head_commit.message, '/bench')
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.build.outputs.matrix }}
+      config_file: ${{ steps.build.outputs.config_file }}
+      timeout_seconds: ${{ steps.build.outputs.timeout_seconds }}
+      environment: ${{ steps.build.outputs.environment }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Build matrix from config file
+        id: build
+        shell: bash
+        run: |
+          set -euo pipefail
+          CONFIG_FILE='.github/configs/bench_simple.yaml'
+          export CONFIG_FILE
+          echo "Config file: $CONFIG_FILE"
+          python3 .github/scripts/build_matrix.py
+
+  bench_worker:
+    needs: plan
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJSON(needs.plan.outputs.matrix) }}
+    name: "bench on ${{ matrix.label }} [${{ matrix.index }}]"
+    runs-on: [self-hosted, macOS, "${{ matrix.label }}"]
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          lfs: false
+
+      - name: Configure git user
+        run: |
+          git config --local user.email "github-actions@users.noreply.github.com"
+          git config --local user.name  "github-actions bot"
+        shell: bash
+
+      # TODO: this is mega hacky and I'd like a simpler solution.
+      - name: Setup Nix Environment
+        run: |
+          echo "Checking for nix installation..."
+          
+          # Check if nix is already available
+          if command -v nix >/dev/null 2>&1; then
+            echo "Nix already in PATH"
+          # Try sourcing profile scripts to set up environment properly
+          elif [ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]; then
+            echo "Sourcing multi-user nix-daemon profile script"
+            source /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
+          elif [ -f "$HOME/.nix-profile/etc/profile.d/nix.sh" ]; then
+            echo "Sourcing single-user nix profile script"
+            source "$HOME/.nix-profile/etc/profile.d/nix.sh"
+          elif [ -f /nix/var/nix/profiles/per-user/$USER/profile/etc/profile.d/nix.sh ]; then
+            echo "Sourcing per-user nix profile script"
+            source /nix/var/nix/profiles/per-user/$USER/profile/etc/profile.d/nix.sh
+          elif [ -f /etc/profile.d/nix.sh ]; then
+            echo "Sourcing system-wide nix profile script"
+            source /etc/profile.d/nix.sh
+          # Fallback: manually add nix to PATH if binary exists
+          elif [ -f /nix/var/nix/profiles/default/bin/nix ]; then
+            echo "Found nix binary, manually adding to PATH"
+            export PATH="/nix/var/nix/profiles/default/bin:$PATH"
+          elif [ -f "$HOME/.nix-profile/bin/nix" ]; then
+            echo "Found nix binary in user profile, manually adding to PATH"
+            export PATH="$HOME/.nix-profile/bin:$PATH"
+          else
+            echo "Nix not found. Debugging info:"
+            echo "USER: $USER"
+            echo "HOME: $HOME"
+            echo "Current PATH: $PATH"
+            echo ""
+            echo "Checking common Nix locations:"
+            echo "  /nix/var/nix/profiles/default/bin/nix:"
+            ls -la /nix/var/nix/profiles/default/bin/nix 2>/dev/null || echo "    Not found"
+            echo "  /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh:"
+            ls -la /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh 2>/dev/null || echo "    Not found"
+            echo "  ~/.nix-profile/etc/profile.d/nix.sh:"
+            ls -la "$HOME/.nix-profile/etc/profile.d/nix.sh" 2>/dev/null || echo "    Not found"
+            echo "  /nix/var/nix/profiles/per-user/$USER/profile/etc/profile.d/nix.sh:"
+            ls -la "/nix/var/nix/profiles/per-user/$USER/profile/etc/profile.d/nix.sh" 2>/dev/null || echo "    Not found"
+            echo ""
+            echo "/nix directory structure:"
+            ls -la /nix 2>/dev/null || echo "    /nix directory not found"
+            echo ""
+            echo "/nix/var:"
+            ls -la /nix/var 2>/dev/null || echo "    /nix/var not found"
+            echo ""
+            echo "/nix/store:"
+            ls -la /nix/store 2>/dev/null | head -20 || echo "    /nix/store not found"
+            echo ""
+            echo "GitHub Actions runner is running as user '$USER'."
+            echo "If Nix is installed for a different user, either:"
+            echo "  1. Install Nix for user '$USER' (multi-user install recommended)"
+            echo "  2. Configure the runner service to run as the user with Nix installed"
+            echo "  3. Ensure Nix is installed system-wide with proper daemon setup"
+            exit 1
+          fi
+          
+          # Verify nix is available and persist to GITHUB_ENV
+          if command -v nix >/dev/null 2>&1; then
+            echo "✓ Nix is available"
+            nix --version
+            echo "PATH=$PATH" >> $GITHUB_ENV
+            if [ -n "$NIX_PATH" ]; then
+              echo "NIX_PATH=$NIX_PATH" >> $GITHUB_ENV
+            fi
+          else
+            echo "ERROR: Failed to set up Nix"
+            echo "PATH after setup attempt: $PATH"
+            exit 1
+          fi
+        shell: bash
+
+      - name: Setup EXO_HOME and API_PORT
+        run: |
+          EXO_HOME=$(mktemp -d -t exo-e2e-XXXXXXXX)
+          API_PORT=$((49152 + RANDOM % (65535 - 49152 + 1)))
+          EXO_MODELS_DIR="$HOME/.exo/models"
+          EXO_LIBP2P_NAMESPACE="bench-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
+          echo "EXO_HOME=$EXO_HOME" >> "$GITHUB_ENV"
+          echo "API_PORT=$API_PORT" >> "$GITHUB_ENV"
+          echo "EXO_MODELS_DIR=$EXO_MODELS_DIR" >> "$GITHUB_ENV"
+          echo "EXO_LIBP2P_NAMESPACE=$EXO_LIBP2P_NAMESPACE" >> "$GITHUB_ENV"
+          echo "Created EXO_HOME: $EXO_HOME"
+          echo "Generated API_PORT: $API_PORT"
+          echo "Using models from: $EXO_MODELS_DIR"
+          echo "Using libp2p namespace: $EXO_LIBP2P_NAMESPACE"
+        shell: bash
+
+      - name: Configure local MLX if available
+        run: |
+          echo "=== DEBUG: Checking for local MLX configuration ==="
+          MODIFIED=false
+          
+          echo "Checking for /Users/Shared/mlx directory..."
+          if [ -d "/Users/Shared/mlx" ]; then
+            echo "✓ Found /Users/Shared/mlx"
+            ls -la /Users/Shared/mlx | head -5
+            echo "Enabling local mlx path in pyproject.toml"
+            sed -i.bak 's|^# mlx = { path = "/Users/Shared/mlx", editable=true }$|mlx = { path = "/Users/Shared/mlx", editable=true }|' pyproject.toml
+            MODIFIED=true
+          else
+            echo "✗ /Users/Shared/mlx not found, will use PyPI version"
+          fi
+          
+          echo "Checking for /Users/Shared/mlx-lm directory..."
+          if [ -d "/Users/Shared/mlx-lm" ]; then
+            echo "✓ Found /Users/Shared/mlx-lm"
+            ls -la /Users/Shared/mlx-lm | head -5
+            echo "Enabling local mlx-lm path in pyproject.toml"
+            sed -i.bak 's|^# mlx-lm = { path = "/Users/Shared/mlx-lm", editable=true }$|mlx-lm = { path = "/Users/Shared/mlx-lm", editable=true }|' pyproject.toml
+            MODIFIED=true
+          else
+            echo "✗ /Users/Shared/mlx-lm not found, will use PyPI version"
+          fi
+          
+          if [ "$MODIFIED" = true ]; then
+            echo "=== Modified pyproject.toml [tool.uv.sources] section: ==="
+            sed -n '/\[tool\.uv\.sources\]/,/^\[/{/^\[tool\.uv\.sources\]/p; /^\[/!p;}' pyproject.toml
+            echo "=== Regenerating uv.lock with local MLX paths... ==="
+            nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command uv lock --upgrade-package mlx --upgrade-package mlx-lm
+            echo "✓ Lock file regenerated"
+          else
+            echo "⚠ No local MLX directories found, using PyPI packages"
+          fi
+          echo "=== DEBUG: Local MLX configuration complete ==="
+        shell: bash
+
+      - name: Sync dependencies
+        run: |
+          if [ -d "/Users/Shared/test" ]; then
+            pushd /Users/Shared/test
+            uv sync --reinstall
+            popd
+          fi
+          echo "Running just sync to ensure clean dependencies..."
+          nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command just sync
+        shell: bash
+
+      - name: Start EXO and run bench script
+        shell: bash
+        env:
+          IS_PRIMARY: ${{ matrix.is_primary }}
+          EXPECTED_NODES: ${{ matrix.expected_nodes }}
+          HARDWARE_LABEL: ${{ matrix.label }}
+          CONFIG_FILE: ${{ needs.plan.outputs.config_file }}
+          TIMEOUT_SECONDS: ${{ needs.plan.outputs.timeout_seconds }}
+          ENVIRONMENT_JSON: ${{ needs.plan.outputs.environment }}
+        run: |
+          set -euo pipefail
+
+          # Parse environment variables from config
+          ENV_VARS=""
+          if [ -n "$ENVIRONMENT_JSON" ] && [ "$ENVIRONMENT_JSON" != "{}" ]; then
+            ENV_VARS=$(echo "$ENVIRONMENT_JSON" | python3 -c "import sys, json; env = json.load(sys.stdin); print(' '.join([f'{k}={v}' for k, v in env.items()]))")
+          fi
+
+          echo "Starting EXO with API_PORT=${API_PORT} EXO_HOME=${EXO_HOME} EXO_LIBP2P_NAMESPACE=${EXO_LIBP2P_NAMESPACE}"
+          echo "Environment variables from config: $ENV_VARS"
+          LOG_FILE=/tmp/exo.log
+          : > "$LOG_FILE"
+
+          MASTER_FLAG=""
+          if [ "$IS_PRIMARY" = "true" ]; then
+            MASTER_FLAG="-m"
+          fi
+
+          nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command bash -c \
+            "EXO_HOME=$EXO_HOME EXO_MODELS_DIR=$EXO_MODELS_DIR EXO_LIBP2P_NAMESPACE=$EXO_LIBP2P_NAMESPACE $ENV_VARS PYTHONUNBUFFERED=1 PYTHONDEBUG=1 PYTHONPATH=. uv run exo $MASTER_FLAG --api-port $API_PORT" \
+            >> "$LOG_FILE" 2>&1 &
+
+          EXO_PID=$!
+          echo "Started EXO in background with PID: $EXO_PID"
+          echo "Log file: $LOG_FILE"
+
+          cleanup() {
+            echo '=== EXO log (tail) ==='
+            tail -n 300 "$LOG_FILE" || true
+            if ps -p "$EXO_PID" >/dev/null 2>&1; then
+              echo "Killing EXO (PID $EXO_PID)"
+              kill "$EXO_PID" || true
+            fi
+          }
+          trap cleanup EXIT
+
+          for i in $(seq 1 60); do
+            if curl -s "http://localhost:${API_PORT}/state" >/dev/null 2>&1; then
+              echo "EXO API ready"
+              break
+            fi
+            if ! ps -p "$EXO_PID" >/dev/null 2>&1; then
+              echo "EXO terminated early"; sed -n '1,200p' "$LOG_FILE" || true; exit 1
+            fi
+            sleep 1
+          done
+
+          RESULTS_FILE="/tmp/bench_results_${GITHUB_RUN_ID}_${GITHUB_RUN_ATTEMPT}_$(date +%s).json"
+          echo "Results will be saved to: $RESULTS_FILE"
+          echo "RESULTS_FILE=$RESULTS_FILE" >> "$GITHUB_ENV"
+
+          echo "Running bench script with config: $CONFIG_FILE, timeout: $TIMEOUT_SECONDS"
+          nix --extra-experimental-features nix-command --extra-experimental-features flakes develop --command bash -c \
+            "PYTHONUNBUFFERED=1 uv run --no-project --with pyyaml --with pydantic python .github/scripts/bench.py \
+              --api-port $API_PORT \
+              --config $CONFIG_FILE \
+              --expected-nodes ${EXPECTED_NODES} \
+              --is-primary ${IS_PRIMARY} \
+              --timeout-seconds ${TIMEOUT_SECONDS} \
+              --output $RESULTS_FILE \
+              --git-commit ${GITHUB_SHA} \
+              --hardware-labels ${HARDWARE_LABEL}"
+
+      - name: Install AWS CLI
+        if: always() && env.RESULTS_FILE && matrix.is_primary
+        run: |
+          if ! command -v aws &> /dev/null; then
+            echo "AWS CLI not found, installing..."
+            brew install awscli
+          else
+            echo "AWS CLI already installed"
+          fi
+        shell: bash
+
+      - name: Upload results to S3
+        if: always() && env.RESULTS_FILE && matrix.is_primary
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets.S3_BENCHMARKS_AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_BENCHMARKS_AWS_SECRET_ACCESS_KEY }}
+          AWS_DEFAULT_REGION: us-east-1
+        run: |
+          echo "Checking for results file: $RESULTS_FILE"
+          echo "Is primary: ${{ matrix.is_primary }}"
+
+          if [ -f "$RESULTS_FILE" ]; then
+            TIMESTAMP=$(date -u +%Y/%m/%d/%H%M%S)
+            S3_KEY="bench/${TIMESTAMP}_${GITHUB_SHA:0:8}_${GITHUB_RUN_ID}.json"
+            echo "Uploading results to s3://exo-benchmark-results/$S3_KEY"
+
+            aws s3 cp "$RESULTS_FILE" "s3://exo-benchmark-results/$S3_KEY" \
+              --content-type application/json \
+              --metadata "commit=${GITHUB_SHA},run_id=${GITHUB_RUN_ID},branch=${GITHUB_REF_NAME}"
+
+            echo "Results uploaded successfully"
+            echo "View at: https://exo-benchmark-results.s3.amazonaws.com/$S3_KEY"
+          else
+            echo "Results file not found at: $RESULTS_FILE"
+            echo "Skipping upload"
+          fi
+        shell: bash
+
+      - name: Cleanup EXO_HOME
+        run: |
+          echo "Cleaning up EXO_HOME: $EXO_HOME"
+          rm -rf "$EXO_HOME"
+        shell: bash
+        if: always()
--- a/.github/workflows/build-app.yml
+++ b/.github/workflows/build-app.yml
@@ -18,7 +18,6 @@ jobs:
      SPARKLE_ED25519_PRIVATE: ${{ secrets.SPARKLE_ED25519_PRIVATE }}
      SPARKLE_S3_BUCKET: ${{ secrets.SPARKLE_S3_BUCKET }}
      SPARKLE_S3_PREFIX: ${{ secrets.SPARKLE_S3_PREFIX }}
-      EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT: ${{ secrets.EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT }}
      AWS_REGION: ${{ secrets.AWS_REGION }}
      EXO_BUILD_NUMBER: ${{ github.run_number }}
      EXO_LIBP2P_NAMESPACE: ${{ github.ref_name }}
@@ -48,32 +47,6 @@ jobs:
          fi
          echo "RELEASE_VERSION=$VERSION" >> $GITHUB_ENV

-      - name: Compute build version from semver
-        run: |
-          VERSION="$RELEASE_VERSION"
-          # Extract major.minor.patch (strip prerelease suffix)
-          BASE_VERSION="${VERSION%%-*}"
-          MAJOR=$(echo "$BASE_VERSION" | cut -d. -f1)
-          MINOR=$(echo "$BASE_VERSION" | cut -d. -f2)
-          PATCH=$(echo "$BASE_VERSION" | cut -d. -f3)
-
-          # Extract prerelease number (e.g., "alpha.2" -> 2, or 999 for releases)
-          if [[ "$VERSION" == *-* ]]; then
-            PRERELEASE_PART="${VERSION#*-}"
-            PRERELEASE_NUM="${PRERELEASE_PART##*.}"
-            # Default to 0 if not a number
-            if ! [[ "$PRERELEASE_NUM" =~ ^[0-9]+$ ]]; then
-              PRERELEASE_NUM=0
-            fi
-          else
-            PRERELEASE_NUM=999
-          fi
-
-          # Compute: PRERELEASE + (1000 * PATCH) + (1_000_000 * MINOR) + (1_000_000_000 * MAJOR)
-          BUILD_VERSION=$((PRERELEASE_NUM + 1000 * PATCH + 1000000 * MINOR + 1000000000 * MAJOR))
-          echo "EXO_BUILD_VERSION=$BUILD_VERSION" >> $GITHUB_ENV
-          echo "Computed build version: $BUILD_VERSION from $VERSION"
-
      - name: Ensure tag commit is on main
        if: github.ref_type == 'tag'
        run: |
@@ -189,12 +162,11 @@ jobs:
            -configuration Release \
            -derivedDataPath build \
            MARKETING_VERSION="$RELEASE_VERSION" \
-            CURRENT_PROJECT_VERSION="$EXO_BUILD_VERSION" \
+            CURRENT_PROJECT_VERSION="$EXO_BUILD_NUMBER" \
            EXO_BUILD_TAG="$RELEASE_VERSION" \
            EXO_BUILD_COMMIT="$GITHUB_SHA" \
            SPARKLE_FEED_URL="$SPARKLE_FEED_URL" \
            SPARKLE_ED25519_PUBLIC="$SPARKLE_ED25519_PUBLIC" \
-            EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT="$EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT" \
            CODE_SIGNING_IDENTITY="$SIGNING_IDENTITY" \
            CODE_SIGN_INJECT_BASE_ENTITLEMENTS=YES
          mkdir -p ../../output
@@ -322,5 +294,5 @@ jobs:
          aws s3 cp "$DMG_NAME" "s3://${SPARKLE_S3_BUCKET}/${PREFIX}${DMG_NAME}"
          if [[ "$IS_ALPHA" != "true" ]]; then
            aws s3 cp "$DMG_NAME" "s3://${SPARKLE_S3_BUCKET}/${PREFIX}EXO-latest.dmg"
-            aws s3 cp appcast.xml "s3://${SPARKLE_S3_BUCKET}/${PREFIX}appcast.xml" --content-type application/xml --cache-control no-cache
          fi
+          aws s3 cp appcast.xml "s3://${SPARKLE_S3_BUCKET}/${PREFIX}appcast.xml" --content-type application/xml --cache-control no-cache
--- a/.prettierrc
+++ b/.prettierrc
@@ -1,3 +0,0 @@
-{
-  "useTabs": true
-}
--- a/.swift-format
+++ b/.swift-format
@@ -1,6 +0,0 @@
-{
-  "version": 1,
-  "indentation": {
-    "spaces": 4
-  }
-}
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 exo: Run your own AI cluster at home with everyday devices. Maintained by [exo labs](https://x.com/exolabs).

 <p align="center">
-  <a href="https://discord.gg/TJ4P57arEm" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/Discord-Join%20Server-5865F2?logo=discord&logoColor=white" alt="Discord"></a>
+  <a href="https://discord.gg/72NsF6ux" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/Discord-Join%20Server-5865F2?logo=discord&logoColor=white" alt="Discord"></a>
  <a href="https://x.com/exolabs" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/twitter/follow/exolabs?style=social" alt="X"></a>
  <a href="https://www.apache.org/licenses/LICENSE-2.0.html" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/License-Apache2.0-blue.svg" alt="License: Apache-2.0"></a>
 </p>
--- a/app/EXO/EXO/ContentView.swift
+++ b/app/EXO/EXO/ContentView.swift
@@ -12,7 +12,6 @@ struct ContentView: View {
    @EnvironmentObject private var controller: ExoProcessController
    @EnvironmentObject private var stateService: ClusterStateService
    @EnvironmentObject private var networkStatusService: NetworkStatusService
-    @EnvironmentObject private var localNetworkChecker: LocalNetworkChecker
    @EnvironmentObject private var updater: SparkleUpdater
    @State private var focusedNode: NodeViewModel?
    @State private var deletingInstanceIDs: Set<String> = []
@@ -21,15 +20,10 @@ struct ContentView: View {
    @State private var showDebugInfo = false
    @State private var bugReportInFlight = false
    @State private var bugReportMessage: String?
-    @State private var showAdvancedOptions = false
-    @State private var pendingNamespace: String = ""

    var body: some View {
        VStack(alignment: .leading, spacing: 12) {
            statusSection
-            if shouldShowLocalNetworkWarning {
-                localNetworkWarningBanner
-            }
            if shouldShowClusterDetails {
                Divider()
                overviewSection
@@ -44,7 +38,6 @@ struct ContentView: View {
        }
        .animation(.easeInOut(duration: 0.3), value: shouldShowClusterDetails)
        .animation(.easeInOut(duration: 0.3), value: shouldShowInstances)
-        .animation(.easeInOut(duration: 0.3), value: shouldShowLocalNetworkWarning)
        .padding()
        .frame(width: 340)
        .onAppear {
@@ -54,62 +47,9 @@ struct ContentView: View {
        }
    }

-    private var shouldShowLocalNetworkWarning: Bool {
-        if case .notWorking = localNetworkChecker.status {
-            return controller.status != .stopped
-        }
-        return false
-    }
-
-    private var localNetworkWarningBanner: some View {
-        VStack(alignment: .leading, spacing: 6) {
-            HStack(spacing: 6) {
-                Image(systemName: "exclamationmark.triangle.fill")
-                    .foregroundColor(.orange)
-                Text("Local Network Access Issue")
-                    .font(.caption)
-                    .fontWeight(.semibold)
-            }
-            Text(
-                "Device discovery won't work. To fix:\n1. Quit EXO\n2. Open System Settings → Privacy & Security → Local Network\n3. Toggle EXO off, then back on\n4. Relaunch EXO"
-            )
-            .font(.caption2)
-            .foregroundColor(.secondary)
-            .fixedSize(horizontal: false, vertical: true)
-            Button {
-                openLocalNetworkSettings()
-            } label: {
-                Text("Open Settings")
-                    .font(.caption2)
-            }
-            .buttonStyle(.bordered)
-            .controlSize(.small)
-        }
-        .padding(8)
-        .background(
-            RoundedRectangle(cornerRadius: 8)
-                .fill(Color.orange.opacity(0.1))
-        )
-        .overlay(
-            RoundedRectangle(cornerRadius: 8)
-                .stroke(Color.orange.opacity(0.3), lineWidth: 1)
-        )
-    }
-
-    private func openLocalNetworkSettings() {
-        // Open Privacy & Security settings - Local Network section
-        if let url = URL(
-            string: "x-apple.systempreferences:com.apple.preference.security?Privacy_LocalNetwork")
-        {
-            NSWorkspace.shared.open(url)
-        }
-    }
-
    private var topologySection: some View {
        Group {
-            if let topology = stateService.latestSnapshot?.topologyViewModel(
-                localNodeId: stateService.localNodeId), !topology.nodes.isEmpty
-            {
+            if let topology = stateService.latestSnapshot?.topologyViewModel(localNodeId: stateService.localNodeId), !topology.nodes.isEmpty {
                TopologyMiniView(topology: topology)
            }
        }
@@ -143,10 +83,8 @@ struct ContentView: View {
                VStack(alignment: .leading, spacing: 4) {
                    HStack {
                        VStack(alignment: .leading) {
-                            Text(
-                                "\(overview.usedRam, specifier: "%.0f") / \(overview.totalRam, specifier: "%.0f") GB"
-                            )
-                            .font(.headline)
+                            Text("\(overview.usedRam, specifier: "%.0f") / \(overview.totalRam, specifier: "%.0f") GB")
+                                .font(.headline)
                            Text("Memory")
                                .font(.caption)
                                .foregroundColor(.secondary)
@@ -259,8 +197,6 @@ struct ContentView: View {
                updater.checkForUpdates()
            }
            .padding(.bottom, 8)
-            advancedOptionsSection
-                .padding(.bottom, 8)
            debugSection
                .padding(.bottom, 8)
            controlButton(title: "Quit", tint: .secondary) {
@@ -270,9 +206,7 @@ struct ContentView: View {
        }
    }

-    private func controlButton(title: String, tint: Color = .primary, action: @escaping () -> Void)
-        -> some View
-    {
+    private func controlButton(title: String, tint: Color = .primary, action: @escaping () -> Void) -> some View {
        HoverButton(title: title, tint: tint, trailingSystemImage: nil, action: action)
    }

@@ -303,12 +237,9 @@ struct ContentView: View {
        Button {
            isExpanded.wrappedValue.toggle()
        } label: {
-            Label(
-                isExpanded.wrappedValue ? "Hide" : "Show All",
-                systemImage: isExpanded.wrappedValue ? "chevron.up" : "chevron.down"
-            )
-            .labelStyle(.titleAndIcon)
-            .contentTransition(.symbolEffect(.replace))
+            Label(isExpanded.wrappedValue ? "Hide" : "Show All", systemImage: isExpanded.wrappedValue ? "chevron.up" : "chevron.down")
+                .labelStyle(.titleAndIcon)
+                .contentTransition(.symbolEffect(.replace))
        }
        .buttonStyle(.plain)
        .font(.caption2)
@@ -396,47 +327,6 @@ struct ContentView: View {
        }
    }

-    private var advancedOptionsSection: some View {
-        VStack(alignment: .leading, spacing: 6) {
-            HStack {
-                Text("Advanced Options")
-                    .font(.caption)
-                    .foregroundColor(.secondary)
-                Spacer()
-                collapseButton(isExpanded: $showAdvancedOptions)
-            }
-            .animation(nil, value: showAdvancedOptions)
-            if showAdvancedOptions {
-                VStack(alignment: .leading, spacing: 8) {
-                    VStack(alignment: .leading, spacing: 4) {
-                        Text("Cluster Namespace")
-                            .font(.caption2)
-                            .foregroundColor(.secondary)
-                        HStack {
-                            TextField("optional", text: $pendingNamespace)
-                                .textFieldStyle(.roundedBorder)
-                                .font(.caption2)
-                                .onAppear {
-                                    pendingNamespace = controller.customNamespace
-                                }
-                            Button("Save & Restart") {
-                                controller.customNamespace = pendingNamespace
-                                if controller.status == .running || controller.status == .starting {
-                                    controller.restart()
-                                }
-                            }
-                            .font(.caption2)
-                            .disabled(pendingNamespace == controller.customNamespace)
-                        }
-
-                    }
-                }
-                .transition(.opacity)
-            }
-        }
-        .animation(.easeInOut(duration: 0.25), value: showAdvancedOptions)
-    }
-
    private var debugSection: some View {
        VStack(alignment: .leading, spacing: 6) {
            HStack {
@@ -459,7 +349,6 @@ struct ContentView: View {
                        .font(.caption2)
                        .foregroundColor(thunderboltStatusColor)
                    interfaceIpList
-                    rdmaStatusView
                    sendBugReportButton
                        .padding(.top, 6)
                }
@@ -469,52 +358,6 @@ struct ContentView: View {
        .animation(.easeInOut(duration: 0.25), value: showDebugInfo)
    }

-    private var rdmaStatusView: some View {
-        let rdma = networkStatusService.status.rdmaStatus
-        return VStack(alignment: .leading, spacing: 1) {
-            Text("RDMA: \(rdmaStatusText(rdma))")
-                .font(.caption2)
-                .foregroundColor(rdmaStatusColor(rdma))
-            if !rdma.devices.isEmpty {
-                Text("  Devices: \(rdma.devices.joined(separator: ", "))")
-                    .font(.caption2)
-                    .foregroundColor(.secondary)
-            }
-            if !rdma.activePorts.isEmpty {
-                Text("  Active Ports:")
-                    .font(.caption2)
-                    .foregroundColor(.secondary)
-                ForEach(rdma.activePorts, id: \.device) { port in
-                    Text("    \(port.device) port \(port.port): \(port.state)")
-                        .font(.caption2)
-                        .foregroundColor(.green)
-                }
-            }
-        }
-    }
-
-    private func rdmaStatusText(_ rdma: RDMAStatus) -> String {
-        switch rdma.rdmaCtlEnabled {
-        case .some(true):
-            return "Enabled"
-        case .some(false):
-            return "Disabled"
-        case nil:
-            return rdma.devices.isEmpty ? "Not Available" : "Available"
-        }
-    }
-
-    private func rdmaStatusColor(_ rdma: RDMAStatus) -> Color {
-        switch rdma.rdmaCtlEnabled {
-        case .some(true):
-            return .green
-        case .some(false):
-            return .orange
-        case nil:
-            return rdma.devices.isEmpty ? .secondary : .green
-        }
-    }
-
    private var sendBugReportButton: some View {
        VStack(alignment: .leading, spacing: 4) {
            Button {
@@ -648,3 +491,4 @@ private struct HoverButton: View {
        .onHover { isHovering = $0 }
    }
 }
+
--- a/app/EXO/EXO/EXOApp.swift
+++ b/app/EXO/EXO/EXOApp.swift
@@ -8,9 +8,9 @@
 import AppKit
 import CoreImage
 import CoreImage.CIFilterBuiltins
-import ServiceManagement
 import Sparkle
 import SwiftUI
+import ServiceManagement
 import UserNotifications
 import os.log

@@ -19,7 +19,6 @@ struct EXOApp: App {
    @StateObject private var controller: ExoProcessController
    @StateObject private var stateService: ClusterStateService
    @StateObject private var networkStatusService: NetworkStatusService
-    @StateObject private var localNetworkChecker: LocalNetworkChecker
    @StateObject private var updater: SparkleUpdater
    private let terminationObserver: TerminationObserver
    private let ciContext = CIContext(options: nil)
@@ -38,13 +37,9 @@ struct EXOApp: App {
        _stateService = StateObject(wrappedValue: service)
        let networkStatus = NetworkStatusService()
        _networkStatusService = StateObject(wrappedValue: networkStatus)
-        let localNetwork = LocalNetworkChecker()
-        _localNetworkChecker = StateObject(wrappedValue: localNetwork)
        _updater = StateObject(wrappedValue: updater)
        enableLaunchAtLoginIfNeeded()
        NetworkSetupHelper.ensureLaunchDaemonInstalled()
-        // Check local network access BEFORE launching exo
-        localNetwork.check()
        controller.scheduleLaunch(after: 15)
        service.startPolling()
        networkStatus.startPolling()
@@ -56,7 +51,6 @@ struct EXOApp: App {
                .environmentObject(controller)
                .environmentObject(stateService)
                .environmentObject(networkStatusService)
-                .environmentObject(localNetworkChecker)
                .environmentObject(updater)
        } label: {
            menuBarIcon
@@ -113,7 +107,7 @@ struct EXOApp: App {
        filter.contrast = 0.9

        guard let output = filter.outputImage,
-            let rendered = ciContext.createCGImage(output, from: output.extent)
+              let rendered = ciContext.createCGImage(output, from: output.extent)
        else {
            return nil
        }
@@ -126,8 +120,7 @@ struct EXOApp: App {
        do {
            try SMAppService.mainApp.register()
        } catch {
-            Logger().error(
-                "Failed to register EXO for launch at login: \(error.localizedDescription)")
+            Logger().error("Failed to register EXO for launch at login: \(error.localizedDescription)")
        }
    }
 }
@@ -152,7 +145,7 @@ final class SparkleUpdater: NSObject, ObservableObject {
        center.requestAuthorization(options: [.alert, .sound]) { _, _ in }
        controller.updater.automaticallyChecksForUpdates = true
        controller.updater.automaticallyDownloadsUpdates = false
-        controller.updater.updateCheckInterval = 900  // 15 minutes
+        controller.updater.updateCheckInterval = 900 // 15 minutes
        DispatchQueue.main.asyncAfter(deadline: .now() + 5) { [weak controller] in
            controller?.updater.checkForUpdatesInBackground()
        }
@@ -219,8 +212,7 @@ private final class ExoNotificationDelegate: NSObject, UNUserNotificationCenterD
    func userNotificationCenter(
        _ center: UNUserNotificationCenter,
        willPresent notification: UNNotification,
-        withCompletionHandler completionHandler: @escaping (UNNotificationPresentationOptions) ->
-            Void
+        withCompletionHandler completionHandler: @escaping (UNNotificationPresentationOptions) -> Void
    ) {
        completionHandler([.banner, .list, .sound])
    }
--- a/app/EXO/EXO/ExoProcessController.swift
+++ b/app/EXO/EXO/ExoProcessController.swift
@@ -2,8 +2,6 @@ import AppKit
 import Combine
 import Foundation

-private let customNamespaceKey = "EXOCustomNamespace"
-
@MainActor
 final class ExoProcessController: ObservableObject {
    enum Status: Equatable {
@@ -29,14 +27,6 @@ final class ExoProcessController: ObservableObject {
    @Published private(set) var status: Status = .stopped
    @Published private(set) var lastError: String?
    @Published private(set) var launchCountdownSeconds: Int?
-    @Published var customNamespace: String = {
-        return UserDefaults.standard.string(forKey: customNamespaceKey) ?? ""
-    }()
-    {
-        didSet {
-            UserDefaults.standard.set(customNamespace, forKey: customNamespaceKey)
-        }
-    }

    private var process: Process?
    private var runtimeDirectoryURL: URL?
@@ -190,7 +180,7 @@ final class ExoProcessController: ObservableObject {
    private func makeEnvironment(for runtimeURL: URL) -> [String: String] {
        var environment = ProcessInfo.processInfo.environment
        environment["EXO_RUNTIME_DIR"] = runtimeURL.path
-        environment["EXO_LIBP2P_NAMESPACE"] = computeNamespace()
+        environment["EXO_LIBP2P_NAMESPACE"] = buildTag()

        var paths: [String] = []
        if let existing = environment["PATH"], !existing.isEmpty {
@@ -222,19 +212,11 @@ final class ExoProcessController: ObservableObject {
        if let tag = Bundle.main.infoDictionary?["EXOBuildTag"] as? String, !tag.isEmpty {
            return tag
        }
-        if let short = Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String,
-            !short.isEmpty
-        {
+        if let short = Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String, !short.isEmpty {
            return short
        }
        return "dev"
    }
-
-    private func computeNamespace() -> String {
-        let base = buildTag()
-        let custom = customNamespace.trimmingCharacters(in: .whitespaces)
-        return custom.isEmpty ? base : custom
-    }
 }

 struct RuntimeError: LocalizedError {
--- a/app/EXO/EXO/Info.plist
+++ b/app/EXO/EXO/Info.plist
@@ -8,15 +8,5 @@
 	<string>$(EXO_BUILD_TAG)</string>
 	<key>EXOBuildCommit</key>
 	<string>$(EXO_BUILD_COMMIT)</string>
-	<key>EXOBugReportPresignedUrlEndpoint</key>
-	<string>$(EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT)</string>
-	<key>NSLocalNetworkUsageDescription</key>
-	<string>EXO needs local network access to discover and connect to other devices in your cluster for distributed AI inference.</string>
-	<key>NSBonjourServices</key>
-	<array>
-		<string>_p2p._tcp</string>
-		<string>_p2p._udp</string>
-		<string>_libp2p._udp</string>
-	</array>
 </dict>
 </plist>
--- a/app/EXO/EXO/Models/ClusterState.swift
+++ b/app/EXO/EXO/Models/ClusterState.swift
@@ -16,13 +16,10 @@ struct ClusterState: Decodable {
        self.instances = rawInstances.mapValues(\.instance)
        self.runners = try container.decode([String: RunnerStatusSummary].self, forKey: .runners)
        self.nodeProfiles = try container.decode([String: NodeProfile].self, forKey: .nodeProfiles)
-        let rawTasks =
-            try container.decodeIfPresent([String: TaggedTask].self, forKey: .tasks) ?? [:]
+        let rawTasks = try container.decodeIfPresent([String: TaggedTask].self, forKey: .tasks) ?? [:]
        self.tasks = rawTasks.compactMapValues(\.task)
        self.topology = try container.decodeIfPresent(Topology.self, forKey: .topology)
-        let rawDownloads =
-            try container.decodeIfPresent([String: [TaggedNodeDownload]].self, forKey: .downloads)
-            ?? [:]
+        let rawDownloads = try container.decodeIfPresent([String: [TaggedNodeDownload]].self, forKey: .downloads) ?? [:]
        self.downloads = rawDownloads.mapValues { $0.compactMap(\.status) }
    }

@@ -44,8 +41,7 @@ private struct TaggedInstance: Decodable {
        let payloads = try container.decode([String: ClusterInstancePayload].self)
        guard let entry = payloads.first else {
            throw DecodingError.dataCorrupted(
-                DecodingError.Context(
-                    codingPath: decoder.codingPath, debugDescription: "Empty instance payload")
+                DecodingError.Context(codingPath: decoder.codingPath, debugDescription: "Empty instance payload")
            )
        }
        self.instance = ClusterInstance(
@@ -81,8 +77,7 @@ struct RunnerStatusSummary: Decodable {
        let payloads = try container.decode([String: RunnerStatusDetail].self)
        guard let entry = payloads.first else {
            throw DecodingError.dataCorrupted(
-                DecodingError.Context(
-                    codingPath: decoder.codingPath, debugDescription: "Empty runner status payload")
+                DecodingError.Context(codingPath: decoder.codingPath, debugDescription: "Empty runner status payload")
            )
        }
        self.status = entry.key
@@ -262,9 +257,7 @@ struct ChatCompletionTaskParameters: Decodable, Equatable {

    func promptPreview() -> String? {
        guard let messages else { return nil }
-        if let userMessage = messages.last(where: {
-            $0.role?.lowercased() == "user" && ($0.content?.isEmpty == false)
-        }) {
+        if let userMessage = messages.last(where: { $0.role?.lowercased() == "user" && ($0.content?.isEmpty == false) }) {
            return userMessage.content
        }
        return messages.last?.content
@@ -372,3 +365,5 @@ extension ClusterState {

    func availableModels() -> [ModelOption] { [] }
 }
+
+
--- a/app/EXO/EXO/Services/BugReportService.swift
+++ b/app/EXO/EXO/Services/BugReportService.swift
@@ -1,3 +1,4 @@
+import CryptoKit
 import Foundation

 struct BugReportOutcome: Equatable {
@@ -6,17 +7,17 @@ struct BugReportOutcome: Equatable {
 }

 enum BugReportError: LocalizedError {
+    case missingCredentials
    case invalidEndpoint
-    case presignedUrlFailed(String)
    case uploadFailed(String)
    case collectFailed(String)

    var errorDescription: String? {
        switch self {
+        case .missingCredentials:
+            return "Bug report upload credentials are not set."
        case .invalidEndpoint:
            return "Bug report endpoint is invalid."
-        case .presignedUrlFailed(let message):
-            return "Failed to get presigned URLs: \(message)"
        case .uploadFailed(let message):
            return "Bug report upload failed: \(message)"
        case .collectFailed(let message):
@@ -26,13 +27,11 @@ enum BugReportError: LocalizedError {
 }

 struct BugReportService {
-    private struct PresignedUrlsRequest: Codable {
-        let keys: [String]
-    }
-
-    private struct PresignedUrlsResponse: Codable {
-        let urls: [String: String]
-        let expiresIn: Int?
+    struct AWSConfig {
+        let accessKey: String
+        let secretKey: String
+        let region: String
+        let bucket: String
    }

    func sendReport(
@@ -40,9 +39,9 @@ struct BugReportService {
        now: Date = Date(),
        isManual: Bool = false
    ) async throws -> BugReportOutcome {
-        let timestamp = Self.runTimestampString(now)
-        let dayPrefix = Self.dayPrefixString(now)
-        let prefix = "reports/\(dayPrefix)/\(timestamp)/"
+        let credentials = try loadCredentials()
+        let timestamp = ISO8601DateFormatter().string(from: now)
+        let prefix = "reports/\(timestamp)/"

        let logData = readLog()
        let ifconfigText = try await captureIfconfig()
@@ -67,82 +66,28 @@ struct BugReportService {
            ("\(prefix)exo.log", logData),
            ("\(prefix)state.json", stateData),
            ("\(prefix)events.json", eventsData),
-            ("\(prefix)report.json", reportJSON),
+            ("\(prefix)report.json", reportJSON)
        ]

-        let uploadItems: [(key: String, body: Data)] = uploads.compactMap { item in
-            guard let body = item.data else { return nil }
-            return (key: item.path, body: body)
+        let uploader = try S3Uploader(config: credentials)
+        for item in uploads {
+            guard let data = item.data else { continue }
+            try await uploader.upload(
+                objectPath: item.path,
+                body: data
+            )
        }

-        guard !uploadItems.isEmpty else {
-            return BugReportOutcome(success: false, message: "No data to upload")
-        }
-
-        let presignedUrls = try await fetchPresignedUploadUrls(keys: uploadItems.map(\.key))
-        for item in uploadItems {
-            guard let urlString = presignedUrls[item.key], let url = URL(string: urlString) else {
-                throw BugReportError.uploadFailed("Missing presigned URL for \(item.key)")
-            }
-            try await uploadToPresignedUrl(url: url, body: item.body)
-        }
-
-        return BugReportOutcome(
-            success: true, message: "Bug Report sent. Thank you for helping to improve EXO 1.0.")
+        return BugReportOutcome(success: true, message: "Bug Report sent. Thank you for helping to improve EXO 1.0.")
    }

-    private static func dayPrefixString(_ date: Date) -> String {
-        var calendar = Calendar(identifier: .gregorian)
-        calendar.timeZone = TimeZone(secondsFromGMT: 0) ?? .current
-        let components = calendar.dateComponents([.year, .month, .day], from: date)
-        let year = components.year ?? 0
-        let month = components.month ?? 0
-        let day = components.day ?? 0
-        return String(format: "%04d/%02d/%02d", year, month, day)
-    }
-
-    private static func runTimestampString(_ date: Date) -> String {
-        let formatter = DateFormatter()
-        formatter.locale = Locale(identifier: "en_US_POSIX")
-        formatter.timeZone = TimeZone(secondsFromGMT: 0) ?? .current
-        formatter.dateFormat = "yyyy-MM-dd'T'HHmmss.SSS'Z'"
-        return formatter.string(from: date)
-    }
-
-    private func fetchPresignedUploadUrls(keys: [String], bundle: Bundle = .main) async throws
-        -> [String: String]
-    {
-        guard
-            let endpointString = bundle.infoDictionary?["EXOBugReportPresignedUrlEndpoint"]
-                as? String
-        else {
-            throw BugReportError.invalidEndpoint
-        }
-        let trimmedEndpointString = endpointString.trimmingCharacters(in: .whitespacesAndNewlines)
-        guard !trimmedEndpointString.isEmpty, let endpoint = URL(string: trimmedEndpointString)
-        else {
-            throw BugReportError.invalidEndpoint
-        }
-
-        var request = URLRequest(url: endpoint)
-        request.httpMethod = "POST"
-        request.timeoutInterval = 10
-        request.setValue("application/json", forHTTPHeaderField: "Content-Type")
-
-        let encoder = JSONEncoder()
-        request.httpBody = try encoder.encode(PresignedUrlsRequest(keys: keys))
-
-        let (data, response) = try await URLSession.shared.data(for: request)
-        guard let http = response as? HTTPURLResponse else {
-            throw BugReportError.presignedUrlFailed("Non-HTTP response")
-        }
-        guard (200..<300).contains(http.statusCode) else {
-            throw BugReportError.presignedUrlFailed("HTTP status \(http.statusCode)")
-        }
-
-        let decoder = JSONDecoder()
-        let decoded = try decoder.decode(PresignedUrlsResponse.self, from: data)
-        return decoded.urls
+    private func loadCredentials() throws -> AWSConfig {
+        return AWSConfig(
+            accessKey: "AKIAYEKP5EMXTOBYDGHX",
+            secretKey: "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",
+            region: "us-east-1",
+            bucket: "exo-bug-reports"
+        )
    }

    private func readLog() -> Data? {
@@ -155,8 +100,7 @@ struct BugReportService {
    private func captureIfconfig() async throws -> String {
        let result = runCommand(["/sbin/ifconfig"])
        guard result.exitCode == 0 else {
-            throw BugReportError.collectFailed(
-                result.error.isEmpty ? "ifconfig failed" : result.error)
+            throw BugReportError.collectFailed(result.error.isEmpty ? "ifconfig failed" : result.error)
        }
        return result.output
    }
@@ -164,23 +108,12 @@ struct BugReportService {
    private func readDebugInfo() -> DebugInfo {
        DebugInfo(
            thunderboltBridgeDisabled: readThunderboltBridgeDisabled(),
-            interfaces: readInterfaces(),
-            rdma: readRDMADebugInfo()
-        )
-    }
-
-    private func readRDMADebugInfo() -> DebugInfo.RDMADebugInfo {
-        DebugInfo.RDMADebugInfo(
-            rdmaCtlStatus: safeRunCommand(["/usr/bin/rdma_ctl", "status"]),
-            ibvDevices: safeRunCommand(["/usr/bin/ibv_devices"]),
-            ibvDevinfo: safeRunCommand(["/usr/bin/ibv_devinfo"])
+            interfaces: readInterfaces()
        )
    }

    private func readThunderboltBridgeDisabled() -> Bool? {
-        let result = runCommand([
-            "/usr/sbin/networksetup", "-getnetworkserviceenabled", "Thunderbolt Bridge",
-        ])
+        let result = runCommand(["/usr/sbin/networksetup", "-getnetworkserviceenabled", "Thunderbolt Bridge"])
        guard result.exitCode == 0 else { return nil }
        let output = result.output.lowercased()
        if output.contains("enabled") {
@@ -223,8 +156,7 @@ struct BugReportService {
        request.timeoutInterval = 5
        do {
            let (data, response) = try await URLSession.shared.data(for: request)
-            guard let http = response as? HTTPURLResponse, (200..<300).contains(http.statusCode)
-            else {
+            guard let http = response as? HTTPURLResponse, (200..<300).contains(http.statusCode) else {
                return nil
            }
            return data
@@ -233,36 +165,6 @@ struct BugReportService {
        }
    }

-    private func uploadToPresignedUrl(url: URL, body: Data) async throws {
-        let maxAttempts = 2
-        var lastError: Error?
-
-        for attempt in 1...maxAttempts {
-            do {
-                var request = URLRequest(url: url)
-                request.httpMethod = "PUT"
-                request.httpBody = body
-                request.timeoutInterval = 30
-
-                let (_, response) = try await URLSession.shared.data(for: request)
-                guard let http = response as? HTTPURLResponse else {
-                    throw BugReportError.uploadFailed("Non-HTTP response")
-                }
-                guard (200..<300).contains(http.statusCode) else {
-                    throw BugReportError.uploadFailed("HTTP status \(http.statusCode)")
-                }
-                return
-            } catch {
-                lastError = error
-                if attempt < maxAttempts {
-                    try await Task.sleep(nanoseconds: 400_000_000)
-                }
-            }
-        }
-
-        throw BugReportError.uploadFailed(lastError?.localizedDescription ?? "Unknown error")
-    }
-
    private func makeReportJson(
        timestamp: String,
        hostName: String,
@@ -280,7 +182,7 @@ struct BugReportService {
            "system": system,
            "exo_version": exo.version as Any,
            "exo_commit": exo.commit as Any,
-            "report_type": isManual ? "manual" : "automated",
+            "report_type": isManual ? "manual" : "automated"
        ]
        return try? JSONSerialization.data(withJSONObject: payload, options: [.prettyPrinted])
    }
@@ -311,13 +213,10 @@ struct BugReportService {
        let user = safeRunCommand(["/usr/bin/whoami"])
        let consoleUser = safeRunCommand(["/usr/bin/stat", "-f%Su", "/dev/console"])
        let uptime = safeRunCommand(["/usr/bin/uptime"])
-        let diskRoot = safeRunCommand([
-            "/bin/sh", "-c", "/bin/df -h / | awk 'NR==2 {print $1, $2, $3, $4, $5}'",
-        ])
+        let diskRoot = safeRunCommand(["/bin/sh", "-c", "/bin/df -h / | awk 'NR==2 {print $1, $2, $3, $4, $5}'"])

        let interfacesList = safeRunCommand(["/usr/sbin/ipconfig", "getiflist"])
-        let interfacesAndIPs =
-            interfacesList?
+        let interfacesAndIPs = interfacesList?
            .split(whereSeparator: { $0 == " " || $0 == "\n" })
            .compactMap { iface -> [String: Any]? in
                let name = String(iface)
@@ -328,8 +227,7 @@ struct BugReportService {
            } ?? []

        let wifiSSID: String?
-        let airportPath =
-            "/System/Library/PrivateFrameworks/Apple80211.framework/Versions/Current/Resources/airport"
+        let airportPath = "/System/Library/PrivateFrameworks/Apple80211.framework/Versions/Current/Resources/airport"
        if FileManager.default.isExecutableFile(atPath: airportPath) {
            wifiSSID = safeRunCommand([airportPath, "-I"]).flatMap(parseWifiSSID)
        } else {
@@ -357,7 +255,7 @@ struct BugReportService {
            "disk_root": diskRoot as Any,
            "interfaces_and_ips": interfacesAndIPs,
            "ipconfig_getiflist": interfacesList as Any,
-            "wifi_ssid": wifiSSID as Any,
+            "wifi_ssid": wifiSSID as Any
        ]
    }

@@ -415,8 +313,7 @@ struct BugReportService {
        for line in airportOutput.split(separator: "\n") {
            let trimmed = line.trimmingCharacters(in: .whitespaces)
            if trimmed.hasPrefix("SSID:") {
-                return trimmed.replacingOccurrences(of: "SSID:", with: "").trimmingCharacters(
-                    in: .whitespaces)
+                return trimmed.replacingOccurrences(of: "SSID:", with: "").trimmingCharacters(in: .whitespaces)
            }
        }
        return nil
@@ -453,7 +350,6 @@ struct BugReportService {
 private struct DebugInfo {
    let thunderboltBridgeDisabled: Bool?
    let interfaces: [InterfaceStatus]
-    let rdma: RDMADebugInfo

    struct InterfaceStatus {
        let name: String
@@ -462,21 +358,7 @@ private struct DebugInfo {
        func toDictionary() -> [String: Any] {
            [
                "name": name,
-                "ip": ip as Any,
-            ]
-        }
-    }
-
-    struct RDMADebugInfo {
-        let rdmaCtlStatus: String?
-        let ibvDevices: String?
-        let ibvDevinfo: String?
-
-        func toDictionary() -> [String: Any] {
-            [
-                "rdma_ctl_status": rdmaCtlStatus as Any,
-                "ibv_devices": ibvDevices as Any,
-                "ibv_devinfo": ibvDevinfo as Any,
+                "ip": ip as Any
            ]
        }
    }
@@ -484,8 +366,7 @@ private struct DebugInfo {
    func toDictionary() -> [String: Any] {
        [
            "thunderbolt_bridge_disabled": thunderboltBridgeDisabled as Any,
-            "interfaces": interfaces.map { $0.toDictionary() },
-            "rdma": rdma.toDictionary(),
+            "interfaces": interfaces.map { $0.toDictionary() }
        ]
    }
 }
@@ -495,3 +376,163 @@ private struct CommandResult {
    let output: String
    let error: String
 }
+
+private struct S3Uploader {
+    let config: BugReportService.AWSConfig
+
+    init(config: BugReportService.AWSConfig) throws {
+        self.config = config
+    }
+
+    func upload(objectPath: String, body: Data) async throws {
+        let host = "\(config.bucket).s3.amazonaws.com"
+        guard let url = URL(string: "https://\(host)/\(objectPath)") else {
+            throw BugReportError.invalidEndpoint
+        }
+
+        let now = Date()
+        let amzDate = awsTimestamp(now)
+        let dateStamp = dateStamp(now)
+        let payloadHash = sha256Hex(body)
+
+        let headers = [
+            "host": host,
+            "x-amz-content-sha256": payloadHash,
+            "x-amz-date": amzDate
+        ]
+
+        let canonicalRequest = buildCanonicalRequest(
+            method: "PUT",
+            url: url,
+            headers: headers,
+            payloadHash: payloadHash
+        )
+
+        let stringToSign = buildStringToSign(
+            amzDate: amzDate,
+            dateStamp: dateStamp,
+            canonicalRequestHash: sha256Hex(canonicalRequest.data(using: .utf8) ?? Data())
+        )
+
+        let signingKey = deriveKey(secret: config.secretKey, dateStamp: dateStamp, region: config.region, service: "s3")
+        let signature = hmacHex(key: signingKey, data: Data(stringToSign.utf8))
+
+        let signedHeaders = "host;x-amz-content-sha256;x-amz-date"
+        let authorization = """
+AWS4-HMAC-SHA256 Credential=\(config.accessKey)/\(dateStamp)/\(config.region)/s3/aws4_request, SignedHeaders=\(signedHeaders), Signature=\(signature)
+"""
+
+        var request = URLRequest(url: url)
+        request.httpMethod = "PUT"
+        request.httpBody = body
+        request.setValue(headers["x-amz-content-sha256"], forHTTPHeaderField: "x-amz-content-sha256")
+        request.setValue(headers["x-amz-date"], forHTTPHeaderField: "x-amz-date")
+        request.setValue(host, forHTTPHeaderField: "Host")
+        request.setValue(authorization, forHTTPHeaderField: "Authorization")
+
+        let (data, response) = try await URLSession.shared.data(for: request)
+        guard let http = response as? HTTPURLResponse, (200..<300).contains(http.statusCode) else {
+            let statusText = (response as? HTTPURLResponse)?.statusCode ?? -1
+            _ = data // ignore response body for UX
+            throw BugReportError.uploadFailed("HTTP status \(statusText)")
+        }
+    }
+
+    private func buildCanonicalRequest(
+        method: String,
+        url: URL,
+        headers: [String: String],
+        payloadHash: String
+    ) -> String {
+        let canonicalURI = encodePath(url.path)
+        let canonicalQuery = url.query ?? ""
+        let sortedHeaders = headers.sorted { $0.key < $1.key }
+        let canonicalHeaders = sortedHeaders
+            .map { "\($0.key.lowercased()):\($0.value)\n" }
+            .joined()
+        let signedHeaders = sortedHeaders.map { $0.key.lowercased() }.joined(separator: ";")
+
+        return [
+            method,
+            canonicalURI,
+            canonicalQuery,
+            canonicalHeaders,
+            signedHeaders,
+            payloadHash
+        ].joined(separator: "\n")
+    }
+
+    private func encodePath(_ path: String) -> String {
+        return path
+            .split(separator: "/")
+            .map { segment in
+                segment.addingPercentEncoding(withAllowedCharacters: Self.rfc3986) ?? String(segment)
+            }
+            .joined(separator: "/")
+            .prependSlashIfNeeded()
+    }
+
+    private func buildStringToSign(
+        amzDate: String,
+        dateStamp: String,
+        canonicalRequestHash: String
+    ) -> String {
+        """
+AWS4-HMAC-SHA256
+\(amzDate)
+\(dateStamp)/\(config.region)/s3/aws4_request
+\(canonicalRequestHash)
+"""
+    }
+
+    private func deriveKey(secret: String, dateStamp: String, region: String, service: String) -> Data {
+        let kDate = hmac(key: Data(("AWS4" + secret).utf8), data: Data(dateStamp.utf8))
+        let kRegion = hmac(key: kDate, data: Data(region.utf8))
+        let kService = hmac(key: kRegion, data: Data(service.utf8))
+        return hmac(key: kService, data: Data("aws4_request".utf8))
+    }
+
+    private func hmac(key: Data, data: Data) -> Data {
+        let keySym = SymmetricKey(data: key)
+        let mac = HMAC<SHA256>.authenticationCode(for: data, using: keySym)
+        return Data(mac)
+    }
+
+    private func hmacHex(key: Data, data: Data) -> String {
+        hmac(key: key, data: data).map { String(format: "%02x", $0) }.joined()
+    }
+
+    private func sha256Hex(_ data: Data) -> String {
+        let digest = SHA256.hash(data: data)
+        return digest.compactMap { String(format: "%02x", $0) }.joined()
+    }
+
+    private func awsTimestamp(_ date: Date) -> String {
+        let formatter = DateFormatter()
+        formatter.dateFormat = "yyyyMMdd'T'HHmmss'Z'"
+        formatter.timeZone = TimeZone(abbreviation: "UTC")
+        return formatter.string(from: date)
+    }
+
+    private func dateStamp(_ date: Date) -> String {
+        let formatter = DateFormatter()
+        formatter.dateFormat = "yyyyMMdd"
+        formatter.timeZone = TimeZone(abbreviation: "UTC")
+        return formatter.string(from: date)
+    }
+
+    private static let rfc3986: CharacterSet = {
+        var set = CharacterSet.alphanumerics
+        set.insert(charactersIn: "-._~")
+        return set
+    }()
+}
+
+private extension String {
+    func prependSlashIfNeeded() -> String {
+        if hasPrefix("/") {
+            return self
+        }
+        return "/" + self
+    }
+}
--- a/app/EXO/EXO/Services/ClusterStateService.swift
+++ b/app/EXO/EXO/Services/ClusterStateService.swift
@@ -57,9 +57,7 @@ final class ClusterStateService: ObservableObject {
            var request = URLRequest(url: url)
            request.cachePolicy = .reloadIgnoringLocalCacheData
            let (data, response) = try await session.data(for: request)
-            guard let httpResponse = response as? HTTPURLResponse,
-                (200..<300).contains(httpResponse.statusCode)
-            else {
+            guard let httpResponse = response as? HTTPURLResponse, (200..<300).contains(httpResponse.statusCode) else {
                return
            }
            if let nodeId = try? decoder.decode(String.self, from: data) {
@@ -115,9 +113,7 @@ final class ClusterStateService: ObservableObject {
        }
    }

-    func launchInstance(modelId: String, sharding: String, instanceMeta: String, minNodes: Int)
-        async
-    {
+    func launchInstance(modelId: String, sharding: String, instanceMeta: String, minNodes: Int) async {
        do {
            var request = URLRequest(url: baseURL.appendingPathComponent("instance"))
            request.httpMethod = "POST"
@@ -126,7 +122,7 @@ final class ClusterStateService: ObservableObject {
                "model_id": modelId,
                "sharding": sharding,
                "instance_meta": instanceMeta,
-                "min_nodes": minNodes,
+                "min_nodes": minNodes
            ]
            request.httpBody = try JSONSerialization.data(withJSONObject: payload, options: [])
            let (_, response) = try await session.data(for: request)
@@ -147,9 +143,7 @@ final class ClusterStateService: ObservableObject {
        do {
            let url = baseURL.appendingPathComponent("models")
            let (data, response) = try await session.data(from: url)
-            guard let httpResponse = response as? HTTPURLResponse,
-                (200..<300).contains(httpResponse.statusCode)
-            else {
+            guard let httpResponse = response as? HTTPURLResponse, (200..<300).contains(httpResponse.statusCode) else {
                throw URLError(.badServerResponse)
            }
            let list = try decoder.decode(ModelListResponse.self, from: data)
--- a/app/EXO/EXO/Services/LocalNetworkChecker.swift
+++ b/app/EXO/EXO/Services/LocalNetworkChecker.swift
@@ -1,150 +0,0 @@
-import Foundation
-import Network
-import os.log
-
-/// Checks if the app's local network permission is actually functional.
-///
-/// macOS local network permission can appear enabled in System Preferences but not
-/// actually work after a restart. This service detects this by creating a UDP
-/// connection to the mDNS multicast address (224.0.0.251:5353).
-@MainActor
-final class LocalNetworkChecker: ObservableObject {
-    enum Status: Equatable {
-        case unknown
-        case checking
-        case working
-        case notWorking(reason: String)
-
-        var isHealthy: Bool {
-            if case .working = self { return true }
-            return false
-        }
-
-        var displayText: String {
-            switch self {
-            case .unknown:
-                return "Unknown"
-            case .checking:
-                return "Checking..."
-            case .working:
-                return "Working"
-            case .notWorking(let reason):
-                return reason
-            }
-        }
-    }
-
-    private static let logger = Logger(subsystem: "io.exo.EXO", category: "LocalNetworkChecker")
-
-    @Published private(set) var status: Status = .unknown
-    @Published private(set) var lastConnectionState: String = "none"
-
-    private var connection: NWConnection?
-    private var checkTask: Task<Void, Never>?
-
-    /// Checks if local network access is working.
-    func check() {
-        checkTask?.cancel()
-        status = .checking
-        lastConnectionState = "connecting"
-
-        checkTask = Task { [weak self] in
-            guard let self else { return }
-            let result = await self.performCheck()
-            self.status = result
-            Self.logger.info("Local network check complete: \(result.displayText)")
-        }
-    }
-
-    private func performCheck() async -> Status {
-        Self.logger.info("Checking local network access via UDP multicast")
-
-        connection?.cancel()
-        connection = nil
-
-        // mDNS multicast address - same as libp2p uses for peer discovery
-        let host = NWEndpoint.Host("224.0.0.251")
-        let port = NWEndpoint.Port(integerLiteral: 5353)
-
-        let params = NWParameters.udp
-        params.allowLocalEndpointReuse = true
-
-        let conn = NWConnection(host: host, port: port, using: params)
-        connection = conn
-
-        return await withCheckedContinuation { continuation in
-            var hasResumed = false
-            let lock = NSLock()
-
-            let resumeOnce: (Status) -> Void = { status in
-                lock.lock()
-                defer { lock.unlock() }
-                guard !hasResumed else { return }
-                hasResumed = true
-                continuation.resume(returning: status)
-            }
-
-            conn.stateUpdateHandler = { [weak self] state in
-                let stateStr: String
-                switch state {
-                case .setup: stateStr = "setup"
-                case .preparing: stateStr = "preparing"
-                case .ready: stateStr = "ready"
-                case .waiting(let e): stateStr = "waiting(\(e))"
-                case .failed(let e): stateStr = "failed(\(e))"
-                case .cancelled: stateStr = "cancelled"
-                @unknown default: stateStr = "unknown"
-                }
-
-                Task { @MainActor in
-                    self?.lastConnectionState = stateStr
-                }
-
-                switch state {
-                case .ready:
-                    resumeOnce(.working)
-                case .waiting(let error):
-                    let errorStr = "\(error)"
-                    if errorStr.contains("54") || errorStr.contains("ECONNRESET") {
-                        resumeOnce(.notWorking(reason: "Connection blocked"))
-                    }
-                case .failed(let error):
-                    let errorStr = "\(error)"
-                    if errorStr.contains("65") || errorStr.contains("EHOSTUNREACH")
-                        || errorStr.contains("permission") || errorStr.contains("denied")
-                    {
-                        resumeOnce(.notWorking(reason: "Permission denied"))
-                    } else {
-                        resumeOnce(.notWorking(reason: "Failed: \(error.localizedDescription)"))
-                    }
-                case .cancelled, .setup, .preparing:
-                    break
-                @unknown default:
-                    break
-                }
-            }
-
-            conn.start(queue: .main)
-
-            Task {
-                try? await Task.sleep(nanoseconds: 3_000_000_000)
-                let state = conn.state
-                switch state {
-                case .ready:
-                    resumeOnce(.working)
-                case .waiting, .preparing, .setup:
-                    resumeOnce(.notWorking(reason: "Timeout (may be blocked)"))
-                default:
-                    resumeOnce(.notWorking(reason: "Timeout"))
-                }
-            }
-        }
-    }
-
-    func stop() {
-        checkTask?.cancel()
-        checkTask = nil
-        connection?.cancel()
-        connection = nil
-    }
-}
--- a/app/EXO/EXO/Services/NetworkSetupHelper.swift
+++ b/app/EXO/EXO/Services/NetworkSetupHelper.swift
@@ -5,62 +5,61 @@ import os.log
 enum NetworkSetupHelper {
    private static let logger = Logger(subsystem: "io.exo.EXO", category: "NetworkSetup")
    private static let daemonLabel = "io.exo.networksetup"
-    private static let scriptDestination =
-        "/Library/Application Support/EXO/disable_bridge_enable_dhcp.sh"
+    private static let scriptDestination = "/Library/Application Support/EXO/disable_bridge_enable_dhcp.sh"
    private static let plistDestination = "/Library/LaunchDaemons/io.exo.networksetup.plist"
    private static let requiredStartInterval: Int = 1791

    private static let setupScript = """
-        #!/usr/bin/env bash
+#!/usr/bin/env bash

-        set -euo pipefail
+set -euo pipefail

-        PREFS="/Library/Preferences/SystemConfiguration/preferences.plist"
+PREFS="/Library/Preferences/SystemConfiguration/preferences.plist"

-        # Remove bridge0 interface
-        ifconfig bridge0 &>/dev/null && {
-          ifconfig bridge0 | grep -q 'member' && {
-            ifconfig bridge0 | awk '/member/ {print $2}' | xargs -n1 ifconfig bridge0 deletem 2>/dev/null || true
-          }
-          ifconfig bridge0 destroy 2>/dev/null || true
-        }
+# Remove bridge0 interface
+ifconfig bridge0 &>/dev/null && {
+  ifconfig bridge0 | grep -q 'member' && {
+    ifconfig bridge0 | awk '/member/ {print $2}' | xargs -n1 ifconfig bridge0 deletem 2>/dev/null || true
+  }
+  ifconfig bridge0 destroy 2>/dev/null || true
+}

-        # Remove Thunderbolt Bridge from VirtualNetworkInterfaces in preferences.plist
-        /usr/libexec/PlistBuddy -c "Delete :VirtualNetworkInterfaces:Bridge:bridge0" "$PREFS" 2>/dev/null || true
+# Remove Thunderbolt Bridge from VirtualNetworkInterfaces in preferences.plist
+/usr/libexec/PlistBuddy -c "Delete :VirtualNetworkInterfaces:Bridge:bridge0" "$PREFS" 2>/dev/null || true

-        networksetup -listlocations | grep -q exo || {
-          networksetup -createlocation exo
-        }
+networksetup -listlocations | grep -q exo || {
+  networksetup -createlocation exo
+}

-        networksetup -switchtolocation exo
-        networksetup -listallhardwareports \\
-          | awk -F': ' '/Hardware Port: / {print $2}' \\
-          | while IFS=":" read -r name; do
-              case "$name" in
-                "Ethernet Adapter"*)
-                        ;;
-                "Thunderbolt Bridge")
-                        ;;
-                "Thunderbolt "*)
-                  networksetup -listallnetworkservices \\
-                    | grep -q "EXO $name" \\
-                      || networksetup -createnetworkservice "EXO $name" "$name" 2>/dev/null \\
-                      || continue
-                  networksetup -setdhcp "EXO $name"
-                        ;;
-                *)
-                  networksetup -listallnetworkservices \\
-                    | grep -q "$name" \\
-                      || networksetup -createnetworkservice "$name" "$name" 2>/dev/null \\
-                      || continue
-                        ;;
-              esac
-            done
+networksetup -switchtolocation exo
+networksetup -listallhardwareports \\
+  | awk -F': ' '/Hardware Port: / {print $2}' \\
+  | while IFS=":" read -r name; do
+      case "$name" in
+        "Ethernet Adapter"*)
+                ;;
+        "Thunderbolt Bridge")
+                ;;
+        "Thunderbolt "*)
+          networksetup -listallnetworkservices \\
+            | grep -q "EXO $name" \\
+              || networksetup -createnetworkservice "EXO $name" "$name" 2>/dev/null \\
+              || continue
+          networksetup -setdhcp "EXO $name"
+                ;;
+        *)
+          networksetup -listallnetworkservices \\
+            | grep -q "$name" \\
+              || networksetup -createnetworkservice "$name" "$name" 2>/dev/null \\
+              || continue
+                ;;
+      esac
+    done

-        networksetup -listnetworkservices | grep -q "Thunderbolt Bridge" && {
-          networksetup -setnetworkserviceenabled "Thunderbolt Bridge" off
-        } || true
-        """
+networksetup -listnetworkservices | grep -q "Thunderbolt Bridge" && {
+  networksetup -setnetworkserviceenabled "Thunderbolt Bridge" off
+} || true
+"""

    static func ensureLaunchDaemonInstalled() {
        Task.detached {
@@ -71,9 +70,7 @@ enum NetworkSetupHelper {
                try await installLaunchDaemon()
                logger.info("Network setup launch daemon installed and started")
            } catch {
-                logger.error(
-                    "Network setup launch daemon failed: \(error.localizedDescription, privacy: .public)"
-                )
+                logger.error("Network setup launch daemon failed: \(error.localizedDescription, privacy: .public)")
            }
        }
    }
@@ -85,8 +82,7 @@ enum NetworkSetupHelper {
        guard scriptExists, plistExists else { return false }
        guard
            let data = try? Data(contentsOf: URL(fileURLWithPath: plistDestination)),
-            let plist = try? PropertyListSerialization.propertyList(
-                from: data, options: [], format: nil) as? [String: Any]
+            let plist = try? PropertyListSerialization.propertyList(from: data, options: [], format: nil) as? [String: Any]
        else {
            return false
        }
@@ -96,9 +92,7 @@ enum NetworkSetupHelper {
        else {
            return false
        }
-        if let programArgs = plist["ProgramArguments"] as? [String],
-            programArgs.contains(scriptDestination) == false
-        {
+        if let programArgs = plist["ProgramArguments"] as? [String], programArgs.contains(scriptDestination) == false {
            return false
        }
        return true
@@ -111,59 +105,58 @@ enum NetworkSetupHelper {

    private static func makeInstallerScript() -> String {
        """
-        set -euo pipefail
+set -euo pipefail

-        LABEL="\(daemonLabel)"
-        SCRIPT_DEST="\(scriptDestination)"
-        PLIST_DEST="\(plistDestination)"
+LABEL="\(daemonLabel)"
+SCRIPT_DEST="\(scriptDestination)"
+PLIST_DEST="\(plistDestination)"

-        mkdir -p "$(dirname "$SCRIPT_DEST")"
+mkdir -p "$(dirname "$SCRIPT_DEST")"

-        cat > "$SCRIPT_DEST" <<'EOF_SCRIPT'
-        \(setupScript)
-        EOF_SCRIPT
-        chmod 755 "$SCRIPT_DEST"
+cat > "$SCRIPT_DEST" <<'EOF_SCRIPT'
+\(setupScript)
+EOF_SCRIPT
+chmod 755 "$SCRIPT_DEST"

-        cat > "$PLIST_DEST" <<'EOF_PLIST'
-        <?xml version="1.0" encoding="UTF-8"?>
-        <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-        <plist version="1.0">
-        <dict>
-          <key>Label</key>
-          <string>\(daemonLabel)</string>
-          <key>ProgramArguments</key>
-          <array>
-            <string>/bin/bash</string>
-            <string>\(scriptDestination)</string>
-          </array>
-          <key>StartInterval</key>
-          <integer>\(requiredStartInterval)</integer>
-          <key>RunAtLoad</key>
-          <true/>
-          <key>StandardOutPath</key>
-          <string>/var/log/\(daemonLabel).log</string>
-          <key>StandardErrorPath</key>
-          <string>/var/log/\(daemonLabel).err.log</string>
-        </dict>
-        </plist>
-        EOF_PLIST
+cat > "$PLIST_DEST" <<'EOF_PLIST'
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+  <key>Label</key>
+  <string>\(daemonLabel)</string>
+  <key>ProgramArguments</key>
+  <array>
+    <string>/bin/bash</string>
+    <string>\(scriptDestination)</string>
+  </array>
+  <key>StartInterval</key>
+  <integer>\(requiredStartInterval)</integer>
+  <key>RunAtLoad</key>
+  <true/>
+  <key>StandardOutPath</key>
+  <string>/var/log/\(daemonLabel).log</string>
+  <key>StandardErrorPath</key>
+  <string>/var/log/\(daemonLabel).err.log</string>
+</dict>
+</plist>
+EOF_PLIST

-        launchctl bootout system/"$LABEL" >/dev/null 2>&1 || true
-        launchctl bootstrap system "$PLIST_DEST"
-        launchctl enable system/"$LABEL"
-        launchctl kickstart -k system/"$LABEL"
-        """
+launchctl bootout system/"$LABEL" >/dev/null 2>&1 || true
+launchctl bootstrap system "$PLIST_DEST"
+launchctl enable system/"$LABEL"
+launchctl kickstart -k system/"$LABEL"
+"""
    }

    private static func runShellAsAdmin(_ script: String) throws {
-        let escapedScript =
-            script
+        let escapedScript = script
            .replacingOccurrences(of: "\\", with: "\\\\")
            .replacingOccurrences(of: "\"", with: "\\\"")

        let appleScriptSource = """
-            do shell script "\(escapedScript)" with administrator privileges
-            """
+do shell script "\(escapedScript)" with administrator privileges
+"""

        guard let appleScript = NSAppleScript(source: appleScriptSource) else {
            throw NetworkSetupError.scriptCreationFailed
--- a/app/EXO/EXO/Services/NetworkStatusService.swift
+++ b/app/EXO/EXO/Services/NetworkStatusService.swift
@@ -35,34 +35,14 @@ struct NetworkStatus: Equatable {
    let thunderboltBridgeState: ThunderboltState?
    let bridgeInactive: Bool?
    let interfaceStatuses: [InterfaceIpStatus]
-    let rdmaStatus: RDMAStatus

    static let empty = NetworkStatus(
        thunderboltBridgeState: nil,
        bridgeInactive: nil,
-        interfaceStatuses: [],
-        rdmaStatus: .empty
+        interfaceStatuses: []
    )
 }

-struct RDMAStatus: Equatable {
-    let rdmaCtlEnabled: Bool?
-    let devices: [String]
-    let activePorts: [RDMAPort]
-
-    var isAvailable: Bool {
-        rdmaCtlEnabled == true || !devices.isEmpty
-    }
-
-    static let empty = RDMAStatus(rdmaCtlEnabled: nil, devices: [], activePorts: [])
-}
-
-struct RDMAPort: Equatable {
-    let device: String
-    let port: String
-    let state: String
-}
-
 struct InterfaceIpStatus: Equatable {
    let interfaceName: String
    let ipAddress: String?
@@ -79,79 +59,10 @@ private struct NetworkStatusFetcher {
        NetworkStatus(
            thunderboltBridgeState: readThunderboltBridgeState(),
            bridgeInactive: readBridgeInactive(),
-            interfaceStatuses: readInterfaceStatuses(),
-            rdmaStatus: readRDMAStatus()
+            interfaceStatuses: readInterfaceStatuses()
        )
    }

-    private func readRDMAStatus() -> RDMAStatus {
-        let rdmaCtlEnabled = readRDMACtlEnabled()
-        let devices = readRDMADevices()
-        let activePorts = readRDMAActivePorts()
-        return RDMAStatus(
-            rdmaCtlEnabled: rdmaCtlEnabled, devices: devices, activePorts: activePorts)
-    }
-
-    private func readRDMACtlEnabled() -> Bool? {
-        let result = runCommand(["rdma_ctl", "status"])
-        guard result.exitCode == 0 else { return nil }
-        let output = result.output.lowercased().trimmingCharacters(in: .whitespacesAndNewlines)
-        if output.contains("enabled") {
-            return true
-        }
-        if output.contains("disabled") {
-            return false
-        }
-        return nil
-    }
-
-    private func readRDMADevices() -> [String] {
-        let result = runCommand(["ibv_devices"])
-        guard result.exitCode == 0 else { return [] }
-        var devices: [String] = []
-        for line in result.output.split(separator: "\n") {
-            let trimmed = line.trimmingCharacters(in: .whitespaces)
-            if trimmed.hasPrefix("---") || trimmed.lowercased().hasPrefix("device")
-                || trimmed.isEmpty
-            {
-                continue
-            }
-            let parts = trimmed.split(separator: " ", maxSplits: 1)
-            if let deviceName = parts.first {
-                devices.append(String(deviceName))
-            }
-        }
-        return devices
-    }
-
-    private func readRDMAActivePorts() -> [RDMAPort] {
-        let result = runCommand(["ibv_devinfo"])
-        guard result.exitCode == 0 else { return [] }
-        var ports: [RDMAPort] = []
-        var currentDevice: String?
-        var currentPort: String?
-
-        for line in result.output.split(separator: "\n") {
-            let trimmed = line.trimmingCharacters(in: .whitespaces)
-            if trimmed.hasPrefix("hca_id:") {
-                currentDevice = trimmed.replacingOccurrences(of: "hca_id:", with: "")
-                    .trimmingCharacters(in: .whitespaces)
-            } else if trimmed.hasPrefix("port:") {
-                currentPort = trimmed.replacingOccurrences(of: "port:", with: "")
-                    .trimmingCharacters(in: .whitespaces)
-            } else if trimmed.hasPrefix("state:") {
-                let state = trimmed.replacingOccurrences(of: "state:", with: "").trimmingCharacters(
-                    in: .whitespaces)
-                if let device = currentDevice, let port = currentPort {
-                    if state.lowercased().contains("active") {
-                        ports.append(RDMAPort(device: device, port: port, state: state))
-                    }
-                }
-            }
-        }
-        return ports
-    }
-
    private func readThunderboltBridgeState() -> ThunderboltState? {
        let result = runCommand(["networksetup", "-getnetworkserviceenabled", "Thunderbolt Bridge"])
        guard result.exitCode == 0 else {
@@ -174,11 +85,10 @@ private struct NetworkStatusFetcher {
    private func readBridgeInactive() -> Bool? {
        let result = runCommand(["ifconfig", "bridge0"])
        guard result.exitCode == 0 else { return nil }
-        guard
-            let statusLine = result.output
-                .components(separatedBy: .newlines)
-                .first(where: { $0.contains("status:") })?
-                .lowercased()
+        guard let statusLine = result.output
+            .components(separatedBy: .newlines)
+            .first(where: { $0.contains("status:") })?
+            .lowercased()
        else {
            return nil
        }
@@ -261,3 +171,4 @@ private struct NetworkStatusFetcher {
        )
    }
 }
+
--- a/app/EXO/EXO/ViewModels/InstanceViewModel.swift
+++ b/app/EXO/EXO/ViewModels/InstanceViewModel.swift
@@ -57,7 +57,7 @@ struct InstanceViewModel: Identifiable, Equatable {
        case waiting
        case failed
        case idle
-        case preparing
+        case unknown

        var label: String {
            switch self {
@@ -68,7 +68,7 @@ struct InstanceViewModel: Identifiable, Equatable {
            case .waiting: return "Waiting"
            case .failed: return "Failed"
            case .idle: return "Idle"
-            case .preparing: return "Preparing"
+            case .unknown: return "Unknown"
            }
        }
    }
@@ -107,13 +107,10 @@ extension ClusterState {
            let nodeToRunner = instance.shardAssignments.nodeToRunner
            let nodeIds = Array(nodeToRunner.keys)
            let runnerIds = Array(nodeToRunner.values)
-            let nodeNames = nodeIds.compactMap {
-                nodeProfiles[$0]?.friendlyName ?? nodeProfiles[$0]?.modelId ?? $0
-            }
+            let nodeNames = nodeIds.compactMap { nodeProfiles[$0]?.friendlyName ?? nodeProfiles[$0]?.modelId ?? $0 }
            let statuses = runnerIds.compactMap { runners[$0]?.status.lowercased() }
            let downloadProgress = aggregateDownloadProgress(for: nodeIds)
-            let state = InstanceViewModel.State(
-                statuses: statuses, hasActiveDownload: downloadProgress != nil)
+            let state = InstanceViewModel.State(statuses: statuses, hasActiveDownload: downloadProgress != nil)
            let chatTasks = (chatTasksByInstance[entry.key] ?? [])
                .sorted(by: { $0.sortPriority < $1.sortPriority })
                .map { InstanceTaskViewModel(task: $0) }
@@ -168,8 +165,8 @@ extension ClusterState {
    }
 }

-extension InstanceViewModel.State {
-    fileprivate init(statuses: [String], hasActiveDownload: Bool = false) {
+private extension InstanceViewModel.State {
+    init(statuses: [String], hasActiveDownload: Bool = false) {
        if statuses.contains(where: { $0.contains("failed") }) {
            self = .failed
        } else if hasActiveDownload || statuses.contains(where: { $0.contains("downloading") }) {
@@ -185,7 +182,7 @@ extension InstanceViewModel.State {
        } else if statuses.isEmpty {
            self = .idle
        } else {
-            self = .preparing
+            self = .unknown
        }
    }
 }
@@ -246,3 +243,4 @@ extension InstanceTaskViewModel {
        self.parameters = task.parameters
    }
 }
+
--- a/app/EXO/EXO/ViewModels/NodeViewModel.swift
+++ b/app/EXO/EXO/ViewModels/NodeViewModel.swift
@@ -87,9 +87,7 @@ struct TopologyViewModel {
 extension ClusterState {
    func topologyViewModel(localNodeId: String?) -> TopologyViewModel? {
        let topologyNodeIds = Set(topology?.nodes.map(\.nodeId) ?? [])
-        let allNodes = nodeViewModels().filter {
-            topologyNodeIds.isEmpty || topologyNodeIds.contains($0.id)
-        }
+        let allNodes = nodeViewModels().filter { topologyNodeIds.isEmpty || topologyNodeIds.contains($0.id) }
        guard !allNodes.isEmpty else { return nil }

        let nodesById = Dictionary(uniqueKeysWithValues: allNodes.map { ($0.id, $0) })
@@ -108,24 +106,18 @@ extension ClusterState {
        }

        // Rotate so the local node (from /node_id API) is first
-        if let localId = localNodeId,
-            let index = orderedNodes.firstIndex(where: { $0.id == localId })
-        {
+        if let localId = localNodeId, let index = orderedNodes.firstIndex(where: { $0.id == localId }) {
            orderedNodes = Array(orderedNodes[index...]) + Array(orderedNodes[..<index])
        }

        let nodeIds = Set(orderedNodes.map(\.id))
-        let edgesArray: [TopologyEdgeViewModel] =
-            topology?.connections?.compactMap { connection in
-                guard nodeIds.contains(connection.localNodeId),
-                    nodeIds.contains(connection.sendBackNodeId)
-                else { return nil }
-                return TopologyEdgeViewModel(
-                    sourceId: connection.localNodeId, targetId: connection.sendBackNodeId)
-            } ?? []
+        let edgesArray: [TopologyEdgeViewModel] = topology?.connections?.compactMap { connection in
+            guard nodeIds.contains(connection.localNodeId), nodeIds.contains(connection.sendBackNodeId) else { return nil }
+            return TopologyEdgeViewModel(sourceId: connection.localNodeId, targetId: connection.sendBackNodeId)
+        } ?? []
        let edges = Set(edgesArray)

-        return TopologyViewModel(
-            nodes: orderedNodes, edges: Array(edges), currentNodeId: localNodeId)
+        return TopologyViewModel(nodes: orderedNodes, edges: Array(edges), currentNodeId: localNodeId)
    }
 }
+
--- a/app/EXO/EXO/Views/InstanceRowView.swift
+++ b/app/EXO/EXO/Views/InstanceRowView.swift
@@ -20,8 +20,8 @@ struct InstanceRowView: View {
                if let progress = instance.downloadProgress {
                    downloadStatusView(progress: progress)
                } else {
-                    statusChip(label: instance.state.label.uppercased(), color: statusColor)
-                }
+                statusChip(label: instance.state.label.uppercased(), color: statusColor)
+            }
            }
            if let progress = instance.downloadProgress {
                GeometryReader { geometry in
@@ -83,7 +83,7 @@ struct InstanceRowView: View {
        case .ready: return .teal
        case .waiting, .idle: return .gray
        case .failed: return .red
-        case .preparing: return .secondary
+        case .unknown: return .secondary
        }
    }

@@ -97,8 +97,7 @@ struct InstanceRowView: View {
                        .font(.caption)
                        .fontWeight(.semibold)
                    if let subtitle = task.subtitle,
-                        subtitle.caseInsensitiveCompare(parentModelName) != .orderedSame
-                    {
+                       subtitle.caseInsensitiveCompare(parentModelName) != .orderedSame {
                        Text(subtitle)
                            .font(.caption2)
                            .foregroundColor(.secondary)
@@ -235,12 +234,9 @@ struct InstanceRowView: View {
        Button {
            isExpanded.wrappedValue.toggle()
        } label: {
-            Label(
-                isExpanded.wrappedValue ? "Hide" : "Show",
-                systemImage: isExpanded.wrappedValue ? "chevron.up" : "chevron.down"
-            )
-            .labelStyle(.titleAndIcon)
-            .contentTransition(.symbolEffect(.replace))
+            Label(isExpanded.wrappedValue ? "Hide" : "Show", systemImage: isExpanded.wrappedValue ? "chevron.up" : "chevron.down")
+                .labelStyle(.titleAndIcon)
+                .contentTransition(.symbolEffect(.replace))
        }
        .buttonStyle(.plain)
        .font(.caption2)
@@ -315,9 +311,7 @@ struct InstanceRowView: View {
        }

        @ViewBuilder
-        private func detailRow(
-            icon: String? = nil, title: String, value: String, tint: Color = .secondary
-        ) -> some View {
+        private func detailRow(icon: String? = nil, title: String, value: String, tint: Color = .secondary) -> some View {
            HStack(alignment: .firstTextBaseline, spacing: 6) {
                if let icon {
                    Image(systemName: icon)
@@ -335,3 +329,4 @@ struct InstanceRowView: View {
        }
    }
 }
+
--- a/app/EXO/EXO/Views/NodeDetailView.swift
+++ b/app/EXO/EXO/Views/NodeDetailView.swift
@@ -32,3 +32,4 @@ struct NodeDetailView: View {
        }
    }
 }
+
--- a/app/EXO/EXO/Views/NodeRowView.swift
+++ b/app/EXO/EXO/Views/NodeRowView.swift
@@ -28,3 +28,4 @@ struct NodeRowView: View {
        .padding(.vertical, 4)
    }
 }
+
--- a/app/EXO/EXO/Views/TopologyMiniView.swift
+++ b/app/EXO/EXO/Views/TopologyMiniView.swift
@@ -76,33 +76,30 @@ struct TopologyMiniView: View {

    private func connectionLines(in size: CGSize) -> some View {
        let positions = positionedNodes(in: size)
-        let positionById = Dictionary(
-            uniqueKeysWithValues: positions.map { ($0.node.id, $0.point) })
+        let positionById = Dictionary(uniqueKeysWithValues: positions.map { ($0.node.id, $0.point) })
        return Canvas { context, _ in
            guard !topology.edges.isEmpty else { return }
            let nodeRadius: CGFloat = 32
            let arrowLength: CGFloat = 10
            let arrowSpread: CGFloat = .pi / 7
            for edge in topology.edges {
-                guard let start = positionById[edge.sourceId], let end = positionById[edge.targetId]
-                else { continue }
+                guard let start = positionById[edge.sourceId], let end = positionById[edge.targetId] else { continue }
                let dx = end.x - start.x
                let dy = end.y - start.y
                let distance = max(CGFloat(hypot(dx, dy)), 1)
                let ux = dx / distance
                let uy = dy / distance
-                let adjustedStart = CGPoint(
-                    x: start.x + ux * nodeRadius, y: start.y + uy * nodeRadius)
+                let adjustedStart = CGPoint(x: start.x + ux * nodeRadius, y: start.y + uy * nodeRadius)
                let adjustedEnd = CGPoint(x: end.x - ux * nodeRadius, y: end.y - uy * nodeRadius)

                var linePath = Path()
                linePath.move(to: adjustedStart)
                linePath.addLine(to: adjustedEnd)
-                context.stroke(
+            context.stroke(
                    linePath,
                    with: .color(.secondary.opacity(0.3)),
-                    style: StrokeStyle(lineWidth: 1, dash: [4, 4])
-                )
+                style: StrokeStyle(lineWidth: 1, dash: [4, 4])
+            )

                let angle = atan2(uy, ux)
                let tip = adjustedEnd
@@ -171,3 +168,5 @@ private struct NodeGlyphView: View {
        .frame(width: 95)
    }
 }
+
+
--- a/app/EXO/EXOTests/EXOTests.swift
+++ b/app/EXO/EXOTests/EXOTests.swift
@@ -6,7 +6,6 @@
 //

 import Testing
-
@testable import EXO

 struct EXOTests {
--- a/bench/exo_bench.py
+++ b/bench/exo_bench.py
@@ -1,526 +0,0 @@
-#!/usr/bin/env python3
-# pyright: reportAny=false, reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false
-from __future__ import annotations
-
-import argparse
-import http.client
-import json
-import os
-import time
-from collections.abc import Callable
-from statistics import mean
-from typing import Any
-from urllib.parse import urlencode
-
-from loguru import logger
-from transformers import AutoTokenizer
-
-from exo.shared.models.model_cards import MODEL_CARDS
-from exo.shared.types.memory import Memory
-
-
-class ExoHttpError(RuntimeError):
-    def __init__(self, status: int, reason: str, body_preview: str):
-        super().__init__(f"HTTP {status} {reason}: {body_preview}")
-        self.status = status
-
-
-class ExoClient:
-    def __init__(self, host: str, port: int, timeout_s: float = 2400.0):
-        self.host = host
-        self.port = port
-        self.timeout_s = timeout_s
-
-    def request_json(
-        self,
-        method: str,
-        path: str,
-        params: dict[str, Any] | None = None,
-        body: dict[str, Any] | None = None,
-        headers: dict[str, str] | None = None,
-    ) -> Any:
-        if not path.startswith("/"):
-            path = "/" + path
-        if params:
-            path = path + "?" + urlencode(params)
-
-        conn = http.client.HTTPConnection(self.host, self.port, timeout=self.timeout_s)
-        try:
-            payload: bytes | None = None
-            hdrs: dict[str, str] = {"Accept": "application/json"}
-
-            if body is not None:
-                payload = json.dumps(body).encode("utf-8")
-                hdrs["Content-Type"] = "application/json"
-            if headers:
-                hdrs.update(headers)
-
-            conn.request(method.upper(), path, body=payload, headers=hdrs)
-            resp = conn.getresponse()
-            raw = resp.read()
-            text = raw.decode("utf-8", errors="replace") if raw else ""
-
-            if resp.status >= 400:
-                raise ExoHttpError(resp.status, resp.reason, text[:300])
-
-            if not text:
-                return None
-            return json.loads(text)
-        finally:
-            conn.close()
-
-    def post_bench_chat_completions(self, payload: dict[str, Any]) -> dict[str, Any]:
-        return self.request_json("POST", "/bench/chat/completions", body=payload)
-
-
-def unwrap_instance(instance: dict[str, Any]) -> dict[str, Any]:
-    if len(instance) != 1:
-        raise KeyError(f"Expected 1 key, got keys={list(instance.keys())}")
-
-    tag = next(iter(instance))
-    inner = instance[tag]
-    if not isinstance(inner, dict):
-        raise TypeError(f"payload for {tag} must be dict, got {type(inner)}")
-    return inner
-
-
-def instance_id_from_instance(instance: dict[str, Any]) -> str:
-    inner = unwrap_instance(instance)
-    return str(inner["instanceId"])
-
-
-def nodes_used_in_instance(instance: dict[str, Any]) -> int:
-    inner = unwrap_instance(instance)
-    return len(inner["shardAssignments"]["nodeToRunner"])
-
-
-def runner_ids_from_instance(instance: dict[str, Any]) -> list[str]:
-    inner = unwrap_instance(instance)
-    runner_to_shard = inner["shardAssignments"]["runnerToShard"]
-    return list(runner_to_shard.keys())
-
-
-def runner_ready(runner: dict[str, Any]) -> bool:
-    return "RunnerReady" in runner
-
-
-def wait_for_instance_ready(
-    client: ExoClient, instance_id: str, timeout: float = 24000.0
-) -> None:
-    start_time = time.time()
-    while time.time() - start_time < timeout:
-        state = client.request_json("GET", "/state")
-        instances = state.get("instances", {})
-
-        if instance_id not in instances:
-            time.sleep(0.1)
-            continue
-
-        instance = instances[instance_id]
-        runner_ids = runner_ids_from_instance(instance)
-        runners = state.get("runners", {})
-
-        if all(runner_ready(runners.get(rid, {})) for rid in runner_ids):
-            return
-
-        time.sleep(0.1)
-
-    raise TimeoutError(f"Instance {instance_id} did not become ready within {timeout=}")
-
-
-def wait_for_instance_gone(
-    client: ExoClient, instance_id: str, timeout: float = 3.0
-) -> None:
-    start_time = time.time()
-    while time.time() - start_time < timeout:
-        try:
-            client.request_json("GET", f"/instance/{instance_id}")
-            time.sleep(0.4)
-        except ExoHttpError as e:
-            if e.status == 404:
-                return
-
-    raise TimeoutError(f"Instance {instance_id} did not get deleted within {timeout=}")
-
-
-def format_peak_memory(b: float) -> str:
-    for unit in ["B", "KB", "MB", "GB", "TB"]:
-        if b < 1024.0:
-            return f"{b:.2f}{unit}"
-        b /= 1024.0
-    raise ValueError("You're using petabytes of memory. Something went wrong...")
-
-
-def parse_int_list(values: list[str]) -> list[int]:
-    items: list[int] = []
-    for v in values:
-        for part in v.split(","):
-            part = part.strip()
-            if part:
-                items.append(int(part))
-
-    seen: set[int] = set()
-    out: list[int] = []
-    for x in items:
-        if x not in seen:
-            out.append(x)
-            seen.add(x)
-    return out
-
-
-def resolve_model_short_id(client: ExoClient, model_arg: str) -> tuple[str, str]:
-    models = client.request_json("GET", "/models") or {}
-    data = models.get("data") or []
-
-    for m in data:
-        if m.get("id") == model_arg:
-            short_id = str(m["id"])
-            full_id = str(m.get("hugging_face_id") or m["id"])
-            return short_id, full_id
-
-    for m in data:
-        if m.get("hugging_face_id") == model_arg:
-            short_id = str(m["id"])
-            full_id = str(m["hugging_face_id"])
-            return short_id, full_id
-
-    raise ValueError(f"Model not found in /models: {model_arg}")
-
-
-def placement_filter(instance_meta: str, wanted: str) -> bool:
-    s = (instance_meta or "").lower()
-    if wanted == "both":
-        return ("ring" in s) or ("jaccl" in s)
-    return wanted in s
-
-
-def sharding_filter(sharding: str, wanted: str) -> bool:
-    s = (sharding or "").lower()
-    if wanted == "both":
-        return ("pipeline" in s) or ("tensor" in s)
-    return wanted in s
-
-
-def run_one_completion(
-    client: ExoClient, model_id: str, pp_hint: int, tg: int, prompt_sizer: PromptSizer
-) -> tuple[dict[str, Any], int]:
-    content, pp_tokens = prompt_sizer.build(pp_hint)
-    payload: dict[str, Any] = {
-        "model": model_id,
-        "messages": [{"role": "user", "content": content}],
-        "stream": False,
-        "max_tokens": tg,
-    }
-
-    t0 = time.perf_counter()
-    out = client.post_bench_chat_completions(payload)
-    elapsed = time.perf_counter() - t0
-
-    stats = out.get("generation_stats")
-
-    preview = (out.get("choices") or [{}])[0]["message"]["content"][:200]
-
-    return {
-        "elapsed_s": elapsed,
-        "output_text_preview": preview,
-        "stats": stats,
-    }, pp_tokens
-
-
-class PromptSizer:
-    def __init__(self, tokenizer: Any, atom: str = "a "):
-        self.tokenizer = tokenizer
-        self.atom = atom
-        self.count_fn = PromptSizer._make_counter(tokenizer)
-        self.base_tokens = self.count_fn("")
-
-    @staticmethod
-    def _make_counter(tokenizer: Any) -> Callable[[str], int]:
-        def count_fn(user_content: str) -> int:
-            messages = [{"role": "user", "content": user_content}]
-            ids = tokenizer.apply_chat_template(
-                messages, tokenize=True, add_generation_prompt=True
-            )
-            return int(len(ids))
-
-        return count_fn
-
-    def build(self, target_prompt_tokens: int) -> tuple[str, int]:
-        target = int(target_prompt_tokens)
-        if target < self.base_tokens:
-            raise RuntimeError(
-                f"Target ({target}) is smaller than template overhead ({self.base_tokens})."
-            )
-
-        content = ""
-        tok = self.count_fn(content)
-
-        while tok < target:
-            content += self.atom
-            tok = self.count_fn(content)
-
-        if tok != target:
-            raise RuntimeError(
-                f"Overshot: got {tok} tokens (target {target}). "
-                f"Pick a different atom (try ' a' or '\\n' or '0 ')."
-            )
-
-        return content, tok
-
-
-def main() -> int:
-    ap = argparse.ArgumentParser(
-        prog="exo-bench",
-        description="Benchmark exo model throughput across placement previews.",
-    )
-    ap.add_argument("--host", default=os.environ.get("EXO_HOST", "localhost"))
-    ap.add_argument(
-        "--port", type=int, default=int(os.environ.get("EXO_PORT", "52415"))
-    )
-    ap.add_argument("--model", required=True, help="Model short id or huggingface id")
-    ap.add_argument(
-        "--pp",
-        nargs="+",
-        required=True,
-        help="Prompt-size hints (ints). Accepts commas.",
-    )
-    ap.add_argument(
-        "--tg",
-        nargs="+",
-        required=True,
-        help="Generation lengths (ints). Accepts commas.",
-    )
-    ap.add_argument(
-        "--max-nodes",
-        type=int,
-        default=4,
-        help="Only consider placements using <= this many nodes.",
-    )
-    ap.add_argument(
-        "--instance-meta", choices=["ring", "jaccl", "both"], default="both"
-    )
-    ap.add_argument(
-        "--sharding", choices=["pipeline", "tensor", "both"], default="both"
-    )
-    ap.add_argument(
-        "--skip-pipeline-jaccl",
-        action="store_true",
-        help="Pipeline jaccl is often pointless, skip by default",
-    )
-    ap.add_argument(
-        "--repeat", type=int, default=1, help="Repetitions per (pp,tg) pair."
-    )
-    ap.add_argument(
-        "--warmup",
-        type=int,
-        default=0,
-        help="Warmup runs per placement (uses first pp/tg).",
-    )
-    ap.add_argument(
-        "--timeout", type=float, default=2400.0, help="HTTP timeout (seconds)."
-    )
-    ap.add_argument(
-        "--json-out",
-        default="bench/results.json",
-        help="Write raw per-run results JSON to this path.",
-    )
-    ap.add_argument(
-        "--dry-run", action="store_true", help="List selected placements and exit."
-    )
-    args = ap.parse_args()
-
-    pp_list = parse_int_list(args.pp)
-    tg_list = parse_int_list(args.tg)
-    if not pp_list or not tg_list:
-        logger.error("pp and tg lists must be non-empty")
-        return 2
-    if args.repeat <= 0:
-        logger.error("--repeat must be >= 1")
-        return 2
-
-    client = ExoClient(args.host, args.port, timeout_s=args.timeout)
-    short_id, full_model_id = resolve_model_short_id(client, args.model)
-
-    previews_resp = client.request_json(
-        "GET", "/instance/previews", params={"model_id": short_id}
-    )
-    previews = previews_resp.get("previews") or []
-
-    tokenizer = AutoTokenizer.from_pretrained(
-        full_model_id,
-        trust_remote_code=True,
-    )
-    if tokenizer is None:
-        raise RuntimeError("[exo-bench] tokenizer load failed")
-
-    try:
-        prompt_sizer = PromptSizer(tokenizer)
-        logger.debug(f"[exo-bench] loaded tokenizer: {full_model_id} for prompt sizer")
-    except Exception:
-        logger.error("[exo-bench] tokenizer usable but prompt sizing failed")
-        raise
-
-    selected: list[dict[str, Any]] = []
-    for p in previews:
-        if p.get("error") is not None:
-            continue
-        if not placement_filter(str(p.get("instance_meta", "")), args.instance_meta):
-            continue
-        if not sharding_filter(str(p.get("sharding", "")), args.sharding):
-            continue
-
-        instance = p.get("instance")
-        if not isinstance(instance, dict):
-            continue
-
-        n = nodes_used_in_instance(instance)
-        # Skip tensor ring single node as it is pointless when pipeline ring
-        if n == 1 and (
-            (args.sharding == "both" and "tensor" in p.get("sharding", "").lower())
-            or (
-                args.instance_meta == "both"
-                and "jaccl" in p.get("instance_meta", "").lower()
-            )
-        ):
-            continue
-
-        if (
-            args.skip_pipeline_jaccl
-            and (
-                args.instance_meta == "both"
-                and "jaccl" in p.get("instance_meta", "").lower()
-            )
-            and (
-                args.sharding == "both" and "pipeline" in p.get("sharding", "").lower()
-            )
-        ):
-            continue
-
-        if 0 < n <= args.max_nodes:
-            selected.append(p)
-
-    if not selected:
-        logger.error("No valid placements matched your filters.")
-        return 1
-
-    selected.sort(
-        key=lambda p: (
-            str(p.get("instance_meta", "")),
-            str(p.get("sharding", "")),
-            -nodes_used_in_instance(p["instance"]),
-        ),
-        reverse=True,
-    )
-
-    logger.debug(f"exo-bench model: short_id={short_id} full_id={full_model_id}")
-    logger.info(f"placements: {len(selected)}")
-    for p in selected:
-        logger.info(
-            f"  - {p['sharding']} / {p['instance_meta']} / nodes={nodes_used_in_instance(p['instance'])}"
-        )
-
-    if args.dry_run:
-        return 0
-
-    all_rows: list[dict[str, Any]] = []
-
-    for preview in selected:
-        instance = preview["instance"]
-        instance_id = instance_id_from_instance(instance)
-
-        sharding = str(preview["sharding"])
-        instance_meta = str(preview["instance_meta"])
-        n_nodes = nodes_used_in_instance(instance)
-
-        logger.info("=" * 80)
-        logger.info(
-            f"PLACEMENT: {sharding} / {instance_meta} / nodes={n_nodes} / instance_id={instance_id}"
-        )
-
-        client.request_json("POST", "/instance", body={"instance": instance})
-        wait_for_instance_ready(client, instance_id)
-
-        time.sleep(1)
-
-        try:
-            for i in range(args.warmup):
-                run_one_completion(
-                    client, full_model_id, pp_list[0], tg_list[0], prompt_sizer
-                )
-                logger.debug(f"  warmup {i + 1}/{args.warmup} done")
-
-            for pp in pp_list:
-                if (
-                    pp * n_nodes > 2048
-                    and "ring" in instance_meta.lower()
-                    and "tensor" in sharding.lower()
-                ):
-                    model_card = MODEL_CARDS[short_id]
-                    if model_card.metadata.storage_size > Memory.from_gb(10):
-                        logger.info(
-                            f"Skipping tensor ring as this is too slow for model of size {model_card.metadata.storage_size} on {n_nodes=}"
-                        )
-                        continue
-                for tg in tg_list:
-                    runs: list[dict[str, Any]] = []
-                    for r in range(args.repeat):
-                        time.sleep(3)
-                        try:
-                            row, actual_pp_tokens = run_one_completion(
-                                client, full_model_id, pp, tg, prompt_sizer
-                            )
-                        except Exception as e:
-                            logger.error(e)
-                            continue
-                        row.update(
-                            {
-                                "model_short_id": short_id,
-                                "model_id": full_model_id,
-                                "placement_sharding": sharding,
-                                "placement_instance_meta": instance_meta,
-                                "placement_nodes": n_nodes,
-                                "instance_id": instance_id,
-                                "pp_tokens": actual_pp_tokens,
-                                "tg": tg,
-                                "repeat_index": r,
-                            }
-                        )
-                        runs.append(row)
-                        all_rows.append(row)
-
-                    if runs:
-                        prompt_tps = mean(x["stats"]["prompt_tps"] for x in runs)
-                        gen_tps = mean(x["stats"]["generation_tps"] for x in runs)
-                        ptok = mean(x["stats"]["prompt_tokens"] for x in runs)
-                        gtok = mean(x["stats"]["generation_tokens"] for x in runs)
-                        peak = mean(
-                            x["stats"]["peak_memory_usage"]["inBytes"] for x in runs
-                        )
-
-                        logger.info(
-                            f"prompt_tps={prompt_tps:.2f} gen_tps={gen_tps:.2f}    "
-                            f"prompt_tokens={ptok} gen_tokens={gtok}    "
-                            f"peak_memory={format_peak_memory(peak)}\n"
-                        )
-                    time.sleep(2)
-        finally:
-            try:
-                client.request_json("DELETE", f"/instance/{instance_id}")
-            except ExoHttpError as e:
-                if e.status != 404:
-                    raise
-            wait_for_instance_gone(client, instance_id)
-            logger.debug(f"Deleted instance {instance_id}")
-
-            time.sleep(5)
-
-    if args.json_out:
-        with open(args.json_out, "w", encoding="utf-8") as f:
-            json.dump(all_rows, f, indent=2, ensure_ascii=False)
-        logger.debug(f"\nWrote results JSON: {args.json_out}")
-
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
--- a/dashboard/src/app.css
+++ b/dashboard/src/app.css
@@ -198,8 +198,10 @@
 	stroke: oklch(0.85 0.18 85 / 0.4);
 	stroke-width: 1.5px;
 	stroke-dasharray: 8, 8;
-	animation: flowAnimation 1s linear infinite;
+	animation: flowAnimation 1.5s linear infinite;
 	filter: drop-shadow(0 0 3px oklch(0.85 0.18 85 / 0.5));
+	/* GPU optimization - hint to browser this element will animate */
+	will-change: stroke-dashoffset;
 }

 .graph-link-active {
@@ -208,6 +210,24 @@
 	filter: drop-shadow(0 0 6px oklch(0.85 0.18 85 / 0.8));
 }

+/* Reduce motion for users who prefer it - also saves GPU */
+@media (prefers-reduced-motion: reduce) {
+	.graph-link {
+		animation: none;
+	}
+	
+	.shooting-star {
+		animation: none;
+		display: none;
+	}
+	
+	.status-pulse,
+	.cursor-blink,
+	.animate-pulse {
+		animation: none;
+	}
+}
+
 /* CRT Screen effect for topology */
 .crt-screen {
 	position: relative;
@@ -266,13 +286,15 @@ input:focus, textarea:focus {
 	box-shadow: none;
 }

-/* Shooting Stars Animation */
+/* Shooting Stars Animation - GPU optimized */
 .shooting-stars {
 	position: fixed;
 	inset: 0;
 	overflow: hidden;
 	pointer-events: none;
 	z-index: 0;
+	/* Only render when visible */
+	content-visibility: auto;
 }

 .shooting-star {
@@ -285,6 +307,9 @@ input:focus, textarea:focus {
 	animation: shootingStar var(--duration, 3s) linear infinite;
 	animation-delay: var(--delay, 0s);
 	opacity: 0;
+	/* GPU optimization */
+	will-change: transform, opacity;
+	transform: translateZ(0);
 }

 .shooting-star::before {
@@ -320,3 +345,13 @@ input:focus, textarea:focus {
 		transform: translate(400px, 400px);
 	}
 }
+
+/* Pause animations when page is hidden to save resources */
+:root:has(body[data-page-hidden="true"]) {
+	.shooting-star,
+	.graph-link,
+	.status-pulse,
+	.cursor-blink {
+		animation-play-state: paused;
+	}
+}
--- a/dashboard/src/app.d.ts
+++ b/dashboard/src/app.d.ts
@@ -11,3 +11,4 @@ declare global {
 }

 export {};
+
--- a/dashboard/src/lib/components/ChatForm.svelte
+++ b/dashboard/src/lib/components/ChatForm.svelte
@@ -139,11 +139,6 @@
 	}

 	function handleKeydown(event: KeyboardEvent) {
-		// Prevent form submission during IME composition (e.g., Chinese, Japanese, Korean input)
-		if (event.isComposing || event.keyCode === 229) {
-			return;
-		}
-		
 		if (event.key === 'Enter' && !event.shiftKey) {
 			event.preventDefault();
 			handleSubmit();
--- a/dashboard/src/lib/components/TopologyGraph.svelte
+++ b/dashboard/src/lib/components/TopologyGraph.svelte
@@ -1,5 +1,5 @@
 <script lang="ts">
-	import { onMount, onDestroy } from 'svelte';
+	import { onMount, onDestroy, tick } from 'svelte';
 	import * as d3 from 'd3';
 import { topologyData, isTopologyMinimized, debugMode } from '$lib/stores/app.svelte';

@@ -12,11 +12,35 @@ import { topologyData, isTopologyMinimized, debugMode } from '$lib/stores/app.sv

 	let svgContainer: SVGSVGElement | undefined = $state();
 	let resizeObserver: ResizeObserver | undefined;
+	
+	// Optimization: Track last render state to avoid unnecessary re-renders
+	let lastRenderHash = '';
+	let lastHighlightedNodesHash = '';
+	let lastDimensions = { width: 0, height: 0 };
+	let isRendering = false;
+	let pendingRender = false;

 const isMinimized = $derived(isTopologyMinimized());
 const data = $derived(topologyData());
 const debugEnabled = $derived(debugMode());

+	// Generate a hash of relevant data to detect actual changes
+	function generateDataHash(topologyData: typeof data, minimized: boolean, debug: boolean): string {
+		if (!topologyData) return 'null';
+		const nodes = topologyData.nodes || {};
+		const edges = topologyData.edges || [];
+		
+		// Create a lightweight hash from key properties only
+		const nodeHashes = Object.entries(nodes).map(([id, n]) => {
+			const macmon = n.macmon_info;
+			return `${id}:${n.friendly_name || ''}:${macmon?.memory?.ram_usage || 0}:${macmon?.memory?.ram_total || 0}:${macmon?.temp?.gpu_temp_avg || 0}:${macmon?.gpu_usage?.[1] || 0}:${macmon?.sys_power || 0}`;
+		}).sort().join('|');
+		
+		const edgeHash = edges.map(e => `${e.source}-${e.target}`).sort().join(',');
+		
+		return `${nodeHashes}::${edgeHash}::${minimized}::${debug}`;
+	}
+
 function getNodeLabel(nodeId: string): string {
 	const node = data?.nodes?.[nodeId];
 	return node?.friendly_name || nodeId.slice(0, 8);
@@ -932,16 +956,59 @@ function wrapLine(text: string, maxLen: number): string[] {

 	}

-	$effect(() => {
-		if (data) {
+	// Throttled render function to prevent too-frequent updates
+	function scheduleRender() {
+		if (isRendering) {
+			pendingRender = true;
+			return;
+		}
+		
+		isRendering = true;
+		requestAnimationFrame(() => {
 			renderGraph();
+			isRendering = false;
+			
+			if (pendingRender) {
+				pendingRender = false;
+				scheduleRender();
+			}
+		});
+	}
+
+	$effect(() => {
+		if (!data || !svgContainer) return;
+		
+		// Generate hash of current state
+		const currentHash = generateDataHash(data, isMinimized, debugEnabled);
+		const highlightHash = Array.from(highlightedNodes).sort().join(',');
+		
+		// Get current dimensions
+		const rect = svgContainer.getBoundingClientRect();
+		const dimensionsChanged = rect.width !== lastDimensions.width || rect.height !== lastDimensions.height;
+		
+		// Only re-render if something actually changed
+		if (currentHash !== lastRenderHash || highlightHash !== lastHighlightedNodesHash || dimensionsChanged) {
+			lastRenderHash = currentHash;
+			lastHighlightedNodesHash = highlightHash;
+			lastDimensions = { width: rect.width, height: rect.height };
+			scheduleRender();
 		}
 	});

 	onMount(() => {
 		if (svgContainer) {
+			// Use a debounced resize observer to prevent rapid re-renders
+			let resizeTimeout: ReturnType<typeof setTimeout> | null = null;
+			
 			resizeObserver = new ResizeObserver(() => {
-				renderGraph();
+				if (resizeTimeout) clearTimeout(resizeTimeout);
+				resizeTimeout = setTimeout(() => {
+					const rect = svgContainer!.getBoundingClientRect();
+					if (rect.width !== lastDimensions.width || rect.height !== lastDimensions.height) {
+						lastDimensions = { width: rect.width, height: rect.height };
+						scheduleRender();
+					}
+				}, 100);
 			});
 			resizeObserver.observe(svgContainer);
 		}
@@ -969,11 +1036,20 @@ function wrapLine(text: string, maxLen: number): string[] {
 		stroke-width: 1px;
 		stroke-dasharray: 4, 4;
 		opacity: 0.8;
-		animation: flowAnimation 0.75s linear infinite;
+		/* Slower animation = less GPU usage */
+		animation: flowAnimation 2s linear infinite;
+		/* GPU optimization */
+		will-change: stroke-dashoffset;
 	}
 	@keyframes flowAnimation {
 		from { stroke-dashoffset: 0; }
 		to { stroke-dashoffset: -10; }
 	}
-
+	
+	/* Respect reduced motion preference */
+	@media (prefers-reduced-motion: reduce) {
+		:global(.graph-link) {
+			animation: none;
+		}
+	}
 </style>
--- a/dashboard/src/lib/components/index.ts
+++ b/dashboard/src/lib/components/index.ts
@@ -1,7 +1,8 @@
-export { default as TopologyGraph } from "./TopologyGraph.svelte";
-export { default as ChatForm } from "./ChatForm.svelte";
-export { default as ChatMessages } from "./ChatMessages.svelte";
-export { default as ChatAttachments } from "./ChatAttachments.svelte";
-export { default as ChatSidebar } from "./ChatSidebar.svelte";
-export { default as ModelCard } from "./ModelCard.svelte";
-export { default as MarkdownContent } from "./MarkdownContent.svelte";
+export { default as TopologyGraph } from './TopologyGraph.svelte';
+export { default as ChatForm } from './ChatForm.svelte';
+export { default as ChatMessages } from './ChatMessages.svelte';
+export { default as ChatAttachments } from './ChatAttachments.svelte';
+export { default as ChatSidebar } from './ChatSidebar.svelte';
+export { default as ModelCard } from './ModelCard.svelte';
+export { default as MarkdownContent } from './MarkdownContent.svelte';
+
--- a/dashboard/src/lib/stores/app.svelte.ts
+++ b/dashboard/src/lib/stores/app.svelte.ts
--- a/dashboard/src/lib/types/files.ts
+++ b/dashboard/src/lib/types/files.ts
@@ -13,124 +13,55 @@ export interface ChatUploadedFile {
 }

 export interface ChatAttachment {
-	type: "image" | "text" | "pdf" | "audio";
+	type: 'image' | 'text' | 'pdf' | 'audio';
 	name: string;
 	content?: string;
 	base64Url?: string;
 	mimeType?: string;
 }

-export type FileCategory = "image" | "text" | "pdf" | "audio" | "unknown";
+export type FileCategory = 'image' | 'text' | 'pdf' | 'audio' | 'unknown';

-export const IMAGE_EXTENSIONS = [
-	".jpg",
-	".jpeg",
-	".png",
-	".gif",
-	".webp",
-	".svg",
-];
-export const IMAGE_MIME_TYPES = [
-	"image/jpeg",
-	"image/png",
-	"image/gif",
-	"image/webp",
-	"image/svg+xml",
-];
+export const IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg'];
+export const IMAGE_MIME_TYPES = ['image/jpeg', 'image/png', 'image/gif', 'image/webp', 'image/svg+xml'];

 export const TEXT_EXTENSIONS = [
-	".txt",
-	".md",
-	".json",
-	".xml",
-	".yaml",
-	".yml",
-	".csv",
-	".log",
-	".js",
-	".ts",
-	".jsx",
-	".tsx",
-	".py",
-	".java",
-	".cpp",
-	".c",
-	".h",
-	".css",
-	".html",
-	".htm",
-	".sql",
-	".sh",
-	".bat",
-	".rs",
-	".go",
-	".rb",
-	".php",
-	".swift",
-	".kt",
-	".scala",
-	".r",
-	".dart",
-	".vue",
-	".svelte",
+	'.txt', '.md', '.json', '.xml', '.yaml', '.yml', '.csv', '.log',
+	'.js', '.ts', '.jsx', '.tsx', '.py', '.java', '.cpp', '.c', '.h',
+	'.css', '.html', '.htm', '.sql', '.sh', '.bat', '.rs', '.go',
+	'.rb', '.php', '.swift', '.kt', '.scala', '.r', '.dart', '.vue', '.svelte'
 ];
 export const TEXT_MIME_TYPES = [
-	"text/plain",
-	"text/markdown",
-	"text/csv",
-	"text/html",
-	"text/css",
-	"application/json",
-	"application/xml",
-	"text/xml",
-	"application/javascript",
-	"text/javascript",
-	"application/typescript",
+	'text/plain', 'text/markdown', 'text/csv', 'text/html', 'text/css',
+	'application/json', 'application/xml', 'text/xml', 'application/javascript',
+	'text/javascript', 'application/typescript'
 ];

-export const PDF_EXTENSIONS = [".pdf"];
-export const PDF_MIME_TYPES = ["application/pdf"];
+export const PDF_EXTENSIONS = ['.pdf'];
+export const PDF_MIME_TYPES = ['application/pdf'];

-export const AUDIO_EXTENSIONS = [".mp3", ".wav", ".ogg", ".m4a"];
-export const AUDIO_MIME_TYPES = [
-	"audio/mpeg",
-	"audio/wav",
-	"audio/ogg",
-	"audio/mp4",
-];
+export const AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a'];
+export const AUDIO_MIME_TYPES = ['audio/mpeg', 'audio/wav', 'audio/ogg', 'audio/mp4'];

 /**
 * Get file category based on MIME type and extension
 */
-export function getFileCategory(
-	mimeType: string,
-	fileName: string,
-): FileCategory {
-	const extension = fileName.toLowerCase().slice(fileName.lastIndexOf("."));
-
-	if (
-		IMAGE_MIME_TYPES.includes(mimeType) ||
-		IMAGE_EXTENSIONS.includes(extension)
-	) {
-		return "image";
+export function getFileCategory(mimeType: string, fileName: string): FileCategory {
+	const extension = fileName.toLowerCase().slice(fileName.lastIndexOf('.'));
+	
+	if (IMAGE_MIME_TYPES.includes(mimeType) || IMAGE_EXTENSIONS.includes(extension)) {
+		return 'image';
 	}
 	if (PDF_MIME_TYPES.includes(mimeType) || PDF_EXTENSIONS.includes(extension)) {
-		return "pdf";
+		return 'pdf';
 	}
-	if (
-		AUDIO_MIME_TYPES.includes(mimeType) ||
-		AUDIO_EXTENSIONS.includes(extension)
-	) {
-		return "audio";
+	if (AUDIO_MIME_TYPES.includes(mimeType) || AUDIO_EXTENSIONS.includes(extension)) {
+		return 'audio';
 	}
-	if (
-		TEXT_MIME_TYPES.includes(mimeType) ||
-		TEXT_EXTENSIONS.includes(extension) ||
-		mimeType.startsWith("text/")
-	) {
-		return "text";
+	if (TEXT_MIME_TYPES.includes(mimeType) || TEXT_EXTENSIONS.includes(extension) || mimeType.startsWith('text/')) {
+		return 'text';
 	}
-	return "unknown";
+	return 'unknown';
 }

 /**
@@ -138,36 +69,36 @@ export function getFileCategory(
 */
 export function getAcceptString(categories: FileCategory[]): string {
 	const accepts: string[] = [];
-
+	
 	for (const category of categories) {
 		switch (category) {
-			case "image":
+			case 'image':
 				accepts.push(...IMAGE_EXTENSIONS, ...IMAGE_MIME_TYPES);
 				break;
-			case "text":
+			case 'text':
 				accepts.push(...TEXT_EXTENSIONS, ...TEXT_MIME_TYPES);
 				break;
-			case "pdf":
+			case 'pdf':
 				accepts.push(...PDF_EXTENSIONS, ...PDF_MIME_TYPES);
 				break;
-			case "audio":
+			case 'audio':
 				accepts.push(...AUDIO_EXTENSIONS, ...AUDIO_MIME_TYPES);
 				break;
 		}
 	}
-
-	return accepts.join(",");
+	
+	return accepts.join(',');
 }

 /**
 * Format file size for display
 */
 export function formatFileSize(bytes: number): string {
-	if (bytes === 0) return "0 B";
+	if (bytes === 0) return '0 B';
 	const k = 1024;
-	const sizes = ["B", "KB", "MB", "GB"];
+	const sizes = ['B', 'KB', 'MB', 'GB'];
 	const i = Math.floor(Math.log(bytes) / Math.log(k));
-	return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + " " + sizes[i];
+	return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i];
 }

 /**
@@ -197,44 +128,42 @@ export function readFileAsText(file: File): Promise<string> {
 /**
 * Process uploaded files into ChatUploadedFile format
 */
-export async function processUploadedFiles(
-	files: File[],
-): Promise<ChatUploadedFile[]> {
+export async function processUploadedFiles(files: File[]): Promise<ChatUploadedFile[]> {
 	const results: ChatUploadedFile[] = [];
-
+	
 	for (const file of files) {
-		const id =
-			Date.now().toString() + Math.random().toString(36).substring(2, 9);
+		const id = Date.now().toString() + Math.random().toString(36).substring(2, 9);
 		const category = getFileCategory(file.type, file.name);
-
+		
 		const base: ChatUploadedFile = {
 			id,
 			name: file.name,
 			size: file.size,
 			type: file.type,
-			file,
+			file
 		};
-
+		
 		try {
-			if (category === "image") {
+			if (category === 'image') {
 				const preview = await readFileAsDataURL(file);
 				results.push({ ...base, preview });
-			} else if (category === "text" || category === "unknown") {
+			} else if (category === 'text' || category === 'unknown') {
 				const textContent = await readFileAsText(file);
 				results.push({ ...base, textContent });
-			} else if (category === "pdf") {
+			} else if (category === 'pdf') {
 				results.push(base);
-			} else if (category === "audio") {
+			} else if (category === 'audio') {
 				const preview = await readFileAsDataURL(file);
 				results.push({ ...base, preview });
 			} else {
 				results.push(base);
 			}
 		} catch (error) {
-			console.error("Error processing file:", file.name, error);
+			console.error('Error processing file:', file.name, error);
 			results.push(base);
 		}
 	}
-
+	
 	return results;
 }
+
--- a/dashboard/src/routes/+layout.svelte
+++ b/dashboard/src/routes/+layout.svelte
@@ -1,7 +1,25 @@
 <script lang="ts">
 	import '../app.css';
+	import { onMount } from 'svelte';
+	import { browser } from '$app/environment';
 	
 	let { children } = $props();
+	let isPageHidden = $state(false);
+	
+	onMount(() => {
+		if (!browser) return;
+		
+		// Listen for visibility changes to pause animations when hidden
+		const handleVisibilityChange = () => {
+			isPageHidden = document.visibilityState === 'hidden';
+		};
+		
+		document.addEventListener('visibilitychange', handleVisibilityChange);
+		
+		return () => {
+			document.removeEventListener('visibilitychange', handleVisibilityChange);
+		};
+	});
 </script>

 <svelte:head>
@@ -9,7 +27,7 @@
 	<meta name="description" content="EXO - Distributed AI Cluster Dashboard" />
 </svelte:head>

-<div class="min-h-screen bg-background text-foreground">
+<div class="min-h-screen bg-background text-foreground" data-page-hidden={isPageHidden}>
 	{@render children?.()}
 </div>

--- a/dashboard/src/routes/+page.svelte
+++ b/dashboard/src/routes/+page.svelte
@@ -51,59 +51,6 @@ const sidebarVisible = $derived(chatSidebarVisible());
 	let selectedSharding = $state<'Pipeline' | 'Tensor'>('Pipeline');
 	type InstanceMeta = 'MlxRing' | 'MlxIbv' | 'MlxJaccl';
 	
-	// Launch defaults persistence
-	const LAUNCH_DEFAULTS_KEY = 'exo-launch-defaults';
-	interface LaunchDefaults {
-		modelId: string | null;
-		sharding: 'Pipeline' | 'Tensor';
-		instanceType: InstanceMeta;
-		minNodes: number;
-	}
-	
-	function saveLaunchDefaults(): void {
-		const defaults: LaunchDefaults = {
-			modelId: selectedPreviewModelId(),
-			sharding: selectedSharding,
-			instanceType: selectedInstanceType,
-			minNodes: selectedMinNodes,
-		};
-		try {
-			localStorage.setItem(LAUNCH_DEFAULTS_KEY, JSON.stringify(defaults));
-		} catch (e) {
-			console.warn('Failed to save launch defaults:', e);
-		}
-	}
-	
-	function loadLaunchDefaults(): LaunchDefaults | null {
-		try {
-			const stored = localStorage.getItem(LAUNCH_DEFAULTS_KEY);
-			if (!stored) return null;
-			return JSON.parse(stored) as LaunchDefaults;
-		} catch (e) {
-			console.warn('Failed to load launch defaults:', e);
-			return null;
-		}
-	}
-	
-	function applyLaunchDefaults(availableModels: Array<{id: string}>, maxNodes: number): void {
-		const defaults = loadLaunchDefaults();
-		if (!defaults) return;
-		
-		// Apply sharding and instance type unconditionally
-		selectedSharding = defaults.sharding;
-		selectedInstanceType = defaults.instanceType;
-		
-		// Apply minNodes if valid (between 1 and maxNodes)
-		if (defaults.minNodes && defaults.minNodes >= 1 && defaults.minNodes <= maxNodes) {
-			selectedMinNodes = defaults.minNodes;
-		}
-		
-		// Only apply model if it exists in the available models
-		if (defaults.modelId && availableModels.some(m => m.id === defaults.modelId)) {
-			selectPreviewModel(defaults.modelId);
-		}
-	}
-	
 	let selectedInstanceType = $state<InstanceMeta>('MlxRing');
 	let selectedMinNodes = $state<number>(1);
 	let minNodesInitialized = $state(false);
@@ -152,17 +99,35 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 }
 	
 	// Compute highlighted nodes from hovered instance or hovered preview
+	// Memoized to avoid creating new Sets on every render
+	let lastHighlightedNodesKey = '';
+	let cachedHighlightedNodes: Set<string> = new Set();
+	
 	const highlightedNodes = $derived(() => {
+		// Create a key for the current state to enable memoization
+		const previewKey = Array.from(hoveredPreviewNodes).sort().join(',');
+		const currentKey = `${hoveredInstanceId || 'null'}:${previewKey}`;
+		
+		// Return cached value if nothing changed
+		if (currentKey === lastHighlightedNodesKey) {
+			return cachedHighlightedNodes;
+		}
+		
+		lastHighlightedNodesKey = currentKey;
+		
 		// First check instance hover
 		if (hoveredInstanceId) {
 			const instanceWrapped = instanceData[hoveredInstanceId];
-			return unwrapInstanceNodes(instanceWrapped);
+			cachedHighlightedNodes = unwrapInstanceNodes(instanceWrapped);
+			return cachedHighlightedNodes;
 		}
 		// Then check preview hover
 		if (hoveredPreviewNodes.size > 0) {
-			return hoveredPreviewNodes;
+			cachedHighlightedNodes = hoveredPreviewNodes;
+			return cachedHighlightedNodes;
 		}
-		return new Set<string>();
+		cachedHighlightedNodes = new Set<string>();
+		return cachedHighlightedNodes;
 	});
 	
 	// Helper to estimate memory from model ID (mirrors ModelCard logic)
@@ -351,9 +316,6 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 				const data = await response.json();
 				// API returns { data: [{ id, name }] } format
 				models = data.data || [];
-				// Restore last launch defaults if available
-				const currentNodeCount = topologyData() ? Object.keys(topologyData()!.nodes).length : 1;
-				applyLaunchDefaults(models, currentNodeCount);
 			}
 		} catch (error) {
 			console.error('Failed to fetch models:', error);
@@ -572,12 +534,13 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 		};
 	}

-	// Debug: Log downloads data when it changes
-	$effect(() => {
-		if (downloadsData && Object.keys(downloadsData).length > 0) {
-			console.log('[Download Debug] Current downloads:', downloadsData);
-		}
-	});
+	// Debug: Log downloads data when it changes (disabled in production for performance)
+	// Uncomment for debugging:
+	// $effect(() => {
+	// 	if (downloadsData && Object.keys(downloadsData).length > 0) {
+	// 		console.log('[Download Debug] Current downloads:', downloadsData);
+	// 	}
+	// });

 	// Helper to get download status for an instance
 	function getInstanceDownloadStatus(instanceId: string, instanceWrapped: unknown): { 
@@ -593,7 +556,7 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 		// Unwrap the instance
 		const [instanceTag, instance] = getTagged(instanceWrapped);
 		if (!instance || typeof instance !== 'object') {
-			return { isDownloading: false, progress: null, statusText: 'PREPARING', perNode: [] };
+			return { isDownloading: false, progress: null, statusText: 'UNKNOWN', perNode: [] };
 		}

 		const inst = instance as { shardAssignments?: { nodeToRunner?: Record<string, string>; runnerToShard?: Record<string, unknown>; modelId?: string } };
@@ -706,7 +669,7 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 	function deriveInstanceStatus(instanceWrapped: unknown): { statusText: string; statusClass: string } {
 		const [, instance] = getTagged(instanceWrapped);
 		if (!instance || typeof instance !== 'object') {
-			return { statusText: 'PREPARING', statusClass: 'inactive' };
+			return { statusText: 'UNKNOWN', statusClass: 'inactive' };
 		}
 		
 		const inst = instance as { shardAssignments?: { runnerToShard?: Record<string, unknown> } };
@@ -735,7 +698,7 @@ function toggleInstanceDownloadDetails(nodeId: string): void {

 		const has = (s: string) => statuses.includes(s);

-		if (statuses.length === 0) return { statusText: 'PREPARING', statusClass: 'inactive' };
+		if (statuses.length === 0) return { statusText: 'UNKNOWN', statusClass: 'inactive' };
 		if (has('Failed')) return { statusText: 'FAILED', statusClass: 'failed' };
 		if (has('Shutdown')) return { statusText: 'SHUTDOWN', statusClass: 'inactive' };
 		if (has('Loading')) return { statusText: 'LOADING', statusClass: 'starting' };
@@ -1044,7 +1007,6 @@ function toggleInstanceDownloadDetails(nodeId: string): void {

 	function handleSliderMouseUp() {
 		isDraggingSlider = false;
-		saveLaunchDefaults();
 	}

 	// Handle touch events for mobile
@@ -1064,7 +1026,6 @@ function toggleInstanceDownloadDetails(nodeId: string): void {

 	function handleSliderTouchEnd() {
 		isDraggingSlider = false;
-		saveLaunchDefaults();
 	}

 	const nodeCount = $derived(data ? Object.keys(data.nodes).length : 0);
@@ -1267,9 +1228,9 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 							<div class="flex-1 h-px bg-gradient-to-r from-exo-yellow/30 to-transparent"></div>
 						</div>
 						
-						<div
+						<div 
 							bind:this={instancesContainerRef}
-							class="max-h-72 xl:max-h-96 space-y-3 overflow-y-auto overflow-x-hidden py-px"
+							class="max-h-72 space-y-3 overflow-y-auto"
 						>
 								{#each Object.entries(instanceData) as [id, instance]}
 									{@const downloadInfo = getInstanceDownloadStatus(id, instance)}
@@ -1522,7 +1483,6 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 												onclick={() => {
 													if (modelCanFit) {
 														selectPreviewModel(model.id);
-														saveLaunchDefaults();
 														isModelDropdownOpen = false;
 														modelDropdownSearch = '';
 													}
@@ -1556,7 +1516,7 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 								<div class="text-xs text-white/70 font-mono mb-2">Sharding:</div>
 								<div class="flex gap-2">
 									<button 
-										onclick={() => { selectedSharding = 'Pipeline'; saveLaunchDefaults(); }}
+										onclick={() => selectedSharding = 'Pipeline'}
 										class="flex items-center gap-2 py-2 px-4 text-sm font-mono border rounded transition-all duration-200 cursor-pointer {selectedSharding === 'Pipeline' ? 'bg-transparent text-exo-yellow border-exo-yellow' : 'bg-transparent text-white/70 border-exo-medium-gray/50 hover:border-exo-yellow/50'}"
 									>
 										<span class="w-4 h-4 rounded-full border-2 flex items-center justify-center {selectedSharding === 'Pipeline' ? 'border-exo-yellow' : 'border-exo-medium-gray'}">
@@ -1567,7 +1527,7 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 										Pipeline
 									</button>
 									<button 
-										onclick={() => { selectedSharding = 'Tensor'; saveLaunchDefaults(); }}
+										onclick={() => selectedSharding = 'Tensor'}
 										class="flex items-center gap-2 py-2 px-4 text-sm font-mono border rounded transition-all duration-200 cursor-pointer {selectedSharding === 'Tensor' ? 'bg-transparent text-exo-yellow border-exo-yellow' : 'bg-transparent text-white/70 border-exo-medium-gray/50 hover:border-exo-yellow/50'}"
 									>
 										<span class="w-4 h-4 rounded-full border-2 flex items-center justify-center {selectedSharding === 'Tensor' ? 'border-exo-yellow' : 'border-exo-medium-gray'}">
@@ -1585,7 +1545,7 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 								<div class="text-xs text-white/70 font-mono mb-2">Instance Type:</div>
 								<div class="flex gap-2">
 									<button 
-										onclick={() => { selectedInstanceType = 'MlxRing'; saveLaunchDefaults(); }}
+										onclick={() => selectedInstanceType = 'MlxRing'}
 										class="flex items-center gap-2 py-2 px-4 text-sm font-mono border rounded transition-all duration-200 cursor-pointer {selectedInstanceType === 'MlxRing' ? 'bg-transparent text-exo-yellow border-exo-yellow' : 'bg-transparent text-white/70 border-exo-medium-gray/50 hover:border-exo-yellow/50'}"
 									>
 										<span class="w-4 h-4 rounded-full border-2 flex items-center justify-center {selectedInstanceType === 'MlxRing' ? 'border-exo-yellow' : 'border-exo-medium-gray'}">
@@ -1596,7 +1556,7 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 										MLX Ring
 									</button>
 									<button 
-										onclick={() => { selectedInstanceType = 'MlxIbv'; saveLaunchDefaults(); }}
+										onclick={() => selectedInstanceType = 'MlxIbv'}
 										class="flex items-center gap-2 py-2 px-4 text-sm font-mono border rounded transition-all duration-200 cursor-pointer {selectedInstanceType === 'MlxIbv' ? 'bg-transparent text-exo-yellow border-exo-yellow' : 'bg-transparent text-white/70 border-exo-medium-gray/50 hover:border-exo-yellow/50'}"
 									>
 										<span class="w-4 h-4 rounded-full border-2 flex items-center justify-center {selectedInstanceType === 'MlxIbv' ? 'border-exo-yellow' : 'border-exo-medium-gray'}">
@@ -1773,7 +1733,7 @@ function toggleInstanceDownloadDetails(nodeId: string): void {
 								<h3 class="text-xs text-exo-yellow font-mono tracking-[0.2em] uppercase">Instances</h3>
 								<div class="flex-1 h-px bg-gradient-to-r from-exo-yellow/30 to-transparent"></div>
 							</div>
-								<div class="space-y-3 max-h-72 xl:max-h-96 overflow-y-auto overflow-x-hidden py-px pr-1">
+								<div class="space-y-3 max-h-72 overflow-y-auto pr-1">
 									{#each Object.entries(instanceData) as [id, instance]}
 										{@const downloadInfo = getInstanceDownloadStatus(id, instance)}
 										{@const statusText = downloadInfo.statusText}
--- a/dashboard/vite.config.ts
+++ b/dashboard/vite.config.ts
@@ -1,15 +1,16 @@
-import tailwindcss from "@tailwindcss/vite";
-import { sveltekit } from "@sveltejs/kit/vite";
-import { defineConfig } from "vite";
+import tailwindcss from '@tailwindcss/vite';
+import { sveltekit } from '@sveltejs/kit/vite';
+import { defineConfig } from 'vite';

 export default defineConfig({
 	plugins: [tailwindcss(), sveltekit()],
 	server: {
 		proxy: {
-			"/v1": "http://localhost:52415",
-			"/state": "http://localhost:52415",
-			"/models": "http://localhost:52415",
-			"/instance": "http://localhost:52415",
-		},
-	},
+			'/v1': 'http://localhost:52415',
+			'/state': 'http://localhost:52415',
+			'/models': 'http://localhost:52415',
+			'/instance': 'http://localhost:52415'
+		}
+	}
 });
+
--- a/flake.nix
+++ b/flake.nix
@@ -42,22 +42,11 @@
        };
        treefmtEval = inputs.treefmt-nix.lib.evalModule pkgs {
          projectRootFile = "flake.nix";
-          programs = {
-            nixpkgs-fmt.enable = true;
-            ruff-format = {
-              enable = true;
-              excludes = [ "rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi" ];
-            };
-            rustfmt = {
-              enable = true;
-              package = (fenixToolchain system).rustfmt;
-            };
-            prettier = {
-              enable = true;
-              includes = [ "*.ts" ];
-            };
-            swift-format.enable = true;
-          };
+          programs.ruff-format.enable = true;
+          programs.ruff-format.excludes = [ "rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi" ];
+          programs.rustfmt.enable = true;
+          programs.rustfmt.package = (fenixToolchain system).rustfmt;
+          programs.nixpkgs-fmt.enable = true;
        };
      in
      {
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,18 +8,30 @@ dependencies = [
    "aiofiles>=24.1.0",
    "aiohttp>=3.12.14",
    "types-aiofiles>=24.1.0.20250708",
+    "typeguard>=4.4.4",
    "pydantic>=2.11.7",
+    "base58>=2.1.1",
+    "cryptography>=45.0.5",
    "fastapi>=0.116.1",
    "filelock>=3.18.0",
+    "aiosqlite>=0.21.0",
+    "networkx>=3.5",
+    "protobuf>=6.32.0",
+    "rich>=14.1.0",
    "rustworkx>=0.17.1",
+    "sqlmodel>=0.0.24",
+    "sqlalchemy[asyncio]>=2.0.43",
+    "greenlet>=3.2.4",
    "huggingface-hub>=0.33.4",
    "psutil>=7.0.0",
    "loguru>=0.7.3",
+    "textual>=5.3.0",
    "exo_pyo3_bindings", # rust bindings
    "anyio==4.11.0",
+    "bidict>=0.23.1",
    "mlx>=0.30.1; sys_platform == 'darwin'",
    "mlx[cpu]>=0.30.1; sys_platform == 'linux'",
-    "mlx-lm",
+    "mlx-lm>=0.28.3",
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
    "hypercorn>=0.18.0",
    "openai-harmony>=0.0.8",
@@ -59,7 +71,7 @@ members = [
 exo_pyo3_bindings = { workspace = true }
 # Uncomment to use local mlx/mlx-lm development versions:
 # mlx = { path = "/Users/Shared/mlx", editable=true }
-mlx-lm = { git = "git+https://github.com/ml-explore/mlx-lm", branch="main" }
+# mlx-lm = { path = "/Users/Shared/mlx-lm", editable=true }

 [build-system]
 requires = ["uv_build>=0.8.9,<0.9.0"]
@@ -70,7 +82,7 @@ build-backend = "uv_build"
 ###

 [tool.basedpyright]
-include = [".venv/lib/mlx", ".venv/lib/mlx_lm", "src", "bench"]
+include = [".venv/lib/mlx", ".venv/lib/mlx_lm", "src"]
 typeCheckingMode = "strict"
 failOnWarnings = true

--- a/src/exo/main.py
+++ b/src/exo/main.py
@@ -1,6 +1,5 @@
 import argparse
 import multiprocessing as mp
-import os
 import signal
 from dataclasses import dataclass, field
 from typing import Self
@@ -28,7 +27,7 @@ from exo.worker.main import Worker
@dataclass
 class Node:
    router: Router
-    worker: Worker | None
+    worker: Worker
    election: Election  # Every node participates in election, as we do want a node to become master even if it isn't a master candidate if no master candidates are present.
    election_result_receiver: Receiver[ElectionResult]
    master: Master | None
@@ -62,19 +61,15 @@ class Node:
        else:
            api = None

-        if not args.no_worker:
-            worker = Worker(
-                node_id,
-                session_id,
-                exo_shard_downloader(),
-                connection_message_receiver=router.receiver(topics.CONNECTION_MESSAGES),
-                global_event_receiver=router.receiver(topics.GLOBAL_EVENTS),
-                local_event_sender=router.sender(topics.LOCAL_EVENTS),
-                command_sender=router.sender(topics.COMMANDS),
-            )
-        else:
-            worker = None
-
+        worker = Worker(
+            node_id,
+            session_id,
+            exo_shard_downloader(),
+            connection_message_receiver=router.receiver(topics.CONNECTION_MESSAGES),
+            global_event_receiver=router.receiver(topics.GLOBAL_EVENTS),
+            local_event_sender=router.sender(topics.LOCAL_EVENTS),
+            command_sender=router.sender(topics.COMMANDS),
+        )
        # We start every node with a master
        master = Master(
            node_id,
@@ -104,9 +99,8 @@ class Node:
        async with self._tg as tg:
            signal.signal(signal.SIGINT, lambda _, __: self.shutdown())
            tg.start_soon(self.router.run)
+            tg.start_soon(self.worker.run)
            tg.start_soon(self.election.run)
-            if self.worker:
-                tg.start_soon(self.worker.run)
            if self.master:
                tg.start_soon(self.master.run)
            if self.api:
@@ -200,7 +194,6 @@ def main():
    # TODO: Refactor the current verbosity system
    logger_setup(EXO_LOG, args.verbosity)
    logger.info("Starting EXO")
-    logger.info(f"EXO_LIBP2P_NAMESPACE: {os.getenv('EXO_LIBP2P_NAMESPACE')}")

    node = anyio.run(Node.create, args)
    anyio.run(node.run)
@@ -214,7 +207,6 @@ class Args(CamelCaseModel):
    spawn_api: bool = False
    api_port: PositiveInt = 52415
    tb_only: bool = False
-    no_worker: bool = False

    @classmethod
    def parse(cls) -> Self:
@@ -252,10 +244,6 @@ class Args(CamelCaseModel):
            dest="api_port",
            default=52415,
        )
-        parser.add_argument(
-            "--no-worker",
-            action="store_true",
-        )

        args = parser.parse_args()
        return cls(**vars(args))  # pyright: ignore[reportAny] - We are intentionally validating here, we can't do it statically
--- a/src/exo/master/api.py
+++ b/src/exo/master/api.py
@@ -27,8 +27,6 @@ from exo.shared.logging import InterceptLogger
 from exo.shared.models.model_cards import MODEL_CARDS
 from exo.shared.models.model_meta import get_model_meta
 from exo.shared.types.api import (
-    BenchChatCompletionResponse,
-    BenchChatCompletionTaskParams,
    ChatCompletionChoice,
    ChatCompletionMessage,
    ChatCompletionResponse,
@@ -36,7 +34,6 @@ from exo.shared.types.api import (
    CreateInstanceResponse,
    DeleteInstanceResponse,
    FinishReason,
-    GenerationStats,
    ModelList,
    ModelListModel,
    PlaceInstanceParams,
@@ -175,7 +172,6 @@ class API:
        self.app.post("/v1/chat/completions", response_model=None)(
            self.chat_completions
        )
-        self.app.post("/bench/chat/completions")(self.bench_chat_completions)
        self.app.get("/state")(lambda: self.state)
        self.app.get("/events")(lambda: self._event_log)

@@ -494,45 +490,6 @@ class API:
            ],
        )

-    async def _collect_chat_completion_with_stats(
-        self, command_id: CommandId, parse_gpt_oss: bool
-    ) -> BenchChatCompletionResponse:
-        text_parts: list[str] = []
-        model: str | None = None
-        finish_reason: FinishReason | None = None
-
-        stats: GenerationStats | None = None
-
-        async for chunk in self._chat_chunk_stream(command_id, parse_gpt_oss):
-            if model is None:
-                model = chunk.model
-
-            text_parts.append(chunk.text)
-            stats = chunk.stats or stats
-
-            if chunk.finish_reason is not None:
-                finish_reason = chunk.finish_reason
-
-        combined_text = "".join(text_parts)
-        assert model is not None
-
-        resp = BenchChatCompletionResponse(
-            id=command_id,
-            created=int(time.time()),
-            model=model,
-            choices=[
-                ChatCompletionChoice(
-                    index=0,
-                    message=ChatCompletionMessage(
-                        role="assistant", content=combined_text
-                    ),
-                    finish_reason=finish_reason,
-                )
-            ],
-            generation_stats=stats,
-        )
-        return resp
-
    async def _trigger_notify_user_to_download_model(self, model_id: str) -> None:
        logger.warning(
            "TODO: we should send a notification to the user to download the model"
@@ -568,33 +525,6 @@ class API:

        return await self._collect_chat_completion(command.command_id, parse_gpt_oss)

-    async def bench_chat_completions(
-        self, payload: BenchChatCompletionTaskParams
-    ) -> BenchChatCompletionResponse:
-        model_meta = await resolve_model_meta(payload.model)
-        parse_gpt_oss = "gpt-oss" in model_meta.model_id.lower()
-        payload.model = model_meta.model_id
-
-        if not any(
-            instance.shard_assignments.model_id == payload.model
-            for instance in self.state.instances.values()
-        ):
-            await self._trigger_notify_user_to_download_model(payload.model)
-            raise HTTPException(
-                status_code=404, detail=f"No instance found for model {payload.model}"
-            )
-
-        payload.stream = False
-
-        command = ChatCompletion(request_params=payload)
-        await self._send(command)
-
-        response = await self._collect_chat_completion_with_stats(
-            command.command_id,
-            parse_gpt_oss,
-        )
-        return response
-
    def _calculate_total_available_memory(self) -> Memory:
        """Calculate total available memory across all nodes in bytes."""
        total_available = Memory()
--- a/src/exo/master/placement.py
+++ b/src/exo/master/placement.py
@@ -21,7 +21,6 @@ from exo.shared.types.commands import (
 )
 from exo.shared.types.events import Event, InstanceCreated, InstanceDeleted
 from exo.shared.types.memory import Memory
-from exo.shared.types.models import ModelId
 from exo.shared.types.topology import NodeInfo
 from exo.shared.types.worker.instances import (
    Instance,
@@ -30,7 +29,6 @@ from exo.shared.types.worker.instances import (
    MlxJacclInstance,
    MlxRingInstance,
 )
-from exo.shared.types.worker.shards import Sharding


 def random_ephemeral_port() -> int:
@@ -67,28 +65,6 @@ def place_instance(
    if not cycles_with_sufficient_memory:
        raise ValueError("No cycles found with sufficient memory")

-    if command.sharding == Sharding.Tensor:
-        if not command.model_meta.supports_tensor:
-            raise ValueError(
-                f"Requested Tensor sharding but this model does not support tensor parallelism: {command.model_meta.model_id}"
-            )
-        # TODO: the condition here for tensor parallel is not correct, but it works good enough for now.
-        cycles_with_sufficient_memory = [
-            cycle
-            for cycle in cycles_with_sufficient_memory
-            if command.model_meta.hidden_size % len(cycle) == 0
-        ]
-        if not cycles_with_sufficient_memory:
-            raise ValueError(
-                f"No tensor sharding found for model with hidden_size {command.model_meta.hidden_size} candidate cycles"
-            )
-    if command.sharding == Sharding.Pipeline and command.model_meta.model_id == ModelId(
-        "mlx-community/DeepSeek-V3.1-8bit"
-    ):
-        raise ValueError(
-            "Pipeline parallelism is not supported for DeepSeek V3.1 (8-bit)"
-        )
-
    smallest_cycles = get_smallest_cycles(cycles_with_sufficient_memory)

    smallest_tb_cycles = [
--- a/src/exo/master/placement_utils.py
+++ b/src/exo/master/placement_utils.py
@@ -385,14 +385,13 @@ def get_mlx_jaccl_coordinators(
    address in format "X.X.X.X:PORT" per node.
    """
    rank_0_node = selected_cycle[0]
-    logger.debug(f"Selecting coordinator from rank 0 node: {rank_0_node.node_id}")
+    logger.info(f"Selecting coordinator from rank 0 node: {rank_0_node.node_id}")

    def get_ip_for_node(n: NodeInfo) -> str:
        if n.node_id == rank_0_node.node_id:
            return "0.0.0.0"

-        ip = _find_ip_prioritised(n, rank_0_node, cycle_digraph)
-        if ip:
+        for ip, _ in _find_connection_ip(n, rank_0_node, cycle_digraph):
            return ip

        logger.warning(
--- a/src/exo/master/tests/test_placement.py
+++ b/src/exo/master/tests/test_placement.py
@@ -50,7 +50,7 @@ def model_meta() -> ModelMetadata:
        storage_size=Memory.from_kb(1000),
        pretty_name="Test Model",
        n_layers=10,
-        hidden_size=30,
+        hidden_size=10,
        supports_tensor=True,
    )

--- a/src/exo/shared/types/api.py
+++ b/src/exo/shared/types/api.py
@@ -5,7 +5,6 @@ from pydantic import BaseModel, Field, field_validator
 from pydantic_core import PydanticUseDefault

 from exo.shared.types.common import CommandId
-from exo.shared.types.memory import Memory
 from exo.shared.types.models import ModelId, ModelMetadata
 from exo.shared.types.worker.instances import Instance, InstanceId, InstanceMeta
 from exo.shared.types.worker.shards import Sharding
@@ -52,10 +51,6 @@ class ChatCompletionMessage(BaseModel):
    function_call: dict[str, Any] | None = None


-class BenchChatCompletionMessage(ChatCompletionMessage):
-    pass
-
-
 class TopLogprobItem(BaseModel):
    token: str
    logprob: float
@@ -118,18 +113,6 @@ class ChatCompletionResponse(BaseModel):
    service_tier: str | None = None


-class GenerationStats(BaseModel):
-    prompt_tps: float
-    generation_tps: float
-    prompt_tokens: int
-    generation_tokens: int
-    peak_memory_usage: Memory
-
-
-class BenchChatCompletionResponse(ChatCompletionResponse):
-    generation_stats: GenerationStats | None = None
-
-
 class ChatCompletionTaskParams(BaseModel):
    model: str
    frequency_penalty: float | None = None
@@ -152,10 +135,6 @@ class ChatCompletionTaskParams(BaseModel):
    user: str | None = None


-class BenchChatCompletionTaskParams(ChatCompletionTaskParams):
-    pass
-
-
 class PlaceInstanceParams(BaseModel):
    model_id: str
    sharding: Sharding = Sharding.Pipeline
--- a/src/exo/shared/types/chunks.py
+++ b/src/exo/shared/types/chunks.py
@@ -1,6 +1,5 @@
 from enum import Enum

-from exo.shared.types.api import GenerationStats
 from exo.utils.pydantic_ext import TaggedModel

 from .api import FinishReason
@@ -21,7 +20,6 @@ class TokenChunk(BaseChunk):
    text: str
    token_id: int
    finish_reason: FinishReason | None = None
-    stats: GenerationStats | None = None


 class ImageChunk(BaseChunk):
--- a/src/exo/shared/types/worker/runner_response.py
+++ b/src/exo/shared/types/worker/runner_response.py
@@ -1,4 +1,4 @@
-from exo.shared.types.api import FinishReason, GenerationStats
+from exo.shared.types.api import FinishReason
 from exo.utils.pydantic_ext import TaggedModel


@@ -15,7 +15,6 @@ class GenerationResponse(BaseRunnerResponse):
    token: int
    # logprobs: list[float] | None = None # too big. we can change to be top-k
    finish_reason: FinishReason | None = None
-    stats: GenerationStats | None = None


 class FinishedResponse(BaseRunnerResponse):
--- a/src/exo/shared/types/worker/runners.py
+++ b/src/exo/shared/types/worker/runners.py
@@ -53,10 +53,6 @@ class RunnerRunning(BaseRunnerStatus):
    pass


-class RunnerShuttingDown(BaseRunnerStatus):
-    pass
-
-
 class RunnerShutdown(BaseRunnerStatus):
    pass

@@ -74,7 +70,6 @@ RunnerStatus = (
    | RunnerWarmingUp
    | RunnerReady
    | RunnerRunning
-    | RunnerShuttingDown
    | RunnerShutdown
    | RunnerFailed
 )
--- a/src/exo/worker/download/download_utils.py
+++ b/src/exo/worker/download/download_utils.py
@@ -450,11 +450,6 @@ async def get_weight_map(repo_id: str, revision: str = "main") -> dict[str, str]


 async def resolve_allow_patterns(shard: ShardMetadata) -> list[str]:
-    # TODO: 'Smart' downloads are disabled because:
-    #  (i) We don't handle all kinds of files;
-    # (ii) We don't have sticky sessions.
-    # (iii) Tensor parallel requires all files.
-    return ["*"]
    try:
        weight_map = await get_weight_map(str(shard.model_meta.model_id))
        return get_allow_patterns(weight_map, shard)
--- a/src/exo/worker/engines/mlx/constants.py
+++ b/src/exo/worker/engines/mlx/constants.py
@@ -9,7 +9,7 @@ MAX_KV_SIZE: int | None = 3200
 KEEP_KV_SIZE: int | None = 1600
 QUANTIZE_MODEL_MODE: str | None = "affine"
 CACHE_GROUP_SIZE: int = 64
-KV_CACHE_BITS: int | None = None
+KV_CACHE_BITS: int | None = 8

 # TODO: We should really make this opt-in, but Kimi requires trust_remote_code=True
 TRUST_REMOTE_CODE: bool = True
--- a/src/exo/worker/engines/mlx/generator/generate.py
+++ b/src/exo/worker/engines/mlx/generator/generate.py
@@ -3,17 +3,10 @@ from typing import Any, Callable, Generator, cast, get_args
 import mlx.core as mx
 from mlx_lm import stream_generate
 from mlx_lm.models.cache import KVCache
-from mlx_lm.sample_utils import make_sampler
 from mlx_lm.tokenizer_utils import TokenizerWrapper

 # from exo.engines.mlx.cache import KVPrefixCache
-from exo.shared.types.api import (
-    BenchChatCompletionTaskParams,
-    ChatCompletionMessage,
-    FinishReason,
-    GenerationStats,
-)
-from exo.shared.types.memory import Memory
+from exo.shared.types.api import ChatCompletionMessage, FinishReason
 from exo.shared.types.tasks import ChatCompletionTaskParams
 from exo.shared.types.worker.runner_response import (
    GenerationResponse,
@@ -48,6 +41,7 @@ def maybe_quantize_kv_cache(
 def warmup_inference(
    model: Model,
    tokenizer: TokenizerWrapper,
+    sampler: Callable[[mx.array], mx.array],
 ) -> int:
    content = "Prompt to warm up the inference engine. Repeat this."

@@ -70,9 +64,6 @@ def warmup_inference(
        model=model,
    )

-    # Use a default sampler for warmup
-    sampler = make_sampler(temp=0.7)
-
    logger.info("Generating warmup tokens")
    for _r in stream_generate(
        model=model,
@@ -81,7 +72,7 @@ def warmup_inference(
        max_tokens=50,
        sampler=sampler,
        prompt_cache=cache,
-        prefill_step_size=2048,
+        prefill_step_size=65536,
        kv_group_size=KV_GROUP_SIZE,
        kv_bits=KV_BITS,
    ):
@@ -89,47 +80,20 @@ def warmup_inference(
        tokens_generated += 1

    logger.info("Generated ALL warmup tokens")
-
-    # TODO: Do we want an mx_barrier?
-    #  At least this version is actively incorrect, as it should use mx_barrier(group)
    mx_barrier()

    return tokens_generated


-def ban_token_ids(token_ids: list[int]) -> Callable[[mx.array, mx.array], mx.array]:
-    token_ids = [int(t) for t in token_ids]
-
-    def proc(_history: mx.array, logits: mx.array) -> mx.array:
-        for tid in token_ids:
-            logits[..., tid] = -1e9
-        return logits
-
-    return proc
-
-
-def eos_ids_from_tokenizer(tokenizer: TokenizerWrapper) -> list[int]:
-    eos: list[int] | None = getattr(tokenizer, "eos_token_ids", None)
-    if eos is None:
-        return []
-    return eos
-
-
 def mlx_generate(
    model: Model,
    tokenizer: TokenizerWrapper,
+    sampler: Callable[[mx.array], mx.array],
    task: ChatCompletionTaskParams,
 ) -> Generator[GenerationResponse]:
-    # Ensure that generation stats only contains peak memory for this generation
-    mx.reset_peak_memory()
-    is_bench: bool = isinstance(task, BenchChatCompletionTaskParams)
-
    # Currently we support chat-completion tasks only.
    logger.info(f"task_params: {task}")

-    if task.seed is not None:
-        mx.random.seed(task.seed)
-
    prompt = apply_chat_template(
        tokenizer=tokenizer,
        chat_task_data=task,
@@ -137,17 +101,6 @@ def mlx_generate(

    caches = make_kv_cache(model=model)

-    logits_processors: list[Callable[[mx.array, mx.array], mx.array]] = []
-    if is_bench:
-        # Only sample length eos tokens
-        eos_ids = eos_ids_from_tokenizer(tokenizer)
-        logits_processors = [ban_token_ids(eos_ids)]
-
-    sampler = make_sampler(
-        temp=task.temperature if task.temperature is not None else 0.7,
-        top_p=task.top_p if task.top_p is not None else 1.0,
-    )
-
    max_tokens = task.max_tokens or MAX_TOKENS
    for out in stream_generate(
        model=model,
@@ -155,40 +108,26 @@ def mlx_generate(
        prompt=prompt,
        max_tokens=max_tokens,
        sampler=sampler,
-        logits_processors=logits_processors,
        prompt_cache=caches,
-        # TODO: Dynamically change prefill step size to be the maximum possible without timing out.
-        prefill_step_size=2048,
+        prefill_step_size=65536,
        kv_group_size=KV_GROUP_SIZE,
        kv_bits=KV_BITS,
    ):
        logger.info(out.text)
-
-        stats: GenerationStats | None = None
-        if out.finish_reason is not None:
-            stats = GenerationStats(
-                prompt_tps=float(out.prompt_tps),
-                generation_tps=float(out.generation_tps),
-                prompt_tokens=int(out.prompt_tokens),
-                generation_tokens=int(out.generation_tokens),
-                peak_memory_usage=Memory.from_gb(out.peak_memory),
+        if out.finish_reason is not None and out.finish_reason not in get_args(
+            FinishReason
+        ):
+            # We don't throw here as this failure case is really not all that bad
+            # Just log the error and move on
+            logger.warning(
+                f"Model generated unexpected finish_reason: {out.finish_reason}"
            )

-            if out.finish_reason not in get_args(FinishReason):
-                # We don't throw here as this failure case is really not all that bad
-                # Just log the error and move on
-                logger.warning(
-                    f"Model generated unexpected finish_reason: {out.finish_reason}"
-                )
-
        yield GenerationResponse(
            text=out.text,
            token=out.token,
            finish_reason=cast(FinishReason | None, out.finish_reason),
-            stats=stats,
        )

        if out.finish_reason is not None:
            break
-
-        # TODO: Do we want an mx_barrier?
--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -3,10 +3,11 @@ import os
 import resource
 import time
 from pathlib import Path
-from typing import Any, cast
+from typing import Any, Callable, cast

 from mlx_lm.models.cache import KVCache, QuantizedKVCache, RotatingKVCache
 from mlx_lm.models.deepseek_v3 import DeepseekV3Model
+from mlx_lm.sample_utils import make_sampler
 from mlx_lm.tokenizer_utils import TokenizerWrapper

 from exo.worker.engines.mlx.constants import (
@@ -175,7 +176,11 @@ def initialize_mlx(

 def load_mlx_items(
    bound_instance: BoundInstance, group: Group | None
-) -> tuple[Model, TokenizerWrapper]:
+) -> tuple[Model, TokenizerWrapper, Callable[[mx.array], mx.array]]:
+    # TODO: pass temperature
+    sampler: Callable[[mx.array], mx.array] = make_sampler(temp=0.7)
+    logger.info("Created a sampler")
+
    if group is None:
        logger.info(f"Single device used for {bound_instance.instance}")
        model_path = build_model_path(bound_instance.bound_shard.model_meta.model_id)
@@ -196,7 +201,7 @@ def load_mlx_items(

    set_wired_limit_for_model(get_weights_size(bound_instance.bound_shard))

-    return cast(Model, model), tokenizer
+    return cast(Model, model), tokenizer, sampler


 def shard_and_load(
@@ -284,15 +289,15 @@ def apply_chat_template(
    messages = chat_task_data.messages

    formatted_messages: list[dict[str, Any]] = []
-    for message in messages:
+    for _, message in enumerate(messages):
        if isinstance(message.content, ChatCompletionMessageText):
            message.content = message.content.text
        if isinstance(message.content, list):
-            if len(message.content) == 0:
-                logger.warning("Received prompt with no content, skipping")
+            if len(message.content) != 1:
+                logger.warning("Received malformed prompt")
                continue

-            message.content = "\n".join(c.text for c in message.content).strip()
+            message.content = message.content[0].text
        if message.content is None and message.thinking is None:
            continue

@@ -305,7 +310,6 @@ def apply_chat_template(
        formatted_messages,
        tokenize=False,
        add_generation_prompt=True,
-        tools=chat_task_data.tools,
    )

    return prompt  # type: ignore
@@ -391,15 +395,11 @@ def set_wired_limit_for_model(model_size: Memory):
            "MB. This can be slow. See the documentation for possible work-arounds: "
            "https://github.com/ml-explore/mlx-lm/tree/main#large-models"
        )
+    kv_bytes = int(0.02 * model_bytes)
+    target_cache = int(1.10 * (model_bytes + kv_bytes))
+    target_cache = min(target_cache, max_rec_size)
+    mx.set_cache_limit(target_cache)
    mx.set_wired_limit(max_rec_size)
-    logger.info(f"Wired limit set to {max_rec_size}.")
-
-
-def mlx_cleanup(
-    model: Model | None, tokenizer: TokenizerWrapper | None, group: Group | None
-) -> None:
-    del model, tokenizer, group
-    mx.clear_cache()
-    import gc
-
-    gc.collect()
+    logger.info(
+        f"Wired limit set to {max_rec_size}. Cache limit set to {target_cache}."
+    )
--- a/src/exo/worker/main.py
+++ b/src/exo/worker/main.py
@@ -23,7 +23,6 @@ from exo.shared.types.events import (
    TopologyEdgeCreated,
    TopologyEdgeDeleted,
 )
-from exo.shared.types.models import ModelId
 from exo.shared.types.multiaddr import Multiaddr
 from exo.shared.types.profiling import MemoryPerformanceProfile, NodePerformanceProfile
 from exo.shared.types.state import State
@@ -84,7 +83,7 @@ class Worker:
        self.out_for_delivery: dict[EventId, ForwarderEvent] = {}

        self.state: State = State()
-        self.download_status: dict[ModelId, DownloadProgress] = {}
+        self.download_status: dict[ShardMetadata, DownloadProgress] = {}
        self.runners: dict[RunnerId, RunnerSupervisor] = {}
        self._tg: TaskGroup | None = None

@@ -129,7 +128,6 @@ class Worker:
            tg.start_soon(start_polling_node_metrics, resource_monitor_callback)

            tg.start_soon(start_polling_memory_metrics, memory_monitor_callback)
-            tg.start_soon(self._emit_existing_download_progress)
            tg.start_soon(self._connection_message_event_writer)
            tg.start_soon(self._resend_out_for_delivery)
            tg.start_soon(self._event_applier)
@@ -202,11 +200,11 @@ class Worker:
                        )
                    )
                case DownloadModel(shard_metadata=shard):
-                    if shard.model_meta.model_id not in self.download_status:
+                    if shard not in self.download_status:
                        progress = DownloadPending(
                            shard_metadata=shard, node_id=self.node_id
                        )
-                        self.download_status[shard.model_meta.model_id] = progress
+                        self.download_status[shard] = progress
                        await self.event_sender.send(
                            NodeDownloadProgress(download_progress=progress)
                        )
@@ -219,7 +217,7 @@ class Worker:
                        progress = DownloadCompleted(
                            shard_metadata=shard, node_id=self.node_id
                        )
-                        self.download_status[shard.model_meta.model_id] = progress
+                        self.download_status[shard] = progress
                        await self.event_sender.send(
                            NodeDownloadProgress(download_progress=progress)
                        )
@@ -351,7 +349,7 @@ class Worker:
                initial_progress
            ),
        )
-        self.download_status[task.shard_metadata.model_meta.model_id] = status
+        self.download_status[task.shard_metadata] = status
        self.event_sender.send_nowait(NodeDownloadProgress(download_progress=status))

        last_progress_time = 0.0
@@ -365,7 +363,7 @@ class Worker:
            nonlocal last_progress_time
            if progress.status == "complete":
                status = DownloadCompleted(shard_metadata=shard, node_id=self.node_id)
-                self.download_status[shard.model_meta.model_id] = status
+                self.download_status[shard] = status
                # Footgun!
                self.event_sender.send_nowait(
                    NodeDownloadProgress(download_progress=status)
@@ -386,7 +384,7 @@ class Worker:
                        progress
                    ),
                )
-                self.download_status[shard.model_meta.model_id] = status
+                self.download_status[shard] = status
                self.event_sender.send_nowait(
                    NodeDownloadProgress(download_progress=status)
                )
@@ -446,40 +444,3 @@ class Worker:
                    await self.event_sender.send(TopologyEdgeDeleted(edge=conn))

            await anyio.sleep(10)
-
-    async def _emit_existing_download_progress(self) -> None:
-        try:
-            while True:
-                logger.info("Fetching and emitting existing download progress...")
-                async for (
-                    _,
-                    progress,
-                ) in self.shard_downloader.get_shard_download_status():
-                    if progress.status == "complete":
-                        status = DownloadCompleted(
-                            node_id=self.node_id, shard_metadata=progress.shard
-                        )
-                    elif progress.status in ["in_progress", "not_started"]:
-                        if progress.downloaded_bytes_this_session.in_bytes == 0:
-                            status = DownloadPending(
-                                node_id=self.node_id, shard_metadata=progress.shard
-                            )
-                        else:
-                            status = DownloadOngoing(
-                                node_id=self.node_id,
-                                shard_metadata=progress.shard,
-                                download_progress=map_repo_download_progress_to_download_progress_data(
-                                    progress
-                                ),
-                            )
-                    else:
-                        continue
-
-                    self.download_status[progress.shard.model_meta.model_id] = status
-                    await self.event_sender.send(
-                        NodeDownloadProgress(download_progress=status)
-                    )
-                logger.info("Done emitting existing download progress.")
-                await anyio.sleep(5 * 60)  # 5 minutes
-        except Exception as e:
-            logger.error(f"Error emitting existing download progress: {e}")
--- a/src/exo/worker/plan.py
+++ b/src/exo/worker/plan.py
@@ -3,7 +3,6 @@
 from collections.abc import Mapping, Sequence

 from exo.shared.types.common import NodeId
-from exo.shared.types.models import ModelId
 from exo.shared.types.tasks import (
    ChatCompletion,
    ConnectToGroup,
@@ -35,6 +34,7 @@ from exo.shared.types.worker.runners import (
    RunnerStatus,
    RunnerWarmingUp,
 )
+from exo.shared.types.worker.shards import ShardMetadata
 from exo.worker.runner.runner_supervisor import RunnerSupervisor


@@ -43,7 +43,7 @@ def plan(
    # Runners is expected to be FRESH and so should not come from state
    runners: Mapping[RunnerId, RunnerSupervisor],
    # DL_status is expected to be FRESH and so should not come from state
-    download_status: Mapping[ModelId, DownloadProgress],
+    download_status: Mapping[ShardMetadata, DownloadProgress],
    # gdls is not expected to be fresh
    global_download_status: Mapping[NodeId, Sequence[DownloadProgress]],
    instances: Mapping[InstanceId, Instance],
@@ -111,14 +111,13 @@ def _create_runner(

 def _model_needs_download(
    runners: Mapping[RunnerId, RunnerSupervisor],
-    download_status: Mapping[ModelId, DownloadProgress],
+    download_status: Mapping[ShardMetadata, DownloadProgress],
 ) -> DownloadModel | None:
    for runner in runners.values():
-        model_id = runner.bound_instance.bound_shard.model_meta.model_id
        if isinstance(runner.status, RunnerIdle) and (
-            model_id not in download_status
-            or not isinstance(
-                download_status[model_id], (DownloadOngoing, DownloadCompleted)
+            not isinstance(
+                download_status.get(runner.bound_instance.bound_shard, None),
+                (DownloadOngoing, DownloadCompleted),
            )
        ):
            # We don't invalidate download_status randomly in case a file gets deleted on disk
@@ -236,8 +235,9 @@ def _ready_to_warmup(
        assert device_rank < world_size
        assert device_rank >= 0

-        # Rank != 0
-        accepting_ranks_ready = device_rank > 0 and all(
+        # TODO: Ensure these align with MLX distributeds expectations.
+        # Rank < n-1
+        accepting_ranks_ready = device_rank < world_size - 1 and all(
            isinstance(
                all_runners.get(global_runner_id, None),
                (RunnerLoaded, RunnerWarmingUp),
@@ -245,8 +245,8 @@ def _ready_to_warmup(
            for global_runner_id in shard_assignments.runner_to_shard
        )

-        # Rank = 0
-        connecting_rank_ready = device_rank == 0 and all(
+        # Rank = n-1
+        connecting_rank_ready = device_rank == world_size - 1 and all(
            isinstance(all_runners.get(global_runner_id, None), RunnerWarmingUp)
            for global_runner_id in shard_assignments.runner_to_shard
            if global_runner_id != runner_id
@@ -274,12 +274,6 @@ def _pending_tasks(
            if task.instance_id != runner.bound_instance.instance.instance_id:
                continue

-            # I have a design point here; this is a state race in disguise as the task status doesn't get updated to completed fast enough
-            # however, realistically the task status should be set to completed by the LAST runner, so this is a true race
-            # the actual solution is somewhat deeper than this bypass - TODO!
-            if task.task_id in runner.completed:
-                continue
-
            # TODO: Check ordering aligns with MLX distributeds expectations.

            if isinstance(runner.status, RunnerReady) and all(
--- a/src/exo/worker/runner/bootstrap.py
+++ b/src/exo/worker/runner/bootstrap.py
@@ -6,7 +6,7 @@ from exo.shared.types.events import Event, RunnerStatusUpdated
 from exo.shared.types.tasks import Task
 from exo.shared.types.worker.instances import BoundInstance, MlxJacclInstance
 from exo.shared.types.worker.runners import RunnerFailed
-from exo.utils.channels import ClosedResourceError, MpReceiver, MpSender
+from exo.utils.channels import MpReceiver, MpSender

 logger: "loguru.Logger" = loguru.logger

@@ -31,8 +31,6 @@ def entrypoint(
        from exo.worker.runner.runner import main

        main(bound_instance, event_sender, task_receiver)
-    except ClosedResourceError:
-        logger.warning("Runner communication closed unexpectedly")
    except Exception as e:
        logger.opt(exception=e).warning(
            f"Runner {bound_instance.bound_runner_id} crashed with critical exception {e}"
@@ -44,10 +42,8 @@ def entrypoint(
            )
        )
    finally:
-        try:
-            event_sender.close()
-            task_receiver.close()
-        finally:
-            event_sender.join()
-            task_receiver.join()
-            logger.info("bye from the runner")
+        event_sender.close()
+        task_receiver.close()
+        event_sender.join()
+        task_receiver.join()
+        logger.info("bye from the runner")
--- a/src/exo/worker/runner/runner.py
+++ b/src/exo/worker/runner/runner.py
@@ -1,7 +1,5 @@
 import time

-import mlx.core as mx
-
 from exo.shared.types.api import ChatCompletionMessageText
 from exo.shared.types.chunks import TokenChunk
 from exo.shared.types.events import (
@@ -34,11 +32,10 @@ from exo.shared.types.worker.runners import (
    RunnerReady,
    RunnerRunning,
    RunnerShutdown,
-    RunnerShuttingDown,
    RunnerStatus,
    RunnerWarmingUp,
 )
-from exo.utils.channels import MpReceiver, MpSender
+from exo.utils.channels import ClosedResourceError, MpReceiver, MpSender
 from exo.worker.engines.mlx.generator.generate import mlx_generate, warmup_inference
 from exo.worker.engines.mlx.utils_mlx import (
    initialize_mlx,
@@ -58,153 +55,180 @@ def main(
        bound_instance.bound_runner_id,
        bound_instance.bound_shard,
    )
-    logger.info("hello from the runner")
-    if getattr(shard_metadata, "immediate_exception", False):
-        raise Exception("Fake exception - runner failed to spin up.")
-    if timeout := getattr(shard_metadata, "should_timeout", 0):
-        time.sleep(timeout)
+    try:
+        logger.info("hello from the runner")
+        if getattr(shard_metadata, "immediate_exception", False):
+            raise Exception("Fake exception - runner failed to spin up.")
+        if timeout := getattr(shard_metadata, "should_timeout", 0):
+            time.sleep(timeout)

-    setup_start_time = time.time()
+        setup_start_time = time.time()

-    model = None
-    tokenizer = None
-    group = None
+        model = None
+        tokenizer = None
+        sampler = None
+        group = None

-    current_status: RunnerStatus = RunnerIdle()
-    logger.info("runner created")
-    event_sender.send(
-        RunnerStatusUpdated(runner_id=runner_id, runner_status=current_status)
-    )
-    with task_receiver as tasks:
-        for task in tasks:
-            event_sender.send(
-                TaskStatusUpdated(task_id=task.task_id, task_status=TaskStatus.Running)
-            )
-            event_sender.send(TaskAcknowledged(task_id=task.task_id))
-            match task:
-                case ConnectToGroup() if isinstance(
-                    current_status, (RunnerIdle, RunnerFailed)
-                ):
-                    logger.info("runner connecting")
-                    current_status = RunnerConnecting()
-                    event_sender.send(
-                        RunnerStatusUpdated(
-                            runner_id=runner_id, runner_status=current_status
-                        )
+        current_status: RunnerStatus = RunnerIdle()
+        logger.info("runner created")
+        event_sender.send(
+            RunnerStatusUpdated(runner_id=runner_id, runner_status=current_status)
+        )
+        with task_receiver as tasks:
+            for task in tasks:
+                event_sender.send(
+                    TaskStatusUpdated(
+                        task_id=task.task_id, task_status=TaskStatus.Running
                    )
-                    group = initialize_mlx(bound_instance)
-
-                    logger.info("runner connected")
-                    current_status = RunnerConnected()
-
-                # we load the model if it's connected with a group, or idle without a group. we should never tell a model to connect if it doesn't need to
-                case LoadModel() if (
-                    isinstance(current_status, RunnerConnected) and group is not None
-                ) or (isinstance(current_status, RunnerIdle) and group is None):
-                    current_status = RunnerLoading()
-                    logger.info("runner loading")
-                    event_sender.send(
-                        RunnerStatusUpdated(
-                            runner_id=runner_id, runner_status=current_status
-                        )
-                    )
-
-                    model, tokenizer = load_mlx_items(bound_instance, group)
-
-                    current_status = RunnerLoaded()
-                    logger.info("runner loaded")
-                case StartWarmup() if isinstance(current_status, RunnerLoaded):
-                    assert model
-                    assert tokenizer
-                    current_status = RunnerWarmingUp()
-                    logger.info("runner warming up")
-                    event_sender.send(
-                        RunnerStatusUpdated(
-                            runner_id=runner_id, runner_status=current_status
-                        )
-                    )
-
-                    logger.info(f"warming up inference for instance: {instance}")
-                    toks = warmup_inference(
-                        model=model,
-                        tokenizer=tokenizer,
-                        # kv_prefix_cache=kv_prefix_cache,  # supply for warmup-time prefix caching
-                    )
-                    logger.info(f"warmed up by generating {toks} tokens")
-                    logger.info(
-                        f"runner initialized in {time.time() - setup_start_time} seconds"
-                    )
-                    current_status = RunnerReady()
-                    logger.info("runner ready")
-                case ChatCompletion(task_params=task_params, command_id=command_id) if (
-                    isinstance(current_status, RunnerReady)
-                ):
-                    assert model
-                    assert tokenizer
-                    logger.info(f"received chat request: {str(task)[:500]}")
-                    current_status = RunnerRunning()
-                    logger.info("runner running")
-                    event_sender.send(
-                        RunnerStatusUpdated(
-                            runner_id=runner_id, runner_status=current_status
-                        )
-                    )
-                    assert task_params.messages[0].content is not None
-                    _check_for_debug_prompts(task_params.messages[0].content)
-
-                    # Generate responses using the actual MLX generation
-                    for response in mlx_generate(
-                        model=model,
-                        tokenizer=tokenizer,
-                        task=task_params,
+                )
+                event_sender.send(TaskAcknowledged(task_id=task.task_id))
+                match task:
+                    case ConnectToGroup() if isinstance(
+                        current_status, (RunnerIdle, RunnerFailed)
                    ):
-                        match response:
-                            case GenerationResponse():
-                                if shard_metadata.device_rank == 0:
-                                    event_sender.send(
-                                        ChunkGenerated(
-                                            command_id=command_id,
-                                            chunk=TokenChunk(
-                                                idx=response.token,
-                                                model=shard_metadata.model_meta.model_id,
-                                                text=response.text,
-                                                token_id=response.token,
-                                                finish_reason=response.finish_reason,
-                                                stats=response.stats,
-                                            ),
-                                        )
-                                    )
-                                # case TokenizedResponse():
-                                # TODO: something here ig
-
-                    current_status = RunnerReady()
-                    logger.info("runner ready")
-                case Shutdown():
-                    current_status = RunnerShuttingDown()
-                    logger.info("runner shutting down")
-                    event_sender.send(
-                        RunnerStatusUpdated(
-                            runner_id=runner_id, runner_status=current_status
+                        logger.info("runner connecting")
+                        current_status = RunnerConnecting()
+                        event_sender.send(
+                            RunnerStatusUpdated(
+                                runner_id=runner_id, runner_status=current_status
+                            )
                        )
-                    )
-                    current_status = RunnerShutdown()
-                case _:
-                    raise ValueError(
-                        f"Received {task.__class__.__name__} outside of state machine in {current_status=}"
-                    )
-            event_sender.send(
-                TaskStatusUpdated(task_id=task.task_id, task_status=TaskStatus.Complete)
-            )
-            event_sender.send(
-                RunnerStatusUpdated(runner_id=runner_id, runner_status=current_status)
-            )
-            if isinstance(current_status, RunnerShutdown):
-                del model, tokenizer, group
-                mx.clear_cache()
-                import gc
+                        group = initialize_mlx(bound_instance)

-                gc.collect()
-                break
+                        logger.info("runner connected")
+                        current_status = RunnerConnected()
+
+                    # we load the model if it's connected with a group, or idle without a group. we should never tell a model to connect if it doesn't need to
+                    case LoadModel() if (
+                        isinstance(current_status, RunnerConnected)
+                        and group is not None
+                    ) or (isinstance(current_status, RunnerIdle) and group is None):
+                        current_status = RunnerLoading()
+                        logger.info("runner loading")
+                        event_sender.send(
+                            RunnerStatusUpdated(
+                                runner_id=runner_id, runner_status=current_status
+                            )
+                        )
+
+                        model, tokenizer, sampler = load_mlx_items(
+                            bound_instance, group
+                        )
+
+                        current_status = RunnerLoaded()
+                        logger.info("runner loaded")
+                    case StartWarmup() if isinstance(current_status, RunnerLoaded):
+                        assert model
+                        assert tokenizer
+                        assert sampler
+                        current_status = RunnerWarmingUp()
+                        logger.info("runner warming up")
+                        event_sender.send(
+                            RunnerStatusUpdated(
+                                runner_id=runner_id, runner_status=current_status
+                            )
+                        )
+
+                        logger.info(f"warming up inference for instance: {instance}")
+                        toks = warmup_inference(
+                            model=model,
+                            tokenizer=tokenizer,
+                            sampler=sampler,
+                            # kv_prefix_cache=kv_prefix_cache,  # supply for warmup-time prefix caching
+                        )
+                        logger.info(f"warmed up by generating {toks} tokens")
+                        logger.info(
+                            f"runner initialized in {time.time() - setup_start_time} seconds"
+                        )
+                        current_status = RunnerReady()
+                        logger.info("runner ready")
+                    case ChatCompletion(
+                        task_params=task_params, command_id=command_id
+                    ) if isinstance(current_status, RunnerReady):
+                        assert model
+                        assert tokenizer
+                        assert sampler
+                        logger.info(f"received chat request: {str(task)[:500]}")
+                        current_status = RunnerRunning()
+                        logger.info("runner running")
+                        event_sender.send(
+                            RunnerStatusUpdated(
+                                runner_id=runner_id, runner_status=current_status
+                            )
+                        )
+                        assert task_params.messages[0].content is not None
+                        _check_for_debug_prompts(task_params.messages[0].content)
+
+                        # Generate responses using the actual MLX generation
+                        for response in mlx_generate(
+                            model=model,
+                            tokenizer=tokenizer,
+                            sampler=sampler,
+                            task=task_params,
+                        ):
+                            match response:
+                                case GenerationResponse():
+                                    if shard_metadata.device_rank == 0:
+                                        event_sender.send(
+                                            ChunkGenerated(
+                                                command_id=command_id,
+                                                chunk=TokenChunk(
+                                                    idx=response.token,
+                                                    model=shard_metadata.model_meta.model_id,
+                                                    text=response.text,
+                                                    token_id=response.token,
+                                                    finish_reason=response.finish_reason,
+                                                ),
+                                            )
+                                        )
+                                    # case TokenizedResponse():
+                                    # TODO: something here ig
+
+                        current_status = RunnerReady()
+                        logger.info("runner ready")
+                    case Shutdown():
+                        logger.info("runner shutting down")
+                        event_sender.send(
+                            TaskStatusUpdated(
+                                task_id=task.task_id, task_status=TaskStatus.Complete
+                            )
+                        )
+                        break
+                    case _:
+                        raise ValueError(
+                            f"Received {task.__class__.__name__} outside of state machine in {current_status=}"
+                        )
+                event_sender.send(
+                    TaskStatusUpdated(
+                        task_id=task.task_id, task_status=TaskStatus.Complete
+                    )
+                )
+                event_sender.send(
+                    RunnerStatusUpdated(
+                        runner_id=runner_id, runner_status=current_status
+                    )
+                )
+        event_sender.send(
+            RunnerStatusUpdated(runner_id=runner_id, runner_status=RunnerShutdown())
+        )
+    except ClosedResourceError:
+        logger.warning("runner communication closed unexpectedly")
+    except Exception as e:
+        logger.opt(exception=e).warning(
+            f"Runner {runner_id} crashed with critical exception {e}"
+        )
+        event_sender.send(
+            RunnerStatusUpdated(
+                runner_id=runner_id,
+                runner_status=RunnerFailed(error_message=str(e)),
+            )
+        )
+    finally:
+        event_sender.close()
+        task_receiver.close()
+        event_sender.join()
+        task_receiver.join()
+        logger.info("bye from the runner")


 EXO_RUNNER_MUST_FAIL = "EXO RUNNER MUST FAIL"
--- a/src/exo/worker/runner/runner_supervisor.py
+++ b/src/exo/worker/runner/runner_supervisor.py
@@ -14,23 +14,13 @@ from anyio import (
 from anyio.abc import TaskGroup
 from loguru import logger

-from exo.shared.types.events import (
-    Event,
-    RunnerStatusUpdated,
-    TaskAcknowledged,
-    TaskStatusUpdated,
-)
-from exo.shared.types.tasks import Task, TaskId, TaskStatus
+from exo.shared.types.events import Event, RunnerStatusUpdated, TaskAcknowledged
+from exo.shared.types.tasks import Task, TaskId
 from exo.shared.types.worker.instances import BoundInstance
 from exo.shared.types.worker.runners import (
-    RunnerConnecting,
    RunnerFailed,
    RunnerIdle,
-    RunnerLoading,
-    RunnerRunning,
-    RunnerShuttingDown,
    RunnerStatus,
-    RunnerWarmingUp,
 )
 from exo.shared.types.worker.shards import ShardMetadata
 from exo.utils.channels import MpReceiver, MpSender, Sender, mp_channel
@@ -49,10 +39,10 @@ class RunnerSupervisor:
    _ev_recv: MpReceiver[Event]
    _task_sender: MpSender[Task]
    _event_sender: Sender[Event]
+    # err_path: str
    _tg: TaskGroup | None = field(default=None, init=False)
    status: RunnerStatus = field(default_factory=RunnerIdle, init=False)
    pending: dict[TaskId, anyio.Event] = field(default_factory=dict, init=False)
-    completed: set[TaskId] = field(default_factory=set, init=False)

    @classmethod
    def create(
@@ -87,6 +77,7 @@ class RunnerSupervisor:
            _ev_recv=ev_recv,
            _task_sender=task_sender,
            _event_sender=event_sender,
+            # err_path=err_path,
        )

        return self
@@ -127,10 +118,6 @@ class RunnerSupervisor:
        self._tg.cancel_scope.cancel()

    async def start_task(self, task: Task):
-        if task.task_id in self.completed:
-            logger.info(
-                f"Skipping invalid task {task} as it has already been completed"
-            )
        logger.info(f"Starting task {task}")
        event = anyio.Event()
        self.pending[task.task_id] = event
@@ -151,22 +138,6 @@ class RunnerSupervisor:
                    if isinstance(event, TaskAcknowledged):
                        self.pending.pop(event.task_id).set()
                        continue
-                    if (
-                        isinstance(event, TaskStatusUpdated)
-                        and event.task_status == TaskStatus.Complete
-                    ):
-                        # If a task has just been completed, we should be working on it.
-                        assert isinstance(
-                            self.status,
-                            (
-                                RunnerRunning,
-                                RunnerWarmingUp,
-                                RunnerLoading,
-                                RunnerConnecting,
-                                RunnerShuttingDown,
-                            ),
-                        )
-                        self.completed.add(event.task_id)
                    await self._event_sender.send(event)
            except (ClosedResourceError, BrokenResourceError) as e:
                await self._check_runner(e)
--- a/src/exo/worker/tests/constants.py
+++ b/src/exo/worker/tests/constants.py
@@ -9,11 +9,9 @@ MASTER_NODE_ID = NodeId("ffffffff-aaaa-4aaa-8aaa-aaaaaaaaaaaa")

 NODE_A: Final[NodeId] = NodeId("aaaaaaaa-aaaa-4aaa-8aaa-aaaaaaaaaaaa")
 NODE_B: Final[NodeId] = NodeId("bbbbbbbb-bbbb-4bbb-8bbb-bbbbbbbbbbbb")
-NODE_C: Final[NodeId] = NodeId("cccccccc-cccc-4ccc-8ccc-cccccccccccc")

 RUNNER_1_ID: Final[RunnerId] = RunnerId("11111111-1111-4111-8111-111111111111")
 RUNNER_2_ID: Final[RunnerId] = RunnerId("33333333-3333-4333-8333-333333333333")
-RUNNER_3_ID: Final[RunnerId] = RunnerId("Runner3")

 INSTANCE_1_ID: Final[InstanceId] = InstanceId("22222222-2222-4222-8222-222222222222")
 INSTANCE_2_ID: Final[InstanceId] = InstanceId("44444444-4444-4444-8444-444444444444")
--- a/src/exo/worker/tests/unittests/conftest.py
+++ b/src/exo/worker/tests/unittests/conftest.py
@@ -1,9 +1,11 @@
-from dataclasses import dataclass, field
+from __future__ import annotations
+
+from dataclasses import dataclass

 from exo.shared.types.common import NodeId
 from exo.shared.types.memory import Memory
 from exo.shared.types.models import ModelId, ModelMetadata
-from exo.shared.types.tasks import BaseTask, TaskId
+from exo.shared.types.tasks import BaseTask
 from exo.shared.types.worker.instances import (
    BoundInstance,
    Instance,
@@ -19,7 +21,6 @@ from exo.shared.types.worker.shards import PipelineShardMetadata, ShardMetadata
 class FakeRunnerSupervisor:
    bound_instance: BoundInstance
    status: RunnerStatus
-    completed: set[TaskId] = field(default_factory=set)


 class OtherTask(BaseTask):
--- a/src/exo/worker/tests/unittests/test_plan/test_download_and_loading.py
+++ b/src/exo/worker/tests/unittests/test_plan/test_download_and_loading.py
@@ -1,6 +1,5 @@
 import exo.worker.plan as plan_mod
 from exo.shared.types.common import NodeId
-from exo.shared.types.models import ModelId
 from exo.shared.types.tasks import LoadModel
 from exo.shared.types.worker.downloads import DownloadCompleted, DownloadProgress
 from exo.shared.types.worker.instances import BoundInstance
@@ -8,6 +7,7 @@ from exo.shared.types.worker.runners import (
    RunnerConnected,
    RunnerIdle,
 )
+from exo.shared.types.worker.shards import ShardMetadata
 from exo.worker.tests.constants import (
    INSTANCE_1_ID,
    MODEL_A_ID,
@@ -46,7 +46,7 @@ def test_plan_requests_download_when_waiting_and_shard_not_downloaded():
    all_runners = {RUNNER_1_ID: RunnerIdle()}

    # No entry for this shard -> should trigger DownloadModel
-    download_status: dict[ModelId, DownloadProgress] = {}
+    download_status: dict[ShardMetadata, DownloadProgress] = {}

    result = plan_mod.plan(
        node_id=NODE_A,
@@ -94,7 +94,7 @@ def test_plan_loads_model_when_all_shards_downloaded_and_waiting():

    # Local node has already marked its shard as downloaded (not actually used by _load_model)
    local_download_status = {
-        MODEL_A_ID: DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)
+        shard1: DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)  # type: ignore[reportUnhashable]
    }

    # Global view has completed downloads for both nodes
@@ -140,7 +140,7 @@ def test_plan_does_not_request_download_when_shard_already_downloaded():

    # Local status claims the shard is downloaded already
    local_download_status = {
-        MODEL_A_ID: DownloadCompleted(shard_metadata=shard, node_id=NODE_A)
+        shard: DownloadCompleted(shard_metadata=shard, node_id=NODE_A)  # type: ignore[reportUnhashable]
    }

    # Global view hasn't caught up yet (no completed shards recorded for NODE_A)
@@ -192,7 +192,7 @@ def test_plan_does_not_load_model_until_all_shards_downloaded_globally():

    # Only NODE_A's shard is recorded as downloaded globally
    local_download_status = {
-        MODEL_A_ID: DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)
+        shard1: DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)  # type: ignore[reportUnhashable]
    }
    global_download_status = {
        NODE_A: [DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)],
--- a/src/exo/worker/tests/unittests/test_plan/test_warmup.py
+++ b/src/exo/worker/tests/unittests/test_plan/test_warmup.py
@@ -12,10 +12,8 @@ from exo.worker.tests.constants import (
    MODEL_A_ID,
    NODE_A,
    NODE_B,
-    NODE_C,
    RUNNER_1_ID,
    RUNNER_2_ID,
-    RUNNER_3_ID,
 )
 from exo.worker.tests.unittests.conftest import (
    FakeRunnerSupervisor,
@@ -26,39 +24,37 @@ from exo.worker.tests.unittests.conftest import (

 def test_plan_starts_warmup_for_accepting_rank_when_all_loaded_or_warming():
    """
-    For non-zero device_rank shards, StartWarmup should be emitted when all
+    For non-final device_rank shards, StartWarmup should be emitted when all
    shards in the instance are Loaded/WarmingUp.
    """
-    shard0 = get_pipeline_shard_metadata(MODEL_A_ID, device_rank=0, world_size=3)
-    shard1 = get_pipeline_shard_metadata(MODEL_A_ID, device_rank=1, world_size=3)
-    shard2 = get_pipeline_shard_metadata(MODEL_A_ID, device_rank=2, world_size=3)
+    shard0 = get_pipeline_shard_metadata(MODEL_A_ID, device_rank=0, world_size=2)
+    shard1 = get_pipeline_shard_metadata(MODEL_A_ID, device_rank=1, world_size=2)
    instance = get_mlx_ring_instance(
        instance_id=INSTANCE_1_ID,
        model_id=MODEL_A_ID,
-        node_to_runner={NODE_A: RUNNER_1_ID, NODE_B: RUNNER_2_ID, NODE_C: RUNNER_3_ID},
-        runner_to_shard={RUNNER_1_ID: shard0, RUNNER_2_ID: shard1, RUNNER_3_ID: shard2},
+        node_to_runner={NODE_A: RUNNER_1_ID, NODE_B: RUNNER_2_ID},
+        runner_to_shard={RUNNER_1_ID: shard0, RUNNER_2_ID: shard1},
    )

    bound_instance = BoundInstance(
-        instance=instance, bound_runner_id=RUNNER_2_ID, bound_node_id=NODE_B
+        instance=instance, bound_runner_id=RUNNER_1_ID, bound_node_id=NODE_A
    )
    local_runner = FakeRunnerSupervisor(
        bound_instance=bound_instance, status=RunnerLoaded()
    )

-    runners = {RUNNER_2_ID: local_runner}
+    runners = {RUNNER_1_ID: local_runner}
    instances = {INSTANCE_1_ID: instance}
    all_runners = {
        RUNNER_1_ID: RunnerLoaded(),
        RUNNER_2_ID: RunnerLoaded(),
-        RUNNER_3_ID: RunnerWarmingUp(),
    }

    result = plan_mod.plan(
-        node_id=NODE_B,
+        node_id=NODE_A,
        runners=runners,  # type: ignore
        download_status={},
-        global_download_status={NODE_A: []},
+        global_download_status={NODE_B: []},
        instances=instances,
        all_runners=all_runners,
        tasks={},
@@ -154,9 +150,9 @@ def test_plan_does_not_start_warmup_for_rank_zero_until_others_warming():
    """
    Rank-zero shard should not start warmup until all non-zero ranks are
    already WarmingUp.
-    For accepting ranks (device_rank != 0), StartWarmup should be
+    For accepting ranks (device_rank != world_size - 1), StartWarmup should be
    emitted when all shards in the instance are Loaded/WarmingUp.
-    In a 2-node setup, rank 1 is the accepting rank.
+    In a 2-node setup, rank 0 is the accepting rank.
    """
    shard0 = get_pipeline_shard_metadata(MODEL_A_ID, device_rank=0, world_size=2)
    shard1 = get_pipeline_shard_metadata(MODEL_A_ID, device_rank=1, world_size=2)
@@ -167,7 +163,7 @@ def test_plan_does_not_start_warmup_for_rank_zero_until_others_warming():
        runner_to_shard={RUNNER_1_ID: shard0, RUNNER_2_ID: shard1},
    )

-    # Rank 1 is the accepting rank
+    # Rank 0 is the accepting rank
    bound_instance = BoundInstance(
        instance=instance, bound_runner_id=RUNNER_1_ID, bound_node_id=NODE_A
    )
@@ -192,23 +188,6 @@ def test_plan_does_not_start_warmup_for_rank_zero_until_others_warming():
        tasks={},
    )

-    assert result is None
-
-    all_runners = {
-        RUNNER_1_ID: RunnerLoaded(),
-        RUNNER_2_ID: RunnerWarmingUp(),
-    }
-
-    result = plan_mod.plan(
-        node_id=NODE_A,
-        runners=runners,  # type: ignore
-        download_status={},
-        global_download_status={NODE_A: []},
-        instances=instances,
-        all_runners=all_runners,
-        tasks={},
-    )
-
    assert isinstance(result, StartWarmup)
    assert result.instance_id == INSTANCE_1_ID

@@ -301,8 +280,9 @@ def test_plan_does_not_start_warmup_for_accepting_rank_until_all_loaded_or_warmi

 def test_plan_does_not_start_warmup_for_connecting_rank_until_others_warming():
    """
-    Connecting rank (device_rank == 0) should not start warmup
+    Connecting rank (device_rank == world_size - 1) should not start warmup
    until all other ranks are already WarmingUp.
+    In a 2-node setup, rank 1 is the connecting rank.
    """
    shard0 = get_pipeline_shard_metadata(MODEL_A_ID, device_rank=0, world_size=2)
    shard1 = get_pipeline_shard_metadata(MODEL_A_ID, device_rank=1, world_size=2)
@@ -315,13 +295,13 @@ def test_plan_does_not_start_warmup_for_connecting_rank_until_others_warming():

    # Rank 1 is the connecting rank
    bound_instance = BoundInstance(
-        instance=instance, bound_runner_id=RUNNER_1_ID, bound_node_id=NODE_A
+        instance=instance, bound_runner_id=RUNNER_2_ID, bound_node_id=NODE_B
    )
    local_runner = FakeRunnerSupervisor(
        bound_instance=bound_instance, status=RunnerLoaded()
    )

-    runners = {RUNNER_1_ID: local_runner}
+    runners = {RUNNER_2_ID: local_runner}
    instances = {INSTANCE_1_ID: instance}
    all_runners = {
        RUNNER_1_ID: RunnerLoaded(),
@@ -329,7 +309,7 @@ def test_plan_does_not_start_warmup_for_connecting_rank_until_others_warming():
    }

    result = plan_mod.plan(
-        node_id=NODE_A,
+        node_id=NODE_B,
        runners=runners,  # type: ignore
        download_status={},
        global_download_status={NODE_A: [], NODE_B: []},
--- a/src/exo/worker/tests/unittests/test_runner/test_event_ordering.py
+++ b/src/exo/worker/tests/unittests/test_runner/test_event_ordering.py
@@ -34,7 +34,6 @@ from exo.shared.types.worker.runners import (
    RunnerReady,
    RunnerRunning,
    RunnerShutdown,
-    RunnerShuttingDown,
    RunnerWarmingUp,
 )
 from exo.utils.channels import mp_channel
@@ -111,7 +110,7 @@ def assert_events_equal(test_events: Iterable[Event], true_events: Iterable[Even
 def patch_out_mlx(monkeypatch: pytest.MonkeyPatch):
    # initialize_mlx returns a "group" equal to 1
    monkeypatch.setattr(mlx_runner, "initialize_mlx", make_nothin(1))
-    monkeypatch.setattr(mlx_runner, "load_mlx_items", make_nothin((1, 1)))
+    monkeypatch.setattr(mlx_runner, "load_mlx_items", make_nothin((1, 1, 1)))
    monkeypatch.setattr(mlx_runner, "warmup_inference", make_nothin(1))
    monkeypatch.setattr(mlx_runner, "_check_for_debug_prompts", nothin)

@@ -200,9 +199,6 @@ def test_events_processed_in_correct_order(patch_out_mlx: pytest.MonkeyPatch):
            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerReady()),
            TaskStatusUpdated(task_id=SHUTDOWN_TASK_ID, task_status=TaskStatus.Running),
            TaskAcknowledged(task_id=SHUTDOWN_TASK_ID),
-            RunnerStatusUpdated(
-                runner_id=RUNNER_1_ID, runner_status=RunnerShuttingDown()
-            ),
            TaskStatusUpdated(
                task_id=SHUTDOWN_TASK_ID, task_status=TaskStatus.Complete
            ),
--- a/src/exo/worker/utils/net_profile.py
+++ b/src/exo/worker/utils/net_profile.py
@@ -32,8 +32,6 @@ async def check_reachability(
            return NodeId(body) or None
        except OSError:
            return None
-        except http.client.HTTPException:
-            return None
        finally:
            connection.close()

--- a/tests/headless_runner.py
+++ b/tests/headless_runner.py
@@ -1,246 +0,0 @@
-import multiprocessing as mp
-import socket
-import time
-import typing
-
-import anyio
-from fastapi import FastAPI
-from fastapi.responses import StreamingResponse
-from hypercorn import Config
-from hypercorn.asyncio import serve  # pyright: ignore[reportUnknownVariableType]
-from loguru import logger
-from pydantic import BaseModel
-
-from exo.shared.logging import InterceptLogger, logger_setup
-from exo.shared.models.model_cards import MODEL_CARDS, ModelId
-from exo.shared.types.api import ChatCompletionMessage, ChatCompletionTaskParams
-from exo.shared.types.commands import CommandId
-from exo.shared.types.common import Host, NodeId
-from exo.shared.types.events import Event
-from exo.shared.types.tasks import (
-    ChatCompletion,
-    ConnectToGroup,
-    LoadModel,
-    Shutdown,
-    StartWarmup,
-    Task,
-)
-from exo.shared.types.worker.instances import (
-    BoundInstance,
-    Instance,
-    InstanceId,
-    MlxJacclInstance,
-    MlxRingInstance,
-)
-from exo.shared.types.worker.runners import RunnerId, ShardAssignments
-from exo.shared.types.worker.shards import PipelineShardMetadata, TensorShardMetadata
-from exo.utils.channels import MpReceiver, MpSender, mp_channel
-from exo.worker.download.impl_shard_downloader import (
-    build_full_shard,
-    exo_shard_downloader,
-)
-from exo.worker.runner.bootstrap import entrypoint
-
-
-class Tests(BaseModel):
-    # list[hostname, ip addr]
-    devs: list[list[str]]
-    model_id: str
-    kind: typing.Literal["init", "warmup", "inference"]
-
-
-hn = socket.gethostname()
-mp.set_start_method("spawn", force=True)
-logger_setup(None)
-
-
-async def main():
-    logger.info("starting cool server majig")
-    logger.info(hn)
-    await assert_downloads()
-    cfg = Config()
-    cfg.bind = "0.0.0.0:52415"
-    # nb: shared.logging needs updating if any of this changes
-    cfg.accesslog = "-"
-    cfg.errorlog = "-"
-    cfg.logger_class = InterceptLogger
-    app = FastAPI()
-    app.post("/ring")(ring_backend)
-    app.post("/jaccl")(jaccl_backend)
-    shutdown = anyio.Event()
-    await serve(
-        app,  # type: ignore
-        cfg,
-        shutdown_trigger=lambda: shutdown.wait(),
-    )
-    await anyio.sleep_forever()
-    # gracefully shutdown the api
-    shutdown.set()
-
-
-async def assert_downloads():
-    sd = exo_shard_downloader()
-    # await sd.ensure_shard(await build_full_shard(MODEL_CARDS["qwen3-0.6b"].model_id))
-    await sd.ensure_shard(await build_full_shard(MODEL_CARDS["llama-3.2-1b"].model_id))
-
-
-async def ring_backend(test: Tests):
-    iid = InstanceId(str(hash(str(test.devs))))
-    return await execute_test(test, ring_instance(test, iid))
-
-
-def ring_instance(test: Tests, iid: InstanceId) -> Instance:
-    global hn
-    hbn = [Host(ip="i dont care", port=52416) for _ in test.devs]
-    world_size = len(test.devs)
-    for i in range(world_size):
-        if hn.startswith(test.devs[i][0]):
-            hn = test.devs[i][0]
-            if i - 1 >= 0:
-                hbn[i - 1] = Host(ip=test.devs[i - 1][1], port=52416)
-            if i + 1 < len(test.devs):
-                hbn[i + 1] = Host(ip=test.devs[i + 1][1], port=52416)
-            hbn[i] = Host(ip="0.0.0.0", port=52416)
-            break
-
-    meta = MODEL_CARDS[test.model_id].metadata
-    instance = MlxRingInstance(
-        instance_id=iid,
-        ephemeral_port=52416,
-        hosts_by_node={NodeId(hn): hbn},
-        shard_assignments=ShardAssignments(
-            model_id=ModelId(test.model_id),
-            node_to_runner={NodeId(host[0]): RunnerId(host[0]) for host in test.devs},
-            runner_to_shard={
-                RunnerId(test.devs[i][0]): PipelineShardMetadata(
-                    model_meta=meta,
-                    device_rank=i,
-                    world_size=world_size,
-                    start_layer=(meta.n_layers // world_size) * i,
-                    end_layer=min(
-                        meta.n_layers, (meta.n_layers // world_size) * (i + 1)
-                    ),
-                    n_layers=min(meta.n_layers, (meta.n_layers // world_size) * (i + 1))
-                    - (meta.n_layers // world_size) * i,
-                )
-                for i in range(world_size)
-            },
-        ),
-    )
-
-    return instance
-
-
-async def execute_test(test: Tests, instance: Instance):
-    world_size = len(test.devs)
-    iid = InstanceId(str(hash(str(test.devs))))
-    _handle, recv, send = new_runner(instance)
-    if world_size > 1:
-        send.send(ConnectToGroup(instance_id=iid))
-    send.send(LoadModel(instance_id=iid))
-
-    match test.kind:
-        case "init":
-            pass
-        case "warmup":
-            send.send(StartWarmup(instance_id=iid))
-        case "inference":
-            send.send(StartWarmup(instance_id=iid))
-            send.send(
-                ChatCompletion(
-                    task_params=ChatCompletionTaskParams(
-                        model=test.model_id,
-                        messages=[
-                            ChatCompletionMessage(
-                                role="system", content="You are a helpful assistant"
-                            ),
-                            ChatCompletionMessage(
-                                role="user", content="What is the capital of France?"
-                            ),
-                        ],
-                    ),
-                    command_id=CommandId("yo"),
-                    instance_id=iid,
-                )
-            )
-
-    send.send(Shutdown(runner_id=RunnerId(hn), instance_id=iid))
-
-    async def map_recv():
-        with recv:
-            try:
-                async for item in recv:
-                    yield item.model_dump_json() + "\n"
-            except anyio.ClosedResourceError:
-                pass
-
-    ret = StreamingResponse(map_recv())
-    ret._pls_dont_gc = _handle  # type: ignore
-    return ret
-
-
-async def jaccl_backend(test: Tests):
-    iid = InstanceId(str(hash(str(test.devs))))
-    return await execute_test(test, jaccl_instance(test, iid))
-
-
-def jaccl_instance(test: Tests, iid: InstanceId):
-    global hn
-    meta = MODEL_CARDS[test.model_id].metadata
-    world_size = len(test.devs)
-    for name, _ in test.devs:
-        if hn.startswith(name):
-            hn = name
-            break
-
-    return MlxJacclInstance(
-        instance_id=iid,
-        ibv_devices=[[None, "rdma_en3"], ["rdma_en3", None]],
-        # rank 0 is always coordinator
-        jaccl_coordinators={
-            NodeId(host[0]): test.devs[0][1] + ":52416" for host in test.devs
-        },
-        shard_assignments=ShardAssignments(
-            model_id=ModelId(test.model_id),
-            node_to_runner={NodeId(host[0]): RunnerId(host[0]) for host in test.devs},
-            runner_to_shard={
-                RunnerId(test.devs[i][0]): TensorShardMetadata(
-                    model_meta=meta,
-                    device_rank=i,
-                    world_size=world_size,
-                    start_layer=meta.n_layers,
-                    end_layer=meta.n_layers,
-                    n_layers=meta.n_layers,
-                )
-                for i in range(world_size)
-            },
-        ),
-    )
-
-
-def new_runner(
-    instance: Instance,
-) -> tuple[mp.Process, MpReceiver[Event], MpSender[Task]]:
-    bound_instance = BoundInstance(
-        instance=instance, bound_runner_id=RunnerId(hn), bound_node_id=NodeId(hn)
-    )
-    ev_send, ev_recv = mp_channel[Event]()
-    task_send, task_recv = mp_channel[Task]()
-
-    runner_process = mp.Process(
-        target=entrypoint,
-        args=(
-            bound_instance,
-            ev_send,
-            task_recv,
-            logger,
-        ),
-    )
-    runner_process._pls_dont_gc = (ev_send, task_recv)  # type: ignore
-    runner_process.start()
-    time.sleep(0.1)
-    return (runner_process, ev_recv, task_send)
-
-
-if __name__ == "__main__":
-    anyio.run(main)
--- a/tests/start_distributed_test.sh
+++ b/tests/start_distributed_test.sh
@@ -1,52 +0,0 @@
-#!/usr/bin/env bash
-
-set -euo pipefail
-
-query() {
-  tailscale status | awk -v find="$1" '$2 == find { print $1 }'
-}
-
-if [[ $# -lt 2 ]]; then
-  echo "USAGE: $0 <test kind> [host1] [host2] ..."
-  exit 1
-fi
-
-
-kind=$1
-shift
-
-test_kinds="ring jaccl"
-
-if ! echo "$test_kinds" | grep -q "$kind"; then
-  printf "%s is not a known test kind.\nCurrent test kinds are %s" "$kind" "$test_kinds"
-  exit 1
-fi
-
-hostnames=("$@")
-weaved=()
-ips=()
-for name in "${hostnames[@]}"; do
-  ip=$(query "$name")
-  ips+=("$ip")
-  weaved+=("$name" "$ip")
-done
-
-devs_raw=$(printf "[\"%s\", \"%s\"], " "${weaved[@]}")
-devs="[${devs_raw%, }]"
-
-for i in "${!ips[@]}"; do  
-  { 
-    req="{
-      \"model_id\": \"llama-3.2-1b\",
-      \"devs\": ${devs},
-      \"kind\": \"inference\"
-     }"
-    echo "req $req"
-    curl -sN \
-      -X POST "http://${ips[$i]}:52415/${kind}" \
-      -H "Content-Type: application/json" -d "$req" \
-    2>&1 | sed "s/^/\n${hostnames[$i]}@${ips[$i]}: /" || echo "curl to ${hostnames[$i]} failed"
-  } &
-done
-
-wait
--- a/tmp/disable_bridge_enable_dhcp.sh
+++ b/tmp/disable_bridge_enable_dhcp.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+networksetup -listallnetworkservices | grep -q '^Thunderbolt Bridge$' \
+  && echo "Disabling bridge in networksetup" \
+  && networksetup -setnetworkserviceenabled "Thunderbolt Bridge" off
+
+networksetup -listallnetworkservices | grep -q '^\*Thunderbolt Bridge$' \
+  && echo "Bridge disabled in networksetup"
+
+ifconfig bridge0 &>/dev/null && {
+  ifconfig bridge0 | grep -q 'member' && echo "Removing bridge members in ifconfig" && {
+    ifconfig bridge0 | \
+      awk '/member/ {print $2}' | \
+      xargs -n1 sudo ifconfig bridge0 deletem
+  }
+  ifconfig bridge0 | grep -q 'status: active' && sudo ifconfig bridge0 down
+  ifconfig bridge0 | grep -q 'status: inactive' && echo "Bridge disabled in ifconfig"
+}
+
+for iface in $(seq 2 7); do
+  sudo ipconfig set "en$iface" dhcp && echo "enabled dhcp on en$iface" || echo "failed to enable dhcp on en$iface"
+done
+
--- a/uv.lock
+++ b/uv.lock
@@ -108,6 +108,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
 ]

+[[package]]
+name = "aiosqlite"
+version = "0.21.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/13/7d/8bca2bf9a247c2c5dfeec1d7a5f40db6518f88d314b8bca9da29670d2671/aiosqlite-0.21.0.tar.gz", hash = "sha256:131bb8056daa3bc875608c631c678cda73922a2d4ba8aec373b19f18c17e7aa3", size = 13454, upload-time = "2025-02-03T07:30:16.235Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f5/10/6c25ed6de94c49f88a91fa5018cb4c0f3625f31d5be9f771ebe5cc7cd506/aiosqlite-0.21.0-py3-none-any.whl", hash = "sha256:2549cf4057f95f53dcba16f2b64e8e2791d7e1adedb13197dd8ed77bb226d7d0", size = 15792, upload-time = "2025-02-03T07:30:13.6Z" },
+]
+
 [[package]]
 name = "altgraph"
 version = "0.17.5"
@@ -157,6 +169,24 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" },
 ]

+[[package]]
+name = "base58"
+version = "2.1.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7f/45/8ae61209bb9015f516102fa559a2914178da1d5868428bd86a1b4421141d/base58-2.1.1.tar.gz", hash = "sha256:c5d0cb3f5b6e81e8e35da5754388ddcc6d0d14b6c6a132cb93d69ed580a7278c", size = 6528, upload-time = "2021-10-30T22:12:17.858Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4a/45/ec96b29162a402fc4c1c5512d114d7b3787b9d1c2ec241d9568b4816ee23/base58-2.1.1-py3-none-any.whl", hash = "sha256:11a36f4d3ce51dfc1043f3218591ac4eb1ceb172919cebe05b52a5bcc8d245c2", size = 5621, upload-time = "2021-10-30T22:12:16.658Z" },
+]
+
+[[package]]
+name = "bidict"
+version = "0.23.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9a/6e/026678aa5a830e07cd9498a05d3e7e650a4f56a42f267a53d22bcda1bdc9/bidict-0.23.1.tar.gz", hash = "sha256:03069d763bc387bbd20e7d49914e75fc4132a41937fa3405417e1a5a2d006d71", size = 29093, upload-time = "2024-02-18T19:09:05.748Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/99/37/e8730c3587a65eb5645d4aba2d27aae48e8003614d6aaf15dda67f702f1f/bidict-0.23.1-py3-none-any.whl", hash = "sha256:5dae8d4d79b552a71cbabc7deb25dfe8ce710b17ff41711e13010ead2abfc3e5", size = 32764, upload-time = "2024-02-18T19:09:04.156Z" },
+]
+
 [[package]]
 name = "certifi"
 version = "2025.10.5"
@@ -166,6 +196,42 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286, upload-time = "2025-10-05T04:12:14.03Z" },
 ]

+[[package]]
+name = "cffi"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pycparser", marker = "(implementation_name != 'PyPy' and sys_platform == 'darwin') or (implementation_name != 'PyPy' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
+    { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
+    { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
+    { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
+    { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" },
+    { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" },
+    { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" },
+    { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" },
+    { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" },
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.4"
@@ -202,12 +268,50 @@ wheels = [
 ]

 [[package]]
-name = "click"
-version = "8.3.1"
+name = "cryptography"
+version = "46.0.3"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
+dependencies = [
+    { name = "cffi", marker = "(platform_python_implementation != 'PyPy' and sys_platform == 'darwin') or (platform_python_implementation != 'PyPy' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9f/33/c00162f49c0e2fe8064a62cb92b93e50c74a72bc370ab92f86112b33ff62/cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1", size = 749258, upload-time = "2025-10-15T23:18:31.74Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/42/9c391dd801d6cf0d561b5890549d4b27bafcc53b39c31a817e69d87c625b/cryptography-46.0.3-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:109d4ddfadf17e8e7779c39f9b18111a09efb969a301a31e987416a0191ed93a", size = 7225004, upload-time = "2025-10-15T23:16:52.239Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/67/38769ca6b65f07461eb200e85fc1639b438bdc667be02cf7f2cd6a64601c/cryptography-46.0.3-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:09859af8466b69bc3c27bdf4f5d84a665e0f7ab5088412e9e2ec49758eca5cbc", size = 4296667, upload-time = "2025-10-15T23:16:54.369Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/49/498c86566a1d80e978b42f0d702795f69887005548c041636df6ae1ca64c/cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d", size = 4450807, upload-time = "2025-10-15T23:16:56.414Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/0a/863a3604112174c8624a2ac3c038662d9e59970c7f926acdcfaed8d61142/cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb", size = 4299615, upload-time = "2025-10-15T23:16:58.442Z" },
+    { url = "https://files.pythonhosted.org/packages/64/02/b73a533f6b64a69f3cd3872acb6ebc12aef924d8d103133bb3ea750dc703/cryptography-46.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5bf0ed4490068a2e72ac03d786693adeb909981cc596425d09032d372bcc849", size = 4016800, upload-time = "2025-10-15T23:17:00.378Z" },
+    { url = "https://files.pythonhosted.org/packages/25/d5/16e41afbfa450cde85a3b7ec599bebefaef16b5c6ba4ec49a3532336ed72/cryptography-46.0.3-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5ecfccd2329e37e9b7112a888e76d9feca2347f12f37918facbb893d7bb88ee8", size = 4984707, upload-time = "2025-10-15T23:17:01.98Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/56/e7e69b427c3878352c2fb9b450bd0e19ed552753491d39d7d0a2f5226d41/cryptography-46.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a2c0cd47381a3229c403062f764160d57d4d175e022c1df84e168c6251a22eec", size = 4482541, upload-time = "2025-10-15T23:17:04.078Z" },
+    { url = "https://files.pythonhosted.org/packages/78/f6/50736d40d97e8483172f1bb6e698895b92a223dba513b0ca6f06b2365339/cryptography-46.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:549e234ff32571b1f4076ac269fcce7a808d3bf98b76c8dd560e42dbc66d7d91", size = 4299464, upload-time = "2025-10-15T23:17:05.483Z" },
+    { url = "https://files.pythonhosted.org/packages/00/de/d8e26b1a855f19d9994a19c702fa2e93b0456beccbcfe437eda00e0701f2/cryptography-46.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:c0a7bb1a68a5d3471880e264621346c48665b3bf1c3759d682fc0864c540bd9e", size = 4950838, upload-time = "2025-10-15T23:17:07.425Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/29/798fc4ec461a1c9e9f735f2fc58741b0daae30688f41b2497dcbc9ed1355/cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926", size = 4481596, upload-time = "2025-10-15T23:17:09.343Z" },
+    { url = "https://files.pythonhosted.org/packages/15/8d/03cd48b20a573adfff7652b76271078e3045b9f49387920e7f1f631d125e/cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71", size = 4426782, upload-time = "2025-10-15T23:17:11.22Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/b1/ebacbfe53317d55cf33165bda24c86523497a6881f339f9aae5c2e13e57b/cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac", size = 4698381, upload-time = "2025-10-15T23:17:12.829Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/e2/a510aa736755bffa9d2f75029c229111a1d02f8ecd5de03078f4c18d91a3/cryptography-46.0.3-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:00a5e7e87938e5ff9ff5447ab086a5706a957137e6e433841e9d24f38a065217", size = 7158012, upload-time = "2025-10-15T23:17:19.982Z" },
+    { url = "https://files.pythonhosted.org/packages/73/dc/9aa866fbdbb95b02e7f9d086f1fccfeebf8953509b87e3f28fff927ff8a0/cryptography-46.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c8daeb2d2174beb4575b77482320303f3d39b8e81153da4f0fb08eb5fe86a6c5", size = 4288728, upload-time = "2025-10-15T23:17:21.527Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/fd/bc1daf8230eaa075184cbbf5f8cd00ba9db4fd32d63fb83da4671b72ed8a/cryptography-46.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39b6755623145ad5eff1dab323f4eae2a32a77a7abef2c5089a04a3d04366715", size = 4435078, upload-time = "2025-10-15T23:17:23.042Z" },
+    { url = "https://files.pythonhosted.org/packages/82/98/d3bd5407ce4c60017f8ff9e63ffee4200ab3e23fe05b765cab805a7db008/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:db391fa7c66df6762ee3f00c95a89e6d428f4d60e7abc8328f4fe155b5ac6e54", size = 4293460, upload-time = "2025-10-15T23:17:24.885Z" },
+    { url = "https://files.pythonhosted.org/packages/26/e9/e23e7900983c2b8af7a08098db406cf989d7f09caea7897e347598d4cd5b/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:78a97cf6a8839a48c49271cdcbd5cf37ca2c1d6b7fdd86cc864f302b5e9bf459", size = 3995237, upload-time = "2025-10-15T23:17:26.449Z" },
+    { url = "https://files.pythonhosted.org/packages/91/15/af68c509d4a138cfe299d0d7ddb14afba15233223ebd933b4bbdbc7155d3/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:dfb781ff7eaa91a6f7fd41776ec37c5853c795d3b358d4896fdbb5df168af422", size = 4967344, upload-time = "2025-10-15T23:17:28.06Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/e3/8643d077c53868b681af077edf6b3cb58288b5423610f21c62aadcbe99f4/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6f61efb26e76c45c4a227835ddeae96d83624fb0d29eb5df5b96e14ed1a0afb7", size = 4466564, upload-time = "2025-10-15T23:17:29.665Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/43/c1e8726fa59c236ff477ff2b5dc071e54b21e5a1e51aa2cee1676f1c986f/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:23b1a8f26e43f47ceb6d6a43115f33a5a37d57df4ea0ca295b780ae8546e8044", size = 4292415, upload-time = "2025-10-15T23:17:31.686Z" },
+    { url = "https://files.pythonhosted.org/packages/42/f9/2f8fefdb1aee8a8e3256a0568cffc4e6d517b256a2fe97a029b3f1b9fe7e/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b419ae593c86b87014b9be7396b385491ad7f320bde96826d0dd174459e54665", size = 4931457, upload-time = "2025-10-15T23:17:33.478Z" },
+    { url = "https://files.pythonhosted.org/packages/79/30/9b54127a9a778ccd6d27c3da7563e9f2d341826075ceab89ae3b41bf5be2/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:50fc3343ac490c6b08c0cf0d704e881d0d660be923fd3076db3e932007e726e3", size = 4466074, upload-time = "2025-10-15T23:17:35.158Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/68/b4f4a10928e26c941b1b6a179143af9f4d27d88fe84a6a3c53592d2e76bf/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22d7e97932f511d6b0b04f2bfd818d73dcd5928db509460aaf48384778eb6d20", size = 4420569, upload-time = "2025-10-15T23:17:37.188Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/49/3746dab4c0d1979888f125226357d3262a6dd40e114ac29e3d2abdf1ec55/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d55f3dffadd674514ad19451161118fd010988540cee43d8bc20675e775925de", size = 4681941, upload-time = "2025-10-15T23:17:39.236Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/23/45fe7f376a7df8daf6da3556603b36f53475a99ce4faacb6ba2cf3d82021/cryptography-46.0.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:cb3d760a6117f621261d662bccc8ef5bc32ca673e037c83fbe565324f5c46936", size = 7218248, upload-time = "2025-10-15T23:17:46.294Z" },
+    { url = "https://files.pythonhosted.org/packages/27/32/b68d27471372737054cbd34c84981f9edbc24fe67ca225d389799614e27f/cryptography-46.0.3-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4b7387121ac7d15e550f5cb4a43aef2559ed759c35df7336c402bb8275ac9683", size = 4294089, upload-time = "2025-10-15T23:17:48.269Z" },
+    { url = "https://files.pythonhosted.org/packages/26/42/fa8389d4478368743e24e61eea78846a0006caffaf72ea24a15159215a14/cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d", size = 4440029, upload-time = "2025-10-15T23:17:49.837Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/eb/f483db0ec5ac040824f269e93dd2bd8a21ecd1027e77ad7bdf6914f2fd80/cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0", size = 4297222, upload-time = "2025-10-15T23:17:51.357Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/cf/da9502c4e1912cb1da3807ea3618a6829bee8207456fbbeebc361ec38ba3/cryptography-46.0.3-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10ca84c4668d066a9878890047f03546f3ae0a6b8b39b697457b7757aaf18dbc", size = 4012280, upload-time = "2025-10-15T23:17:52.964Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/8f/9adb86b93330e0df8b3dcf03eae67c33ba89958fc2e03862ef1ac2b42465/cryptography-46.0.3-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:36e627112085bb3b81b19fed209c05ce2a52ee8b15d161b7c643a7d5a88491f3", size = 4978958, upload-time = "2025-10-15T23:17:54.965Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/a0/5fa77988289c34bdb9f913f5606ecc9ada1adb5ae870bd0d1054a7021cc4/cryptography-46.0.3-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1000713389b75c449a6e979ffc7dcc8ac90b437048766cef052d4d30b8220971", size = 4473714, upload-time = "2025-10-15T23:17:56.754Z" },
+    { url = "https://files.pythonhosted.org/packages/14/e5/fc82d72a58d41c393697aa18c9abe5ae1214ff6f2a5c18ac470f92777895/cryptography-46.0.3-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b02cf04496f6576afffef5ddd04a0cb7d49cf6be16a9059d793a30b035f6b6ac", size = 4296970, upload-time = "2025-10-15T23:17:58.588Z" },
+    { url = "https://files.pythonhosted.org/packages/78/06/5663ed35438d0b09056973994f1aec467492b33bd31da36e468b01ec1097/cryptography-46.0.3-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:71e842ec9bc7abf543b47cf86b9a743baa95f4677d22baa4c7d5c69e49e9bc04", size = 4940236, upload-time = "2025-10-15T23:18:00.897Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/59/873633f3f2dcd8a053b8dd1d38f783043b5fce589c0f6988bf55ef57e43e/cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506", size = 4472642, upload-time = "2025-10-15T23:18:02.749Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/39/8e71f3930e40f6877737d6f69248cf74d4e34b886a3967d32f919cc50d3b/cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963", size = 4423126, upload-time = "2025-10-15T23:18:04.85Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/c7/f65027c2810e14c3e7268353b1681932b87e5a48e65505d8cc17c99e36ae/cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4", size = 4686573, upload-time = "2025-10-15T23:18:06.908Z" },
 ]

 [[package]]
@@ -217,21 +321,33 @@ source = { editable = "." }
 dependencies = [
    { name = "aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "aiosqlite", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "base58", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "bidict", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "cryptography", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "exo-pyo3-bindings", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "fastapi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "greenlet", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "hypercorn", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "loguru", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "mlx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "mlx", extra = ["cpu"], marker = "sys_platform == 'linux'" },
    { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "networkx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "openai-harmony", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "psutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "rustworkx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "sqlalchemy", extra = ["asyncio"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "sqlmodel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "textual", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "tiktoken", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "typeguard", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "types-aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]

@@ -248,21 +364,33 @@ dev = [
 requires-dist = [
    { name = "aiofiles", specifier = ">=24.1.0" },
    { name = "aiohttp", specifier = ">=3.12.14" },
+    { name = "aiosqlite", specifier = ">=0.21.0" },
    { name = "anyio", specifier = "==4.11.0" },
+    { name = "base58", specifier = ">=2.1.1" },
+    { name = "bidict", specifier = ">=0.23.1" },
+    { name = "cryptography", specifier = ">=45.0.5" },
    { name = "exo-pyo3-bindings", editable = "rust/exo_pyo3_bindings" },
    { name = "fastapi", specifier = ">=0.116.1" },
    { name = "filelock", specifier = ">=3.18.0" },
+    { name = "greenlet", specifier = ">=3.2.4" },
    { name = "huggingface-hub", specifier = ">=0.33.4" },
    { name = "hypercorn", specifier = ">=0.18.0" },
    { name = "loguru", specifier = ">=0.7.3" },
    { name = "mlx", marker = "sys_platform == 'darwin'", specifier = ">=0.30.1" },
    { name = "mlx", extras = ["cpu"], marker = "sys_platform == 'linux'", specifier = ">=0.30.1" },
-    { name = "mlx-lm", git = "https://github.com/ml-explore/mlx-lm?branch=main" },
+    { name = "mlx-lm", specifier = ">=0.28.3" },
+    { name = "networkx", specifier = ">=3.5" },
    { name = "openai-harmony", specifier = ">=0.0.8" },
+    { name = "protobuf", specifier = ">=6.32.0" },
    { name = "psutil", specifier = ">=7.0.0" },
    { name = "pydantic", specifier = ">=2.11.7" },
+    { name = "rich", specifier = ">=14.1.0" },
    { name = "rustworkx", specifier = ">=0.17.1" },
+    { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.43" },
+    { name = "sqlmodel", specifier = ">=0.0.24" },
+    { name = "textual", specifier = ">=5.3.0" },
    { name = "tiktoken", specifier = ">=0.12.0" },
+    { name = "typeguard", specifier = ">=4.4.4" },
    { name = "types-aiofiles", specifier = ">=24.1.0.20250708" },
 ]

@@ -390,6 +518,32 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
 ]

+[[package]]
+name = "greenlet"
+version = "3.2.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/03/b8/704d753a5a45507a7aab61f18db9509302ed3d0a27ac7e0359ec2905b1a6/greenlet-3.2.4.tar.gz", hash = "sha256:0dca0d95ff849f9a364385f36ab49f50065d76964944638be9691e1832e9f86d", size = 188260, upload-time = "2025-08-07T13:24:33.51Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" },
+    { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/0b/bc13f787394920b23073ca3b6c4a7a21396301ed75a655bcb47196b50e6e/greenlet-3.2.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:710638eb93b1fa52823aa91bf75326f9ecdfd5e0466f00789246a5280f4ba0fc", size = 655191, upload-time = "2025-08-07T13:45:29.752Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/d6/6adde57d1345a8d0f14d31e4ab9c23cfe8e2cd39c3baf7674b4b0338d266/greenlet-3.2.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c5111ccdc9c88f423426df3fd1811bfc40ed66264d35aa373420a34377efc98a", size = 649516, upload-time = "2025-08-07T13:53:16.314Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/3b/3a3328a788d4a473889a2d403199932be55b1b0060f4ddd96ee7cdfcad10/greenlet-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d76383238584e9711e20ebe14db6c88ddcedc1829a9ad31a584389463b5aa504", size = 652169, upload-time = "2025-08-07T13:18:32.861Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/53/f9c440463b3057485b8594d7a638bed53ba531165ef0ca0e6c364b5cc807/greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b", size = 1564759, upload-time = "2025-11-04T12:42:19.395Z" },
+    { url = "https://files.pythonhosted.org/packages/47/e4/3bb4240abdd0a8d23f4f88adec746a3099f0d86bfedb623f063b2e3b4df0/greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929", size = 1634288, upload-time = "2025-11-04T12:42:21.174Z" },
+    { url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/aa/687d6b12ffb505a4447567d1f3abea23bd20e73a5bed63871178e0831b7a/greenlet-3.2.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c17b6b34111ea72fc5a4e4beec9711d2226285f0386ea83477cbb97c30a3f3a5", size = 699218, upload-time = "2025-08-07T13:45:30.969Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" },
+    { url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" },
+    { url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" },
+    { url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/da/343cd760ab2f92bac1845ca07ee3faea9fe52bee65f7bcb19f16ad7de08b/greenlet-3.2.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:015d48959d4add5d6c9f6c5210ee3803a830dce46356e3bc326d6776bde54681", size = 1680760, upload-time = "2025-11-04T12:42:25.341Z" },
+]
+
 [[package]]
 name = "h11"
 version = "0.16.0"
@@ -447,53 +601,23 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" },
 ]

-[[package]]
-name = "httpcore"
-version = "1.0.9"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "h11", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
-]
-
-[[package]]
-name = "httpx"
-version = "0.28.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "httpcore", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "idna", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
-]
-
 [[package]]
 name = "huggingface-hub"
-version = "1.3.0"
+version = "0.36.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "fsspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "hf-xet", marker = "(platform_machine == 'AMD64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'darwin') or (platform_machine == 'amd64' and sys_platform == 'darwin') or (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and sys_platform == 'darwin') or (platform_machine == 'AMD64' and sys_platform == 'linux') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'amd64' and sys_platform == 'linux') or (platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "hf-xet", marker = "(platform_machine == 'aarch64' and sys_platform == 'darwin') or (platform_machine == 'amd64' and sys_platform == 'darwin') or (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'amd64' and sys_platform == 'linux') or (platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "shellingham", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "typer-slim", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c1/c9/d42b5cfa0a50b77cf9165e13edfaf2e3bd4e0def9cb67b6b8a07224a52ab/huggingface_hub-1.3.0.tar.gz", hash = "sha256:289e2a3586fdf01e35882944eaa06fbd57436de24b6e653d1fab248584acd66b", size = 622092, upload-time = "2026-01-09T09:54:44.663Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/98/63/4910c5fa9128fdadf6a9c5ac138e8b1b6cee4ca44bf7915bbfbce4e355ee/huggingface_hub-0.36.0.tar.gz", hash = "sha256:47b3f0e2539c39bf5cde015d63b72ec49baff67b6931c3d97f3f84532e2b8d25", size = 463358, upload-time = "2025-10-23T12:12:01.413Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b1/5b/c5fde1f56b1f072b3028ec5413f3f5bf472c5891ebb34589cddb1689609f/huggingface_hub-1.3.0-py3-none-any.whl", hash = "sha256:763f450169bb05ea3867990e9d3ba9464eb617b874791301dc81be2c6ffb0bf5", size = 533092, upload-time = "2026-01-09T09:54:43.228Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/bd/1a875e0d592d447cbc02805fd3fe0f497714d6a2583f59d14fa9ebad96eb/huggingface_hub-0.36.0-py3-none-any.whl", hash = "sha256:7bcc9ad17d5b3f07b57c78e79d527102d08313caa278a641993acddcb894548d", size = 566094, upload-time = "2025-10-23T12:11:59.557Z" },
 ]

 [[package]]
@@ -550,6 +674,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
 ]

+[[package]]
+name = "linkify-it-py"
+version = "2.0.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "uc-micro-py", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2a/ae/bb56c6828e4797ba5a4821eec7c43b8bf40f69cda4d4f5f8c8a2810ec96a/linkify-it-py-2.0.3.tar.gz", hash = "sha256:68cda27e162e9215c17d786649d1da0021a451bdc436ef9e0fa0ba5234b9b048", size = 27946, upload-time = "2024-02-04T14:48:04.179Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/1e/b832de447dee8b582cac175871d2f6c3d5077cc56d5575cadba1fd1cccfa/linkify_it_py-2.0.3-py3-none-any.whl", hash = "sha256:6bcbc417b0ac14323382aef5c5192c0075bf8a9d6b41820a2b66371eac6b6d79", size = 19820, upload-time = "2024-02-04T14:48:02.496Z" },
+]
+
 [[package]]
 name = "loguru"
 version = "0.7.3"
@@ -571,6 +707,23 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/c7/d1/a9f36f8ecdf0fb7c9b1e78c8d7af12b8c8754e74851ac7b94a8305540fc7/macholib-1.16.4-py2.py3-none-any.whl", hash = "sha256:da1a3fa8266e30f0ce7e97c6a54eefaae8edd1e5f86f3eb8b95457cae90265ea", size = 38117, upload-time = "2025-11-22T08:28:36.939Z" },
 ]

+[[package]]
+name = "markdown-it-py"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mdurl", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
+]
+
+[package.optional-dependencies]
+linkify = [
+    { name = "linkify-it-py", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+
 [[package]]
 name = "markupsafe"
 version = "3.0.3"
@@ -611,6 +764,27 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" },
 ]

+[[package]]
+name = "mdit-py-plugins"
+version = "0.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b2/fd/a756d36c0bfba5f6e39a1cdbdbfdd448dc02692467d83816dff4592a1ebc/mdit_py_plugins-0.5.0.tar.gz", hash = "sha256:f4918cb50119f50446560513a8e311d574ff6aaed72606ddae6d35716fe809c6", size = 44655, upload-time = "2025-08-11T07:25:49.083Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/86/dd6e5db36df29e76c7a7699123569a4a18c1623ce68d826ed96c62643cae/mdit_py_plugins-0.5.0-py3-none-any.whl", hash = "sha256:07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f", size = 57205, upload-time = "2025-08-11T07:25:47.597Z" },
+]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+]
+
 [[package]]
 name = "mlx"
 version = "0.30.1"
@@ -647,17 +821,20 @@ wheels = [

 [[package]]
 name = "mlx-lm"
-version = "0.30.2"
-source = { git = "https://github.com/ml-explore/mlx-lm?branch=main#39a96ab18b77e933d031a8f5777b6e08c19cbe17" }
+version = "0.28.3"
+source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "mlx", marker = "sys_platform == 'darwin'" },
    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "sentencepiece", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/51/f6/15e002d52c28d8c544ec3aaf9053677468333e6ef0e76ea68579fd77b76d/mlx_lm-0.28.3.tar.gz", hash = "sha256:75df2b925d343ebaf50b63008dede4fe98cd3b02b1b24b7da71ebeb198d674f0", size = 214455, upload-time = "2025-10-17T21:44:33.921Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c2/a6/db3b44a5ac1a1174605628b0a477fbe4632d4fad1f94cf08647e27cc79ad/mlx_lm-0.28.3-py3-none-any.whl", hash = "sha256:ec103e2c9a06bd2cbafd41aafc975e40262176f7360d4f53ec342cebb9e0e6ea", size = 294506, upload-time = "2025-10-17T21:44:32.447Z" },
+]

 [[package]]
 name = "mlx-metal"
@@ -738,6 +915,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
 ]

+[[package]]
+name = "networkx"
+version = "3.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload-time = "2025-05-29T11:35:07.804Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec", size = 2034406, upload-time = "2025-05-29T11:35:04.961Z" },
+]
+
 [[package]]
 name = "numpy"
 version = "2.3.4"
@@ -808,6 +994,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
 ]

+[[package]]
+name = "platformdirs"
+version = "4.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/61/33/9611380c2bdb1225fdef633e2a9610622310fed35ab11dac9620972ee088/platformdirs-4.5.0.tar.gz", hash = "sha256:70ddccdd7c99fc5942e9fc25636a8b34d04c24b335100223152c2803e4063312", size = 21632, upload-time = "2025-10-08T17:44:48.791Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/73/cb/ac7874b3e5d58441674fb70742e6c374b28b0c7cb988d37d991cde47166c/platformdirs-4.5.0-py3-none-any.whl", hash = "sha256:e578a81bb873cbb89a41fcc904c7ef523cc18284b7e3b3ccf06aca1403b7ebd3", size = 18651, upload-time = "2025-10-08T17:44:47.223Z" },
+]
+
 [[package]]
 name = "pluggy"
 version = "1.6.0"
@@ -916,6 +1111,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e0/95/992c8816a74016eb095e73585d747e0a8ea21a061ed3689474fabb29a395/psutil-7.1.3-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56d974e02ca2c8eb4812c3f76c30e28836fffc311d55d979f1465c1feeb2b68b", size = 264635, upload-time = "2025-11-02T12:26:31.74Z" },
 ]

+[[package]]
+name = "pycparser"
+version = "2.23"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/cf/d2d3b9f5699fb1e4615c8e32ff220203e43b248e1dfcc6736ad9057731ca/pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2", size = 173734, upload-time = "2025-09-09T13:23:47.91Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" },
+]
+
 [[package]]
 name = "pydantic"
 version = "2.12.3"
@@ -1150,6 +1354,19 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
 ]

+[[package]]
+name = "rich"
+version = "14.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pygments", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fb/d2/8920e102050a0de7bfabeb4c4614a49248cf8d5d7a8d01885fbb24dc767a/rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4", size = 219990, upload-time = "2025-10-09T14:16:53.064Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
+]
+
 [[package]]
 name = "ruff"
 version = "0.14.3"
@@ -1213,34 +1430,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/ad/fe/cad1d9762868c7c5dc70c8620074df28ebb1a8e4c17d4c0cb031889c457e/safetensors-0.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d944cea65fad0ead848b6ec2c37cc0b197194bec228f8020054742190e9312ac", size = 655957, upload-time = "2025-08-08T13:13:57.029Z" },
 ]

-[[package]]
-name = "sentencepiece"
-version = "0.2.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/15/15/2e7a025fc62d764b151ae6d0f2a92f8081755ebe8d4a64099accc6f77ba6/sentencepiece-0.2.1.tar.gz", hash = "sha256:8138cec27c2f2282f4a34d9a016e3374cd40e5c6e9cb335063db66a0a3b71fad", size = 3228515, upload-time = "2025-08-12T07:00:51.718Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ba/4a/85fbe1706d4d04a7e826b53f327c4b80f849cf1c7b7c5e31a20a97d8f28b/sentencepiece-0.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dcd8161eee7b41aae57ded06272905dbd680a0a04b91edd0f64790c796b2f706", size = 1943150, upload-time = "2025-08-12T06:59:53.588Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/83/4cfb393e287509fc2155480b9d184706ef8d9fa8cbf5505d02a5792bf220/sentencepiece-0.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c6c8f42949f419ff8c7e9960dbadcfbc982d7b5efc2f6748210d3dd53a7de062", size = 1325651, upload-time = "2025-08-12T06:59:55.073Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/de/5a007fb53b1ab0aafc69d11a5a3dd72a289d5a3e78dcf2c3a3d9b14ffe93/sentencepiece-0.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:097f3394e99456e9e4efba1737c3749d7e23563dd1588ce71a3d007f25475fff", size = 1253641, upload-time = "2025-08-12T06:59:56.562Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/d2/f552be5928105588f4f4d66ee37dd4c61460d8097e62d0e2e0eec41bc61d/sentencepiece-0.2.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7b670879c370d350557edabadbad1f6561a9e6968126e6debca4029e5547820", size = 1316271, upload-time = "2025-08-12T06:59:58.109Z" },
-    { url = "https://files.pythonhosted.org/packages/96/df/0cfe748ace5485be740fed9476dee7877f109da32ed0d280312c94ec259f/sentencepiece-0.2.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c7f0fd2f2693309e6628aeeb2e2faf6edd221134dfccac3308ca0de01f8dab47", size = 1387882, upload-time = "2025-08-12T07:00:00.701Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/b6/08fe2ce819e02ccb0296f4843e3f195764ce9829cbda61b7513f29b95718/sentencepiece-0.2.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:8dd4b477a7b069648d19363aad0cab9bad2f4e83b2d179be668efa672500dc94", size = 1946052, upload-time = "2025-08-12T07:00:08.136Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/d9/1ea0e740591ff4c6fc2b6eb1d7510d02f3fb885093f19b2f3abd1363b402/sentencepiece-0.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0c0f672da370cc490e4c59d89e12289778310a0e71d176c541e4834759e1ae07", size = 1327408, upload-time = "2025-08-12T07:00:09.572Z" },
-    { url = "https://files.pythonhosted.org/packages/99/7e/1fb26e8a21613f6200e1ab88824d5d203714162cf2883248b517deb500b7/sentencepiece-0.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ad8493bea8432dae8d6830365352350f3b4144415a1d09c4c8cb8d30cf3b6c3c", size = 1254857, upload-time = "2025-08-12T07:00:11.021Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/85/c72fd1f3c7a6010544d6ae07f8ddb38b5e2a7e33bd4318f87266c0bbafbf/sentencepiece-0.2.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b81a24733726e3678d2db63619acc5a8dccd074f7aa7a54ecd5ca33ca6d2d596", size = 1315722, upload-time = "2025-08-12T07:00:12.989Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/e8/661e5bd82a8aa641fd6c1020bd0e890ef73230a2b7215ddf9c8cd8e941c2/sentencepiece-0.2.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0a81799d0a68d618e89063fb423c3001a034c893069135ffe51fee439ae474d6", size = 1387452, upload-time = "2025-08-12T07:00:15.088Z" },
-    { url = "https://files.pythonhosted.org/packages/24/9c/89eb8b2052f720a612478baf11c8227dcf1dc28cd4ea4c0c19506b5af2a2/sentencepiece-0.2.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:5d0350b686c320068702116276cfb26c066dc7e65cfef173980b11bb4d606719", size = 1943147, upload-time = "2025-08-12T07:00:21.809Z" },
-    { url = "https://files.pythonhosted.org/packages/82/0b/a1432bc87f97c2ace36386ca23e8bd3b91fb40581b5e6148d24b24186419/sentencepiece-0.2.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c7f54a31cde6fa5cb030370566f68152a742f433f8d2be458463d06c208aef33", size = 1325624, upload-time = "2025-08-12T07:00:23.289Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/99/bbe054ebb5a5039457c590e0a4156ed073fb0fe9ce4f7523404dd5b37463/sentencepiece-0.2.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c83b85ab2d6576607f31df77ff86f28182be4a8de6d175d2c33ca609925f5da1", size = 1253670, upload-time = "2025-08-12T07:00:24.69Z" },
-    { url = "https://files.pythonhosted.org/packages/19/ad/d5c7075f701bd97971d7c2ac2904f227566f51ef0838dfbdfdccb58cd212/sentencepiece-0.2.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1855f57db07b51fb51ed6c9c452f570624d2b169b36f0f79ef71a6e6c618cd8b", size = 1316247, upload-time = "2025-08-12T07:00:26.435Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/03/35fbe5f3d9a7435eebd0b473e09584bd3cc354ce118b960445b060d33781/sentencepiece-0.2.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01e6912125cb45d3792f530a4d38f8e21bf884d6b4d4ade1b2de5cf7a8d2a52b", size = 1387894, upload-time = "2025-08-12T07:00:28.339Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/11/5b414b9fae6255b5fb1e22e2ed3dc3a72d3a694e5703910e640ac78346bb/sentencepiece-0.2.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:a19adcec27c524cb7069a1c741060add95f942d1cbf7ad0d104dffa0a7d28a2b", size = 1946081, upload-time = "2025-08-12T07:00:36.97Z" },
-    { url = "https://files.pythonhosted.org/packages/77/eb/7a5682bb25824db8545f8e5662e7f3e32d72a508fdce086029d89695106b/sentencepiece-0.2.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:e37e4b4c4a11662b5db521def4e44d4d30ae69a1743241412a93ae40fdcab4bb", size = 1327406, upload-time = "2025-08-12T07:00:38.669Z" },
-    { url = "https://files.pythonhosted.org/packages/03/b0/811dae8fb9f2784e138785d481469788f2e0d0c109c5737372454415f55f/sentencepiece-0.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:477c81505db072b3ab627e7eab972ea1025331bd3a92bacbf798df2b75ea86ec", size = 1254846, upload-time = "2025-08-12T07:00:40.611Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/23/195b2e7ec85ebb6a547969f60b723c7aca5a75800ece6cc3f41da872d14e/sentencepiece-0.2.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:010f025a544ef770bb395091d57cb94deb9652d8972e0d09f71d85d5a0816c8c", size = 1315721, upload-time = "2025-08-12T07:00:42.914Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/aa/553dbe4178b5f23eb28e59393dddd64186178b56b81d9b8d5c3ff1c28395/sentencepiece-0.2.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:733e59ff1794d26db706cd41fc2d7ca5f6c64a820709cb801dc0ea31780d64ab", size = 1387458, upload-time = "2025-08-12T07:00:44.56Z" },
-]
-
 [[package]]
 name = "setuptools"
 version = "80.9.0"
@@ -1250,15 +1439,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
 ]

-[[package]]
-name = "shellingham"
-version = "1.5.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
-]
-
 [[package]]
 name = "sniffio"
 version = "1.3.1"
@@ -1268,6 +1448,43 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
 ]

+[[package]]
+name = "sqlalchemy"
+version = "2.0.44"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "greenlet", marker = "(platform_machine == 'AMD64' and sys_platform == 'darwin') or (platform_machine == 'WIN32' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'darwin') or (platform_machine == 'amd64' and sys_platform == 'darwin') or (platform_machine == 'ppc64le' and sys_platform == 'darwin') or (platform_machine == 'win32' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and sys_platform == 'darwin') or (platform_machine == 'AMD64' and sys_platform == 'linux') or (platform_machine == 'WIN32' and sys_platform == 'linux') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'amd64' and sys_platform == 'linux') or (platform_machine == 'ppc64le' and sys_platform == 'linux') or (platform_machine == 'win32' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f0/f2/840d7b9496825333f532d2e3976b8eadbf52034178aac53630d09fe6e1ef/sqlalchemy-2.0.44.tar.gz", hash = "sha256:0ae7454e1ab1d780aee69fd2aae7d6b8670a581d8847f2d1e0f7ddfbf47e5a22", size = 9819830, upload-time = "2025-10-10T14:39:12.935Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/45/d3/c67077a2249fdb455246e6853166360054c331db4613cda3e31ab1cadbef/sqlalchemy-2.0.44-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ff486e183d151e51b1d694c7aa1695747599bb00b9f5f604092b54b74c64a8e1", size = 2135479, upload-time = "2025-10-10T16:03:37.671Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/91/eabd0688330d6fd114f5f12c4f89b0d02929f525e6bf7ff80aa17ca802af/sqlalchemy-2.0.44-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0b1af8392eb27b372ddb783b317dea0f650241cea5bd29199b22235299ca2e45", size = 2123212, upload-time = "2025-10-10T16:03:41.755Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/bb/43e246cfe0e81c018076a16036d9b548c4cc649de241fa27d8d9ca6f85ab/sqlalchemy-2.0.44-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b61188657e3a2b9ac4e8f04d6cf8e51046e28175f79464c67f2fd35bceb0976", size = 3255353, upload-time = "2025-10-10T15:35:31.221Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/96/c6105ed9a880abe346b64d3b6ddef269ddfcab04f7f3d90a0bf3c5a88e82/sqlalchemy-2.0.44-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b87e7b91a5d5973dda5f00cd61ef72ad75a1db73a386b62877d4875a8840959c", size = 3260222, upload-time = "2025-10-10T15:43:50.124Z" },
+    { url = "https://files.pythonhosted.org/packages/44/16/1857e35a47155b5ad927272fee81ae49d398959cb749edca6eaa399b582f/sqlalchemy-2.0.44-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:15f3326f7f0b2bfe406ee562e17f43f36e16167af99c4c0df61db668de20002d", size = 3189614, upload-time = "2025-10-10T15:35:32.578Z" },
+    { url = "https://files.pythonhosted.org/packages/88/ee/4afb39a8ee4fc786e2d716c20ab87b5b1fb33d4ac4129a1aaa574ae8a585/sqlalchemy-2.0.44-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1e77faf6ff919aa8cd63f1c4e561cac1d9a454a191bb864d5dd5e545935e5a40", size = 3226248, upload-time = "2025-10-10T15:43:51.862Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/5e/6a29fa884d9fb7ddadf6b69490a9d45fded3b38541713010dad16b77d015/sqlalchemy-2.0.44-py3-none-any.whl", hash = "sha256:19de7ca1246fbef9f9d1bff8f1ab25641569df226364a0e40457dc5457c54b05", size = 1928718, upload-time = "2025-10-10T15:29:45.32Z" },
+]
+
+[package.optional-dependencies]
+asyncio = [
+    { name = "greenlet", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+
+[[package]]
+name = "sqlmodel"
+version = "0.0.27"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "sqlalchemy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/90/5a/693d90866233e837d182da76082a6d4c2303f54d3aaaa5c78e1238c5d863/sqlmodel-0.0.27.tar.gz", hash = "sha256:ad1227f2014a03905aef32e21428640848ac09ff793047744a73dfdd077ff620", size = 118053, upload-time = "2025-10-08T16:39:11.938Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8c/92/c35e036151fe53822893979f8a13e6f235ae8191f4164a79ae60a95d66aa/sqlmodel-0.0.27-py3-none-any.whl", hash = "sha256:667fe10aa8ff5438134668228dc7d7a08306f4c5c4c7e6ad3ad68defa0e7aa49", size = 29131, upload-time = "2025-10-08T16:39:10.917Z" },
+]
+
 [[package]]
 name = "starlette"
 version = "0.49.3"
@@ -1280,6 +1497,23 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/a3/e0/021c772d6a662f43b63044ab481dc6ac7592447605b5b35a957785363122/starlette-0.49.3-py3-none-any.whl", hash = "sha256:b579b99715fdc2980cf88c8ec96d3bf1ce16f5a8051a7c2b84ef9b1cdecaea2f", size = 74340, upload-time = "2025-11-01T15:12:24.387Z" },
 ]

+[[package]]
+name = "textual"
+version = "6.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py", extra = ["linkify"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mdit-py-plugins", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "platformdirs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pygments", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/af/90/59757aa887ddcea61428820274f1a2d1f986feb7880374a5420ab5d37132/textual-6.5.0.tar.gz", hash = "sha256:e5f152cdd47db48a635d23b839721bae4d0e8b6d855e3fede7285218289294e3", size = 1574116, upload-time = "2025-10-31T17:21:53.4Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/42/37/1deba011782a49ea249c73adcf703a39b0249ac9b0e17d1a2e4074df8d57/textual-6.5.0-py3-none-any.whl", hash = "sha256:c5505be7fe606b8054fb88431279885f88352bddca64832f6acd293ef7d9b54f", size = 711848, upload-time = "2025-10-31T17:21:51.134Z" },
+]
+
 [[package]]
 name = "tiktoken"
 version = "0.12.0"
@@ -1350,7 +1584,7 @@ wheels = [

 [[package]]
 name = "transformers"
-version = "5.0.0rc1"
+version = "4.57.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -1363,24 +1597,22 @@ dependencies = [
    { name = "safetensors", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "tokenizers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "typer-slim", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/2f/33/c4d7a86f5a60fda56e72f90911ce859044ecdac1dcea4cf904c1eb20ecf2/transformers-5.0.0rc1.tar.gz", hash = "sha256:1fdde557b96ef8ea277c45b8e0d558f1e167fe28a98593f4c4aec0277e335821", size = 8208085, upload-time = "2025-12-11T17:21:23.486Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/68/a39307bcc4116a30b2106f2e689130a48de8bd8a1e635b5e1030e46fcd9e/transformers-4.57.1.tar.gz", hash = "sha256:f06c837959196c75039809636cd964b959f6604b75b8eeec6fdfc0440b89cc55", size = 10142511, upload-time = "2025-10-14T15:39:26.18Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fb/74/fd8aef40d2bf2a15c0e02a0d867ebbf488ccca79fcf45efa51ec8e40c004/transformers-5.0.0rc1-py3-none-any.whl", hash = "sha256:8b9604700769872cab4280dbcde201f557e93f72ee5a85c4592275ab4f15d330", size = 9873024, upload-time = "2025-12-11T17:21:20.348Z" },
+    { url = "https://files.pythonhosted.org/packages/71/d3/c16c3b3cf7655a67db1144da94b021c200ac1303f82428f2beef6c2e72bb/transformers-4.57.1-py3-none-any.whl", hash = "sha256:b10d05da8fa67dc41644dbbf9bc45a44cb86ae33da6f9295f5fbf5b7890bd267", size = 11990925, upload-time = "2025-10-14T15:39:23.085Z" },
 ]

 [[package]]
-name = "typer-slim"
-version = "0.21.1"
+name = "typeguard"
+version = "4.4.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/17/d4/064570dec6358aa9049d4708e4a10407d74c99258f8b2136bb8702303f1a/typer_slim-0.21.1.tar.gz", hash = "sha256:73495dd08c2d0940d611c5a8c04e91c2a0a98600cbd4ee19192255a233b6dbfd", size = 110478, upload-time = "2026-01-06T11:21:11.176Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c7/68/71c1a15b5f65f40e91b65da23b8224dad41349894535a97f63a52e462196/typeguard-4.4.4.tar.gz", hash = "sha256:3a7fd2dffb705d4d0efaed4306a704c89b9dee850b688f060a8b1615a79e5f74", size = 75203, upload-time = "2025-06-18T09:56:07.624Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c8/0a/4aca634faf693e33004796b6cee0ae2e1dba375a800c16ab8d3eff4bb800/typer_slim-0.21.1-py3-none-any.whl", hash = "sha256:6e6c31047f171ac93cc5a973c9e617dbc5ab2bddc4d0a3135dc161b4e2020e0d", size = 47444, upload-time = "2026-01-06T11:21:12.441Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/a9/e3aee762739c1d7528da1c3e06d518503f8b6c439c35549b53735ba52ead/typeguard-4.4.4-py3-none-any.whl", hash = "sha256:b5f562281b6bfa1f5492470464730ef001646128b180769880468bd84b68b09e", size = 34874, upload-time = "2025-06-18T09:56:05.999Z" },
 ]

 [[package]]
@@ -1413,6 +1645,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
 ]

+[[package]]
+name = "uc-micro-py"
+version = "1.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/91/7a/146a99696aee0609e3712f2b44c6274566bc368dfe8375191278045186b8/uc-micro-py-1.0.3.tar.gz", hash = "sha256:d321b92cff673ec58027c04015fcaa8bb1e005478643ff4a500882eaab88c48a", size = 6043, upload-time = "2024-02-09T16:52:01.654Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/37/87/1f677586e8ac487e29672e4b17455758fce261de06a0d086167bb760361a/uc_micro_py-1.0.3-py3-none-any.whl", hash = "sha256:db1dffff340817673d7b466ec86114a9dc0e9d4d9b5ba229d9d60e5c12600cd5", size = 6229, upload-time = "2024-02-09T16:52:00.371Z" },
+]
+
 [[package]]
 name = "urllib3"
 version = "2.5.0"