wuff

started
exo: enable multiprocessing support in PyInstaller bundles
2025-12-23 22:27:50 -05:00 · 2025-12-23 16:54:02 +00:00 · 2025-12-23 14:38:07 +00:00 · 2025-12-23 14:34:21 +00:00 · 2025-12-23 12:53:30 +00:00 · 2025-12-22 19:51:48 +00:00
24 changed files with 737 additions and 167 deletions
--- a/.github/workflows/build-app.yml
+++ b/.github/workflows/build-app.yml
@@ -4,10 +4,12 @@ on:
  push:
    tags:
      - "v*"
+    branches:
+      - "test-app"

 jobs:
  build-macos-app:
-    runs-on: [self-hosted, XCode262_Beta]
+    runs-on: "macos-26"
    env:
      SPARKLE_VERSION: 2.8.1
      SPARKLE_DOWNLOAD_PREFIX: ${{ secrets.SPARKLE_DOWNLOAD_PREFIX }}
@@ -21,6 +23,10 @@ jobs:
      EXO_LIBP2P_NAMESPACE: ${{ github.ref_name }}

    steps:
+      # ============================================================
+      # Checkout and tag validation
+      # ============================================================
+
      - name: Checkout
        uses: actions/checkout@v4
        with:
@@ -28,19 +34,24 @@ jobs:

      - name: Derive release version from tag
        run: |
-          VERSION="${GITHUB_REF_NAME#v}"
-          # Detect alpha tags
-          if [[ "$VERSION" == *-alpha* ]]; then
+          if [[ "$GITHUB_REF_NAME" == "test-app" ]]; then
+            VERSION="0.0.0-alpha.0"
            echo "IS_ALPHA=true" >> $GITHUB_ENV
          else
-            echo "IS_ALPHA=false" >> $GITHUB_ENV
+            VERSION="${GITHUB_REF_NAME#v}"
+            if [[ "$VERSION" == *-alpha* ]]; then
+              echo "IS_ALPHA=true" >> $GITHUB_ENV
+            else
+              echo "IS_ALPHA=false" >> $GITHUB_ENV
+            fi
          fi
          echo "RELEASE_VERSION=$VERSION" >> $GITHUB_ENV

      - name: Ensure tag commit is on main
+        if: github.ref_type == 'tag'
        run: |
          git fetch origin main
-          # Allow alpha tags on any branch, but require production tags to be on main
+          # Alpha tags can be on any branch, production tags must be on main
          if [[ "$IS_ALPHA" == "true" ]]; then
            echo "Alpha tag detected, skipping main branch check"
          elif ! git merge-base --is-ancestor origin/main HEAD; then
@@ -48,27 +59,20 @@ jobs:
            exit 1
          fi

-      - name: Add Homebrew to PATH      
-        run: |      
-          if [ -f /opt/homebrew/bin/brew ]; then      
-            echo "/opt/homebrew/bin" >> $GITHUB_PATH      
-          elif [ -f /usr/local/bin/brew ]; then      
-            echo "/usr/local/bin" >> $GITHUB_PATH      
-          fi
+      # ============================================================
+      # Install dependencies
+      # ============================================================

-      - name: Check Metal toolchain is installed
+      - name: Select Xcode 26.2
        run: |
+          sudo xcode-select -s /Applications/Xcode_26.2.app
          if ! xcrun -f metal >/dev/null 2>&1; then
-            echo "Metal toolchain is not installed. Run 'xcodebuild -downloadComponent MetalToolchain' on the runner host."
+            echo "Metal toolchain is not installed."
            exit 1
          fi
-          echo "Metal toolchain is installed."

-      - name: Install Just
-        run: brew install just
-
-      - name: Install AWS CLI
-        run: brew install awscli
+      - name: Install Homebrew packages
+        run: brew install just awscli macmon

      - name: Install UV
        uses: astral-sh/setup-uv@v6
@@ -76,17 +80,25 @@ jobs:
          enable-cache: true
          cache-dependency-glob: uv.lock

-      - name: Setup Python (UV)
+      - name: Setup Python
        run: |
          uv python install
          uv sync --locked

-      - name: Install macmon
-        run: brew install macmon
-
-      - name: Build PyInstaller bundle
+      - name: Build dashboard
        run: |
-          uv run pyinstaller packaging/pyinstaller/exo.spec
+          cd dashboard
+          npm ci
+          npm run build
+
+      - name: Install Sparkle CLI
+        run: |
+          CLI_URL="${SPARKLE_CLI_URL:-https://github.com/sparkle-project/Sparkle/releases/download/${SPARKLE_VERSION}/Sparkle-${SPARKLE_VERSION}.tar.xz}"
+          echo "Downloading Sparkle CLI from: $CLI_URL"
+          mkdir -p /tmp/sparkle
+          curl --fail --location --output /tmp/sparkle.tar.xz "$CLI_URL"
+          tar -xJf /tmp/sparkle.tar.xz -C /tmp/sparkle --strip-components=1
+          echo "SPARKLE_BIN=/tmp/sparkle/bin" >> $GITHUB_ENV

      - name: Prepare code-signing keychain
        env:
@@ -95,43 +107,47 @@ jobs:
          PROVISIONING_PROFILE: ${{ secrets.PROVISIONING_PROFILE }}
        run: |
          KEYCHAIN_PATH="$HOME/Library/Keychains/build.keychain-db"
-          
-          # Remove stale keychain from previous failed runs
-          security delete-keychain "$KEYCHAIN_PATH" 2>/dev/null || true
-          
+
          # Create fresh keychain
          security create-keychain -p "$MACOS_CERTIFICATE_PASSWORD" "$KEYCHAIN_PATH"
-          
+
          # Disable auto-lock (no timeout, no lock-on-sleep)
          security set-keychain-settings "$KEYCHAIN_PATH"
-          
+
          # Add to search list while preserving existing keychains
          security list-keychains -d user -s "$KEYCHAIN_PATH" $(security list-keychains -d user | tr -d '"')
-          
+
          # Set as default and unlock
          security default-keychain -s "$KEYCHAIN_PATH"
          security unlock-keychain -p "$MACOS_CERTIFICATE_PASSWORD" "$KEYCHAIN_PATH"
-          
+
          # Import certificate with full access for codesign
          echo "$MACOS_CERTIFICATE" | base64 --decode > /tmp/cert.p12
          security import /tmp/cert.p12 -k "$KEYCHAIN_PATH" -P "$MACOS_CERTIFICATE_PASSWORD" \
            -T /usr/bin/codesign -T /usr/bin/security -T /usr/bin/productbuild
          rm /tmp/cert.p12
-          
+
          # Allow codesign to access the key without prompting
          security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k "$MACOS_CERTIFICATE_PASSWORD" "$KEYCHAIN_PATH"
-          
+
          # Verify keychain is unlocked and identity is available
          echo "Verifying signing identity..."
          security find-identity -v -p codesigning "$KEYCHAIN_PATH"
-          
+
          # Setup provisioning profile
          mkdir -p "$HOME/Library/Developer/Xcode/UserData/Provisioning Profiles"
          echo "$PROVISIONING_PROFILE" | base64 --decode > "$HOME/Library/Developer/Xcode/UserData/Provisioning Profiles/EXO.provisionprofile"
-          
+
          # Export keychain path for other steps
          echo "BUILD_KEYCHAIN_PATH=$KEYCHAIN_PATH" >> $GITHUB_ENV

+      # ============================================================
+      # Build the bundle
+      # ============================================================
+
+      - name: Build PyInstaller bundle
+        run: uv run pyinstaller packaging/pyinstaller/exo.spec
+
      - name: Build Swift app
        env:
          MACOS_CERTIFICATE_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_PASSWORD }}
@@ -162,7 +178,7 @@ jobs:
          mkdir -p output/EXO.app/Contents/Resources
          cp -R dist/exo output/EXO.app/Contents/Resources/exo

-      - name: Codesign PyInstaller runtime payload
+      - name: Codesign PyInstaller runtime
        env:
          MACOS_CERTIFICATE_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_PASSWORD }}
        run: |
@@ -226,41 +242,40 @@ jobs:

      - name: Generate Sparkle appcast
        env:
-          SPARKLE_VERSION: ${{ env.SPARKLE_VERSION }}
          SPARKLE_DOWNLOAD_PREFIX: ${{ env.SPARKLE_DOWNLOAD_PREFIX }}
          SPARKLE_ED25519_PRIVATE: ${{ secrets.SPARKLE_ED25519_PRIVATE }}
-          SPARKLE_CLI_URL: ${{ secrets.SPARKLE_CLI_URL }}
          IS_ALPHA: ${{ env.IS_ALPHA }}
        run: |
          set -euo pipefail
          cd output
          DOWNLOAD_PREFIX="${SPARKLE_DOWNLOAD_PREFIX:-https://assets.exolabs.net}"
-          mkdir -p sparkle
-          CLI_URL="${SPARKLE_CLI_URL:-}"
-          if [[ -z "$CLI_URL" ]]; then
-            CLI_URL="https://github.com/sparkle-project/Sparkle/releases/download/${SPARKLE_VERSION}/Sparkle-${SPARKLE_VERSION}.tar.xz"
-          fi
-          echo "Downloading Sparkle CLI from: $CLI_URL"
-          curl --fail --location --output sparkle.tar.xz "$CLI_URL"
-          tar -xJf sparkle.tar.xz -C sparkle --strip-components=1
          echo "$SPARKLE_ED25519_PRIVATE" > sparkle_ed25519.key
          chmod 600 sparkle_ed25519.key

-          # Add --channel alpha flag for alpha builds
          CHANNEL_FLAG=""
          if [[ "$IS_ALPHA" == "true" ]]; then
            CHANNEL_FLAG="--channel alpha"
            echo "Generating appcast for alpha channel"
          fi

-          ./sparkle/bin/generate_appcast \
+          $SPARKLE_BIN/generate_appcast \
            --ed-key-file sparkle_ed25519.key \
            --download-url-prefix "$DOWNLOAD_PREFIX" \
            $CHANNEL_FLAG \
            .

-      - name: Upload Sparkle assets to S3
-        if: env.SPARKLE_S3_BUCKET != ''
+      # ============================================================
+      # Upload artifacts
+      # ============================================================
+
+      - name: Upload DMG
+        uses: actions/upload-artifact@v4
+        with:
+          name: EXO-dmg-${{ env.RELEASE_VERSION }}
+          path: output/EXO-${{ env.RELEASE_VERSION }}.dmg
+
+      - name: Upload to S3
+        if: env.SPARKLE_S3_BUCKET != '' && github.ref_type == 'tag'
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
@@ -281,22 +296,3 @@ jobs:
            aws s3 cp "$DMG_NAME" "s3://${SPARKLE_S3_BUCKET}/${PREFIX}EXO-latest.dmg"
          fi
          aws s3 cp appcast.xml "s3://${SPARKLE_S3_BUCKET}/${PREFIX}appcast.xml" --content-type application/xml --cache-control no-cache
-
-      - name: Cleanup keychain
-        if: always()
-        run: |
-          KEYCHAIN_PATH="$HOME/Library/Keychains/build.keychain-db"
-          security default-keychain -s login.keychain || true
-          security delete-keychain "$KEYCHAIN_PATH" 2>/dev/null || true
-
-      - name: Upload app bundle
-        uses: actions/upload-artifact@v4
-        with:
-          name: EXO-app-${{ env.RELEASE_VERSION }}
-          path: output/EXO.app
-
-      - name: Upload DMG
-        uses: actions/upload-artifact@v4
-        with:
-          name: EXO-dmg-${{ env.RELEASE_VERSION }}
-          path: output/EXO-${{ env.RELEASE_VERSION }}.dmg
--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
 <div align="center">

 <picture>
-  <source media="(prefers-color-scheme: light)" srcset="/docs/exo-logo-black-bg.jpg">
-  <img alt="exo logo" src="/docs/exo-logo-transparent.png" width="50%" height="50%">
+  <source media="(prefers-color-scheme: light)" srcset="/docs/imgs/exo-logo-black-bg.jpg">
+  <img alt="exo logo" src="/docs/imgs/exo-logo-transparent.png" width="50%" height="50%">
 </picture>

 exo: Run your own AI cluster at home with everyday devices. Maintained by [exo labs](https://x.com/exolabs).
@@ -64,13 +64,23 @@ There are two ways to run exo:
 ### Run from Source (Mac & Linux)

 **Prerequisites:**
- [uv](https://github.com/astral-sh/uv) (for Python dependency management)
+- [brew](https://github.com/Homebrew/brew) (for simple package management on MacOS)
+  
  ```bash
-  brew install uv
+  /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
  ```
+- [uv](https://github.com/astral-sh/uv) (for Python dependency management)
 - [macmon](https://github.com/vladkens/macmon) (for hardware monitoring on Apple Silicon)
+- [node](https://github.com/nodejs/node) (for building the dashboard)
+  
  ```bash
-  brew install macmon
+  brew install uv macmon node
+  ```
+- [rust](https://github.com/rust-lang/rustup) (to build Rust bindings, nightly for now)
+
+  ```bash
+  curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
+  rustup toolchain install nightly
  ```

 Clone the repo, build the dashboard, and run exo:
@@ -92,7 +102,7 @@ This starts the exo dashboard and API at http://localhost:52415/

 exo ships a macOS app that runs in the background on your Mac.

-<img src="docs/macos-app-one-macbook.png" alt="exo macOS App - running on a MacBook" width="35%" />
+<img src="docs/imgs/macos-app-one-macbook.png" alt="exo macOS App - running on a MacBook" width="35%" />

 The macOS app requires macOS Tahoe 26.2 or later.

--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -0,0 +1,64 @@
+# EXO Architecture overview
+
+EXO uses an _Event Sourcing_ architecture, and Erlang-style _message passing_. To facilitate this, we've written a channel library extending anyio channels with inspiration from tokio::sync::mpsc. 
+
+Each logical module - designed to be functional independently of the others - communicates with the rest of the system by sending messages on topics.
+
+## Systems
+
+There are currently 5 major systems:
+
+- Master
+    
+    Executes placement and orders events through a single writer
+
+- Worker
+    
+    Schedules work on a node, gathers system information, etc.#
+
+- Runner
+    
+    Executes inference jobs (for now) in an isolated process from the worker for fault-tolerance.
+
+- API
+    
+    Runs a python webserver for exposing state and commands to client applications
+
+- Election
+    
+    Implements a distributed algorithm for master election in unstable networking conditions
+
+## Topics
+
+There are currently 5 topics:
+
+- Commands
+
+    The API and Worker instruct the master when the event log isn't sufficient. Namely placement and catchup requests go through Commands atm.
+
+- Local Events
+
+    All nodes write events here, the master reads those events and orders them
+
+- Global Events
+
+    The master writes events here, all nodes read from this topic and fold the produced events into their `State`
+
+- Election Messages
+
+    Before establishing a cluster, nodes communicate here to negotiate a master node.
+
+- Connection Messages
+
+    The networking system write mdns-discovered hardware connections here.
+
+
+## Event Sourcing
+
+Lots has been written about event sourcing, but it lets us centralize faulty connections and message ACKing with the following model.
+
+Whenever a device produces side effects, it captures those side effects in an `Event`. `Event`s are then "applied" to their model of `State`, which is globally distributed across the cluster. Whenever a command is received, it is combined with state to produce side effects, captured in yet more events. The rule of thumb is "`Event`s are past tense, `Command`s are imperative". Telling a node to perform some action like "place this model" or "Give me a copy of the event log" is represented by a command (The worker's `Task`s are also commands), while "this node is using 300GB of ram" is an event. Notably, `Event`s SHOULD never cause side effects on their own. There are a few exceptions to this, we're working out the specifics of generalizing the distributed event sourcing model to make it better suit our needs
+
+## Purity
+
+A significant goal of the current design is to make data flow explicit. Classes should either represent simple data (`CamelCaseModel`s typically, and `TaggedModel`s for unions) or active `System`s (Erlang `Actor`s), with all transformations of that data being "referentially transparent" - destructure and construct new data, don't mutate in place. We have had varying degrees of success with this, and are still exploring where purity makes sense.
--- a/docs/imgs/exo-logo-black-bg.jpg
+++ b/docs/imgs/exo-logo-black-bg.jpg
--- a/docs/imgs/exo-logo-transparent-black-text.png
+++ b/docs/imgs/exo-logo-transparent-black-text.png
--- a/docs/imgs/exo-logo-transparent.png
+++ b/docs/imgs/exo-logo-transparent.png
--- a/docs/imgs/exo-rounded.png
+++ b/docs/imgs/exo-rounded.png
--- a/docs/imgs/exo-screenshot.jpg
+++ b/docs/imgs/exo-screenshot.jpg
--- a/docs/imgs/four-mac-studio-topology.png
+++ b/docs/imgs/four-mac-studio-topology.png
--- a/docs/imgs/macos-app-one-macbook.png
+++ b/docs/imgs/macos-app-one-macbook.png
--- a/packaging/pyinstaller/exo.spec
+++ b/packaging/pyinstaller/exo.spec
@@ -0,0 +1,118 @@
+# -*- mode: python ; coding: utf-8 -*-
+
+import importlib.util
+import shutil
+from pathlib import Path
+
+from PyInstaller.utils.hooks import collect_submodules
+
+PROJECT_ROOT = Path.cwd()
+SOURCE_ROOT = PROJECT_ROOT / "src"
+ENTRYPOINT = SOURCE_ROOT / "exo" / "__main__.py"
+DASHBOARD_DIR = PROJECT_ROOT / "dashboard" / "build"
+EXO_SHARED_MODELS_DIR = SOURCE_ROOT / "exo" / "shared" / "models"
+
+if not ENTRYPOINT.is_file():
+    raise SystemExit(f"Unable to locate Exo entrypoint: {ENTRYPOINT}")
+
+if not DASHBOARD_DIR.is_dir():
+    raise SystemExit(f"Dashboard assets are missing: {DASHBOARD_DIR}")
+
+if not EXO_SHARED_MODELS_DIR.is_dir():
+    raise SystemExit(f"Shared model assets are missing: {EXO_SHARED_MODELS_DIR}")
+
+block_cipher = None
+
+
+def _module_directory(module_name: str) -> Path:
+    spec = importlib.util.find_spec(module_name)
+    if spec is None:
+        raise SystemExit(f"Module '{module_name}' is not available in the current environment.")
+    if spec.submodule_search_locations:
+        return Path(next(iter(spec.submodule_search_locations))).resolve()
+    if spec.origin:
+        return Path(spec.origin).resolve().parent
+    raise SystemExit(f"Unable to determine installation directory for '{module_name}'.")
+
+
+MLX_PACKAGE_DIR = _module_directory("mlx")
+MLX_LIB_DIR = MLX_PACKAGE_DIR / "lib"
+if not MLX_LIB_DIR.is_dir():
+    raise SystemExit(f"mlx Metal libraries are missing: {MLX_LIB_DIR}")
+
+
+def _safe_collect(package_name: str) -> list[str]:
+    try:
+        return collect_submodules(package_name)
+    except ImportError:
+        return []
+
+
+HIDDEN_IMPORTS = sorted(
+    set(
+        collect_submodules("mlx")
+        + _safe_collect("mlx_lm")
+        + _safe_collect("transformers")
+    )
+)
+
+DATAS: list[tuple[str, str]] = [
+    (str(DASHBOARD_DIR), "dashboard"),
+    (str(MLX_LIB_DIR), "mlx/lib"),
+    (str(EXO_SHARED_MODELS_DIR), "exo/shared/models"),
+]
+
+MACMON_PATH = shutil.which("macmon")
+if MACMON_PATH is None:
+    raise SystemExit(
+        "macmon binary not found in PATH. "
+        "Install it via: brew install macmon"
+    )
+
+BINARIES: list[tuple[str, str]] = [
+    (MACMON_PATH, "."),
+]
+
+a = Analysis(
+    [str(ENTRYPOINT)],
+    pathex=[str(SOURCE_ROOT)],
+    binaries=BINARIES,
+    datas=DATAS,
+    hiddenimports=HIDDEN_IMPORTS,
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    win_no_prefer_redirects=False,
+    win_private_assemblies=False,
+    noarchive=False,
+)
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+exe = EXE(
+    pyz,
+    a.scripts,
+    [],
+    exclude_binaries=True,
+    name="exo",
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=False,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+)
+coll = COLLECT(
+    exe,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
+    strip=False,
+    upx=False,
+    upx_exclude=[],
+    name="exo",
+)
+
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,6 @@ dependencies = [
    "filelock>=3.18.0",
    "aiosqlite>=0.21.0",
    "networkx>=3.5",
-    "pathlib>=1.0.1",
    "protobuf>=6.32.0",
    "rich>=14.1.0",
    "rustworkx>=0.17.1",
@@ -31,6 +30,7 @@ dependencies = [
    "anyio==4.11.0",
    "bidict>=0.23.1",
    "mlx>=0.29.3",
+    "mlx[cpu]>=0.29.3; sys_platform == 'linux'",
    "mlx-lm>=0.28.3",
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
    "hypercorn>=0.18.0",
@@ -44,6 +44,7 @@ exo = "exo.main:main"
 # dependencies only required for development
 [dependency-groups]
 dev = [
+    "pyinstaller>=6.17.0",
    "pytest>=8.4.0",
    "pytest-asyncio>=1.0.0",
    "pytest-env",
--- a/src/exo/main.py
+++ b/src/exo/main.py
@@ -1,4 +1,7 @@
+from multiprocessing import freeze_support
+
 from exo.main import main

 if __name__ == "__main__":
+    freeze_support()
    main()
--- a/src/exo/shared/types/tasks.py
+++ b/src/exo/shared/types/tasks.py
@@ -40,6 +40,10 @@ class LoadModel(BaseTask):  # emitted by Worker
    pass


+class ConnectToGroup(BaseTask):  # emitted by Worker
+    pass
+
+
 class StartWarmup(BaseTask):  # emitted by Worker
    pass

@@ -57,5 +61,11 @@ class Shutdown(BaseTask):  # emitted by Worker


 Task = (
-    CreateRunner | DownloadModel | LoadModel | StartWarmup | ChatCompletion | Shutdown
+    CreateRunner
+    | DownloadModel
+    | LoadModel
+    | StartWarmup
+    | ChatCompletion
+    | Shutdown
+    | ConnectToGroup
 )
--- a/src/exo/shared/types/worker/runners.py
+++ b/src/exo/shared/types/worker/runners.py
@@ -21,7 +21,15 @@ class BaseRunnerStatus(TaggedModel):
        return isinstance(self, RunnerRunning)


-class RunnerWaitingForModel(BaseRunnerStatus):
+class RunnerIdle(BaseRunnerStatus):
+    pass
+
+
+class RunnerConnecting(BaseRunnerStatus):
+    pass
+
+
+class RunnerConnected(BaseRunnerStatus):
    pass


@@ -54,7 +62,9 @@ class RunnerFailed(BaseRunnerStatus):


 RunnerStatus = (
-    RunnerWaitingForModel
+    RunnerIdle
+    | RunnerConnecting
+    | RunnerConnected
    | RunnerLoading
    | RunnerLoaded
    | RunnerWarmingUp
--- a/src/exo/worker/engines/mlx/constants.py
+++ b/src/exo/worker/engines/mlx/constants.py
@@ -10,7 +10,6 @@ KEEP_KV_SIZE: int | None = 1600
 QUANTIZE_MODEL_MODE: str | None = "affine"
 CACHE_GROUP_SIZE: int = 64
 KV_CACHE_BITS: int | None = 8
-TEMPERATURE: float = 1.0

 # TODO: We should really make this opt-in, but Kimi requires trust_remote_code=True
 TRUST_REMOTE_CODE: bool = True
--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -5,6 +5,7 @@ import time
 from pathlib import Path
 from typing import Any, Callable, cast

+from mlx.core.distributed import Group
 from mlx_lm.models.cache import KVCache, QuantizedKVCache, RotatingKVCache
 from mlx_lm.models.deepseek_v3 import DeepseekV3Model
 from mlx_lm.sample_utils import make_sampler
@@ -13,7 +14,6 @@ from mlx_lm.tokenizer_utils import TokenizerWrapper
 from exo.worker.engines.mlx.constants import (
    CACHE_GROUP_SIZE,
    KV_CACHE_BITS,
-    TEMPERATURE,
    TRUST_REMOTE_CODE,
 )

@@ -67,7 +67,7 @@ def get_weights_size(model_shard_meta: ShardMetadata) -> Memory:
    )


-def mx_barrier(group: mx.distributed.Group | None = None):
+def mx_barrier(group: Group | None = None):
    mx.eval(
        mx.distributed.all_sum(
            mx.array(1.0),
@@ -77,7 +77,7 @@ def mx_barrier(group: mx.distributed.Group | None = None):
    )


-def broadcast_from_zero(value: int, group: mx.distributed.Group | None = None):
+def broadcast_from_zero(value: int, group: Group | None = None):
    if group is None:
        return value

@@ -99,15 +99,9 @@ class HostList(RootModel[list[str]]):

 def mlx_distributed_init(
    bound_instance: BoundInstance,
-) -> mx.distributed.Group:
+) -> Group:
    """
-    Initialize the MLX distributed (runs in thread pool).
-
-    Either hosts or mlx_ibv_devices must be provided:
-    - hosts: traditional host-based connectivity using MLX_HOSTFILE
-    - mlx_ibv_devices: RDMA connectivity matrix using MLX_IBV_DEVICES
-    - mlx_ibv_coordinator: coordinator address (IP:PORT) for RDMA setup
-    - strict: if True, raise an error if the distributed backend is not available
+    Initialize MLX distributed.
    """
    rank = bound_instance.bound_shard.device_rank
    logger.info(f"Starting initialization for rank {rank}")
@@ -154,36 +148,34 @@ def mlx_distributed_init(

 def initialize_mlx(
    bound_instance: BoundInstance,
-) -> tuple[Model, TokenizerWrapper, Callable[[mx.array], mx.array]]:
-    """
-    Initialize the MLX model, tokenizer, and sampler. Runs in the MLX thread.
-    """
+) -> Group | None:
+    # should we unseed it?
+    # TODO: pass in seed from params
    mx.random.seed(42)

-    set_wired_limit_for_model(get_weights_size(bound_instance.bound_shard))
+    if len(bound_instance.instance.shard_assignments.node_to_runner) <= 1:
+        return None
+    return mlx_distributed_init(bound_instance)

-    sampler: Callable[[mx.array], mx.array] = make_sampler(temp=TEMPERATURE)
+
+def load_mlx_items(
+    bound_instance: BoundInstance, group: Group | None
+) -> tuple[Model, TokenizerWrapper, Callable[[mx.array], mx.array]]:
+    # TODO: pass temperature
+    sampler: Callable[[mx.array], mx.array] = make_sampler(temp=0.7)
    logger.info("Created a sampler")

-    if len(bound_instance.instance.shard_assignments.node_to_runner) <= 1:
+    if group is None:
        logger.info(f"Single device used for {bound_instance.instance}")
        model_path = build_model_path(bound_instance.bound_shard.model_meta.model_id)
        start_time = time.perf_counter()
        model, _ = load_model(model_path, strict=True)
        end_time = time.perf_counter()
        logger.info(f"Time taken to load model: {(end_time - start_time):.2f}s")
-        if hasattr(model, "model") and isinstance(model.model, DeepseekV3Model):  # type: ignore
-            pass
-            # model, config = quantize_model(
-            #    model, config, group_size=KV_GROUP_SIZE, bits=ATTENTION_KV_BITS, quant_predicate=quant_predicate, mode=QUANTIZE_MODEL_MODE
-            # )
-
        tokenizer = get_tokenizer(model_path, bound_instance.bound_shard)

    else:
        logger.info("Starting distributed init")
-        group = mlx_distributed_init(bound_instance)
-
        start_time = time.perf_counter()
        model, tokenizer = shard_and_load(bound_instance.bound_shard, group=group)
        end_time = time.perf_counter()
@@ -193,8 +185,6 @@ def initialize_mlx(

    set_wired_limit_for_model(get_weights_size(bound_instance.bound_shard))

-    logger.debug(model)
-
    return cast(Model, model), tokenizer, sampler


--- a/src/exo/worker/plan.py
+++ b/src/exo/worker/plan.py
@@ -3,29 +3,38 @@
 from collections.abc import Mapping, Sequence

 from exo.shared.types.common import NodeId
+from exo.shared.types.models import ModelId
 from exo.shared.types.tasks import (
    ChatCompletion,
    CreateRunner,
    DownloadModel,
    LoadModel,
+    ConnectToGroup,
    Shutdown,
    StartWarmup,
    Task,
    TaskId,
    TaskStatus,
 )
-from exo.shared.types.worker.downloads import DownloadCompleted, DownloadProgress
+from exo.shared.types.worker.downloads import (
+    DownloadCompleted,
+    DownloadOngoing,
+    DownloadProgress,
+)
 from exo.shared.types.worker.instances import BoundInstance, Instance, InstanceId
 from exo.shared.types.worker.runners import (
    RunnerFailed,
    RunnerId,
+    RunnerIdle,
+    RunnerConnecting,
+    RunnerConnected,
    RunnerLoaded,
    RunnerLoading,
    RunnerReady,
    RunnerRunning,
    RunnerStatus,
-    RunnerWaitingForModel,
    RunnerWarmingUp,
+    ShardAssignments,
 )
 from exo.shared.types.worker.shards import ShardMetadata
 from exo.worker.runner.runner_supervisor import RunnerSupervisor
@@ -36,7 +45,7 @@ def plan(
    # Runners is expected to be FRESH and so should not come from state
    runners: Mapping[RunnerId, RunnerSupervisor],
    # DL_status is expected to be FRESH and so should not come from state
-    download_status: Mapping[ShardMetadata, DownloadProgress],
+    download_status: Mapping[ModelId, DownloadProgress],
    # gdls is not expected to be fresh
    global_download_status: Mapping[NodeId, Sequence[DownloadProgress]],
    instances: Mapping[InstanceId, Instance],
@@ -48,7 +57,8 @@ def plan(
        _kill_runner(runners, all_runners, instances)
        or _create_runner(node_id, runners, instances)
        or _model_needs_download(runners, download_status)
-        or _load_model(runners, all_runners, global_download_status)
+        or _init_distributed_backend(runners, all_runners, global_download_status)
+        or _load_model(runners, all_runners)
        or _ready_to_warmup(runners, all_runners)
        or _pending_tasks(runners, tasks, all_runners)
    )
@@ -103,12 +113,15 @@ def _create_runner(

 def _model_needs_download(
    runners: Mapping[RunnerId, RunnerSupervisor],
-    download_status: Mapping[ShardMetadata, DownloadProgress],
+    download_status: Mapping[ModelId, DownloadProgress],
 ) -> DownloadModel | None:
    for runner in runners.values():
-        if (
-            isinstance(runner.status, RunnerWaitingForModel)
-            and runner.bound_instance.bound_shard not in download_status
+        if isinstance(runner.status, RunnerIdle) and (
+            runner.bound_instance.bound_shard.model_meta.model_id not in download_status
+            or not isinstance(
+                download_status[runner.bound_instance.bound_shard.model_meta.model_id],
+                (DownloadOngoing, DownloadCompleted),
+            )
        ):
            # We don't invalidate download_status randomly in case a file gets deleted on disk
            return DownloadModel(
@@ -117,50 +130,88 @@ def _model_needs_download(
            )


-""" --- TODO!
-def _init_backend(
+def _init_distributed_backend(
    runners: Mapping[RunnerId, RunnerSupervisor],
    all_runners: Mapping[RunnerId, RunnerStatus],
-) -> LoadModel | None:
-    for runner in runner.values()
-    pass
-"""
+    global_download_status: Mapping[NodeId, Sequence[DownloadProgress]],
+):
+    for runner in runners.values():
+        instance = runner.bound_instance.instance
+        shard_assignments = instance.shard_assignments
+
+        is_single_node_instance = len(shard_assignments.runner_to_shard) == 1
+        if is_single_node_instance:
+            continue
+
+        all_local_downloads_complete = all(
+            nid in global_download_status
+            and any(
+                isinstance(dp, DownloadCompleted)
+                and dp.shard_metadata.model_meta.model_id == shard_assignments.model_id
+                for dp in global_download_status[nid]
+            )
+            for nid in shard_assignments.node_to_runner.keys()
+        )
+
+        runner_is_idle = isinstance(runner.status, RunnerIdle)
+        all_runners_connecting = all(
+            isinstance(
+                all_runners.get(global_runner_id),
+                (RunnerConnecting, RunnerIdle),
+            )
+            for global_runner_id in shard_assignments.runner_to_shard
+        )
+
+        if not (
+            all_local_downloads_complete and runner_is_idle and all_runners_connecting
+        ):
+            continue
+
+        runner_id = runner.bound_instance.bound_runner_id
+
+        shard = runner.bound_instance.bound_shard
+        device_rank = shard.device_rank
+        world_size = shard.world_size
+
+        assert device_rank < world_size
+        assert device_rank >= 0
+
+        accepting_ranks = device_rank < world_size - 1
+
+        # Rank = n-1
+        connecting_rank_ready = device_rank == world_size - 1 and all(
+            isinstance(all_runners.get(global_runner_id, None), RunnerConnecting)
+            for global_runner_id in shard_assignments.runner_to_shard
+            if global_runner_id != runner_id
+        )
+
+        if not (accepting_ranks or connecting_rank_ready):
+            continue
+
+        return ConnectToGroup(instance_id=instance.instance_id)
+
+    return None


 def _load_model(
    runners: Mapping[RunnerId, RunnerSupervisor],
    all_runners: Mapping[RunnerId, RunnerStatus],
-    global_download_status: Mapping[NodeId, Sequence[DownloadProgress]],
 ) -> LoadModel | None:
    for runner in runners.values():
        instance = runner.bound_instance.instance
        shard_assignments = instance.shard_assignments

-        all_downloads_complete_local = all(
-            nid in global_download_status
-            and any(
-                isinstance(dp, DownloadCompleted)
-                and dp.shard_metadata == shard_assignments.runner_to_shard[rid]
-                for dp in global_download_status[nid]
-            )
-            for nid, rid in shard_assignments.node_to_runner.items()
-        )
+        is_runner_waiting = isinstance(runner.status, RunnerConnected)

-        runner_is_waiting = isinstance(runner.status, RunnerWaitingForModel)
-
-        all_runners_expecting_model = all(
+        all_ready_for_model = all(
            isinstance(
-                all_runners.get(global_runner_id),
-                (RunnerWaitingForModel, RunnerLoading, RunnerLoaded),
+                all_runners.get(global_runner_id, None),
+                (RunnerConnected, RunnerLoading, RunnerLoaded),
            )
            for global_runner_id in shard_assignments.runner_to_shard
        )

-        if (
-            all_downloads_complete_local
-            and runner_is_waiting
-            and all_runners_expecting_model
-        ):
+        if is_runner_waiting and all_ready_for_model:
            return LoadModel(instance_id=instance.instance_id)

    return None
@@ -183,8 +234,8 @@ def _ready_to_warmup(
        assert device_rank < world_size
        assert device_rank >= 0

-        # Rank != n-1
-        accepting_ranks_ready = device_rank != world_size - 1 and all(
+        # Rank != 0
+        accepting_ranks_ready = device_rank > 0 and all(
            isinstance(
                all_runners.get(global_runner_id, None),
                (RunnerLoaded, RunnerWarmingUp),
@@ -192,8 +243,8 @@ def _ready_to_warmup(
            for global_runner_id in shard_assignments.runner_to_shard
        )

-        # Rank = n-1
-        connecting_rank_ready = device_rank == world_size - 1 and all(
+        # Rank = 0
+        connecting_rank_ready = device_rank == 0 and all(
            isinstance(all_runners.get(global_runner_id, None), RunnerWarmingUp)
            for global_runner_id in shard_assignments.runner_to_shard
            if global_runner_id != runner_id
--- a/src/exo/worker/runner/runner.py
+++ b/src/exo/worker/runner/runner.py
@@ -16,6 +16,7 @@ from exo.shared.types.tasks import (
    StartWarmup,
    Task,
    TaskStatus,
+    ConnectToGroup,
 )
 from exo.shared.types.worker.instances import BoundInstance
 from exo.shared.types.worker.runner_response import (
@@ -29,13 +30,16 @@ from exo.shared.types.worker.runners import (
    RunnerRunning,
    RunnerShutdown,
    RunnerStatus,
-    RunnerWaitingForModel,
+    RunnerConnecting,
+    RunnerConnected,
+    RunnerIdle,
    RunnerWarmingUp,
 )
 from exo.utils.channels import ClosedResourceError, MpReceiver, MpSender
 from exo.worker.engines.mlx.generator.generate import mlx_generate, warmup_inference
 from exo.worker.engines.mlx.utils_mlx import (
    initialize_mlx,
+    load_mlx_items,
    mlx_force_oom,
 )
 from exo.worker.runner.bootstrap import logger
@@ -63,9 +67,10 @@ def main(
        model = None
        tokenizer = None
        sampler = None
+        group = None

-        current_status: RunnerStatus = RunnerWaitingForModel()
-        logger.info("runner waiting for model")
+        current_status: RunnerStatus = RunnerIdle()
+        logger.info("runner created")
        event_sender.send(
            RunnerStatusUpdated(runner_id=runner_id, runner_status=current_status)
        )
@@ -78,9 +83,22 @@ def main(
                )
                event_sender.send(TaskAcknowledged(task_id=task.task_id))
                match task:
-                    case LoadModel() if isinstance(
-                        current_status, (RunnerWaitingForModel, RunnerFailed)
+                    case ConnectToGroup() if isinstance(
+                        current_status, (RunnerIdle, RunnerFailed)
                    ):
+                        logger.info("runner connecting")
+                        current_status = RunnerConnecting()
+                        event_sender.send(
+                            RunnerStatusUpdated(
+                                runner_id=runner_id, runner_status=current_status
+                            )
+                        )
+                        group = initialize_mlx(bound_instance)
+
+                        logger.info("runner connected")
+                        current_status = RunnerConnected()
+
+                    case LoadModel() if isinstance(current_status, RunnerConnected):
                        current_status = RunnerLoading()
                        logger.info("runner loading")
                        event_sender.send(
@@ -89,7 +107,9 @@ def main(
                            )
                        )

-                        model, tokenizer, sampler = initialize_mlx(bound_instance)
+                        model, tokenizer, sampler = load_mlx_items(
+                            bound_instance, group
+                        )

                        current_status = RunnerLoaded()
                        logger.info("runner loaded")
--- a/src/exo/worker/tests/constants.py
+++ b/src/exo/worker/tests/constants.py
@@ -24,3 +24,9 @@ TASK_2_ID: Final[TaskId] = TaskId("66666666-6666-4666-8666-666666666666")

 COMMAND_1_ID: Final[CommandId] = CommandId("77777777-7777-4777-8777-777777777777")
 COMMAND_2_ID: Final[CommandId] = CommandId("88888888-8888-4888-8888-888888888888")
+
+SHUTDOWN_TASK_ID = TaskId("shutdown")
+CHAT_COMPLETION_TASK_ID = TaskId("chat-completion")
+INITIALIZATION_TASK_ID = TaskId("initialisation")
+LOAD_TASK_ID = TaskId("load")
+WARMUP_TASK_ID = TaskId("warmup")
--- a/src/exo/worker/tests/unittests/conftest.py
+++ b/src/exo/worker/tests/unittests/conftest.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from dataclasses import dataclass

 from exo.shared.types.common import NodeId
@@ -14,6 +16,7 @@ from exo.shared.types.worker.runners import RunnerId, RunnerStatus, ShardAssignm
 from exo.shared.types.worker.shards import PipelineShardMetadata, ShardMetadata


+# Runner supervisor without multiprocessing logic.
@dataclass(frozen=True)
 class FakeRunnerSupervisor:
    bound_instance: BoundInstance
@@ -35,6 +38,8 @@ def get_pipeline_shard_metadata(
            pretty_name=str(model_id),
            storage_size=Memory.from_mb(100000),
            n_layers=32,
+            # hidden_size=2048,
+            # supports_tensor=False,
        ),
        device_rank=device_rank,
        world_size=world_size,
@@ -69,3 +74,18 @@ def get_mlx_ring_instance(
        ),
        hosts=[],
    )
+
+
+def get_bound_mlx_ring_instance(
+    instance_id: InstanceId, model_id: ModelId, runner_id: RunnerId, node_id: NodeId
+) -> BoundInstance:
+    shard = get_pipeline_shard_metadata(model_id=model_id, device_rank=0, world_size=1)
+    instance = get_mlx_ring_instance(
+        instance_id=instance_id,
+        model_id=model_id,
+        node_to_runner={node_id: runner_id},
+        runner_to_shard={runner_id: shard},
+    )
+    return BoundInstance(
+        instance=instance, bound_runner_id=runner_id, bound_node_id=node_id
+    )
--- a/src/exo/worker/tests/unittests/test_runner/test_event_ordering.py
+++ b/src/exo/worker/tests/unittests/test_runner/test_event_ordering.py
@@ -0,0 +1,199 @@
+# Check tasks are complete before runner is ever ready.
+from collections.abc import Iterable
+import pytest
+
+import exo.worker.runner.runner as mlx_runner
+from exo.shared.types.api import ChatCompletionMessage
+from exo.shared.types.chunks import TokenChunk
+from exo.shared.types.events import (
+    Event,
+    ChunkGenerated,
+    RunnerStatusUpdated,
+    TaskAcknowledged,
+    TaskStatusUpdated,
+)
+from exo.shared.types.tasks import (
+    ChatCompletion,
+    ChatCompletionTaskParams,
+    ConnectToGroup,
+    LoadModel,
+    Shutdown,
+    StartWarmup,
+    TaskStatus,
+    Task,
+)
+from exo.shared.types.worker.runner_response import GenerationResponse
+from exo.shared.types.worker.runners import (
+    RunnerIdle,
+    RunnerLoaded,
+    RunnerLoading,
+    RunnerReady,
+    RunnerRunning,
+    RunnerShutdown,
+    RunnerConnecting,
+    RunnerConnected,
+    RunnerWarmingUp,
+)
+from exo.utils.channels import mp_channel
+
+from ...constants import (
+    CHAT_COMPLETION_TASK_ID,
+    COMMAND_1_ID,
+    INITIALIZATION_TASK_ID,
+    INSTANCE_1_ID,
+    LOAD_TASK_ID,
+    MODEL_A_ID,
+    NODE_A,
+    RUNNER_1_ID,
+    SHUTDOWN_TASK_ID,
+    WARMUP_TASK_ID,
+)
+from ..conftest import get_bound_mlx_ring_instance
+
+
+INIT_TASK = ConnectToGroup(
+    task_id=INITIALIZATION_TASK_ID,
+    instance_id=INSTANCE_1_ID,
+)
+
+LOAD_TASK = LoadModel(
+    task_id=LOAD_TASK_ID,
+    instance_id=INSTANCE_1_ID,
+)
+
+WARMUP_TASK = StartWarmup(
+    task_id=WARMUP_TASK_ID,
+    instance_id=INSTANCE_1_ID,
+)
+
+SHUTDOWN_TASK = Shutdown(
+    task_id=SHUTDOWN_TASK_ID,
+    instance_id=INSTANCE_1_ID,
+    runner_id=RUNNER_1_ID,
+)
+
+CHAT_PARAMS = ChatCompletionTaskParams(
+    model=str(MODEL_A_ID),
+    messages=[ChatCompletionMessage(role="user", content="hello")],
+    stream=True,
+    max_tokens=4,
+    temperature=0.0,
+)
+
+CHAT_TASK = ChatCompletion(
+    task_id=CHAT_COMPLETION_TASK_ID,
+    command_id=COMMAND_1_ID,
+    task_params=CHAT_PARAMS,
+    instance_id=INSTANCE_1_ID,
+)
+
+
+def assert_events_equal(test_events: Iterable[Event], true_events: Iterable[Event]):
+    for test_event, true_event in zip(test_events, true_events, strict=True):
+        test_event.event_id = true_event.event_id
+        assert test_event == true_event, f"{test_event} != {true_event}"
+
+
+@pytest.fixture
+def patch_out_mlx(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.setattr(mlx_runner, "initialize_mlx", lambda bound_instance: object())
+    monkeypatch.setattr(
+        mlx_runner,
+        "load_mlx_items",
+        lambda bound_instance, group: (object(), object(), object()),
+    )
+    monkeypatch.setattr(mlx_runner, "warmup_inference", lambda **kwargs: 1)
+    monkeypatch.setattr(mlx_runner, "_check_for_debug_prompts", lambda *_: None)
+
+    def fake_generate(model, tokenizer, sampler, task):
+        yield GenerationResponse(token=0, text="hi", finish_reason="stop")
+
+    monkeypatch.setattr(mlx_runner, "mlx_generate", fake_generate)
+
+
+def _run(tasks: Iterable[Task]):
+    bound_instance = get_bound_mlx_ring_instance(
+        instance_id=INSTANCE_1_ID,
+        model_id=MODEL_A_ID,
+        runner_id=RUNNER_1_ID,
+        node_id=NODE_A,
+    )
+
+    task_sender, task_receiver = mp_channel[Task]()
+    event_sender, event_receiver = mp_channel[Event]()
+
+    with task_sender, event_receiver:
+        for t in tasks:
+            task_sender.send(t)
+
+        # worst monkeypatch known to man
+        def nothin() -> None: pass
+        event_sender.close = nothin
+        event_sender.join =  nothin
+        task_receiver.close = nothin 
+        task_receiver.join = nothin 
+
+        mlx_runner.main(bound_instance, event_sender, task_receiver)
+
+        return event_receiver.collect()
+
+
+def test_events_processed_in_correct_order(patch_out_mlx: pytest.MonkeyPatch):
+    events = _run([INIT_TASK, LOAD_TASK, WARMUP_TASK, CHAT_TASK, SHUTDOWN_TASK])
+
+    expected_chunk = ChunkGenerated(
+        command_id=COMMAND_1_ID,
+        chunk=TokenChunk(
+            idx=0,
+            model=MODEL_A_ID,
+            text="hi",
+            token_id=0,
+            finish_reason="stop",
+        ),
+    )
+
+    assert_events_equal(
+        events,
+        [
+            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerIdle()),
+            TaskStatusUpdated(
+                task_id=INITIALIZATION_TASK_ID, task_status=TaskStatus.Running
+            ),
+            TaskAcknowledged(task_id=INITIALIZATION_TASK_ID),
+            RunnerStatusUpdated(
+                runner_id=RUNNER_1_ID, runner_status=RunnerConnecting()
+            ),
+            TaskStatusUpdated(
+                task_id=INITIALIZATION_TASK_ID, task_status=TaskStatus.Complete
+            ),
+            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerConnected()),
+            TaskStatusUpdated(task_id=LOAD_TASK_ID, task_status=TaskStatus.Running),
+            TaskAcknowledged(task_id=LOAD_TASK_ID),
+            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerLoading()),
+            TaskStatusUpdated(task_id=LOAD_TASK_ID, task_status=TaskStatus.Complete),
+            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerLoaded()),
+            TaskStatusUpdated(task_id=WARMUP_TASK_ID, task_status=TaskStatus.Running),
+            TaskAcknowledged(task_id=WARMUP_TASK_ID),
+            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerWarmingUp()),
+            TaskStatusUpdated(task_id=WARMUP_TASK_ID, task_status=TaskStatus.Complete),
+            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerReady()),
+            TaskStatusUpdated(
+                task_id=CHAT_COMPLETION_TASK_ID, task_status=TaskStatus.Running
+            ),
+            TaskAcknowledged(task_id=CHAT_COMPLETION_TASK_ID),
+            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerRunning()),
+            expected_chunk,
+            TaskStatusUpdated(
+                task_id=CHAT_COMPLETION_TASK_ID, task_status=TaskStatus.Complete
+            ),
+            # CHAT COMPLETION TASK SHOULD COMPLETE BEFORE RUNNER READY
+            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerReady()),
+            TaskStatusUpdated(task_id=SHUTDOWN_TASK_ID, task_status=TaskStatus.Running),
+            TaskAcknowledged(task_id=SHUTDOWN_TASK_ID),
+            TaskStatusUpdated(
+                task_id=SHUTDOWN_TASK_ID, task_status=TaskStatus.Complete
+            ),
+            # SPECIAL EXCEPTION FOR RUNNER SHUTDOWN
+            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerShutdown()),
+        ],
+    )
--- a/src/exo/worker/tests/unittests/test_runner/test_runner_supervisor.py
+++ b/src/exo/worker/tests/unittests/test_runner/test_runner_supervisor.py
@@ -0,0 +1 @@
+# TODO:
--- a/uv.lock
+++ b/uv.lock
@@ -120,6 +120,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/f5/10/6c25ed6de94c49f88a91fa5018cb4c0f3625f31d5be9f771ebe5cc7cd506/aiosqlite-0.21.0-py3-none-any.whl", hash = "sha256:2549cf4057f95f53dcba16f2b64e8e2791d7e1adedb13197dd8ed77bb226d7d0", size = 15792, upload-time = "2025-02-03T07:30:13.6Z" },
 ]

+[[package]]
+name = "altgraph"
+version = "0.17.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7e/f8/97fdf103f38fed6792a1601dbc16cc8aac56e7459a9fff08c812d8ae177a/altgraph-0.17.5.tar.gz", hash = "sha256:c87b395dd12fabde9c99573a9749d67da8d29ef9de0125c7f536699b4a9bc9e7", size = 48428, upload-time = "2025-11-21T20:35:50.583Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a9/ba/000a1996d4308bc65120167c21241a3b205464a2e0b58deda26ae8ac21d1/altgraph-0.17.5-py2.py3-none-any.whl", hash = "sha256:f3a22400bce1b0c701683820ac4f3b159cd301acab067c51c653e06961600597", size = 21228, upload-time = "2025-11-21T20:35:49.444Z" },
+]
+
 [[package]]
 name = "annotated-doc"
 version = "0.0.3"
@@ -325,9 +334,9 @@ dependencies = [
    { name = "hypercorn", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "loguru", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "mlx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mlx", extra = ["cpu"], marker = "sys_platform == 'linux'" },
    { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "networkx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pathlib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "psutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -343,6 +352,7 @@ dependencies = [

 [package.dev-dependencies]
 dev = [
+    { name = "pyinstaller", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pytest-asyncio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pytest-env", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -366,9 +376,9 @@ requires-dist = [
    { name = "hypercorn", specifier = ">=0.18.0" },
    { name = "loguru", specifier = ">=0.7.3" },
    { name = "mlx", specifier = ">=0.29.3" },
+    { name = "mlx", extras = ["cpu"], marker = "sys_platform == 'linux'", specifier = ">=0.29.3" },
    { name = "mlx-lm", specifier = ">=0.28.3" },
    { name = "networkx", specifier = ">=3.5" },
-    { name = "pathlib", specifier = ">=1.0.1" },
    { name = "protobuf", specifier = ">=6.32.0" },
    { name = "psutil", specifier = ">=7.0.0" },
    { name = "pydantic", specifier = ">=2.11.7" },
@@ -384,6 +394,7 @@ requires-dist = [

 [package.metadata.requires-dev]
 dev = [
+    { name = "pyinstaller", specifier = ">=6.17.0" },
    { name = "pytest", specifier = ">=8.4.0" },
    { name = "pytest-asyncio", specifier = ">=1.0.0" },
    { name = "pytest-env" },
@@ -682,6 +693,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" },
 ]

+[[package]]
+name = "macholib"
+version = "1.16.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "altgraph", marker = "sys_platform == 'darwin'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/10/2f/97589876ea967487978071c9042518d28b958d87b17dceb7cdc1d881f963/macholib-1.16.4.tar.gz", hash = "sha256:f408c93ab2e995cd2c46e34fe328b130404be143469e41bc366c807448979362", size = 59427, upload-time = "2025-11-22T08:28:38.373Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/d1/a9f36f8ecdf0fb7c9b1e78c8d7af12b8c8754e74851ac7b94a8305540fc7/macholib-1.16.4-py2.py3-none-any.whl", hash = "sha256:da1a3fa8266e30f0ce7e97c6a54eefaae8edd1e5f86f3eb8b95457cae90265ea", size = 38117, upload-time = "2025-11-22T08:28:36.939Z" },
+]
+
 [[package]]
 name = "markdown-it-py"
 version = "4.0.0"
@@ -774,6 +797,19 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/f2/90/d481dd70b351e28718cfc9a0deb229a75e140abda3ed59284cf635f93f12/mlx-0.29.3-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:e217a99ece66832a2e631131df32e9feb047276b68ac59ca0ad63735842f6dd0", size = 649781, upload-time = "2025-10-17T19:21:26.075Z" },
 ]

+[package.optional-dependencies]
+cpu = [
+    { name = "mlx-cpu", marker = "sys_platform == 'linux'" },
+]
+
+[[package]]
+name = "mlx-cpu"
+version = "0.29.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6d/ff/474abb13000ca641985084055c145a70c1214973d867979ebfe7420c2df2/mlx_cpu-0.29.3-py3-none-manylinux_2_35_x86_64.whl", hash = "sha256:e76763434a9d1d878bb0d6dd965ad319a0a63b0b1d69314e4c97d8332f5e7170", size = 10225301, upload-time = "2025-10-17T19:24:03.544Z" },
+]
+
 [[package]]
 name = "mlx-lm"
 version = "0.28.3"
@@ -928,15 +964,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
 ]

-[[package]]
-name = "pathlib"
-version = "1.0.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ac/aa/9b065a76b9af472437a0059f77e8f962fe350438b927cb80184c32f075eb/pathlib-1.0.1.tar.gz", hash = "sha256:6940718dfc3eff4258203ad5021090933e5c04707d5ca8cc9e73c94a7894ea9f", size = 49298, upload-time = "2014-09-03T15:41:57.18Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/78/f9/690a8600b93c332de3ab4a344a4ac34f00c8f104917061f779db6a918ed6/pathlib-1.0.1-py3-none-any.whl", hash = "sha256:f35f95ab8b0f59e6d354090350b44a80a80635d22efdedfa84c7ad1cf0a74147", size = 14363, upload-time = "2022-05-04T13:37:20.585Z" },
-]
-
 [[package]]
 name = "platformdirs"
 version = "4.5.0"
@@ -1126,6 +1153,42 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
 ]

+[[package]]
+name = "pyinstaller"
+version = "6.17.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "altgraph", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "macholib", marker = "sys_platform == 'darwin'" },
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pyinstaller-hooks-contrib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/01/80/9e0dad9c69a7cfd4b5aaede8c6225d762bab7247a2a6b7651e1995522001/pyinstaller-6.17.0.tar.gz", hash = "sha256:be372bd911392b88277e510940ac32a5c2a6ce4b8d00a311c78fa443f4f27313", size = 4014147, upload-time = "2025-11-24T19:43:32.109Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/35/f5/37e419d84d5284ecab11ef8b61306a3b978fe6f0fd69a9541e16bfd72e65/pyinstaller-6.17.0-py3-none-macosx_10_13_universal2.whl", hash = "sha256:4e446b8030c6e5a2f712e3f82011ecf6c7ead86008357b0d23a0ec4bcde31dac", size = 1031880, upload-time = "2025-11-24T19:42:30.862Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/b6/2e184879ab9cf90a1d2867fdd34d507c4d246b3cc52ca05aad00bfc70ee7/pyinstaller-6.17.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:aa9fd87aaa28239c6f0d0210114029bd03f8cac316a90bab071a5092d7c85ad7", size = 731968, upload-time = "2025-11-24T19:42:35.421Z" },
+    { url = "https://files.pythonhosted.org/packages/40/76/f529de98f7e5cce7904c19b224990003fc2267eda2ee5fdd8452acb420a9/pyinstaller-6.17.0-py3-none-manylinux2014_i686.whl", hash = "sha256:060b122e43e7c0b23e759a4153be34bd70914135ab955bb18a67181e0dca85a2", size = 743217, upload-time = "2025-11-24T19:42:39.286Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/10/c02bfbb050cafc4c353cf69baf95407e211e1372bd286ab5ce5cbc13a30a/pyinstaller-6.17.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:cd213d1a545c97dfe4a3c40e8213ff7c5127fc115c49229f27a3fa541503444b", size = 741119, upload-time = "2025-11-24T19:42:43.12Z" },
+    { url = "https://files.pythonhosted.org/packages/11/9d/69fdacfd9335695f5900a376cfe3e4aed28f0720ffc15fee81fdb9d920bc/pyinstaller-6.17.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:89c0d18ba8b62c6607abd8cf2299ae5ffa5c36d8c47f39608ce8c3f357f6099f", size = 738111, upload-time = "2025-11-24T19:42:46.97Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/1e/e8e36e1568f6865ac706c6e1f875c1a346ddaa9f9a8f923d66545d2240ed/pyinstaller-6.17.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2a147b83cdebb07855bd5a663600891550062373a2ca375c58eacead33741a27", size = 737795, upload-time = "2025-11-24T19:42:50.675Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/15/9dc0f81ccb746c27bfa6ee53164422fe47ee079c7a717d9c4791aba78797/pyinstaller-6.17.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:f8cfbbfa6708e54fb936df6dd6eafaf133e84efb0d2fe25b91cfeefa793c4ca4", size = 736891, upload-time = "2025-11-24T19:42:54.458Z" },
+    { url = "https://files.pythonhosted.org/packages/97/e6/bed54821c1ebe1275c559661d3e7bfa23c406673b515252dfbf89db56c65/pyinstaller-6.17.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:97f4c1942f7b4cd73f9e38b49cc8f5f8a6fbb44922cb60dd3073a189b77ee1ae", size = 736752, upload-time = "2025-11-24T19:42:58.144Z" },
+]
+
+[[package]]
+name = "pyinstaller-hooks-contrib"
+version = "2025.10"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/26/4f/e33132acdb8f732978e577b8a0130a412cbfe7a3414605e3fd380a975522/pyinstaller_hooks_contrib-2025.10.tar.gz", hash = "sha256:a1a737e5c0dccf1cf6f19a25e2efd109b9fec9ddd625f97f553dac16ee884881", size = 168155, upload-time = "2025-11-22T09:34:36.138Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/86/de/a7688eed49a1d3df337cdaa4c0d64e231309a52f269850a72051975e3c4a/pyinstaller_hooks_contrib-2025.10-py3-none-any.whl", hash = "sha256:aa7a378518772846221f63a84d6306d9827299323243db890851474dfd1231a9", size = 447760, upload-time = "2025-11-22T09:34:34.753Z" },
+]
+
 [[package]]
 name = "pytest"
 version = "8.4.2"
@@ -1337,6 +1400,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/ad/fe/cad1d9762868c7c5dc70c8620074df28ebb1a8e4c17d4c0cb031889c457e/safetensors-0.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d944cea65fad0ead848b6ec2c37cc0b197194bec228f8020054742190e9312ac", size = 655957, upload-time = "2025-08-08T13:13:57.029Z" },
 ]

+[[package]]
+name = "setuptools"
+version = "80.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
+]
+
 [[package]]
 name = "sniffio"
 version = "1.3.1"
Author	SHA1	Message	Date
Evan	5fd080a246	wuff	2025-12-23 16:54:02 +00:00
Evan	201c61f9cd	started	2025-12-23 14:38:07 +00:00
Jake Hillion	a24bdf7680	exo: enable multiprocessing support in PyInstaller bundles Model loading fails silently when running from the DMG-packaged app, despite working correctly with `uv run exo`. The bundled app spawns child processes for model inference via multiprocessing, but these processes fail to start in a frozen (PyInstaller) environment. Add `freeze_support()` which is required for multiprocessing to work in frozen applications. Test plan: Hardware setup: 3x Mac Studio M3 Ultra connected all-to-all with TB5 - Built a DMG using a modified .github/workflows/build-app.yml[0] to avoid publishing it. - Installed on all 3 Macs, replacing the existing Exo. - Downloaded Llama 3.3 70B (FP16). - Downloaded Qwen3 Coder 235B A22B (8-bit). Things that work now but didn't on the previous app: - Topology looks good, previously there was no discovery. What didn't work: - Started an instance with Pipeline + MLX Ring + 3 Nodes. Failed. - Started an instance with Tensor + MLX RDMA + 2 Nodes. Failed. Will continue debugging the instance starting issues separately. [0] https://github.com/exo-explore/exo/actions/runs/20461320368	2025-12-23 14:34:21 +00:00
Jake Hillion	e8855959c1	build-app: add branch trigger from named branch As I've been working on the .dmg, it's become clear we need a way to test changes to the app. It's too hard to reproduce the full DMG locally to be reasonable and much more convenient to test if it's signed. Add a feature to the build-app workflow where if you push specifically to the `test-app` branch it'll perform a build. The version is stubbed to `0.0.0-alpha.0`, which is about as low as it gets in semver so you'll always update away from it automatically with Sparkle. The resulting DMG won't be pushed to S3 but will be uploaded as a GitHub Actions artifact. I've been using similar commits to this for a while for testing. It's worked well and not interfered with auto updating at all. Test plan: - Pushed this change to `test-app`. - Generated action at https://github.com/exo-explore/exo/actions/runs/20447213358/job/58752909332 - Installed the DMG on a Mac. It worked as intended.	2025-12-23 12:53:30 +00:00
Jake Hillion	0a7fe5d943	ci: migrate build-app to github hosted runners	2025-12-22 19:51:48 +00:00
rltakashige	51a5191ff3	format readme (#978 ) ## Motivation README looks weird after last update. <!-- Why is this change needed? What problem does it solve? --> <!-- If it fixes an open issue, please link to the issue here --> ## Changes <!-- Describe what you changed in detail --> ## Why It Works <!-- Explain why your approach solves the problem --> ## Test Plan ### Manual Testing <!-- Hardware: (e.g., MacBook Pro M1 Max 32GB, Mac Mini M2 16GB, connected via Thunderbolt 4) --> <!-- What you did: --> <!-- - --> I actually checked the file on GitHub this time. ### Automated Testing <!-- Describe changes to automated tests, or how existing tests cover this change --> <!-- - -->	2025-12-22 18:06:27 +00:00
Evan Quiney	1efbd26388	add architecture.md, move images to docs/imgs (#968 ) ## Motivation Documentation will make contribution easier and communicate our development philosophy and decision process. Closes #967 ## Changes Added `architecture.md` to docs/ and moved the images out of docs and into their own docs/imgs/ folder	2025-12-22 17:57:43 +00:00
Jake Hillion	02c915a88d	pyproject: drop pathlib dependency	2025-12-22 17:52:44 +00:00
rltakashige	fc41bfa1f1	Add all prerequisites to README (#975 ) ## Motivation Addresses #974 ``` INFO: pip is looking at multiple versions of exo to determine which version is compatible with other requirements. This could take a while. ERROR: Could not find a version that satisfies the requirement exo-pyo3-bindings (from exo) (from versions: none) ERROR: No matching distribution found for exo-pyo3-bindings ``` ## Changes Describes Rust dependency for building from source ## Why It Works <!-- Explain why your approach solves the problem --> ## Test Plan ### Manual Testing <!-- Hardware: (e.g., MacBook Pro M1 Max 32GB, Mac Mini M2 16GB, connected via Thunderbolt 4) --> <!-- What you did: --> <!-- - --> Tested locally and runs after this setup without exo-pyo3-bindings error ### Automated Testing <!-- Describe changes to automated tests, or how existing tests cover this change --> <!-- - -->	2025-12-22 17:38:51 +00:00
Jake Hillion	dd0638b74d	pyproject: add pyinstaller to dev-dependencies	2025-12-22 15:49:27 +00:00