test: verify instance deletion cancels ongoing tasks (#1215 )

The cancellation logic already exists in get_transition_events() but had no test coverage. Add tests confirming that deleting an instance emits TaskStatusUpdated(Cancelled) events for all Pending/Running tasks on that instance while leaving tasks on other instances untouched. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
dashboard: show failure reasons for runner and download errors (#1350 )
2026-02-18 23:06:23 -05:00 · 2026-02-17 10:05:08 -08:00 · 2026-02-17 09:49:31 -08:00
14 changed files with 280 additions and 188 deletions
--- a/README.md
+++ b/README.md
@@ -72,23 +72,16 @@ There are two ways to run exo:

 ### Run from Source (macOS)

-If you have [Nix](https://nixos.org/) installed, you can skip most of the steps below and run exo directly (after accepting the Cachix cache):
-
-```bash
-nix run .#exo
-```
-
 **Prerequisites:**
- [Xcode](https://developer.apple.com/xcode/) (provides the Metal ToolChain required for MLX compilation)
 - [brew](https://github.com/Homebrew/brew) (for simple package management on macOS)
-
+  
  ```bash
  /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
  ```
 - [uv](https://github.com/astral-sh/uv) (for Python dependency management)
 - [macmon](https://github.com/vladkens/macmon) (for hardware monitoring on Apple Silicon)
 - [node](https://github.com/nodejs/node) (for building the dashboard)
-
+  
  ```bash
  brew install uv macmon node
  ```
--- a/app/EXO/EXO/ExoProcessController.swift
+++ b/app/EXO/EXO/ExoProcessController.swift
@@ -126,37 +126,11 @@ final class ExoProcessController: ObservableObject {
            return
        }
        process.terminationHandler = nil
-        status = .stopped
-
-        guard process.isRunning else {
-            self.process = nil
-            return
+        if process.isRunning {
+            process.terminate()
        }
-
-        let proc = process
        self.process = nil
-
-        Task.detached {
-            proc.interrupt()
-
-            for _ in 0..<50 {
-                if !proc.isRunning { return }
-                try? await Task.sleep(nanoseconds: 100_000_000)
-            }
-
-            if proc.isRunning {
-                proc.terminate()
-            }
-
-            for _ in 0..<30 {
-                if !proc.isRunning { return }
-                try? await Task.sleep(nanoseconds: 100_000_000)
-            }
-
-            if proc.isRunning {
-                kill(proc.processIdentifier, SIGKILL)
-            }
-        }
+        status = .stopped
    }

    func restart() {
--- a/dashboard/src/routes/+page.svelte
+++ b/dashboard/src/routes/+page.svelte
@@ -1084,7 +1084,7 @@
      return {
        isDownloading: false,
        isFailed: statusInfo.statusText === "FAILED",
-        errorMessage: null,
+        errorMessage: statusInfo.errorMessage,
        progress: null,
        statusText: statusInfo.statusText,
        perNode: [],
@@ -1142,10 +1142,15 @@
  function deriveInstanceStatus(instanceWrapped: unknown): {
    statusText: string;
    statusClass: string;
+    errorMessage: string | null;
  } {
    const [, instance] = getTagged(instanceWrapped);
    if (!instance || typeof instance !== "object") {
-      return { statusText: "PREPARING", statusClass: "inactive" };
+      return {
+        statusText: "PREPARING",
+        statusClass: "inactive",
+        errorMessage: null,
+      };
    }

    const inst = instance as {
@@ -1153,50 +1158,106 @@
    };
    const runnerIds = Object.keys(inst.shardAssignments?.runnerToShard || {});

+    const statusMap: Record<string, string> = {
+      RunnerWaitingForInitialization: "WaitingForInitialization",
+      RunnerInitializingBackend: "InitializingBackend",
+      RunnerWaitingForModel: "WaitingForModel",
+      RunnerLoading: "Loading",
+      RunnerLoaded: "Loaded",
+      RunnerWarmingUp: "WarmingUp",
+      RunnerReady: "Ready",
+      RunnerRunning: "Running",
+      RunnerShutdown: "Shutdown",
+      RunnerFailed: "Failed",
+    };
+
    const statuses = runnerIds
      .map((rid) => {
        const r = runnersData[rid];
        if (!r) return null;
-        const [kind] = getTagged(r);
-        const statusMap: Record<string, string> = {
-          RunnerWaitingForInitialization: "WaitingForInitialization",
-          RunnerInitializingBackend: "InitializingBackend",
-          RunnerWaitingForModel: "WaitingForModel",
-          RunnerLoading: "Loading",
-          RunnerLoaded: "Loaded",
-          RunnerWarmingUp: "WarmingUp",
-          RunnerReady: "Ready",
-          RunnerRunning: "Running",
-          RunnerShutdown: "Shutdown",
-          RunnerFailed: "Failed",
-        };
-        return kind ? statusMap[kind] || null : null;
+        const [kind, payload] = getTagged(r);
+        if (!kind || !statusMap[kind]) return null;
+        const errorMessage =
+          kind === "RunnerFailed" && payload && typeof payload === "object"
+            ? (((payload as Record<string, unknown>).errorMessage as string) ??
+              null)
+            : null;
+        return { status: statusMap[kind], errorMessage };
      })
-      .filter((s): s is string => s !== null);
+      .filter(
+        (s): s is { status: string; errorMessage: string | null } => s !== null,
+      );

-    const has = (s: string) => statuses.includes(s);
+    const has = (s: string) => statuses.some((e) => e.status === s);

    if (statuses.length === 0)
-      return { statusText: "PREPARING", statusClass: "inactive" };
-    if (has("Failed")) return { statusText: "FAILED", statusClass: "failed" };
+      return {
+        statusText: "PREPARING",
+        statusClass: "inactive",
+        errorMessage: null,
+      };
+    if (has("Failed")) {
+      const failedEntry = statuses.find(
+        (e) => e.status === "Failed" && e.errorMessage,
+      );
+      return {
+        statusText: "FAILED",
+        statusClass: "failed",
+        errorMessage: failedEntry?.errorMessage ?? null,
+      };
+    }
    if (has("Shutdown"))
-      return { statusText: "SHUTDOWN", statusClass: "inactive" };
+      return {
+        statusText: "SHUTDOWN",
+        statusClass: "inactive",
+        errorMessage: null,
+      };
    if (has("Loading"))
-      return { statusText: "LOADING", statusClass: "starting" };
+      return {
+        statusText: "LOADING",
+        statusClass: "starting",
+        errorMessage: null,
+      };
    if (has("WarmingUp"))
-      return { statusText: "WARMING UP", statusClass: "starting" };
+      return {
+        statusText: "WARMING UP",
+        statusClass: "starting",
+        errorMessage: null,
+      };
    if (has("Running"))
-      return { statusText: "RUNNING", statusClass: "running" };
-    if (has("Ready")) return { statusText: "READY", statusClass: "loaded" };
-    if (has("Loaded")) return { statusText: "LOADED", statusClass: "loaded" };
+      return {
+        statusText: "RUNNING",
+        statusClass: "running",
+        errorMessage: null,
+      };
+    if (has("Ready"))
+      return { statusText: "READY", statusClass: "loaded", errorMessage: null };
+    if (has("Loaded"))
+      return {
+        statusText: "LOADED",
+        statusClass: "loaded",
+        errorMessage: null,
+      };
    if (has("WaitingForModel"))
-      return { statusText: "WAITING", statusClass: "starting" };
+      return {
+        statusText: "WAITING",
+        statusClass: "starting",
+        errorMessage: null,
+      };
    if (has("InitializingBackend"))
-      return { statusText: "INITIALIZING", statusClass: "starting" };
+      return {
+        statusText: "INITIALIZING",
+        statusClass: "starting",
+        errorMessage: null,
+      };
    if (has("WaitingForInitialization"))
-      return { statusText: "INITIALIZING", statusClass: "starting" };
+      return {
+        statusText: "INITIALIZING",
+        statusClass: "starting",
+        errorMessage: null,
+      };

-    return { statusText: "RUNNING", statusClass: "active" };
+    return { statusText: "RUNNING", statusClass: "active", errorMessage: null };
  }

  function getBytes(value: unknown): number {
--- a/dashboard/src/routes/downloads/+page.svelte
+++ b/dashboard/src/routes/downloads/+page.svelte
@@ -30,7 +30,7 @@
        modelDirectory?: string;
      }
    | { kind: "pending"; modelDirectory?: string }
-    | { kind: "failed"; modelDirectory?: string }
+    | { kind: "failed"; modelDirectory?: string; errorMessage?: string }
    | { kind: "not_present" };

  type ModelCardInfo = {
@@ -263,7 +263,10 @@
              modelDirectory,
            };
          } else if (tag === "DownloadFailed") {
-            cell = { kind: "failed", modelDirectory };
+            const errorMsg =
+              ((payload.errorMessage ?? payload.error_message) as string) ||
+              undefined;
+            cell = { kind: "failed", modelDirectory, errorMessage: errorMsg };
          } else {
            cell = { kind: "pending", modelDirectory };
          }
@@ -499,7 +502,7 @@
                    {:else if cell.kind === "failed"}
                      <div
                        class="flex flex-col items-center gap-0.5"
-                        title="Download failed"
+                        title={cell.errorMessage ?? "Download failed"}
                      >
                        <svg
                          class="w-5 h-5 text-red-400"
@@ -512,6 +515,14 @@
                            clip-rule="evenodd"
                          ></path>
                        </svg>
+                        {#if cell.errorMessage}
+                          <span
+                            class="text-[9px] text-red-400/70 max-w-[120px] truncate"
+                            title={cell.errorMessage}
+                          >
+                            {cell.errorMessage}
+                          </span>
+                        {/if}
                        {#if row.shardMetadata}
                          <button
                            type="button"
@@ -707,6 +718,11 @@
                      ({clampPercent(cellStatus.percentage).toFixed(0)}%)
                    {/if}
                  </span>
+                  {#if cellStatus.kind === "failed" && "errorMessage" in cellStatus && cellStatus.errorMessage}
+                    <span class="text-[9px] text-red-400/70 break-all pl-1">
+                      {cellStatus.errorMessage}
+                    </span>
+                  {/if}
                  {#if "modelDirectory" in cellStatus && cellStatus.modelDirectory}
                    <span
                      class="text-[9px] text-white/30 break-all pl-1"
--- a/flake.nix
+++ b/flake.nix
@@ -115,7 +115,7 @@
          packages = lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin (
            let
              uvLock = builtins.fromTOML (builtins.readFile ./uv.lock);
-              mlxPackage = builtins.head (builtins.filter (p: p.name == "mlx" && p.source ? git) uvLock.package);
+              mlxPackage = builtins.head (builtins.filter (p: p.name == "mlx") uvLock.package);
              uvLockMlxVersion = mlxPackage.version;
            in
            {
--- a/nix/mlx.nix
+++ b/nix/mlx.nix
@@ -41,16 +41,16 @@ let

  mlx = stdenv.mkDerivation rec {
    pname = "mlx";
-    version = let v = "0.30.7.dev20260217+50487b41"; in
+    version = let v = "0.30.6"; in
      assert v == uvLockMlxVersion || throw "MLX version mismatch: nix/mlx.nix has ${v} but uv.lock has ${uvLockMlxVersion}. Update both the version and hash in nix/mlx.nix.";
      v;
    pyproject = true;

    src = fetchFromGitHub {
-      owner = "rltakashige";
-      repo = "mlx-jaccl-fix-small-recv";
-      rev = "50487b4141f3c951122655db3b83df5146c1fbeb";
-      hash = "sha256-IL4a9vMX5nocgJU1WG4zE8hArHkHJtnh4sdYh3od5zU=";
+      owner = "ml-explore";
+      repo = "mlx";
+      tag = "v${version}";
+      hash = "sha256-avD5EGhwgmPdXLAyQSqTO6AXk/W3ziH+f6AetjK3Sdo=";
    };

    patches = [
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ dependencies = [
    "loguru>=0.7.3",
    "exo_pyo3_bindings", # rust bindings
    "anyio==4.11.0",
-    "mlx; sys_platform == 'darwin'",
+    "mlx==0.30.6; sys_platform == 'darwin'",
    "mlx[cpu]==0.30.6; sys_platform == 'linux'",
    "mlx-lm==0.30.6",
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
@@ -64,7 +64,6 @@ members = [

 [tool.uv.sources]
 exo_pyo3_bindings = { workspace = true }
-mlx = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git", branch = "address-rdma-gpu-locks", marker = "sys_platform == 'darwin'" }
 #mlx-lm = { git = "https://github.com/davidmcc73/mlx-lm", branch = "stable" }
 # Uncomment to use local mlx/mlx-lm development versions:
 # mlx = { path = "/Users/Shared/mlx", editable=true }
--- a/python/parts.nix
+++ b/python/parts.nix
@@ -58,21 +58,6 @@
        lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux (
          (lib.mapAttrs (_: ignoreMissing) nvidiaPackages) // {
            mlx = ignoreMissing prev.mlx;
-            mlx-cuda-13 = prev.mlx-cuda-13.overrideAttrs (old: {
-              buildInputs = (old.buildInputs or [ ]) ++ [
-                final.nvidia-cublas
-                final.nvidia-cuda-nvrtc
-                final.nvidia-cudnn-cu13
-                final.nvidia-nccl-cu13
-              ];
-              preFixup = ''
-                addAutoPatchelfSearchPath ${final.nvidia-cublas}
-                addAutoPatchelfSearchPath ${final.nvidia-cuda-nvrtc}
-                addAutoPatchelfSearchPath ${final.nvidia-cudnn-cu13}
-                addAutoPatchelfSearchPath ${final.nvidia-nccl-cu13}
-              '';
-              autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
-            });
            torch = ignoreMissing prev.torch;
            triton = ignoreMissing prev.triton;
          }
@@ -89,25 +74,14 @@
          linuxOverlay
        ]
      );
-      # mlx-cpu and mlx-cuda-13 both ship mlx/ site-packages files; keep first.
-      # mlx-cpu/mlx-cuda-13 and nvidia-cudnn-cu12/cu13 ship overlapping files.
-      venvCollisionPaths = lib.optionals pkgs.stdenv.hostPlatform.isLinux [
-        "lib/python3.13/site-packages/mlx*"
-        "lib/python3.13/site-packages/nvidia*"
-      ];
-
-      exoVenv = (pythonSet.mkVirtualEnv "exo-env" workspace.deps.default).overrideAttrs {
-        venvIgnoreCollisions = venvCollisionPaths;
-      };
+      exoVenv = pythonSet.mkVirtualEnv "exo-env" workspace.deps.default;

      # Virtual environment with dev dependencies for testing
-      testVenv = (pythonSet.mkVirtualEnv "exo-test-env" (
+      testVenv = pythonSet.mkVirtualEnv "exo-test-env" (
        workspace.deps.default // {
          exo = [ "dev" ]; # Include pytest, pytest-asyncio, pytest-env
        }
-      )).overrideAttrs {
-        venvIgnoreCollisions = venvCollisionPaths;
-      };
+      );

      mkPythonScript = name: path: pkgs.writeShellApplication {
        inherit name;
--- a/src/exo/main.py
+++ b/src/exo/main.py
@@ -94,7 +94,6 @@ class Node:
                command_sender=router.sender(topics.COMMANDS),
                download_command_sender=router.sender(topics.DOWNLOAD_COMMANDS),
                event_index_counter=event_index_counter,
-                no_downloads=args.no_downloads,
            )
        else:
            worker = None
@@ -137,8 +136,6 @@ class Node:

    async def run(self):
        async with self._tg as tg:
-            signal.signal(signal.SIGINT, lambda _, __: self.shutdown())
-            signal.signal(signal.SIGTERM, lambda _, __: self.shutdown())
            tg.start_soon(self.router.run)
            tg.start_soon(self.election.run)
            if self.download_coordinator:
@@ -150,6 +147,8 @@ class Node:
            if self.api:
                tg.start_soon(self.api.run)
            tg.start_soon(self._elect_loop)
+            signal.signal(signal.SIGINT, lambda _, __: self.shutdown())
+            signal.signal(signal.SIGTERM, lambda _, __: self.shutdown())

    def shutdown(self):
        # if this is our second call to shutdown, just sys.exit
@@ -226,7 +225,6 @@ class Node:
                        )
                        self._tg.start_soon(self.download_coordinator.run)
                    if self.worker:
-                        no_downloads = self.worker.no_downloads
                        self.worker.shutdown()
                        # TODO: add profiling etc to resource monitor
                        self.worker = Worker(
@@ -241,7 +239,6 @@ class Node:
                                topics.DOWNLOAD_COMMANDS
                            ),
                            event_index_counter=self.event_index_counter,
-                            no_downloads=no_downloads,
                        )
                        self._tg.start_soon(self.worker.run)
                    if self.api:
--- a/src/exo/master/tests/test_placement.py
+++ b/src/exo/master/tests/test_placement.py
@@ -14,10 +14,12 @@ from exo.shared.models.model_cards import ModelCard, ModelId, ModelTask
 from exo.shared.topology import Topology
 from exo.shared.types.commands import PlaceInstance
 from exo.shared.types.common import CommandId, NodeId
-from exo.shared.types.events import InstanceCreated, InstanceDeleted
+from exo.shared.types.events import InstanceCreated, InstanceDeleted, TaskStatusUpdated
 from exo.shared.types.memory import Memory
 from exo.shared.types.multiaddr import Multiaddr
 from exo.shared.types.profiling import NetworkInterfaceInfo, NodeNetworkInfo
+from exo.shared.types.tasks import TaskId, TaskStatus, TextGeneration
+from exo.shared.types.text_generation import InputMessage, TextGenerationTaskParams
 from exo.shared.types.topology import Connection, SocketConnection
 from exo.shared.types.worker.instances import (
    Instance,
@@ -456,3 +458,117 @@ def test_tensor_rdma_backend_connectivity_matrix(
        else:
            ip_part = coordinator.split(":")[0]
            assert len(ip_part.split(".")) == 4
+
+
+def _make_task(
+    instance_id: InstanceId,
+    status: TaskStatus = TaskStatus.Running,
+) -> TextGeneration:
+    return TextGeneration(
+        task_id=TaskId(),
+        task_status=status,
+        instance_id=instance_id,
+        command_id=CommandId(),
+        task_params=TextGenerationTaskParams(
+            model=ModelId("test-model"),
+            input=[InputMessage(role="user", content="hello")],
+        ),
+    )
+
+
+def test_get_transition_events_delete_instance_cancels_running_tasks(
+    instance: Instance,
+):
+    # arrange
+    instance_id = InstanceId()
+    current_instances: dict[InstanceId, Instance] = {instance_id: instance}
+    target_instances: dict[InstanceId, Instance] = {}
+    task = _make_task(instance_id, TaskStatus.Running)
+    tasks = {task.task_id: task}
+
+    # act
+    events = get_transition_events(current_instances, target_instances, tasks)
+
+    # assert – cancellation event should come before the deletion event
+    assert len(events) == 2
+    assert isinstance(events[0], TaskStatusUpdated)
+    assert events[0].task_id == task.task_id
+    assert events[0].task_status == TaskStatus.Cancelled
+    assert isinstance(events[1], InstanceDeleted)
+    assert events[1].instance_id == instance_id
+
+
+def test_get_transition_events_delete_instance_cancels_pending_tasks(
+    instance: Instance,
+):
+    # arrange
+    instance_id = InstanceId()
+    current_instances: dict[InstanceId, Instance] = {instance_id: instance}
+    target_instances: dict[InstanceId, Instance] = {}
+    task = _make_task(instance_id, TaskStatus.Pending)
+    tasks = {task.task_id: task}
+
+    # act
+    events = get_transition_events(current_instances, target_instances, tasks)
+
+    # assert
+    assert len(events) == 2
+    assert isinstance(events[0], TaskStatusUpdated)
+    assert events[0].task_id == task.task_id
+    assert events[0].task_status == TaskStatus.Cancelled
+    assert isinstance(events[1], InstanceDeleted)
+
+
+def test_get_transition_events_delete_instance_ignores_completed_tasks(
+    instance: Instance,
+):
+    # arrange
+    instance_id = InstanceId()
+    current_instances: dict[InstanceId, Instance] = {instance_id: instance}
+    target_instances: dict[InstanceId, Instance] = {}
+    tasks = {
+        t.task_id: t
+        for t in [
+            _make_task(instance_id, TaskStatus.Complete),
+            _make_task(instance_id, TaskStatus.Failed),
+            _make_task(instance_id, TaskStatus.TimedOut),
+            _make_task(instance_id, TaskStatus.Cancelled),
+        ]
+    }
+
+    # act
+    events = get_transition_events(current_instances, target_instances, tasks)
+
+    # assert – only the InstanceDeleted event, no cancellations
+    assert len(events) == 1
+    assert isinstance(events[0], InstanceDeleted)
+
+
+def test_get_transition_events_delete_instance_cancels_only_matching_tasks(
+    instance: Instance,
+):
+    # arrange
+    instance_id_a = InstanceId()
+    instance_id_b = InstanceId()
+    current_instances: dict[InstanceId, Instance] = {
+        instance_id_a: instance,
+        instance_id_b: instance,
+    }
+    # only delete instance A, keep instance B
+    target_instances: dict[InstanceId, Instance] = {instance_id_b: instance}
+
+    task_a = _make_task(instance_id_a, TaskStatus.Running)
+    task_b = _make_task(instance_id_b, TaskStatus.Running)
+    tasks = {task_a.task_id: task_a, task_b.task_id: task_b}
+
+    # act
+    events = get_transition_events(current_instances, target_instances, tasks)
+
+    # assert – only task_a should be cancelled
+    cancel_events = [e for e in events if isinstance(e, TaskStatusUpdated)]
+    delete_events = [e for e in events if isinstance(e, InstanceDeleted)]
+    assert len(cancel_events) == 1
+    assert cancel_events[0].task_id == task_a.task_id
+    assert cancel_events[0].task_status == TaskStatus.Cancelled
+    assert len(delete_events) == 1
+    assert delete_events[0].instance_id == instance_id_a
--- a/src/exo/worker/main.py
+++ b/src/exo/worker/main.py
@@ -65,7 +65,6 @@ class Worker:
        command_sender: Sender[ForwarderCommand],
        download_command_sender: Sender[ForwarderDownloadCommand],
        event_index_counter: Iterator[int],
-        no_downloads: bool = False,
    ):
        self.node_id: NodeId = node_id
        self.session_id: SessionId = session_id
@@ -75,7 +74,6 @@ class Worker:
        self.event_index_counter = event_index_counter
        self.command_sender = command_sender
        self.download_command_sender = download_command_sender
-        self.no_downloads = no_downloads
        self.event_buffer = OrderedBuffer[Event]()
        self.out_for_delivery: dict[EventId, ForwarderEvent] = {}

@@ -184,7 +182,6 @@ class Worker:
                self.state.tasks,
                self.input_chunk_buffer,
                self.input_chunk_counts,
-                no_downloads=self.no_downloads,
            )
            if task is None:
                continue
--- a/src/exo/worker/plan.py
+++ b/src/exo/worker/plan.py
@@ -51,20 +51,15 @@ def plan(
    tasks: Mapping[TaskId, Task],
    input_chunk_buffer: Mapping[CommandId, dict[int, str]] | None = None,
    input_chunk_counts: Mapping[CommandId, int] | None = None,
-    no_downloads: bool = False,
 ) -> Task | None:
    # Python short circuiting OR logic should evaluate these sequentially.
    return (
        _cancel_tasks(runners, tasks)
        or _kill_runner(runners, all_runners, instances)
        or _create_runner(node_id, runners, instances)
-        or (
-            None
-            if no_downloads
-            else _model_needs_download(node_id, runners, global_download_status)
-        )
+        or _model_needs_download(node_id, runners, global_download_status)
        or _init_distributed_backend(runners, all_runners)
-        or _load_model(runners, all_runners, global_download_status, no_downloads)
+        or _load_model(runners, all_runners, global_download_status)
        or _ready_to_warmup(runners, all_runners)
        or _pending_tasks(runners, tasks, all_runners, input_chunk_buffer or {})
    )
@@ -197,25 +192,22 @@ def _load_model(
    runners: Mapping[RunnerId, RunnerSupervisor],
    all_runners: Mapping[RunnerId, RunnerStatus],
    global_download_status: Mapping[NodeId, Sequence[DownloadProgress]],
-    no_downloads: bool = False,
 ) -> LoadModel | None:
    for runner in runners.values():
        instance = runner.bound_instance.instance
        shard_assignments = instance.shard_assignments

-        if not no_downloads:
-            all_local_downloads_complete = all(
-                nid in global_download_status
-                and any(
-                    isinstance(dp, DownloadCompleted)
-                    and dp.shard_metadata.model_card.model_id
-                    == shard_assignments.model_id
-                    for dp in global_download_status[nid]
-                )
-                for nid in shard_assignments.node_to_runner
+        all_local_downloads_complete = all(
+            nid in global_download_status
+            and any(
+                isinstance(dp, DownloadCompleted)
+                and dp.shard_metadata.model_card.model_id == shard_assignments.model_id
+                for dp in global_download_status[nid]
            )
-            if not all_local_downloads_complete:
-                continue
+            for nid in shard_assignments.node_to_runner
+        )
+        if not all_local_downloads_complete:
+            continue

        is_single_node_instance = len(instance.shard_assignments.runner_to_shard) == 1
        if is_single_node_instance and isinstance(runner.status, RunnerIdle):
--- a/src/exo/worker/tests/unittests/test_plan/test_download_and_loading.py
+++ b/src/exo/worker/tests/unittests/test_plan/test_download_and_loading.py
@@ -157,41 +157,6 @@ def test_plan_does_not_request_download_when_shard_already_downloaded():
    assert not isinstance(result, plan_mod.DownloadModel)


-def test_plan_loads_model_with_no_downloads_flag():
-    """
-    When no_downloads=True, plan() should skip download checks and proceed
-    to load the model even with empty global_download_status.
-    """
-    shard = get_pipeline_shard_metadata(model_id=MODEL_A_ID, device_rank=0)
-    instance = get_mlx_ring_instance(
-        instance_id=INSTANCE_1_ID,
-        model_id=MODEL_A_ID,
-        node_to_runner={NODE_A: RUNNER_1_ID},
-        runner_to_shard={RUNNER_1_ID: shard},
-    )
-    bound_instance = BoundInstance(
-        instance=instance, bound_runner_id=RUNNER_1_ID, bound_node_id=NODE_A
-    )
-    runner = FakeRunnerSupervisor(bound_instance=bound_instance, status=RunnerIdle())
-
-    runners = {RUNNER_1_ID: runner}
-    instances = {INSTANCE_1_ID: instance}
-    all_runners = {RUNNER_1_ID: RunnerIdle()}
-
-    result = plan_mod.plan(
-        node_id=NODE_A,
-        runners=runners,  # type: ignore
-        global_download_status={},
-        instances=instances,
-        all_runners=all_runners,
-        tasks={},
-        no_downloads=True,
-    )
-
-    assert isinstance(result, LoadModel)
-    assert result.instance_id == INSTANCE_1_ID
-
-
 def test_plan_does_not_load_model_until_all_shards_downloaded_globally():
    """
    LoadModel should not be emitted while some shards are still missing from
--- a/uv.lock
+++ b/uv.lock
@@ -377,8 +377,8 @@ dependencies = [
    { name = "hypercorn", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "loguru", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "mflux", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.6", source = { registry = "https://pypi.org/simple" }, extra = ["cpu"], marker = "sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.7.dev20260217+50487b41", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#50487b4141f3c951122655db3b83df5146c1fbeb" }, marker = "sys_platform == 'darwin'" },
+    { name = "mlx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mlx", extra = ["cpu"], marker = "sys_platform == 'linux'" },
    { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "msgspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "openai-harmony", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -416,7 +416,7 @@ requires-dist = [
    { name = "hypercorn", specifier = ">=0.18.0" },
    { name = "loguru", specifier = ">=0.7.3" },
    { name = "mflux", specifier = "==0.15.5" },
-    { name = "mlx", marker = "sys_platform == 'darwin'", git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks" },
+    { name = "mlx", marker = "sys_platform == 'darwin'", specifier = "==0.30.6" },
    { name = "mlx", extras = ["cpu"], marker = "sys_platform == 'linux'", specifier = "==0.30.6" },
    { name = "mlx-lm", specifier = "==0.30.6" },
    { name = "msgspec", specifier = ">=0.19.0" },
@@ -1020,8 +1020,8 @@ dependencies = [
    { name = "fonttools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "matplotlib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.6", source = { registry = "https://pypi.org/simple" }, extra = ["cuda13"], marker = "sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.7.dev20260217+50487b41", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#50487b4141f3c951122655db3b83df5146c1fbeb" }, marker = "sys_platform == 'darwin'" },
+    { name = "mlx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mlx", extra = ["cuda13"], marker = "sys_platform == 'linux'" },
    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "opencv-python", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "piexif", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -1048,12 +1048,18 @@ wheels = [
 name = "mlx"
 version = "0.30.6"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "sys_platform == 'linux'",
+dependencies = [
+    { name = "mlx-metal", marker = "sys_platform == 'darwin'" },
 ]
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/ae/5b/e460e144a34d5529e010056cccf50b538d56ed001473bc6b246018fd58cb/mlx-0.30.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ed86f8bffc174c2f259ca589ea25464c96cf69d1bb457074a2bf2ef53737e54f", size = 573515, upload-time = "2026-02-06T03:45:23.405Z" },
+    { url = "https://files.pythonhosted.org/packages/60/25/69833fefb9a3fef30b56792b1bcd022496c4fea83e45411d289b77ef7546/mlx-0.30.6-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:c52294958269e20f300639a17c1900ca8fc737d859ddda737f9811e94bd040e5", size = 573516, upload-time = "2026-02-06T03:45:24.618Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/6a/7e7fbeebc5cb51b6a5eba96b263a6298707bcbdc059f4b0b73e088bc3dea/mlx-0.30.6-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:b5b6636f7c49a4d86d8ec82643b972f45a144a7a9f3a967b27b2e6e22cf71e6a", size = 573592, upload-time = "2026-02-06T03:45:25.928Z" },
    { url = "https://files.pythonhosted.org/packages/93/06/280f6f2ba80520a7109730425eda0d966658793aa0d02d8be8d351f75253/mlx-0.30.6-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:67e6c9e30a9faeacc209917ef5523177cf9b086914b6b5d83ff886e4294b727d", size = 622011, upload-time = "2026-02-06T03:45:28.165Z" },
    { url = "https://files.pythonhosted.org/packages/fe/35/f872afbee9c079cc69924d9e9c46f5663adb7da58cba3511db082dd307c1/mlx-0.30.6-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:47db8b16fcb6f6c5a47c0bdb24ed377b41237017ac93aa6cb6aa206c9bdf82e4", size = 663650, upload-time = "2026-02-06T03:45:30.315Z" },
+    { url = "https://files.pythonhosted.org/packages/60/23/361dc7a5797634e4d7e9bdd6564c6b28f9b1246672632def2f91bf066b18/mlx-0.30.6-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:78804a89dcff4a838f7c2da72392fe87a523e95122a3c840e53df019122aad45", size = 575028, upload-time = "2026-02-06T03:45:31.549Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/69/1854484d414171586814dfbe8def95f75c4ea2c7341ba13ba8ee675f7c62/mlx-0.30.6-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:ec13584ab069665cc7ad34a05494d9291cd623aef6ae96be48875fc87cfc25d6", size = 575026, upload-time = "2026-02-06T03:45:33.072Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/b8/3adbc441924209a7e4c568308b2a0b54bd09aee6a68db5bae85304791e54/mlx-0.30.6-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:b2c5e8a090a753ef99a1380a4d059c983083f36198864f6df9faaf1223d083df", size = 575041, upload-time = "2026-02-06T03:45:34.814Z" },
    { url = "https://files.pythonhosted.org/packages/3f/54/9d9e06804fb2088202a2cdf60458e00b221f71420bea285720b60f9e82b5/mlx-0.30.6-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:9ceddede4af0de31d1f6b3099f70e5469d60cd7c546975dedbdbeab3519cab3f", size = 624002, upload-time = "2026-02-06T03:45:36Z" },
    { url = "https://files.pythonhosted.org/packages/42/92/3140a15a50cb1f9267a6552171e1dfa577861de53e093124bc43707f2a0e/mlx-0.30.6-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:4a6ffd2d16728cf95f63a1b555d7c2eaeea686a0e6b73228bd265411cb5d77a4", size = 663569, upload-time = "2026-02-06T03:45:37.242Z" },
 ]
@@ -1066,14 +1072,6 @@ cuda13 = [
    { name = "mlx-cuda-13", marker = "sys_platform == 'linux'" },
 ]

-[[package]]
-name = "mlx"
-version = "0.30.7.dev20260217+50487b41"
-source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#50487b4141f3c951122655db3b83df5146c1fbeb" }
-resolution-markers = [
-    "sys_platform == 'darwin'",
-]
-
 [[package]]
 name = "mlx-cpu"
 version = "0.30.6"
@@ -1104,7 +1102,7 @@ version = "0.30.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.7.dev20260217+50487b41", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#50487b4141f3c951122655db3b83df5146c1fbeb" }, marker = "sys_platform == 'darwin'" },
+    { name = "mlx", marker = "sys_platform == 'darwin'" },
    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -1116,6 +1114,16 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/20/5f/01d281f1fa8a1521d5936659beb4f5ab1f32b463d059263cf9d4cef969d9/mlx_lm-0.30.6-py3-none-any.whl", hash = "sha256:a7405bd581eacc4bf8209d7a6b7f23629585a0d7c6740c2a97e51fee35b3b0e1", size = 379451, upload-time = "2026-02-04T21:27:43.222Z" },
 ]

+[[package]]
+name = "mlx-metal"
+version = "0.30.6"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f3/85/44406b521f920248fad621334d4dc15e77660a494edf890e7cbee33bf38d/mlx_metal-0.30.6-py3-none-macosx_14_0_arm64.whl", hash = "sha256:ea6d0c973def9a5b4f652cc77036237db3f88c9d0af63701d76b5fddde99b820", size = 38437818, upload-time = "2026-02-06T03:44:56.19Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/cb/10a516995f7d0c154b0d7e633c54b51e96977a86a355105b6474cfcbe0d0/mlx_metal-0.30.6-py3-none-macosx_15_0_arm64.whl", hash = "sha256:0f8cb94634d07e06a372d6ad9a090f38a18bab1ff19a140aede60eacf707bb94", size = 38433701, upload-time = "2026-02-06T03:44:59.678Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/7d/70cb272f7373c334709f210ed8420511fc9d64d05a7a646c0b3b94c29c04/mlx_metal-0.30.6-py3-none-macosx_26_0_arm64.whl", hash = "sha256:d761ae26304f2c4b454eeea7f612a56919d9e5e57dbb1dc0788f8e34aa6f41c2", size = 47718448, upload-time = "2026-02-06T03:45:03.133Z" },
+]
+
 [[package]]
 name = "more-itertools"
 version = "10.8.0"