Merge branch 'main' into leo/address-rdma-gpu-locks

exit cleanly
Address GPU timeouts with custom fork
2026-02-12 15:11:30 -05:00 · 2026-02-12 00:00:44 +00:00 · 2026-02-11 23:55:46 +00:00 · 2026-02-11 22:53:17 +00:00
16 changed files with 35 additions and 554 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,15 +0,0 @@
-.venv/
-.direnv/
-target/
-.git/
-.idea/
-.pytest_cache/
-.ruff_cache/
-dashboard/node_modules/
-dashboard/.svelte-kit/
-dashboard/build/
-dist/
-*.pdb
-**/__pycache__
-**/.DS_Store
-.mlx_typings/
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -1,29 +0,0 @@
-name: e2e-tests
-
-on:
-  push:
-  pull_request:
-    branches:
-      - staging
-      - main
-
-jobs:
-  e2e:
-    runs-on: ubuntu-latest
-    timeout-minutes: 30
-    steps:
-      - name: Free up disk space
-        run: |
-          sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc \
-            /opt/hostedtoolcache /usr/local/share/boost /usr/share/swift \
-            /opt/microsoft /opt/az
-          docker system prune -af
-          df -h /
-
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          lfs: false
-
-      - name: Run E2E tests
-        run: python3 e2e/run_all.py
--- a/e2e/Dockerfile
+++ b/e2e/Dockerfile
@@ -1,53 +0,0 @@
-# Stage 1: Build the dashboard
-FROM node:22-slim AS dashboard
-WORKDIR /app/dashboard
-COPY dashboard/package.json dashboard/package-lock.json ./
-RUN npm ci
-COPY dashboard/ .
-RUN npm run build
-
-# Stage 2: Build and run exo
-FROM python:3.13-slim
-
-# Install system dependencies
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    pkg-config \
-    libssl-dev \
-    curl \
-    protobuf-compiler \
-    iptables \
-    && rm -rf /var/lib/apt/lists/*
-
-# Install Rust nightly
-RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain nightly
-ENV PATH="/root/.cargo/bin:${PATH}"
-
-# Install uv
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
-
-WORKDIR /app
-
-# Copy dependency files first for better layer caching
-COPY pyproject.toml Cargo.toml uv.lock README.md ./
-COPY rust/ ./rust/
-COPY bench/pyproject.toml ./bench/pyproject.toml
-
-# Copy source and resources
-COPY src/ ./src/
-COPY resources/ ./resources/
-
-# Copy built dashboard from stage 1
-COPY --from=dashboard /app/dashboard/build ./dashboard/build/
-
-# Install Python deps and build Rust bindings, then clean up build artifacts
-# to keep the layer small (Rust target/ and cargo registry can be 1-2 GB)
-RUN uv sync && rm -rf /app/rust/target /root/.cargo/registry /root/.cargo/git
-
-# Wrap g++ with -fpermissive to fix MLX CPU JIT compilation with GCC 14
-# (GCC 14 treats _Float128/_Float32/_Float64 as built-in types, conflicting with MLX-generated code)
-RUN mv /usr/bin/g++ /usr/bin/g++.real && \
-    printf '#!/bin/sh\nexec /usr/bin/g++.real -fpermissive "$@"\n' > /usr/bin/g++ && \
-    chmod +x /usr/bin/g++
-
-CMD [".venv/bin/exo", "-v"]
--- a/e2e/conftest.py
+++ b/e2e/conftest.py
@@ -1,159 +0,0 @@
-"""Shared E2E test infrastructure for exo cluster tests."""
-
-import asyncio
-import json
-import os
-import sys
-from pathlib import Path
-from urllib.request import urlopen, Request
-from urllib.error import URLError
-
-E2E_DIR = Path(__file__).parent.resolve()
-TIMEOUT = int(os.environ.get("E2E_TIMEOUT", "120"))
-
-
-class Cluster:
-    """Async wrapper around a docker compose exo cluster."""
-
-    def __init__(self, name: str, overrides: list[str] | None = None):
-        self.name = name
-        self.project = f"e2e-{name}"
-        compose_files = [str(E2E_DIR / "docker-compose.yml")]
-        for path in overrides or []:
-            compose_files.append(str(E2E_DIR / path))
-        self._compose_base = [
-            "docker", "compose",
-            "-p", self.project,
-            *[arg for f in compose_files for arg in ("-f", f)],
-        ]
-
-    async def __aenter__(self):
-        return self
-
-    async def __aexit__(self, *exc):
-        await self.stop()
-
-    async def _run(self, *args: str, check: bool = True) -> str:
-        proc = await asyncio.create_subprocess_exec(
-            *self._compose_base, *args,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.STDOUT,
-        )
-        stdout, _ = await proc.communicate()
-        output = stdout.decode()
-        if check and proc.returncode != 0:
-            print(output, file=sys.stderr)
-            raise RuntimeError(f"docker compose {' '.join(args)} failed (rc={proc.returncode})")
-        return output
-
-    async def build(self):
-        print("  Building images...")
-        await self._run("build", "--quiet")
-
-    async def start(self):
-        print("  Starting cluster...")
-        await self._run("up", "-d")
-
-    async def stop(self):
-        print("  Cleaning up...")
-        await self._run("down", "--timeout", "5", check=False)
-
-    async def logs(self) -> str:
-        return await self._run("logs", check=False)
-
-    async def exec(self, service: str, *cmd: str, check: bool = True) -> tuple[int, str]:
-        """Run a command inside a running container. Returns (returncode, output)."""
-        proc = await asyncio.create_subprocess_exec(
-            *self._compose_base, "exec", "-T", service, *cmd,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.STDOUT,
-        )
-        stdout, _ = await proc.communicate()
-        output = stdout.decode()
-        if check and proc.returncode != 0:
-            raise RuntimeError(f"exec {' '.join(cmd)} in {service} failed (rc={proc.returncode})")
-        return proc.returncode, output
-
-    async def wait_for(self, description: str, check_fn, timeout: int = TIMEOUT):
-        """Poll check_fn every 2s until it returns True or timeout expires."""
-        print(f"  Waiting for {description}...")
-        deadline = asyncio.get_event_loop().time() + timeout
-        while asyncio.get_event_loop().time() < deadline:
-            if await check_fn():
-                print(f"  {description}")
-                return
-            await asyncio.sleep(2)
-        output = await self.logs()
-        print(f"--- cluster logs ---\n{output}\n---", file=sys.stderr)
-        raise TimeoutError(f"Timed out waiting for {description}")
-
-    async def assert_healthy(self):
-        """Verify the cluster formed correctly: nodes started, discovered each other, elected a master, API responds."""
-
-        async def both_nodes_started():
-            log = await self.logs()
-            return log.count("Starting node") >= 2
-
-        async def nodes_discovered():
-            log = await self.logs()
-            return log.count("ConnectionMessageType.Connected") >= 2
-
-        async def master_elected():
-            log = await self.logs()
-            return "demoting self" in log
-
-        async def api_responding():
-            try:
-                with urlopen("http://localhost:52415/v1/models", timeout=3) as resp:
-                    return resp.status == 200
-            except (URLError, OSError):
-                return False
-
-        await self.wait_for("Both nodes started", both_nodes_started)
-        await self.wait_for("Nodes discovered each other", nodes_discovered)
-        await self.wait_for("Master election resolved", master_elected)
-        await self.wait_for("API responding", api_responding)
-
-    async def _api(self, method: str, path: str, body: dict | None = None, timeout: int = 30) -> dict:
-        """Make an API request to the cluster. Returns parsed JSON."""
-        url = f"http://localhost:52415{path}"
-        data = json.dumps(body).encode() if body else None
-        req = Request(url, data=data, headers={"Content-Type": "application/json"}, method=method)
-        loop = asyncio.get_event_loop()
-        resp_bytes = await loop.run_in_executor(None, lambda: urlopen(req, timeout=timeout).read())
-        return json.loads(resp_bytes)
-
-    async def place_model(self, model: str, timeout: int = 600):
-        """Place a model instance on the cluster (triggers download) and wait until it's ready."""
-        await self._api("POST", "/place_instance", {"model_id": model})
-
-        async def model_ready():
-            try:
-                resp = await self._api("GET", "/v1/models")
-                return any(m.get("id") == model for m in resp.get("data", []))
-            except Exception:
-                return False
-
-        await self.wait_for(f"Model {model} ready", model_ready, timeout=timeout)
-
-    async def chat(self, model: str, messages: list[dict], timeout: int = 600, **kwargs) -> dict:
-        """Send a chat completion request. Retries until model is downloaded and inference completes."""
-        body = json.dumps({"model": model, "messages": messages, **kwargs}).encode()
-        deadline = asyncio.get_event_loop().time() + timeout
-        last_error = None
-
-        while asyncio.get_event_loop().time() < deadline:
-            try:
-                req = Request(
-                    "http://localhost:52415/v1/chat/completions",
-                    data=body,
-                    headers={"Content-Type": "application/json"},
-                )
-                loop = asyncio.get_event_loop()
-                resp_bytes = await loop.run_in_executor(None, lambda: urlopen(req, timeout=300).read())
-                return json.loads(resp_bytes)
-            except Exception as e:
-                last_error = e
-                await asyncio.sleep(5)
-
-        raise TimeoutError(f"Chat request failed after {timeout}s: {last_error}")
--- a/e2e/docker-compose.yml
+++ b/e2e/docker-compose.yml
@@ -1,18 +0,0 @@
-services:
-  exo-node-1:
-    build:
-      context: ..
-      dockerfile: e2e/Dockerfile
-    environment:
-      - EXO_LIBP2P_NAMESPACE=docker-e2e
-    command: [".venv/bin/exo", "-v"]
-    ports:
-      - "52415:52415"
-
-  exo-node-2:
-    build:
-      context: ..
-      dockerfile: e2e/Dockerfile
-    environment:
-      - EXO_LIBP2P_NAMESPACE=docker-e2e
-    command: [".venv/bin/exo", "-v"]
--- a/e2e/run_all.py
+++ b/e2e/run_all.py
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-"""Discovers and runs all E2E tests in e2e/test_*.py.
-
-Tests with '# slow' on the first line of their docstring are skipped
-unless --slow is passed or E2E_SLOW=1 is set.
-"""
-
-import os
-import subprocess
-import sys
-from pathlib import Path
-
-E2E_DIR = Path(__file__).parent.resolve()
-
-
-def is_slow(test_file: Path) -> bool:
-    """Check if the test file is marked as slow (has '# slow' in first 3 lines)."""
-    with open(test_file) as f:
-        for line in f:
-            if line.strip().startswith("#"):
-                continue
-            if line.strip().startswith('"""') or line.strip().startswith("'''"):
-                # Read into the docstring
-                for doc_line in f:
-                    if "slow" in doc_line.lower() and doc_line.strip().startswith("slow"):
-                        return True
-                    if '"""' in doc_line or "'''" in doc_line:
-                        break
-            break
-    return False
-
-
-def main():
-    run_slow = "--slow" in sys.argv or os.environ.get("E2E_SLOW") == "1"
-    test_files = sorted(E2E_DIR.glob("test_*.py"))
-    if not test_files:
-        print("No test files found")
-        sys.exit(1)
-
-    passed = 0
-    failed = 0
-    skipped = 0
-    failures = []
-
-    for test_file in test_files:
-        name = test_file.stem
-        if is_slow(test_file) and not run_slow:
-            print(f"=== {name} === SKIPPED (slow, use --slow to run)")
-            skipped += 1
-            continue
-
-        print(f"=== {name} ===")
-        result = subprocess.run([sys.executable, str(test_file)])
-        if result.returncode == 0:
-            passed += 1
-        else:
-            failed += 1
-            failures.append(name)
-        print()
-
-    total = passed + failed + skipped
-    print("================================")
-    print(f"{passed}/{total} tests passed" + (f", {skipped} skipped" if skipped else ""))
-
-    if failed:
-        print(f"Failed: {' '.join(failures)}")
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
--- a/e2e/snapshots/inference.json
+++ b/e2e/snapshots/inference.json
@@ -1,8 +0,0 @@
-{
-  "model": "mlx-community/Qwen3-0.6B-4bit",
-  "seed": 42,
-  "temperature": 0,
-  "prompt": "What is 2+2? Reply with just the number.",
-  "max_tokens": 32,
-  "content": "<think>\nOkay, so I need to figure out what 2+2 is. Let me think. Well, if you add 2 and 2 together"
-}
--- a/e2e/test_cluster_formation.py
+++ b/e2e/test_cluster_formation.py
@@ -1,21 +0,0 @@
-"""Test: Basic cluster formation.
-
-Verifies two nodes discover each other, elect a master, and the API responds.
-"""
-
-import asyncio
-import sys
-sys.path.insert(0, str(__import__("pathlib").Path(__file__).parent))
-from conftest import Cluster
-
-
-async def main():
-    async with Cluster("cluster_formation") as cluster:
-        await cluster.build()
-        await cluster.start()
-        await cluster.assert_healthy()
-        print("PASSED: cluster_formation")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/e2e/test_inference_snapshot.py
+++ b/e2e/test_inference_snapshot.py
@@ -1,75 +0,0 @@
-"""Test: Deterministic inference output (snapshot test).
-slow
-
-Sends a chat completion request with a fixed seed and temperature=0,
-then verifies the output matches a known-good snapshot. This ensures
-inference produces consistent results across runs.
-
-Requires a machine that can run MLX inference at reasonable speed (Apple Silicon).
-Run with: python3 e2e/run_all.py --slow  or  E2E_SLOW=1 python3 e2e/run_all.py
-"""
-
-import asyncio
-import json
-import sys
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent))
-from conftest import Cluster
-
-MODEL = "mlx-community/Qwen3-0.6B-4bit"
-SEED = 42
-PROMPT = "What is 2+2? Reply with just the number."
-MAX_TOKENS = 32
-SNAPSHOT_FILE = Path(__file__).parent / "snapshots" / "inference.json"
-
-
-async def main():
-    async with Cluster("inference_snapshot") as cluster:
-        await cluster.build()
-        await cluster.start()
-        await cluster.assert_healthy()
-
-        # Launch the model instance (triggers download + placement)
-        print(f"  Launching model {MODEL}...")
-        await cluster.place_model(MODEL)
-
-        print(f"  Sending chat completion (seed={SEED}, temperature=0)...")
-        resp = await cluster.chat(
-            model=MODEL,
-            messages=[{"role": "user", "content": PROMPT}],
-            seed=SEED,
-            temperature=0,
-            max_tokens=MAX_TOKENS,
-        )
-
-        content = resp["choices"][0]["message"]["content"]
-        print(f"  Response: {content!r}")
-
-        # Load or create snapshot
-        if SNAPSHOT_FILE.exists():
-            snapshot = json.loads(SNAPSHOT_FILE.read_text())
-            expected = snapshot["content"]
-            assert content == expected, (
-                f"Snapshot mismatch!\n"
-                f"  Expected: {expected!r}\n"
-                f"  Got:      {content!r}\n"
-                f"  Delete {SNAPSHOT_FILE} to regenerate."
-            )
-            print(f"  Output matches snapshot")
-        else:
-            SNAPSHOT_FILE.parent.mkdir(parents=True, exist_ok=True)
-            SNAPSHOT_FILE.write_text(json.dumps({
-                "model": MODEL,
-                "seed": SEED,
-                "temperature": 0,
-                "prompt": PROMPT,
-                "max_tokens": MAX_TOKENS,
-                "content": content,
-            }, indent=2) + "\n")
-            print(f"  Snapshot created: {SNAPSHOT_FILE}")
-
-        print("PASSED: inference_snapshot")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/e2e/test_no_internet.py
+++ b/e2e/test_no_internet.py
@@ -1,38 +0,0 @@
-"""Test: Cluster works without internet access.
-
-Verifies exo functions correctly when containers can talk to each other
-but cannot reach the internet. Uses iptables to block all outbound traffic
-except private subnets and multicast (for mDNS discovery).
-"""
-
-import asyncio
-import sys
-sys.path.insert(0, str(__import__("pathlib").Path(__file__).parent))
-from conftest import Cluster
-
-
-async def main():
-    async with Cluster(
-        "no_internet",
-        overrides=["tests/no_internet/docker-compose.override.yml"],
-    ) as cluster:
-        await cluster.build()
-        await cluster.start()
-        await cluster.assert_healthy()
-
-        # Verify internet is actually blocked from inside the containers
-        for node in ["exo-node-1", "exo-node-2"]:
-            rc, _ = await cluster.exec(node, "curl", "-sf", "--max-time", "3", "https://huggingface.co", check=False)
-            assert rc != 0, f"{node} should not be able to reach the internet"
-            print(f"  {node}: internet correctly blocked")
-
-        # Verify exo detected no internet connectivity
-        log = await cluster.logs()
-        assert "Internet connectivity: False" in log, "exo should detect no internet"
-        print("  exo correctly detected no internet connectivity")
-
-        print("PASSED: no_internet")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/e2e/tests/no_internet/docker-compose.override.yml
+++ b/e2e/tests/no_internet/docker-compose.override.yml
@@ -1,32 +0,0 @@
-# Block all outbound internet traffic using iptables while preserving:
-#   - Multicast (224.0.0.0/4) for mDNS peer discovery
-#   - Private subnets (10/8, 172.16/12, 192.168/16) for inter-container communication
-#   - Loopback (127/8)
-# Requires NET_ADMIN capability for iptables.
-services:
-  exo-node-1:
-    cap_add:
-      - NET_ADMIN
-    entrypoint: ["/bin/sh", "-c"]
-    command:
-      - |
-        iptables -A OUTPUT -d 127.0.0.0/8 -j ACCEPT
-        iptables -A OUTPUT -d 10.0.0.0/8 -j ACCEPT
-        iptables -A OUTPUT -d 172.16.0.0/12 -j ACCEPT
-        iptables -A OUTPUT -d 192.168.0.0/16 -j ACCEPT
-        iptables -A OUTPUT -d 224.0.0.0/4 -j ACCEPT
-        iptables -A OUTPUT -j REJECT
-        exec .venv/bin/exo -v
-  exo-node-2:
-    cap_add:
-      - NET_ADMIN
-    entrypoint: ["/bin/sh", "-c"]
-    command:
-      - |
-        iptables -A OUTPUT -d 127.0.0.0/8 -j ACCEPT
-        iptables -A OUTPUT -d 10.0.0.0/8 -j ACCEPT
-        iptables -A OUTPUT -d 172.16.0.0/12 -j ACCEPT
-        iptables -A OUTPUT -d 192.168.0.0/16 -j ACCEPT
-        iptables -A OUTPUT -d 224.0.0.0/4 -j ACCEPT
-        iptables -A OUTPUT -j REJECT
-        exec .venv/bin/exo -v
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ dependencies = [
    "loguru>=0.7.3",
    "exo_pyo3_bindings", # rust bindings
    "anyio==4.11.0",
-    "mlx==0.30.6; sys_platform == 'darwin'",
+    "mlx; sys_platform == 'darwin'",
    "mlx[cpu]==0.30.6; sys_platform == 'linux'",
    "mlx-lm==0.30.6",
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
@@ -64,6 +64,7 @@ members = [

 [tool.uv.sources]
 exo_pyo3_bindings = { workspace = true }
+mlx = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git", marker = "sys_platform == 'darwin'" }
 #mlx-lm = { git = "https://github.com/davidmcc73/mlx-lm", branch = "stable" }
 # Uncomment to use local mlx/mlx-lm development versions:
 # mlx = { path = "/Users/Shared/mlx", editable=true }
--- a/src/exo/utils/channels.py
+++ b/src/exo/utils/channels.py
@@ -141,6 +141,9 @@ class MpSender[T]:
        )
        self._state.buffer.join_thread()

+    def cancel_join(self) -> None:
+        self._state.buffer.cancel_join_thread()
+
    # == context manager support ==
    def __enter__(self) -> Self:
        return self
@@ -212,6 +215,9 @@ class MpReceiver[T]:
        )
        self._state.buffer.join_thread()

+    def cancel_join(self) -> None:
+        self._state.buffer.cancel_join_thread()
+
    # == iterator support ==
    def __iter__(self) -> Self:
        return self
--- a/src/exo/worker/engines/mlx/generator/generate.py
+++ b/src/exo/worker/engines/mlx/generator/generate.py
@@ -57,6 +57,7 @@ def prefill(
    sampler: Callable[[mx.array], mx.array],
    prompt_tokens: mx.array,
    cache: KVCacheType,
+    group: mx.distributed.Group | None = None,
 ) -> tuple[float, int, list[CacheSnapshot]]:
    """Prefill the KV cache with prompt tokens.

@@ -86,6 +87,9 @@ def prefill(

    set_pipeline_prefill(model, is_prefill=True)

+    mx_barrier(group)
+    logger.info("Ready to prefill")
+
    # Use max_tokens=1 because max_tokens=0 does not work.
    # We just throw away the generated token - we only care about filling the cache
    for _ in stream_generate(
@@ -305,16 +309,9 @@ def mlx_generate(
    )
    max_stop_len = max((len(s) for s in stop_sequences), default=0)

-    mx_barrier(group)
-    logger.info("Ready to prefill")
-
    # Prefill cache with all tokens except the last one
    prefill_tps, prefill_tokens, ssm_snapshots_list = prefill(
-        model,
-        tokenizer,
-        sampler,
-        prompt_tokens[:-1],
-        caches,
+        model, tokenizer, sampler, prompt_tokens[:-1], caches, group
    )
    cache_snapshots: list[CacheSnapshot] | None = ssm_snapshots_list or None

--- a/src/exo/worker/runner/bootstrap.py
+++ b/src/exo/worker/runner/bootstrap.py
@@ -41,6 +41,8 @@ def entrypoint(
        main(bound_instance, event_sender, task_receiver)
    except ClosedResourceError:
        logger.warning("Runner communication closed unexpectedly")
+    except KeyboardInterrupt:
+        logger.info("Runner received interrupt, shutting down")
    except Exception as e:
        logger.opt(exception=e).warning(
            f"Runner {bound_instance.bound_runner_id} crashed with critical exception {e}"
@@ -55,7 +57,9 @@ def entrypoint(
        try:
            event_sender.close()
            task_receiver.close()
+        except Exception:
+            pass
        finally:
-            event_sender.join()
-            task_receiver.join()
+            event_sender.cancel_join()
+            task_receiver.cancel_join()
            logger.info("bye from the runner")
--- a/uv.lock
+++ b/uv.lock
@@ -377,8 +377,8 @@ dependencies = [
    { name = "hypercorn", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "loguru", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "mflux", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", extra = ["cpu"], marker = "sys_platform == 'linux'" },
+    { name = "mlx", version = "0.30.6", source = { registry = "https://pypi.org/simple" }, extra = ["cpu"], marker = "sys_platform == 'linux'" },
+    { name = "mlx", version = "0.30.7.dev20260211+80de5e35", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git#80de5e35498d6ae759573225177482f0ca9ded80" }, marker = "sys_platform == 'darwin'" },
    { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "msgspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "openai-harmony", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -416,7 +416,7 @@ requires-dist = [
    { name = "hypercorn", specifier = ">=0.18.0" },
    { name = "loguru", specifier = ">=0.7.3" },
    { name = "mflux", specifier = "==0.15.5" },
-    { name = "mlx", marker = "sys_platform == 'darwin'", specifier = "==0.30.6" },
+    { name = "mlx", marker = "sys_platform == 'darwin'", git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git" },
    { name = "mlx", extras = ["cpu"], marker = "sys_platform == 'linux'", specifier = "==0.30.6" },
    { name = "mlx-lm", specifier = "==0.30.6" },
    { name = "msgspec", specifier = ">=0.19.0" },
@@ -1020,8 +1020,8 @@ dependencies = [
    { name = "fonttools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "matplotlib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", extra = ["cuda13"], marker = "sys_platform == 'linux'" },
+    { name = "mlx", version = "0.30.6", source = { registry = "https://pypi.org/simple" }, extra = ["cuda13"], marker = "sys_platform == 'linux'" },
+    { name = "mlx", version = "0.30.7.dev20260211+80de5e35", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git#80de5e35498d6ae759573225177482f0ca9ded80" }, marker = "sys_platform == 'darwin'" },
    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "opencv-python", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "piexif", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -1048,18 +1048,12 @@ wheels = [
 name = "mlx"
 version = "0.30.6"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "mlx-metal", marker = "sys_platform == 'darwin'" },
+resolution-markers = [
+    "sys_platform == 'linux'",
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ae/5b/e460e144a34d5529e010056cccf50b538d56ed001473bc6b246018fd58cb/mlx-0.30.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ed86f8bffc174c2f259ca589ea25464c96cf69d1bb457074a2bf2ef53737e54f", size = 573515, upload-time = "2026-02-06T03:45:23.405Z" },
-    { url = "https://files.pythonhosted.org/packages/60/25/69833fefb9a3fef30b56792b1bcd022496c4fea83e45411d289b77ef7546/mlx-0.30.6-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:c52294958269e20f300639a17c1900ca8fc737d859ddda737f9811e94bd040e5", size = 573516, upload-time = "2026-02-06T03:45:24.618Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/6a/7e7fbeebc5cb51b6a5eba96b263a6298707bcbdc059f4b0b73e088bc3dea/mlx-0.30.6-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:b5b6636f7c49a4d86d8ec82643b972f45a144a7a9f3a967b27b2e6e22cf71e6a", size = 573592, upload-time = "2026-02-06T03:45:25.928Z" },
    { url = "https://files.pythonhosted.org/packages/93/06/280f6f2ba80520a7109730425eda0d966658793aa0d02d8be8d351f75253/mlx-0.30.6-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:67e6c9e30a9faeacc209917ef5523177cf9b086914b6b5d83ff886e4294b727d", size = 622011, upload-time = "2026-02-06T03:45:28.165Z" },
    { url = "https://files.pythonhosted.org/packages/fe/35/f872afbee9c079cc69924d9e9c46f5663adb7da58cba3511db082dd307c1/mlx-0.30.6-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:47db8b16fcb6f6c5a47c0bdb24ed377b41237017ac93aa6cb6aa206c9bdf82e4", size = 663650, upload-time = "2026-02-06T03:45:30.315Z" },
-    { url = "https://files.pythonhosted.org/packages/60/23/361dc7a5797634e4d7e9bdd6564c6b28f9b1246672632def2f91bf066b18/mlx-0.30.6-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:78804a89dcff4a838f7c2da72392fe87a523e95122a3c840e53df019122aad45", size = 575028, upload-time = "2026-02-06T03:45:31.549Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/69/1854484d414171586814dfbe8def95f75c4ea2c7341ba13ba8ee675f7c62/mlx-0.30.6-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:ec13584ab069665cc7ad34a05494d9291cd623aef6ae96be48875fc87cfc25d6", size = 575026, upload-time = "2026-02-06T03:45:33.072Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/b8/3adbc441924209a7e4c568308b2a0b54bd09aee6a68db5bae85304791e54/mlx-0.30.6-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:b2c5e8a090a753ef99a1380a4d059c983083f36198864f6df9faaf1223d083df", size = 575041, upload-time = "2026-02-06T03:45:34.814Z" },
    { url = "https://files.pythonhosted.org/packages/3f/54/9d9e06804fb2088202a2cdf60458e00b221f71420bea285720b60f9e82b5/mlx-0.30.6-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:9ceddede4af0de31d1f6b3099f70e5469d60cd7c546975dedbdbeab3519cab3f", size = 624002, upload-time = "2026-02-06T03:45:36Z" },
    { url = "https://files.pythonhosted.org/packages/42/92/3140a15a50cb1f9267a6552171e1dfa577861de53e093124bc43707f2a0e/mlx-0.30.6-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:4a6ffd2d16728cf95f63a1b555d7c2eaeea686a0e6b73228bd265411cb5d77a4", size = 663569, upload-time = "2026-02-06T03:45:37.242Z" },
 ]
@@ -1072,6 +1066,14 @@ cuda13 = [
    { name = "mlx-cuda-13", marker = "sys_platform == 'linux'" },
 ]

+[[package]]
+name = "mlx"
+version = "0.30.7.dev20260211+80de5e35"
+source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git#80de5e35498d6ae759573225177482f0ca9ded80" }
+resolution-markers = [
+    "sys_platform == 'darwin'",
+]
+
 [[package]]
 name = "mlx-cpu"
 version = "0.30.6"
@@ -1102,7 +1104,7 @@ version = "0.30.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", marker = "sys_platform == 'darwin'" },
+    { name = "mlx", version = "0.30.7.dev20260211+80de5e35", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git#80de5e35498d6ae759573225177482f0ca9ded80" }, marker = "sys_platform == 'darwin'" },
    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -1114,16 +1116,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/20/5f/01d281f1fa8a1521d5936659beb4f5ab1f32b463d059263cf9d4cef969d9/mlx_lm-0.30.6-py3-none-any.whl", hash = "sha256:a7405bd581eacc4bf8209d7a6b7f23629585a0d7c6740c2a97e51fee35b3b0e1", size = 379451, upload-time = "2026-02-04T21:27:43.222Z" },
 ]

-[[package]]
-name = "mlx-metal"
-version = "0.30.6"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f3/85/44406b521f920248fad621334d4dc15e77660a494edf890e7cbee33bf38d/mlx_metal-0.30.6-py3-none-macosx_14_0_arm64.whl", hash = "sha256:ea6d0c973def9a5b4f652cc77036237db3f88c9d0af63701d76b5fddde99b820", size = 38437818, upload-time = "2026-02-06T03:44:56.19Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/cb/10a516995f7d0c154b0d7e633c54b51e96977a86a355105b6474cfcbe0d0/mlx_metal-0.30.6-py3-none-macosx_15_0_arm64.whl", hash = "sha256:0f8cb94634d07e06a372d6ad9a090f38a18bab1ff19a140aede60eacf707bb94", size = 38433701, upload-time = "2026-02-06T03:44:59.678Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/7d/70cb272f7373c334709f210ed8420511fc9d64d05a7a646c0b3b94c29c04/mlx_metal-0.30.6-py3-none-macosx_26_0_arm64.whl", hash = "sha256:d761ae26304f2c4b454eeea7f612a56919d9e5e57dbb1dc0788f8e34aa6f41c2", size = 47718448, upload-time = "2026-02-06T03:45:03.133Z" },
-]
-
 [[package]]
 name = "more-itertools"
 version = "10.8.0"
Author	SHA1	Message	Date
rltakashige	659dbde927	Merge branch 'main' into leo/address-rdma-gpu-locks	2026-02-12 00:00:44 +00:00
Ryuichi Leo Takashige	22227efe96	exit cleanly	2026-02-11 23:55:46 +00:00
Ryuichi Leo Takashige	8eaaee7622	Address GPU timeouts with custom fork	2026-02-11 22:53:17 +00:00