smore

wait
remove dead code
2026-02-18 14:55:13 -05:00 · 2026-02-18 11:40:16 +00:00 · 2026-02-18 11:38:13 +00:00 · 2026-02-18 11:14:23 +00:00 · 2026-02-17 14:00:52 -08:00 · 2026-02-17 18:18:54 +00:00
62 changed files with 2040 additions and 2868 deletions
--- a/.github/workflows/pipeline.yml
+++ b/.github/workflows/pipeline.yml
@@ -8,33 +8,6 @@ on:
      - main

 jobs:
-  typecheck:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          lfs: false
-
-      - uses: cachix/install-nix-action@v31
-        with:
-          nix_path: nixpkgs=channel:nixos-unstable
-
-      - uses: cachix/cachix-action@v14
-        name: Configure Cachix
-        with:
-          name: exo
-          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
-
-      - name: Load nix develop environment
-        run: nix run github:nicknovitski/nix-develop/v1
-
-      - name: Sync dependencies
-        run: uv sync --all-packages
-
-      - name: Run type checker
-        run: uv run basedpyright --project pyproject.toml
-
  nix:
    name: Build and check (${{ matrix.system }})
    runs-on: ${{ matrix.runner }}
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -125,9 +125,9 @@ dependencies = [

 [[package]]
 name = "anyhow"
-version = "1.0.100"
+version = "1.0.101"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
+checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea"

 [[package]]
 name = "arc-swap"
@@ -141,12 +141,6 @@ version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"

-[[package]]
-name = "arrayvec"
-version = "0.7.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
-
 [[package]]
 name = "asn1-rs"
 version = "0.7.1"
@@ -171,7 +165,7 @@ checksum = "3109e49b1e4909e9db6515a30c633684d68cdeaa252f215214cb4fa1a5bfee2c"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 "synstructure",
 ]

@@ -183,7 +177,7 @@ checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -230,7 +224,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -304,19 +298,6 @@ version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba"

-[[package]]
-name = "bigdecimal"
-version = "0.4.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934"
-dependencies = [
- "autocfg",
- "libm",
- "num-bigint",
- "num-integer",
- "num-traits",
-]
-
 [[package]]
 name = "bimap"
 version = "0.6.3"
@@ -440,9 +421,9 @@ dependencies = [

 [[package]]
 name = "chrono"
-version = "0.4.42"
+version = "0.4.43"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
+checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
 dependencies = [
 "iana-time-zone",
 "js-sys",
@@ -516,15 +497,6 @@ version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3"

-[[package]]
-name = "convert_case"
-version = "0.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9"
-dependencies = [
- "unicode-segmentation",
-]
-
 [[package]]
 name = "core-foundation"
 version = "0.9.4"
@@ -672,7 +644,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -698,7 +670,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976"
 dependencies = [
 "data-encoding",
- "syn 2.0.111",
+ "syn 1.0.109",
 ]

 [[package]]
@@ -709,7 +681,7 @@ checksum = "780eb241654bf097afb00fc5f054a09b687dad862e485fdcf8399bb056565370"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -746,29 +718,6 @@ dependencies = [
 "powerfmt",
 ]

-[[package]]
-name = "derive_more"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "10b768e943bed7bf2cab53df09f4bc34bfd217cdb57d971e769874c9a6710618"
-dependencies = [
- "derive_more-impl",
-]
-
-[[package]]
-name = "derive_more-impl"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d286bfdaf75e988b4a78e013ecd79c581e06399ab53fbacd2d916c2f904f30b"
-dependencies = [
- "convert_case",
- "proc-macro2",
- "quote",
- "rustc_version",
- "syn 2.0.111",
- "unicode-xid",
-]
-
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -789,7 +738,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -871,7 +820,7 @@ dependencies = [
 "heck",
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -939,22 +888,17 @@ name = "exo_pyo3_bindings"
 version = "0.0.1"
 dependencies = [
 "delegate",
- "derive_more",
 "env_logger",
 "extend",
- "futures",
- "impl-trait-for-tuples",
+ "futures-lite",
 "libp2p",
 "log",
 "networking",
- "once_cell",
 "pin-project",
 "pyo3",
 "pyo3-async-runtimes",
 "pyo3-log",
 "pyo3-stub-gen",
- "thiserror 2.0.17",
- "thread_local",
 "tokio",
 "util",
 ]
@@ -967,9 +911,15 @@ checksum = "311a6d2f1f9d60bff73d2c78a0af97ed27f79672f15c238192a5bbb64db56d00"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
 [[package]]
 name = "ff"
 version = "0.13.1"
@@ -1078,7 +1028,10 @@ version = "2.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f78e10609fe0e0b3f4157ffab1876319b5b0db102a2c60dc4626306dc46b44ad"
 dependencies = [
+ "fastrand",
 "futures-core",
+ "futures-io",
+ "parking",
 "pin-project-lite",
 ]

@@ -1090,7 +1043,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -1640,17 +1593,6 @@ dependencies = [
 "xmltree",
 ]

-[[package]]
-name = "impl-trait-for-tuples"
-version = "0.2.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0eb5a3343abf848c0984fe4604b2b105da9539376e24fc0a3b0007411ae4fd9"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.111",
-]
-
 [[package]]
 name = "indexmap"
 version = "2.12.1"
@@ -1715,7 +1657,7 @@ dependencies = [
 "heck",
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -1769,7 +1711,7 @@ checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -1829,12 +1771,6 @@ version = "0.2.178"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"

-[[package]]
-name = "libm"
-version = "0.2.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
-
 [[package]]
 name = "libp2p"
 version = "0.56.0"
@@ -2364,7 +2300,7 @@ checksum = "dd297cf53f0cb3dee4d2620bb319ae47ef27c702684309f682bdb7e55a18ae9c"
 dependencies = [
 "heck",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -2824,16 +2760,13 @@ name = "networking"
 version = "0.0.1"
 dependencies = [
 "delegate",
- "derive_more",
 "either",
 "extend",
- "futures",
+ "futures-lite",
 "futures-timer",
- "impl-trait-for-tuples",
 "keccak-const",
 "libp2p",
 "log",
- "thiserror 2.0.17",
 "tokio",
 "tracing-subscriber",
 "util",
@@ -2905,9 +2838,9 @@ dependencies = [

 [[package]]
 name = "num-conv"
-version = "0.1.0"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
+checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"

 [[package]]
 name = "num-integer"
@@ -2918,17 +2851,6 @@ dependencies = [
 "num-traits",
 ]

-[[package]]
-name = "num-rational"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
-dependencies = [
- "num-bigint",
- "num-integer",
- "num-traits",
-]
-
 [[package]]
 name = "num-traits"
 version = "0.2.19"
@@ -3131,7 +3053,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -3243,9 +3165,9 @@ dependencies = [

 [[package]]
 name = "proc-macro2"
-version = "1.0.103"
+version = "1.0.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
 dependencies = [
 "unicode-ident",
 ]
@@ -3270,7 +3192,7 @@ checksum = "440f724eba9f6996b75d63681b0a92b06947f1457076d503a4d2e2c8f56442b8"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -3279,28 +3201,14 @@ version = "0.27.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ab53c047fcd1a1d2a8820fe84f05d6be69e9526be40cb03b73f86b6b03e6d87d"
 dependencies = [
- "bigdecimal",
- "either",
- "hashbrown 0.16.1",
- "indexmap",
 "indoc",
- "inventory",
 "libc",
- "lock_api",
 "memoffset",
- "num-bigint",
- "num-complex",
- "num-rational",
- "num-traits",
 "once_cell",
- "ordered-float",
- "parking_lot",
 "portable-atomic",
 "pyo3-build-config",
 "pyo3-ffi",
 "pyo3-macros",
- "rust_decimal",
- "smallvec",
 "unindent",
 ]

@@ -3328,7 +3236,7 @@ checksum = "bcd7d70ee0ca1661c40407e6f84e4463ef2658c90a9e2fbbd4515b2bcdfcaeca"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -3370,7 +3278,7 @@ dependencies = [
 "proc-macro2",
 "pyo3-macros-backend",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -3383,14 +3291,14 @@ dependencies = [
 "proc-macro2",
 "pyo3-build-config",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
 name = "pyo3-stub-gen"
-version = "0.17.2"
+version = "0.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "398b833826a83ca72c1e26d1b2c7c71f9ca7c3bfc74eacc663901895c362ae33"
+checksum = "b159f7704044f57d058f528a6f1f22a0a0a327dcb595c5fb38beae658e0338d6"
 dependencies = [
 "anyhow",
 "chrono",
@@ -3405,22 +3313,25 @@ dependencies = [
 "ordered-float",
 "pyo3",
 "pyo3-stub-gen-derive",
+ "rustpython-parser",
 "serde",
+ "serde_json",
+ "time",
 "toml",
 ]

 [[package]]
 name = "pyo3-stub-gen-derive"
-version = "0.17.2"
+version = "0.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2426ba759d848787239d80f9fdb1f223786976f87fb6c3da8188ca7c17744b28"
+checksum = "a8c79e7c5b1fcec7c39ab186594658a971c59911eb6fbab5a5932cf2318534be"
 dependencies = [
 "heck",
 "indexmap",
 "proc-macro2",
 "quote",
 "rustpython-parser",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -3503,9 +3414,9 @@ dependencies = [

 [[package]]
 name = "quote"
-version = "1.0.42"
+version = "1.0.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
+checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
 dependencies = [
 "proc-macro2",
 ]
@@ -3741,16 +3652,6 @@ dependencies = [
 "tokio",
 ]

-[[package]]
-name = "rust_decimal"
-version = "1.39.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35affe401787a9bd846712274d97654355d21b2a2c092a3139aabe31e9022282"
-dependencies = [
- "arrayvec",
- "num-traits",
-]
-
 [[package]]
 name = "rustc-hash"
 version = "1.1.0"
@@ -3986,7 +3887,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -4004,9 +3905,9 @@ dependencies = [

 [[package]]
 name = "serde_spanned"
-version = "1.0.3"
+version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e24345aa0fe688594e73770a5f6d1b216508b4f93484c0026d521acd30134392"
+checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776"
 dependencies = [
 "serde_core",
 ]
@@ -4194,9 +4095,9 @@ dependencies = [

 [[package]]
 name = "syn"
-version = "2.0.111"
+version = "2.0.116"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
+checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -4211,7 +4112,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -4287,7 +4188,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -4298,7 +4199,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -4312,30 +4213,30 @@ dependencies = [

 [[package]]
 name = "time"
-version = "0.3.44"
+version = "0.3.47"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d"
+checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c"
 dependencies = [
 "deranged",
 "itoa",
 "num-conv",
 "powerfmt",
- "serde",
+ "serde_core",
 "time-core",
 "time-macros",
 ]

 [[package]]
 name = "time-core"
-version = "0.1.6"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
+checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"

 [[package]]
 name = "time-macros"
-version = "0.2.24"
+version = "0.2.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
+checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215"
 dependencies = [
 "num-conv",
 "time-core",
@@ -4411,7 +4312,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -4429,9 +4330,9 @@ dependencies = [

 [[package]]
 name = "toml"
-version = "0.9.8"
+version = "1.0.2+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0dc8b1fb61449e27716ec0e1bdf0f6b8f3e8f6b05391e8497b8b6d7804ea6d8"
+checksum = "d1dfefef6a142e93f346b64c160934eb13b5594b84ab378133ac6815cb2bd57f"
 dependencies = [
 "indexmap",
 "serde_core",
@@ -4444,27 +4345,27 @@ dependencies = [

 [[package]]
 name = "toml_datetime"
-version = "0.7.3"
+version = "1.0.0+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533"
+checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e"
 dependencies = [
 "serde_core",
 ]

 [[package]]
 name = "toml_parser"
-version = "1.0.4"
+version = "1.0.9+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e"
+checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4"
 dependencies = [
 "winnow",
 ]

 [[package]]
 name = "toml_writer"
-version = "1.0.4"
+version = "1.0.6+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df8b2b54733674ad286d16267dcfc7a71ed5c776e4ac7aa3c3e2561f7c637bf2"
+checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607"

 [[package]]
 name = "tower-service"
@@ -4491,7 +4392,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -4615,24 +4516,12 @@ version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"

-[[package]]
-name = "unicode-segmentation"
-version = "1.12.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
-
 [[package]]
 name = "unicode-width"
 version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"

-[[package]]
-name = "unicode-xid"
-version = "0.2.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
-
 [[package]]
 name = "unicode_names2"
 version = "1.3.0"
@@ -4815,7 +4704,7 @@ dependencies = [
 "bumpalo",
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 "wasm-bindgen-shared",
 ]

@@ -4957,7 +4846,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -4968,7 +4857,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -4979,7 +4868,7 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -4990,7 +4879,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -5379,7 +5268,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 "synstructure",
 ]

@@ -5400,7 +5289,7 @@ checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -5420,7 +5309,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 "synstructure",
 ]

@@ -5441,7 +5330,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]

 [[package]]
@@ -5474,5 +5363,5 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.111",
+ "syn 2.0.116",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -26,49 +26,22 @@ opt-level = 3
 networking = { path = "rust/networking" }
 util = { path = "rust/util" }

-# Proc-macro authoring tools
-syn = "2.0"
-quote = "1.0"
-proc-macro2 = "1.0"
-darling = "0.20"
-
 # Macro dependecies
 extend = "1.2"
 delegate = "0.13"
-impl-trait-for-tuples = "0.2"
-clap = "4.5"
-derive_more = { version = "2.0.1", features = ["display"] }
 pin-project = "1"

 # Utility dependencies
-itertools = "0.14"
-thiserror = "2"
-internment = "0.8"
-recursion = "0.5"
-regex = "1.11"
-once_cell = "1.21"
-thread_local = "1.1"
-bon = "3.4"
-generativity = "1.1"
-anyhow = "1.0"
 keccak-const = "0.2"

-# Functional generics/lenses frameworks
-frunk_core = "0.4"
-frunk = "0.4"
-frunk_utils = "0.2"
-frunk-enum-core = "0.3"
-
 # Async dependencies
 tokio = "1.46"
 futures = "0.3"
-futures-util = "0.3"
+futures-lite = "2.6.1"
 futures-timer = "3.0"

 # Data structures
 either = "1.15"
-ordered-float = "5.0"
-ahash = "0.8"

 # Tracing/logging
 log = "0.4"
--- a/README.md
+++ b/README.md
@@ -72,16 +72,23 @@ There are two ways to run exo:

 ### Run from Source (macOS)

+If you have [Nix](https://nixos.org/) installed, you can skip most of the steps below and run exo directly (after accepting the Cachix cache):
+
+```bash
+nix run .#exo
+```
+
 **Prerequisites:**
+- [Xcode](https://developer.apple.com/xcode/) (provides the Metal ToolChain required for MLX compilation)
 - [brew](https://github.com/Homebrew/brew) (for simple package management on macOS)
-  
+
  ```bash
  /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
  ```
 - [uv](https://github.com/astral-sh/uv) (for Python dependency management)
 - [macmon](https://github.com/vladkens/macmon) (for hardware monitoring on Apple Silicon)
 - [node](https://github.com/nodejs/node) (for building the dashboard)
-  
+
  ```bash
  brew install uv macmon node
  ```
--- a/app/EXO/EXO/ExoProcessController.swift
+++ b/app/EXO/EXO/ExoProcessController.swift
@@ -126,11 +126,37 @@ final class ExoProcessController: ObservableObject {
            return
        }
        process.terminationHandler = nil
-        if process.isRunning {
-            process.terminate()
-        }
-        self.process = nil
        status = .stopped
+
+        guard process.isRunning else {
+            self.process = nil
+            return
+        }
+
+        let proc = process
+        self.process = nil
+
+        Task.detached {
+            proc.interrupt()
+
+            for _ in 0..<50 {
+                if !proc.isRunning { return }
+                try? await Task.sleep(nanoseconds: 100_000_000)
+            }
+
+            if proc.isRunning {
+                proc.terminate()
+            }
+
+            for _ in 0..<30 {
+                if !proc.isRunning { return }
+                try? await Task.sleep(nanoseconds: 100_000_000)
+            }
+
+            if proc.isRunning {
+                kill(proc.processIdentifier, SIGKILL)
+            }
+        }
    }

    func restart() {
--- a/bench/bench.toml
+++ b/bench/bench.toml
@@ -0,0 +1,7 @@
+# Canary benchmark manifest
+#
+# Lists the suite files to include. Each file defines benchmarks
+# with shared constraints, topology, and default args.
+include = [
+    "single-m3-ultra.toml",
+]
--- a/bench/exo_bench.py
+++ b/bench/exo_bench.py
@@ -288,6 +288,151 @@ def resolve_model_short_id(client: ExoClient, model_arg: str) -> tuple[str, str]
    raise ValueError(f"Model not found in /models: {model_arg}")


+def run_planning_phase(
+    client: ExoClient,
+    full_model_id: str,
+    preview: dict[str, Any],
+    danger_delete: bool,
+    timeout: float,
+    settle_deadline: float | None,
+) -> None:
+    """Check disk space and ensure model is downloaded before benchmarking."""
+    # Get model size from /models
+    models = client.request_json("GET", "/models") or {}
+    model_bytes = 0
+    for m in models.get("data", []):
+        if m.get("hugging_face_id") == full_model_id:
+            model_bytes = m.get("storage_size_megabytes", 0) * 1024 * 1024
+            break
+
+    if not model_bytes:
+        logger.warning(
+            f"Could not determine size for {full_model_id}, skipping disk check"
+        )
+        return
+
+    # Get nodes from preview
+    inner = unwrap_instance(preview["instance"])
+    node_ids = list(inner["shardAssignments"]["nodeToRunner"].keys())
+    runner_to_shard = inner["shardAssignments"]["runnerToShard"]
+
+    state = client.request_json("GET", "/state")
+    downloads = state.get("downloads", {})
+    node_disk = state.get("nodeDisk", {})
+
+    for node_id in node_ids:
+        node_downloads = downloads.get(node_id, [])
+
+        # Check if model already downloaded on this node
+        already_downloaded = any(
+            "DownloadCompleted" in p
+            and unwrap_instance(p["DownloadCompleted"]["shardMetadata"])["modelCard"][
+                "modelId"
+            ]
+            == full_model_id
+            for p in node_downloads
+        )
+        if already_downloaded:
+            continue
+
+        # Wait for disk info if settle_deadline is set
+        disk_info = node_disk.get(node_id, {})
+        backoff = _SETTLE_INITIAL_BACKOFF_S
+        while not disk_info and settle_deadline and time.monotonic() < settle_deadline:
+            remaining = settle_deadline - time.monotonic()
+            logger.info(
+                f"Waiting for disk info on {node_id} ({remaining:.0f}s remaining)..."
+            )
+            time.sleep(min(backoff, remaining))
+            backoff = min(backoff * _SETTLE_BACKOFF_MULTIPLIER, _SETTLE_MAX_BACKOFF_S)
+            state = client.request_json("GET", "/state")
+            node_disk = state.get("nodeDisk", {})
+            disk_info = node_disk.get(node_id, {})
+
+        if not disk_info:
+            logger.warning(f"No disk info for {node_id}, skipping space check")
+            continue
+
+        avail = disk_info.get("available", {}).get("inBytes", 0)
+        if avail >= model_bytes:
+            continue
+
+        if not danger_delete:
+            raise RuntimeError(
+                f"Insufficient disk on {node_id}: need {model_bytes // (1024**3)}GB, "
+                f"have {avail // (1024**3)}GB. Use --danger-delete-downloads to free space."
+            )
+
+        # Delete from smallest to largest
+        completed = [
+            (
+                unwrap_instance(p["DownloadCompleted"]["shardMetadata"])["modelCard"][
+                    "modelId"
+                ],
+                p["DownloadCompleted"]["totalBytes"]["inBytes"],
+            )
+            for p in node_downloads
+            if "DownloadCompleted" in p
+        ]
+        for del_model, size in sorted(completed, key=lambda x: x[1]):
+            logger.info(f"Deleting {del_model} from {node_id} ({size // (1024**2)}MB)")
+            client.request_json("DELETE", f"/download/{node_id}/{del_model}")
+            avail += size
+            if avail >= model_bytes:
+                break
+
+        if avail < model_bytes:
+            raise RuntimeError(f"Could not free enough space on {node_id}")
+
+    # Start downloads (idempotent)
+    for node_id in node_ids:
+        runner_id = inner["shardAssignments"]["nodeToRunner"][node_id]
+        shard = runner_to_shard[runner_id]
+        client.request_json(
+            "POST",
+            "/download/start",
+            body={
+                "targetNodeId": node_id,
+                "shardMetadata": shard,
+            },
+        )
+        logger.info(f"Started download on {node_id}")
+
+    # Wait for downloads
+    start = time.time()
+    while time.time() - start < timeout:
+        state = client.request_json("GET", "/state")
+        downloads = state.get("downloads", {})
+        all_done = True
+        for node_id in node_ids:
+            done = any(
+                "DownloadCompleted" in p
+                and unwrap_instance(p["DownloadCompleted"]["shardMetadata"])[
+                    "modelCard"
+                ]["modelId"]
+                == full_model_id
+                for p in downloads.get(node_id, [])
+            )
+            failed = [
+                p["DownloadFailed"]["errorMessage"]
+                for p in downloads.get(node_id, [])
+                if "DownloadFailed" in p
+                and unwrap_instance(p["DownloadFailed"]["shardMetadata"])["modelCard"][
+                    "modelId"
+                ]
+                == full_model_id
+            ]
+            if failed:
+                raise RuntimeError(f"Download failed on {node_id}: {failed[0]}")
+            if not done:
+                all_done = False
+        if all_done:
+            return
+        time.sleep(1)
+
+    raise TimeoutError("Downloads did not complete in time")
+
+
 def placement_filter(instance_meta: str, wanted: str) -> bool:
    s = (instance_meta or "").lower()
    if wanted == "both":
@@ -535,6 +680,11 @@ def main() -> int:
        default=0,
        help="Max seconds to wait for the cluster to produce valid placements (0 = try once).",
    )
+    ap.add_argument(
+        "--danger-delete-downloads",
+        action="store_true",
+        help="Delete existing models from smallest to largest to make room for benchmark model.",
+    )
    args = ap.parse_args()

    pp_list = parse_int_list(args.pp)
@@ -569,13 +719,16 @@ def main() -> int:
        logger.error("[exo-bench] tokenizer usable but prompt sizing failed")
        raise

+    settle_deadline = (
+        time.monotonic() + args.settle_timeout if args.settle_timeout > 0 else None
+    )
+
    selected = fetch_and_filter_placements(client, full_model_id, args)

-    if not selected and args.settle_timeout > 0:
+    if not selected and settle_deadline:
        backoff = _SETTLE_INITIAL_BACKOFF_S
-        deadline = time.monotonic() + args.settle_timeout
-        while not selected and time.monotonic() < deadline:
-            remaining = deadline - time.monotonic()
+        while not selected and time.monotonic() < settle_deadline:
+            remaining = settle_deadline - time.monotonic()
            logger.warning(
                f"No valid placements yet (cluster may still be settling). "
                f"Retrying in {backoff:.1f}s ({remaining:.0f}s remaining)..."
@@ -607,6 +760,16 @@ def main() -> int:
    if args.dry_run:
        return 0

+    logger.info("Planning phase: checking downloads...")
+    run_planning_phase(
+        client,
+        full_model_id,
+        selected[0],
+        args.danger_delete_downloads,
+        args.timeout,
+        settle_deadline,
+    )
+
    all_rows: list[dict[str, Any]] = []

    for preview in selected:
--- a/bench/single-m3-ultra.toml
+++ b/bench/single-m3-ultra.toml
@@ -0,0 +1,189 @@
+# Single-node M3 Ultra benchmarks
+#
+# Shared constraints applied to ALL benchmarks in this file.
+constraints = [
+    "All(MacOsBuild(=25D125))",
+    "Hosts(=1)",
+    "All(Chip(m3_ultra))",
+    "All(GpuCores(=80))",
+]
+
+[topology]
+type = "none"
+
+# Default args merged into each benchmark's args (benchmark-level args win).
+[defaults]
+pp = [512, 2048, 8192, 16384]
+tg = 128
+
+[[benchmark]]
+model = "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/gpt-oss-120b-MXFP4-Q8"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/GLM-4.7-Flash-8bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-Coder-Next-6bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-30B-A3B-8bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-0.6B-4bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-0.6B-8bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Llama-3.2-1B-Instruct-4bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Llama-3.2-3B-Instruct-4bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Llama-3.2-3B-Instruct-8bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/gpt-oss-20b-MXFP4-Q8"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-30B-A3B-4bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/GLM-4.7-Flash-4bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/GLM-4.7-Flash-5bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/GLM-4.7-Flash-6bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Llama-3.3-70B-Instruct-4bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-Coder-Next-4bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-Coder-Next-5bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-Coder-Next-8bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"
+extra_constraints = ["All(Memory(>=96GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Llama-3.3-70B-Instruct-8bit"
+extra_constraints = ["All(Memory(>=256GiB))"]
+
+[[benchmark]]
+model = "mlx-community/llama-3.3-70b-instruct-fp16"
+extra_constraints = ["All(Memory(>=256GiB))"]
+
+[[benchmark]]
+model = "mlx-community/GLM-4.5-Air-8bit"
+extra_constraints = ["All(Memory(>=256GiB))"]
+
+[[benchmark]]
+model = "mlx-community/GLM-4.5-Air-bf16"
+extra_constraints = ["All(Memory(>=256GiB))"]
+
+[[benchmark]]
+model = "mlx-community/GLM-4.7-4bit"
+extra_constraints = ["All(Memory(>=256GiB))"]
+
+[[benchmark]]
+model = "mlx-community/MiniMax-M2.1-3bit"
+extra_constraints = ["All(Memory(>=256GiB))"]
+
+[[benchmark]]
+model = "mlx-community/MiniMax-M2.1-8bit"
+extra_constraints = ["All(Memory(>=256GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"
+extra_constraints = ["All(Memory(>=256GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-Coder-Next-bf16"
+extra_constraints = ["All(Memory(>=256GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Step-3.5-Flash-4bit"
+extra_constraints = ["All(Memory(>=256GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Step-3.5-Flash-6bit"
+extra_constraints = ["All(Memory(>=256GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Step-3.5-Flash-8Bit"
+extra_constraints = ["All(Memory(>=256GiB))"]
+
+[[benchmark]]
+model = "mlx-community/DeepSeek-V3.1-4bit"
+extra_constraints = ["All(Memory(>=512GiB))"]
+
+[[benchmark]]
+model = "mlx-community/GLM-4.7-6bit"
+extra_constraints = ["All(Memory(>=512GiB))"]
+
+[[benchmark]]
+model = "mlx-community/GLM-4.7-8bit-gs32"
+extra_constraints = ["All(Memory(>=512GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"
+extra_constraints = ["All(Memory(>=512GiB))"]
+
+[[benchmark]]
+model = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"
+extra_constraints = ["All(Memory(>=512GiB))"]
--- a/dashboard/src/lib/components/ChatForm.svelte
+++ b/dashboard/src/lib/components/ChatForm.svelte
@@ -1,7 +1,6 @@
 <script lang="ts">
  import {
    isLoading,
-    stopGeneration,
    sendMessage,
    generateImage,
    editImage,
@@ -266,6 +265,7 @@

  function handleSubmit() {
    if ((!message.trim() && uploadedFiles.length === 0) || loading) return;
+    if (isEditOnlyWithoutImage) return;

    const content = message.trim();
    const files = [...uploadedFiles];
@@ -290,7 +290,11 @@
      if (imageFile.preview) {
        editImage(content, imageFile.preview);
      }
-    } else if (isImageModel() && content) {
+    } else if (
+      currentModel &&
+      modelSupportsTextToImage(currentModel) &&
+      content
+    ) {
      // Use image generation for text-to-image models
      generateImage(content);
    } else {
@@ -649,92 +653,86 @@
        style="min-height: 28px; max-height: 150px;"
      ></textarea>

-      {#if loading}
-        <button
-          type="button"
-          onclick={() => stopGeneration()}
-          class="px-2.5 sm:px-4 py-1.5 sm:py-2 rounded text-xs sm:text-xs tracking-[0.1em] sm:tracking-[0.15em] uppercase font-medium transition-all duration-200 whitespace-nowrap bg-exo-medium-gray/70 text-exo-light-gray hover:bg-red-900/50 hover:text-red-400 border border-exo-medium-gray/50 hover:border-red-500/50 cursor-pointer"
-          aria-label="Stop generation"
-        >
+      <button
+        type="submit"
+        disabled={!canSend || loading || isEditOnlyWithoutImage}
+        class="px-2.5 sm:px-4 py-1.5 sm:py-2 rounded text-xs sm:text-xs tracking-[0.1em] sm:tracking-[0.15em] uppercase font-medium transition-all duration-200 whitespace-nowrap
+					{!canSend || loading || isEditOnlyWithoutImage
+          ? 'bg-exo-medium-gray/50 text-exo-light-gray cursor-not-allowed'
+          : 'bg-exo-yellow text-exo-black hover:bg-exo-yellow-darker hover:shadow-[0_0_20px_rgba(255,215,0,0.3)]'}"
+        aria-label={shouldShowEditMode
+          ? "Edit image"
+          : isImageModel()
+            ? "Generate image"
+            : "Send message"}
+      >
+        {#if loading}
          <span class="inline-flex items-center gap-1 sm:gap-2">
-            <svg
-              class="w-2.5 h-2.5 sm:w-3 sm:h-3"
-              viewBox="0 0 24 24"
-              fill="currentColor"
+            <span
+              class="w-2.5 h-2.5 sm:w-3 sm:h-3 border-2 border-current border-t-transparent rounded-full animate-spin"
+            ></span>
+            <span class="hidden sm:inline"
+              >{shouldShowEditMode
+                ? "EDITING"
+                : isImageModel()
+                  ? "GENERATING"
+                  : "PROCESSING"}</span
            >
-              <rect x="4" y="4" width="16" height="16" rx="2" />
-            </svg>
-            <span class="hidden sm:inline">STOP</span>
+            <span class="sm:hidden">...</span>
          </span>
-        </button>
-      {:else}
-        <button
-          type="submit"
-          disabled={!canSend || isEditOnlyWithoutImage}
-          class="px-2.5 sm:px-4 py-1.5 sm:py-2 rounded text-xs sm:text-xs tracking-[0.1em] sm:tracking-[0.15em] uppercase font-medium transition-all duration-200 whitespace-nowrap
-            {!canSend || isEditOnlyWithoutImage
-            ? 'bg-exo-medium-gray/50 text-exo-light-gray cursor-not-allowed'
-            : 'bg-exo-yellow text-exo-black hover:bg-exo-yellow-darker hover:shadow-[0_0_20px_rgba(255,215,0,0.3)]'}"
-          aria-label={shouldShowEditMode
-            ? "Edit image"
-            : isImageModel()
-              ? "Generate image"
-              : "Send message"}
-        >
-          {#if shouldShowEditMode}
-            <span class="inline-flex items-center gap-1.5">
-              <svg
-                class="w-3.5 h-3.5"
-                fill="none"
-                viewBox="0 0 24 24"
-                stroke="currentColor"
-                stroke-width="2"
-              >
-                <path
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                  d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z"
-                />
-              </svg>
-              <span>EDIT</span>
-            </span>
-          {:else if isEditOnlyWithoutImage}
-            <span class="inline-flex items-center gap-1.5">
-              <svg
-                class="w-3.5 h-3.5"
-                fill="none"
-                viewBox="0 0 24 24"
-                stroke="currentColor"
-                stroke-width="2"
-              >
-                <path
-                  stroke-linecap="round"
-                  stroke-linejoin="round"
-                  d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z"
-                />
-              </svg>
-              <span>EDIT</span>
-            </span>
-          {:else if isImageModel()}
-            <span class="inline-flex items-center gap-1.5">
-              <svg
-                class="w-3.5 h-3.5"
-                fill="none"
-                viewBox="0 0 24 24"
-                stroke="currentColor"
-                stroke-width="2"
-              >
-                <rect x="3" y="3" width="18" height="18" rx="2" ry="2" />
-                <circle cx="8.5" cy="8.5" r="1.5" />
-                <polyline points="21 15 16 10 5 21" />
-              </svg>
-              <span>GENERATE</span>
-            </span>
-          {:else}
-            SEND
-          {/if}
-        </button>
-      {/if}
+        {:else if shouldShowEditMode}
+          <span class="inline-flex items-center gap-1.5">
+            <svg
+              class="w-3.5 h-3.5"
+              fill="none"
+              viewBox="0 0 24 24"
+              stroke="currentColor"
+              stroke-width="2"
+            >
+              <path
+                stroke-linecap="round"
+                stroke-linejoin="round"
+                d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z"
+              />
+            </svg>
+            <span>EDIT</span>
+          </span>
+        {:else if isEditOnlyWithoutImage}
+          <span class="inline-flex items-center gap-1.5">
+            <svg
+              class="w-3.5 h-3.5"
+              fill="none"
+              viewBox="0 0 24 24"
+              stroke="currentColor"
+              stroke-width="2"
+            >
+              <path
+                stroke-linecap="round"
+                stroke-linejoin="round"
+                d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z"
+              />
+            </svg>
+            <span>EDIT</span>
+          </span>
+        {:else if isImageModel()}
+          <span class="inline-flex items-center gap-1.5">
+            <svg
+              class="w-3.5 h-3.5"
+              fill="none"
+              viewBox="0 0 24 24"
+              stroke="currentColor"
+              stroke-width="2"
+            >
+              <rect x="3" y="3" width="18" height="18" rx="2" ry="2" />
+              <circle cx="8.5" cy="8.5" r="1.5" />
+              <polyline points="21 15 16 10 5 21" />
+            </svg>
+            <span>GENERATE</span>
+          </span>
+        {:else}
+          SEND
+        {/if}
+      </button>
    </div>

    <!-- Bottom accent line -->
--- a/dashboard/src/lib/components/ChatMessages.svelte
+++ b/dashboard/src/lib/components/ChatMessages.svelte
@@ -225,6 +225,7 @@
  }

  function handleDeleteClick(messageId: string) {
+    if (loading) return;
    deleteConfirmId = messageId;
  }

@@ -255,7 +256,7 @@
 </script>

 <div class="flex flex-col gap-4 sm:gap-6 {className}">
-  {#each messageList as message (message.id)}
+  {#each messageList as message, i (message.id)}
    <div
      class="group flex {message.role === 'user'
        ? 'justify-end'
@@ -317,9 +318,11 @@
          <!-- Delete confirmation -->
          <div class="bg-red-500/10 border border-red-500/30 rounded-lg p-3">
            <p class="text-xs text-red-400 mb-3">
-              Delete this message{message.role === "user"
-                ? " and all responses after it"
-                : ""}?
+              {#if i === messageList.length - 1}
+                Delete this message?
+              {:else}
+                Delete this message and all messages after it?
+              {/if}
            </p>
            <div class="flex gap-2 justify-end">
              <button
@@ -751,8 +754,13 @@
            <!-- Delete button -->
            <button
              onclick={() => handleDeleteClick(message.id)}
-              class="p-1.5 text-exo-light-gray hover:text-red-400 transition-colors rounded hover:bg-red-500/10 cursor-pointer"
-              title="Delete message"
+              disabled={loading}
+              class="p-1.5 transition-colors rounded {loading
+                ? 'text-exo-light-gray/30 cursor-not-allowed'
+                : 'text-exo-light-gray hover:text-red-400 hover:bg-red-500/10 cursor-pointer'}"
+              title={loading
+                ? "Cannot delete while generating"
+                : "Delete message"}
            >
              <svg
                class="w-3.5 h-3.5"
--- a/dashboard/src/lib/stores/app.svelte.ts
+++ b/dashboard/src/lib/stores/app.svelte.ts
@@ -514,7 +514,6 @@ class AppStore {
  messages = $state<Message[]>([]);
  currentResponse = $state("");
  isLoading = $state(false);
-  private currentAbortController: AbortController | null = null;

  // Performance metrics
  ttftMs = $state<number | null>(null); // Time to first token in ms
@@ -1815,11 +1814,9 @@ class AppStore {
        return;
      }

-      this.currentAbortController = new AbortController();
      const response = await fetch("/v1/chat/completions", {
        method: "POST",
        headers: { "Content-Type": "application/json" },
-        signal: this.currentAbortController.signal,
        body: JSON.stringify({
          model: modelToUse,
          messages: apiMessages,
@@ -1933,7 +1930,6 @@ class AppStore {
        "Unknown error",
      );
    } finally {
-      this.currentAbortController = null;
      this.isLoading = false;
      this.currentResponse = "";
      this.saveConversationsToStorage();
@@ -2070,10 +2066,6 @@ class AppStore {
    assistantMessageId: string,
    errorPrefix = "Failed to get response",
  ): void {
-    // Don't show error for user-initiated abort (stop button)
-    if (error instanceof DOMException && error.name === "AbortError") {
-      return;
-    }
    if (this.conversationExists(targetConversationId)) {
      this.updateConversationMessage(
        targetConversationId,
@@ -2115,17 +2107,6 @@ class AppStore {
    return null;
  }

-  /**
-   * Stop the current generation by aborting the HTTP connection.
-   * This triggers backend cancellation via the mechanism in PR #1276.
-   */
-  stopGeneration() {
-    if (this.currentAbortController) {
-      this.currentAbortController.abort();
-      this.currentAbortController = null;
-    }
-  }
-
  /**
   * Send a message to the LLM and stream the response
   */
@@ -2274,13 +2255,11 @@ class AppStore {
      let firstTokenTime: number | null = null;
      let tokenCount = 0;

-      this.currentAbortController = new AbortController();
      const response = await fetch("/v1/chat/completions", {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
-        signal: this.currentAbortController.signal,
        body: JSON.stringify({
          model: modelToUse,
          messages: apiMessages,
@@ -2431,7 +2410,6 @@ class AppStore {
        "Failed to get response",
      );
    } finally {
-      this.currentAbortController = null;
      this.isLoading = false;
      this.currentResponse = "";
      this.saveConversationsToStorage();
@@ -2536,13 +2514,11 @@ class AppStore {
        };
      }

-      this.currentAbortController = new AbortController();
      const response = await fetch("/v1/images/generations", {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
-        signal: this.currentAbortController.signal,
        body: JSON.stringify(requestBody),
      });

@@ -2691,7 +2667,6 @@ class AppStore {
        "Failed to generate image",
      );
    } finally {
-      this.currentAbortController = null;
      this.isLoading = false;
      this.saveConversationsToStorage();
    }
@@ -2813,10 +2788,8 @@ class AppStore {
        );
      }

-      this.currentAbortController = new AbortController();
      const apiResponse = await fetch("/v1/images/edits", {
        method: "POST",
-        signal: this.currentAbortController.signal,
        body: formData,
      });

@@ -2926,7 +2899,6 @@ class AppStore {
        "Failed to edit image",
      );
    } finally {
-      this.currentAbortController = null;
      this.isLoading = false;
      this.saveConversationsToStorage();
    }
@@ -3067,7 +3039,6 @@ export const hasStartedChat = () => appStore.hasStartedChat;
 export const messages = () => appStore.messages;
 export const currentResponse = () => appStore.currentResponse;
 export const isLoading = () => appStore.isLoading;
-export const stopGeneration = () => appStore.stopGeneration();
 export const ttftMs = () => appStore.ttftMs;
 export const tps = () => appStore.tps;
 export const totalTokens = () => appStore.totalTokens;
--- a/dashboard/src/routes/downloads/+page.svelte
+++ b/dashboard/src/routes/downloads/+page.svelte
--- a/flake.nix
+++ b/flake.nix
@@ -115,7 +115,7 @@
          packages = lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin (
            let
              uvLock = builtins.fromTOML (builtins.readFile ./uv.lock);
-              mlxPackage = builtins.head (builtins.filter (p: p.name == "mlx") uvLock.package);
+              mlxPackage = builtins.head (builtins.filter (p: p.name == "mlx" && p.source ? git) uvLock.package);
              uvLockMlxVersion = mlxPackage.version;
            in
            {
--- a/nix/mlx.nix
+++ b/nix/mlx.nix
@@ -41,16 +41,16 @@ let

  mlx = stdenv.mkDerivation rec {
    pname = "mlx";
-    version = let v = "0.30.6"; in
+    version = let v = "0.30.7.dev20260217+50487b41"; in
      assert v == uvLockMlxVersion || throw "MLX version mismatch: nix/mlx.nix has ${v} but uv.lock has ${uvLockMlxVersion}. Update both the version and hash in nix/mlx.nix.";
      v;
    pyproject = true;

    src = fetchFromGitHub {
-      owner = "ml-explore";
-      repo = "mlx";
-      tag = "v${version}";
-      hash = "sha256-avD5EGhwgmPdXLAyQSqTO6AXk/W3ziH+f6AetjK3Sdo=";
+      owner = "rltakashige";
+      repo = "mlx-jaccl-fix-small-recv";
+      rev = "50487b4141f3c951122655db3b83df5146c1fbeb";
+      hash = "sha256-IL4a9vMX5nocgJU1WG4zE8hArHkHJtnh4sdYh3od5zU=";
    };

    patches = [
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,9 +17,9 @@ dependencies = [
    "loguru>=0.7.3",
    "exo_pyo3_bindings", # rust bindings
    "anyio==4.11.0",
-    "mlx==0.30.6; sys_platform == 'darwin'",
+    "mlx; sys_platform == 'darwin'",
    "mlx[cpu]==0.30.6; sys_platform == 'linux'",
-    "mlx-lm==0.30.6",
+    "mlx-lm==0.30.7",
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
    "hypercorn>=0.18.0",
    "openai-harmony>=0.0.8",
@@ -64,6 +64,7 @@ members = [

 [tool.uv.sources]
 exo_pyo3_bindings = { workspace = true }
+mlx = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git", branch = "address-rdma-gpu-locks", marker = "sys_platform == 'darwin'" }
 #mlx-lm = { git = "https://github.com/davidmcc73/mlx-lm", branch = "stable" }
 # Uncomment to use local mlx/mlx-lm development versions:
 # mlx = { path = "/Users/Shared/mlx", editable=true }
--- a/python/parts.nix
+++ b/python/parts.nix
@@ -14,7 +14,9 @@

      # Override overlay to inject Nix-built components
      exoOverlay = final: prev: {
-        # Replace workspace exo_pyo3_bindings with Nix-built wheel
+        # Replace workspace exo_pyo3_bindings with Nix-built wheel.
+        # Preserve passthru so mkVirtualEnv can resolve dependency groups.
+        # Copy .pyi stub + py.typed marker so basedpyright can find the types.
        exo-pyo3-bindings = pkgs.stdenv.mkDerivation {
          pname = "exo-pyo3-bindings";
          version = "0.1.0";
@@ -22,6 +24,12 @@
          # Install from pre-built wheel
          nativeBuildInputs = [ final.pyprojectWheelHook ];
          dontStrip = true;
+          passthru = prev.exo-pyo3-bindings.passthru or { };
+          postInstall = ''
+            local siteDir=$out/${final.python.sitePackages}/exo_pyo3_bindings
+            cp ${inputs.self}/rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi $siteDir/
+            touch $siteDir/py.typed
+          '';
        };
      };

@@ -29,17 +37,47 @@

      # Overlay to provide build systems and custom packages
      buildSystemsOverlay = final: prev: {
-        # Use our pure Nix-built MLX with Metal support
-        mlx = self'.packages.mlx;
-
        # mlx-lm is a git dependency that needs setuptools
        mlx-lm = prev.mlx-lm.overrideAttrs (old: {
          nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
            final.setuptools
          ];
        });
+      } // lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin {
+        # Use our pure Nix-built MLX with Metal support (macOS only)
+        mlx = self'.packages.mlx;
      };

+      # Additional overlay for Linux-specific fixes (type checking env).
+      # Native wheels have shared lib dependencies we don't need at type-check time.
+      linuxOverlay = final: prev:
+        let
+          ignoreMissing = drv: drv.overrideAttrs { autoPatchelfIgnoreMissingDeps = [ "*" ]; };
+          nvidiaPackages = lib.filterAttrs (name: _: lib.hasPrefix "nvidia-" name) prev;
+        in
+        lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux (
+          (lib.mapAttrs (_: ignoreMissing) nvidiaPackages) // {
+            mlx = ignoreMissing prev.mlx;
+            mlx-cuda-13 = prev.mlx-cuda-13.overrideAttrs (old: {
+              buildInputs = (old.buildInputs or [ ]) ++ [
+                final.nvidia-cublas
+                final.nvidia-cuda-nvrtc
+                final.nvidia-cudnn-cu13
+                final.nvidia-nccl-cu13
+              ];
+              preFixup = ''
+                addAutoPatchelfSearchPath ${final.nvidia-cublas}
+                addAutoPatchelfSearchPath ${final.nvidia-cuda-nvrtc}
+                addAutoPatchelfSearchPath ${final.nvidia-cudnn-cu13}
+                addAutoPatchelfSearchPath ${final.nvidia-nccl-cu13}
+              '';
+              autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
+            });
+            torch = ignoreMissing prev.torch;
+            triton = ignoreMissing prev.triton;
+          }
+        );
+
      pythonSet = (pkgs.callPackage inputs.pyproject-nix.build.packages {
        inherit python;
      }).overrideScope (
@@ -48,16 +86,28 @@
          overlay
          exoOverlay
          buildSystemsOverlay
+          linuxOverlay
        ]
      );
-      exoVenv = pythonSet.mkVirtualEnv "exo-env" workspace.deps.default;
+      # mlx-cpu and mlx-cuda-13 both ship mlx/ site-packages files; keep first.
+      # mlx-cpu/mlx-cuda-13 and nvidia-cudnn-cu12/cu13 ship overlapping files.
+      venvCollisionPaths = lib.optionals pkgs.stdenv.hostPlatform.isLinux [
+        "lib/python3.13/site-packages/mlx*"
+        "lib/python3.13/site-packages/nvidia*"
+      ];
+
+      exoVenv = (pythonSet.mkVirtualEnv "exo-env" workspace.deps.default).overrideAttrs {
+        venvIgnoreCollisions = venvCollisionPaths;
+      };

      # Virtual environment with dev dependencies for testing
-      testVenv = pythonSet.mkVirtualEnv "exo-test-env" (
+      testVenv = (pythonSet.mkVirtualEnv "exo-test-env" (
        workspace.deps.default // {
          exo = [ "dev" ]; # Include pytest, pytest-asyncio, pytest-env
        }
-      );
+      )).overrideAttrs {
+        venvIgnoreCollisions = venvCollisionPaths;
+      };

      mkPythonScript = name: path: pkgs.writeShellApplication {
        inherit name;
@@ -118,6 +168,21 @@
          ${pkgs.ruff}/bin/ruff check ${inputs.self}
          touch $out
        '';
+
+        # Hermetic basedpyright type checking
+        typecheck = pkgs.runCommand "typecheck"
+          {
+            nativeBuildInputs = [
+              testVenv
+              pkgs.basedpyright
+            ];
+          }
+          ''
+            cd ${inputs.self}
+            export HOME=$TMPDIR
+            basedpyright --pythonpath ${testVenv}/bin/python
+            touch $out
+          '';
      };
    };
 }
--- a/rust/exo_pyo3_bindings/Cargo.toml
+++ b/rust/exo_pyo3_bindings/Cargo.toml
@@ -25,53 +25,38 @@ workspace = true
 networking = { workspace = true }

 # interop
-pyo3 = { version = "0.27.1", features = [
-    # "abi3-py311", # tells pyo3 (and maturin) to build using the stable ABI with minimum Python version 3.11
-    "nightly", # enables better-supported GIL integration
+pyo3 = { version = "0.27.2", features = [
+    # "abi3-py313", # tells pyo3 (and maturin) to build using the stable ABI with minimum Python version 3.13
+    # "nightly", # enables better-supported GIL integration
    "experimental-async", # async support in #[pyfunction] & #[pymethods]
    #"experimental-inspect", # inspection of generated binary => easier to automate type-hint generation
    #"py-clone", # adding Clone-ing of `Py<T>` without GIL (may cause panics - remove if panics happen)
-    "multiple-pymethods", # allows multiple #[pymethods] sections per class
+    # "multiple-pymethods", # allows multiple #[pymethods] sections per class

    # integrations with other libraries
-    "arc_lock", "bigdecimal", "either", "hashbrown", "indexmap", "num-bigint", "num-complex", "num-rational",
-    "ordered-float", "rust_decimal", "smallvec",
+    # "arc_lock", "bigdecimal", "either", "hashbrown", "indexmap", "num-bigint", "num-complex", "num-rational",
+    # "ordered-float", "rust_decimal", "smallvec",
    # "anyhow", "chrono", "chrono-local", "chrono-tz", "eyre", "jiff-02", "lock_api", "parking-lot", "time",  "serde",
 ] }
-pyo3-stub-gen = { version = "0.17.2" }
+pyo3-stub-gen = { version = "0.19.0" }
 pyo3-async-runtimes = { version = "0.27.0", features = ["attributes", "tokio-runtime", "testing"] }
 pyo3-log = "0.13.2"

 # macro dependencies
 extend = { workspace = true }
 delegate = { workspace = true }
-impl-trait-for-tuples = { workspace = true }
-derive_more = { workspace = true }
 pin-project = { workspace = true }

 # async runtime
 tokio = { workspace = true, features = ["full", "tracing"] }
-futures = { workspace = true }
+futures-lite = { workspace = true }

 # utility dependencies
-once_cell = "1.21.3"
-thread_local = "1.1.9"
 util = { workspace = true }
-thiserror = { workspace = true }
-#internment = { workspace = true }
-#recursion = { workspace = true }
-#generativity = { workspace = true }
-#itertools = { workspace = true }
-

 # Tracing
-#tracing = "0.1"
-#tracing-subscriber = "0.3"
-#console-subscriber = "0.1.5"
-#tracing-log = "0.2.0"
 log = { workspace = true }
 env_logger = "0.11"

-
 # Networking
 libp2p = { workspace = true, features = ["full"] }
--- a/rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi
+++ b/rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi
@@ -1,155 +1,85 @@
 # This file is automatically generated by pyo3_stub_gen
-# ruff: noqa: E501, F401
+# ruff: noqa: E501, F401, F403, F405

 import builtins
-import enum
 import typing
+__all__ = [
+    "AllQueuesFullError",
+    "Keypair",
+    "NoPeersSubscribedToTopicError",
+    "PyMessage",
+    "PySwarm",
+]

@typing.final
 class AllQueuesFullError(builtins.Exception):
-    def __new__(cls, *args: typing.Any) -> AllQueuesFullError: ...
-    def __repr__(self) -> builtins.str: ...
+    def __new__(cls, *_a: typing.Any) -> AllQueuesFullError: ...
    def __str__(self) -> builtins.str: ...

-@typing.final
-class ConnectionUpdate:
-    @property
-    def update_type(self) -> ConnectionUpdateType:
-        r"""
-        Whether this is a connection or disconnection event
-        """
-    @property
-    def peer_id(self) -> PeerId:
-        r"""
-        Identity of the peer that we have connected to or disconnected from.
-        """
-    @property
-    def remote_ipv4(self) -> builtins.str:
-        r"""
-        Remote connection's IPv4 address.
-        """
-    @property
-    def remote_tcp_port(self) -> builtins.int:
-        r"""
-        Remote connection's TCP port.
-        """
-
@typing.final
 class Keypair:
    r"""
    Identity keypair of a node.
    """
    @staticmethod
-    def generate_ed25519() -> Keypair:
+    def generate() -> Keypair:
        r"""
        Generate a new Ed25519 keypair.
        """
    @staticmethod
-    def generate_ecdsa() -> Keypair:
-        r"""
-        Generate a new ECDSA keypair.
-        """
-    @staticmethod
-    def generate_secp256k1() -> Keypair:
-        r"""
-        Generate a new Secp256k1 keypair.
-        """
-    @staticmethod
-    def from_protobuf_encoding(bytes: bytes) -> Keypair:
+    def deserialize(bytes: bytes) -> Keypair:
        r"""
        Decode a private key from a protobuf structure and parse it as a `Keypair`.
        """
-    @staticmethod
-    def rsa_from_pkcs8(bytes: bytes) -> Keypair:
-        r"""
-        Decode an keypair from a DER-encoded secret key in PKCS#8 `PrivateKeyInfo`
-        format (i.e. unencrypted) as defined in [RFC5208].
-        
-        [RFC5208]: https://tools.ietf.org/html/rfc5208#section-5
-        """
-    @staticmethod
-    def secp256k1_from_der(bytes: bytes) -> Keypair:
-        r"""
-        Decode a keypair from a DER-encoded Secp256k1 secret key in an `ECPrivateKey`
-        structure as defined in [RFC5915].
-        
-        [RFC5915]: https://tools.ietf.org/html/rfc5915
-        """
-    @staticmethod
-    def ed25519_from_bytes(bytes: bytes) -> Keypair: ...
-    def to_protobuf_encoding(self) -> bytes:
+    def serialize(self) -> bytes:
        r"""
        Encode a private key as protobuf structure.
        """
-    def to_peer_id(self) -> PeerId:
+    def to_string(self) -> builtins.str:
        r"""
        Convert the `Keypair` into the corresponding `PeerId`.
        """

@typing.final
-class Multiaddr:
-    r"""
-    Representation of a Multiaddr.
-    """
-    @staticmethod
-    def empty() -> Multiaddr:
-        r"""
-        Create a new, empty multiaddress.
-        """
-    @staticmethod
-    def with_capacity(n: builtins.int) -> Multiaddr:
-        r"""
-        Create a new, empty multiaddress with the given capacity.
-        """
-    @staticmethod
-    def from_bytes(bytes: bytes) -> Multiaddr:
-        r"""
-        Parse a `Multiaddr` value from its byte slice representation.
-        """
-    @staticmethod
-    def from_string(string: builtins.str) -> Multiaddr:
-        r"""
-        Parse a `Multiaddr` value from its string representation.
-        """
-    def len(self) -> builtins.int:
-        r"""
-        Return the length in bytes of this multiaddress.
-        """
-    def is_empty(self) -> builtins.bool:
-        r"""
-        Returns true if the length of this multiaddress is 0.
-        """
-    def to_bytes(self) -> bytes:
-        r"""
-        Return a copy of this [`Multiaddr`]'s byte representation.
-        """
-    def to_string(self) -> builtins.str:
-        r"""
-        Convert a Multiaddr to a string.
-        """
+class NoPeersSubscribedToTopicError(builtins.Exception):
+    def __new__(cls, *_a: typing.Any) -> NoPeersSubscribedToTopicError: ...
+    def __str__(self) -> builtins.str: ...
+
+class PyMessage:
+    @typing.final
+    class Connection(PyMessage):
+        __match_args__ = ("node_id", "connected",)
+        @property
+        def node_id(self) -> builtins.str: ...
+        @property
+        def connected(self) -> builtins.bool: ...
+        def __new__(cls, node_id: builtins.str, connected: builtins.bool) -> PyMessage.Connection: ...
+    
+    @typing.final
+    class Gossip(PyMessage):
+        __match_args__ = ("node_id", "topic", "data",)
+        @property
+        def node_id(self) -> builtins.str: ...
+        @property
+        def topic(self) -> builtins.str: ...
+        @property
+        def data(self) -> bytes: ...
+        def __new__(cls, node_id: builtins.str, topic: builtins.str, data: bytes) -> PyMessage.Gossip: ...
+    
+    ...

@typing.final
-class NetworkingHandle:
-    def __new__(cls, identity: Keypair) -> NetworkingHandle: ...
-    async def connection_update_recv(self) -> ConnectionUpdate:
+class PySwarm:
+    def __new__(cls, identity: Keypair) -> PySwarm: ...
+    async def recv(self) -> PyMessage:
        r"""
-        Receives the next `ConnectionUpdate` from networking.
+        Receives the next message from networking.
        """
-    async def connection_update_recv_many(self, limit: builtins.int) -> builtins.list[ConnectionUpdate]:
-        r"""
-        Receives at most `limit` `ConnectionUpdate`s from networking and returns them.
-        
-        For `limit = 0`, an empty collection of `ConnectionUpdate`s will be returned immediately.
-        For `limit > 0`, if there are no `ConnectionUpdate`s in the channel's queue this method
-        will sleep until a `ConnectionUpdate`s is sent.
-        """
-    async def gossipsub_subscribe(self, topic: builtins.str) -> builtins.bool:
+    async def gossipsub_subscribe(self, topic: builtins.str) -> None:
        r"""
        Subscribe to a `GossipSub` topic.
-        
-        Returns `True` if the subscription worked. Returns `False` if we were already subscribed.
        """
-    async def gossipsub_unsubscribe(self, topic: builtins.str) -> builtins.bool:
+    async def gossipsub_unsubscribe(self, topic: builtins.str) -> None:
        r"""
        Unsubscribes from a `GossipSub` topic.
        
@@ -157,65 +87,6 @@ class NetworkingHandle:
        """
    async def gossipsub_publish(self, topic: builtins.str, data: bytes) -> None:
        r"""
-        Publishes a message with multiple topics to the `GossipSub` network.
-        
-        If no peers are found that subscribe to this topic, throws `NoPeersSubscribedToTopicError` exception.
-        """
-    async def gossipsub_recv(self) -> tuple[builtins.str, bytes]:
-        r"""
-        Receives the next message from the `GossipSub` network.
-        """
-    async def gossipsub_recv_many(self, limit: builtins.int) -> builtins.list[tuple[builtins.str, bytes]]:
-        r"""
-        Receives at most `limit` messages from the `GossipSub` network and returns them.
-        
-        For `limit = 0`, an empty collection of messages will be returned immediately.
-        For `limit > 0`, if there are no messages in the channel's queue this method
-        will sleep until a message is sent.
+        Publishes a message to the network on a specific topic.
        """

-@typing.final
-class NoPeersSubscribedToTopicError(builtins.Exception):
-    def __new__(cls, *args: typing.Any) -> NoPeersSubscribedToTopicError: ...
-    def __repr__(self) -> builtins.str: ...
-    def __str__(self) -> builtins.str: ...
-
-@typing.final
-class PeerId:
-    r"""
-    Identifier of a peer of the network.
-    
-    The data is a `CIDv0` compatible multihash of the protobuf encoded public key of the peer
-    as specified in [specs/peer-ids](https://github.com/libp2p/specs/blob/master/peer-ids/peer-ids.md).
-    """
-    @staticmethod
-    def random() -> PeerId:
-        r"""
-        Generates a random peer ID from a cryptographically secure PRNG.
-        
-        This is useful for randomly walking on a DHT, or for testing purposes.
-        """
-    @staticmethod
-    def from_bytes(bytes: bytes) -> PeerId:
-        r"""
-        Parses a `PeerId` from bytes.
-        """
-    def to_bytes(self) -> bytes:
-        r"""
-        Returns a raw bytes representation of this `PeerId`.
-        """
-    def to_base58(self) -> builtins.str:
-        r"""
-        Returns a base-58 encoded string of this `PeerId`.
-        """
-    def __repr__(self) -> builtins.str: ...
-    def __str__(self) -> builtins.str: ...
-
-@typing.final
-class ConnectionUpdateType(enum.Enum):
-    r"""
-    Connection or disconnection event discriminant type.
-    """
-    Connected = ...
-    Disconnected = ...
-
--- a/rust/exo_pyo3_bindings/src/allow_threading.rs
+++ b/rust/exo_pyo3_bindings/src/allow_threading.rs
@@ -1,8 +1,4 @@
-//! SEE: https://pyo3.rs/v0.26.0/async-await.html#detaching-from-the-interpreter-across-await
-//!
-
-use pin_project::pin_project;
-use pyo3::marker::Ungil;
+//! See: <https://pyo3.rs/v0.27.2/async-await.html#detaching-from-the-interpreter-across-await>
 use pyo3::prelude::*;
 use std::{
    future::Future,
@@ -10,31 +6,17 @@ use std::{
    task::{Context, Poll},
 };

-/// SEE: https://pyo3.rs/v0.26.0/async-await.html#detaching-from-the-interpreter-across-await
-#[pin_project]
-#[repr(transparent)]
-pub(crate) struct AllowThreads<F>(#[pin] F);
-
-impl<F> AllowThreads<F>
-where
-    Self: Future,
-{
-    pub fn new(f: F) -> Self {
-        Self(f)
-    }
-}
+pub struct AllowThreads<F>(pub(crate) F);

 impl<F> Future for AllowThreads<F>
 where
-    F: Future + Ungil,
-    F::Output: Ungil,
+    F: Future + Unpin + Send,
+    F::Output: Send,
 {
    type Output = F::Output;

-    fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
        let waker = cx.waker();
-        Python::with_gil(|py| {
-            py.allow_threads(|| self.project().0.poll(&mut Context::from_waker(waker)))
-        })
+        Python::attach(|py| py.detach(|| pin!(&mut self.0).poll(&mut Context::from_waker(waker))))
    }
 }
--- a/rust/exo_pyo3_bindings/src/examples/mod.rs
+++ b/rust/exo_pyo3_bindings/src/examples/mod.rs
@@ -1,240 +0,0 @@
-//! This module exists to hold examples of some pyo3 patterns that may be too complex to
-//! re-create from scratch, but too inhomogenous to create an abstraction/wrapper around.
-//!
-//! Pattern examples include:
-//!  - Async task handles: with GC-integrated cleanup
-//!  - Sync/async callbacks from python: with propper eventloop handling
-//!
-//! Mutability pattern: https://pyo3.rs/v0.26.0/async-await.html#send--static-constraint
-//!  - Store mutable fields in tokio's `Mutex<T>`
-//!  - For async code: take `&self` and `.lock().await`
-//!  - For sync code: take `&mut self` and `.get_mut()`
-
-use crate::ext::{PyResultExt as _, ResultExt as _, TokioRuntimeExt as _};
-use futures::FutureExt as _;
-use futures::future::BoxFuture;
-use pyo3::exceptions::PyRuntimeError;
-use pyo3::prelude::{PyModule, PyModuleMethods as _};
-use pyo3::{
-    Bound, Py, PyAny, PyErr, PyResult, PyTraverseError, PyVisit, Python, pyclass, pymethods,
-};
-use std::time::Duration;
-use tokio::sync::mpsc;
-use tokio::sync::mpsc::error::TryRecvError;
-
-fn needs_tokio_runtime() {
-    tokio::runtime::Handle::current();
-}
-
-type SyncCallback = Box<dyn Fn() + Send + Sync>;
-type AsyncCallback = Box<dyn Fn() -> BoxFuture<'static, ()> + Send + Sync>;
-
-enum AsyncTaskMessage {
-    SyncCallback(SyncCallback),
-    AsyncCallback(AsyncCallback),
-}
-
-async fn async_task(
-    sender: mpsc::UnboundedSender<()>,
-    mut receiver: mpsc::UnboundedReceiver<AsyncTaskMessage>,
-) {
-    log::info!("RUST: async task started");
-
-    // task state
-    let mut interval = tokio::time::interval(Duration::from_secs(1));
-
-    let mut sync_cbs: Vec<SyncCallback> = vec![];
-    let mut async_cbs: Vec<AsyncCallback> = vec![];
-
-    loop {
-        tokio::select! {
-            // handle incoming messages from task-handle
-            message = receiver.recv() => {
-                // handle closed channel by exiting
-                let Some(message) = message else {
-                    log::info!("RUST: channel closed");
-                    break;
-                };
-
-                // dispatch incoming event
-                match message {
-                    AsyncTaskMessage::SyncCallback(cb) => {
-                        sync_cbs.push(cb);
-                    }
-                    AsyncTaskMessage::AsyncCallback(cb) => {
-                        async_cbs.push(cb);
-                    }
-                }
-            }
-
-            // handle all other events
-            _ = interval.tick() => {
-                log::info!("RUST: async task tick");
-
-                // call back all sync callbacks
-                for cb in &sync_cbs {
-                    cb();
-                }
-
-                // call back all async callbacks
-                for cb in &async_cbs {
-                    cb().await;
-                }
-
-                // send event on unbounded channel
-                sender.send(()).expect("handle receiver cannot be closed/dropped");
-            }
-        }
-    }
-
-    log::info!("RUST: async task stopped");
-}
-
-// #[gen_stub_pyclass]
-#[pyclass(name = "AsyncTaskHandle")]
-#[derive(Debug)]
-struct PyAsyncTaskHandle {
-    sender: Option<mpsc::UnboundedSender<AsyncTaskMessage>>,
-    receiver: mpsc::UnboundedReceiver<()>,
-}
-
-#[allow(clippy::expect_used)]
-impl PyAsyncTaskHandle {
-    const fn sender(&self) -> &mpsc::UnboundedSender<AsyncTaskMessage> {
-        self.sender
-            .as_ref()
-            .expect("The sender should only be None after de-initialization.")
-    }
-
-    const fn sender_mut(&mut self) -> &mpsc::UnboundedSender<AsyncTaskMessage> {
-        self.sender
-            .as_mut()
-            .expect("The sender should only be None after de-initialization.")
-    }
-
-    const fn new(
-        sender: mpsc::UnboundedSender<AsyncTaskMessage>,
-        receiver: mpsc::UnboundedReceiver<()>,
-    ) -> Self {
-        Self {
-            sender: Some(sender),
-            receiver,
-        }
-    }
-}
-
-// #[gen_stub_pymethods]
-#[pymethods]
-impl PyAsyncTaskHandle {
-    #[new]
-    fn py_new(py: Python<'_>) -> PyResult<Self> {
-        use pyo3_async_runtimes::tokio::get_runtime;
-
-        // create communication channel TOWARDS our task
-        let (h_sender, t_receiver) = mpsc::unbounded_channel::<AsyncTaskMessage>();
-
-        // create communication channel FROM our task
-        let (t_sender, h_receiver) = mpsc::unbounded_channel::<()>();
-
-        // perform necessary setup within tokio context - or it crashes
-        let () = get_runtime().block_on(async { needs_tokio_runtime() });
-
-        // spawn tokio task with this thread's task-locals - without this, async callbacks on the new threads will not work!!
-        _ = get_runtime().spawn_with_scope(py, async move {
-            async_task(t_sender, t_receiver).await;
-        });
-        Ok(Self::new(h_sender, h_receiver))
-    }
-
-    /// NOTE: exceptions in callbacks are silently ignored until end of execution
-    fn add_sync_callback(
-        &self,
-        // #[gen_stub(override_type(
-        //     type_repr="collections.abc.Callable[[], None]",
-        //     imports=("collections.abc")
-        // ))]
-        callback: Py<PyAny>,
-    ) -> PyResult<()> {
-        // blocking call to async method -> can do non-blocking if needed
-        self.sender()
-            .send(AsyncTaskMessage::SyncCallback(Box::new(move || {
-                _ = Python::with_gil(|py| callback.call0(py).write_unraisable_with(py));
-            })))
-            .pyerr()?;
-        Ok(())
-    }
-
-    /// NOTE: exceptions in callbacks are silently ignored until end of execution
-    fn add_async_callback(
-        &self,
-        // #[gen_stub(override_type(
-        //     type_repr="collections.abc.Callable[[], collections.abc.Awaitable[None]]",
-        //     imports=("collections.abc")
-        // ))]
-        callback: Py<PyAny>,
-    ) -> PyResult<()> {
-        // blocking call to async method -> can do non-blocking if needed
-        self.sender()
-            .send(AsyncTaskMessage::AsyncCallback(Box::new(move || {
-                let c = Python::with_gil(|py| callback.clone_ref(py));
-                async move {
-                    if let Some(f) = Python::with_gil(|py| {
-                        let coroutine = c.call0(py).write_unraisable_with(py)?;
-                        pyo3_async_runtimes::tokio::into_future(coroutine.into_bound(py))
-                            .write_unraisable_with(py)
-                    }) {
-                        _ = f.await.write_unraisable();
-                    }
-                }
-                .boxed()
-            })))
-            .pyerr()?;
-        Ok(())
-    }
-
-    async fn receive_unit(&mut self) -> PyResult<()> {
-        self.receiver
-            .recv()
-            .await
-            .ok_or(PyErr::new::<PyRuntimeError, _>(
-                "cannot receive unit on closed channel",
-            ))
-    }
-
-    fn drain_units(&mut self) -> PyResult<i32> {
-        let mut cnt = 0;
-        loop {
-            match self.receiver.try_recv() {
-                Err(TryRecvError::Disconnected) => {
-                    return Err(PyErr::new::<PyRuntimeError, _>(
-                        "cannot receive unit on closed channel",
-                    ));
-                }
-                Err(TryRecvError::Empty) => return Ok(cnt),
-                Ok(()) => {
-                    cnt += 1;
-                    continue;
-                }
-            }
-        }
-    }
-
-    // #[gen_stub(skip)]
-    const fn __traverse__(&self, _visit: PyVisit<'_>) -> Result<(), PyTraverseError> {
-        Ok(()) // This is needed purely so `__clear__` can work
-    }
-
-    // #[gen_stub(skip)]
-    fn __clear__(&mut self) {
-        // TODO: may or may not need to await a "kill-signal" oneshot channel message,
-        //       to ensure that the networking task is done BEFORE exiting the clear function...
-        //       but this may require GIL?? and it may not be safe to call GIL here??
-        self.sender = None; // Using Option<T> as a trick to force `sender` channel to be dropped
-    }
-}
-
-pub fn examples_submodule(m: &Bound<'_, PyModule>) -> PyResult<()> {
-    m.add_class::<PyAsyncTaskHandle>()?;
-
-    Ok(())
-}
--- a/rust/exo_pyo3_bindings/src/ident.rs
+++ b/rust/exo_pyo3_bindings/src/ident.rs
@@ -0,0 +1,47 @@
+use crate::ext::ResultExt as _;
+use libp2p::identity::Keypair;
+use pyo3::prelude::{PyBytesMethods as _, PyModule, PyModuleMethods as _};
+use pyo3::types::PyBytes;
+use pyo3::{Bound, PyResult, Python, pyclass, pymethods};
+use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods};
+
+/// Identity keypair of a node.
+#[gen_stub_pyclass]
+#[pyclass(name = "Keypair", frozen)]
+#[repr(transparent)]
+pub struct PyKeypair(pub Keypair);
+
+#[gen_stub_pymethods]
+#[pymethods]
+#[allow(clippy::needless_pass_by_value)]
+impl PyKeypair {
+    /// Generate a new Ed25519 keypair.
+    #[staticmethod]
+    fn generate() -> Self {
+        Self(Keypair::generate_ed25519())
+    }
+
+    /// Decode a private key from a protobuf structure and parse it as a `Keypair`.
+    #[staticmethod]
+    fn deserialize(bytes: Bound<'_, PyBytes>) -> PyResult<Self> {
+        let bytes = Vec::from(bytes.as_bytes());
+        Ok(Self(Keypair::from_protobuf_encoding(&bytes).pyerr()?))
+    }
+
+    /// Encode a private key as protobuf structure.
+    fn serialize<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
+        let bytes = self.0.to_protobuf_encoding().pyerr()?;
+        Ok(PyBytes::new(py, &bytes))
+    }
+
+    /// Convert the `Keypair` into the corresponding `PeerId`.
+    fn to_string(&self) -> String {
+        self.0.public().to_peer_id().to_base58()
+    }
+}
+
+pub fn ident_submodule(m: &Bound<'_, PyModule>) -> PyResult<()> {
+    m.add_class::<PyKeypair>()?;
+
+    Ok(())
+}
--- a/rust/exo_pyo3_bindings/src/lib.rs
+++ b/rust/exo_pyo3_bindings/src/lib.rs
@@ -4,28 +4,13 @@
 //!
 //!

-// enable Rust-unstable features for convenience
-#![feature(trait_alias)]
-#![feature(tuple_trait)]
-#![feature(unboxed_closures)]
-// #![feature(stmt_expr_attributes)]
-// #![feature(assert_matches)]
-// #![feature(async_fn_in_dyn_trait)]
-// #![feature(async_for_loop)]
-// #![feature(auto_traits)]
-// #![feature(negative_impls)]
-
-extern crate core;
 mod allow_threading;
-mod examples;
-pub(crate) mod networking;
-pub(crate) mod pylibp2p;
+mod ident;
+mod networking;

+use crate::ident::ident_submodule;
 use crate::networking::networking_submodule;
-use crate::pylibp2p::ident::ident_submodule;
-use crate::pylibp2p::multiaddr::multiaddr_submodule;
 use pyo3::prelude::PyModule;
-use pyo3::prelude::*;
 use pyo3::{Bound, PyResult, pyclass, pymodule};
 use pyo3_stub_gen::define_stub_info_gatherer;

@@ -34,35 +19,18 @@ pub(crate) mod r#const {
    pub const MPSC_CHANNEL_SIZE: usize = 1024;
 }

-/// Namespace for all the type/trait aliases used by this crate.
-pub(crate) mod alias {
-    use std::error::Error;
-    use std::marker::Tuple;
-
-    pub trait SendFn<Args: Tuple + Send + 'static, Output> =
-        Fn<Args, Output = Output> + Send + 'static;
-
-    pub type AnyError = Box<dyn Error + Send + Sync + 'static>;
-    pub type AnyResult<T> = Result<T, AnyError>;
-}
-
 /// Namespace for crate-wide extension traits/methods
 pub(crate) mod ext {
    use crate::allow_threading::AllowThreads;
    use extend::ext;
-    use pyo3::exceptions::{PyConnectionError, PyRuntimeError};
-    use pyo3::marker::Ungil;
+    use pyo3::exceptions::PyRuntimeError;
    use pyo3::types::PyBytes;
-    use pyo3::{Py, PyErr, PyResult, Python};
-    use tokio::runtime::Runtime;
-    use tokio::sync::mpsc;
-    use tokio::sync::mpsc::error::TryRecvError;
-    use tokio::task::JoinHandle;
+    use pyo3::{Py, PyResult, Python};

    #[ext(pub, name = ByteArrayExt)]
    impl [u8] {
        fn pybytes(&self) -> Py<PyBytes> {
-            Python::with_gil(|py| PyBytes::new(py, self).unbind())
+            Python::attach(|py| PyBytes::new(py, self).unbind())
        }
    }

@@ -77,120 +45,16 @@ pub(crate) mod ext {
    }

    pub trait FutureExt: Future + Sized {
-        /// SEE: https://pyo3.rs/v0.26.0/async-await.html#detaching-from-the-interpreter-across-await
+        /// SEE: https://pyo3.rs/v0.27.2/async-await.html#detaching-from-the-interpreter-across-await
        fn allow_threads_py(self) -> AllowThreads<Self>
        where
            AllowThreads<Self>: Future,
        {
-            AllowThreads::new(self)
+            AllowThreads(self)
        }
    }

    impl<T: Future> FutureExt for T {}
-
-    #[ext(pub, name = PyErrExt)]
-    impl PyErr {
-        fn receiver_channel_closed() -> Self {
-            PyConnectionError::new_err("Receiver channel closed unexpectedly")
-        }
-    }
-
-    #[ext(pub, name = PyResultExt)]
-    impl<T> PyResult<T> {
-        fn write_unraisable(self) -> Option<T> {
-            Python::with_gil(|py| self.write_unraisable_with(py))
-        }
-
-        fn write_unraisable_with(self, py: Python<'_>) -> Option<T> {
-            match self {
-                Ok(v) => Some(v),
-                Err(e) => {
-                    // write error back to python
-                    e.write_unraisable(py, None);
-                    None
-                }
-            }
-        }
-    }
-
-    #[ext(pub, name = TokioRuntimeExt)]
-    impl Runtime {
-        fn spawn_with_scope<F>(&self, py: Python<'_>, future: F) -> PyResult<JoinHandle<F::Output>>
-        where
-            F: Future + Send + 'static,
-            F::Output: Send + 'static,
-        {
-            let locals = pyo3_async_runtimes::tokio::get_current_locals(py)?;
-            Ok(self.spawn(pyo3_async_runtimes::tokio::scope(locals, future)))
-        }
-    }
-
-    #[ext(pub, name = TokioMpscSenderExt)]
-    impl<T> mpsc::Sender<T> {
-        /// Sends a value, waiting until there is capacity.
-        ///
-        /// A successful send occurs when it is determined that the other end of the
-        /// channel has not hung up already. An unsuccessful send would be one where
-        /// the corresponding receiver has already been closed.
-        async fn send_py(&self, value: T) -> PyResult<()> {
-            self.send(value)
-                .await
-                .map_err(|_| PyErr::receiver_channel_closed())
-        }
-    }
-
-    #[ext(pub, name = TokioMpscReceiverExt)]
-    impl<T> mpsc::Receiver<T> {
-        /// Receives the next value for this receiver.
-        async fn recv_py(&mut self) -> PyResult<T> {
-            self.recv().await.ok_or_else(PyErr::receiver_channel_closed)
-        }
-
-        /// Receives at most `limit` values for this receiver and returns them.
-        ///
-        /// For `limit = 0`, an empty collection of messages will be returned immediately.
-        /// For `limit > 0`, if there are no messages in the channel's queue this method
-        /// will sleep until a message is sent.
-        async fn recv_many_py(&mut self, limit: usize) -> PyResult<Vec<T>> {
-            // get updates from receiver channel
-            let mut updates = Vec::with_capacity(limit);
-            let received = self.recv_many(&mut updates, limit).await;
-
-            // if we received zero items, then the channel was unexpectedly closed
-            if limit != 0 && received == 0 {
-                return Err(PyErr::receiver_channel_closed());
-            }
-
-            Ok(updates)
-        }
-
-        /// Tries to receive the next value for this receiver.
-        fn try_recv_py(&mut self) -> PyResult<Option<T>> {
-            match self.try_recv() {
-                Ok(v) => Ok(Some(v)),
-                Err(TryRecvError::Empty) => Ok(None),
-                Err(TryRecvError::Disconnected) => Err(PyErr::receiver_channel_closed()),
-            }
-        }
-    }
-}
-
-pub(crate) mod private {
-    use std::marker::Sized;
-
-    /// Sealed traits support
-    pub trait Sealed {}
-    impl<T: ?Sized> Sealed for T {}
-}
-
-/// A wrapper around [`Py`] that implements [`Clone`] using [`Python::with_gil`].
-#[repr(transparent)]
-pub(crate) struct ClonePy<T>(pub Py<T>);
-
-impl<T> Clone for ClonePy<T> {
-    fn clone(&self) -> Self {
-        Python::with_gil(|py| Self(self.0.clone_ref(py)))
-    }
 }

 /// A Python module implemented in Rust. The name of this function must match
@@ -201,16 +65,9 @@ fn main_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
    // install logger
    pyo3_log::init();

-    // TODO: for now this is all NOT a submodule, but figure out how to make the submodule system
-    //       work with maturin, where the types generate correctly, in the right folder, without
-    //       too many importing issues...
    ident_submodule(m)?;
-    multiaddr_submodule(m)?;
    networking_submodule(m)?;

-    // top-level constructs
-    // TODO: ...
-
    Ok(())
 }

--- a/rust/exo_pyo3_bindings/src/networking.rs
+++ b/rust/exo_pyo3_bindings/src/networking.rs
@@ -1,31 +1,24 @@
-#![allow(
-    clippy::multiple_inherent_impl,
-    clippy::unnecessary_wraps,
-    clippy::unused_self,
-    clippy::needless_pass_by_value
-)]
-
 use crate::r#const::MPSC_CHANNEL_SIZE;
-use crate::ext::{ByteArrayExt as _, FutureExt, PyErrExt as _};
-use crate::ext::{ResultExt as _, TokioMpscReceiverExt as _, TokioMpscSenderExt as _};
+use crate::ext::ResultExt as _;
+use crate::ext::{ByteArrayExt as _, FutureExt as _};
+use crate::ident::PyKeypair;
+use crate::networking::exception::{PyAllQueuesFullError, PyNoPeersSubscribedToTopicError};
 use crate::pyclass;
-use crate::pylibp2p::ident::{PyKeypair, PyPeerId};
-use libp2p::futures::StreamExt as _;
-use libp2p::gossipsub::{IdentTopic, Message, MessageId, PublishError};
-use libp2p::swarm::SwarmEvent;
-use libp2p::{gossipsub, mdns};
-use networking::discovery;
-use networking::swarm::create_swarm;
-use pyo3::prelude::{PyModule, PyModuleMethods as _};
+use futures_lite::FutureExt as _;
+use networking::swarm::{FromSwarm, Swarm, ToSwarm};
+use pyo3::coroutine::CancelHandle;
+use pyo3::exceptions::{PyConnectionError, PyRuntimeError};
+use pyo3::prelude::*;
 use pyo3::types::PyBytes;
-use pyo3::{Bound, Py, PyErr, PyResult, PyTraverseError, PyVisit, Python, pymethods};
-use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pyclass_enum, gen_stub_pymethods};
-use std::net::IpAddr;
-use tokio::sync::{Mutex, mpsc, oneshot};
+use pyo3_async_runtimes::tokio::get_runtime;
+use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pyclass_complex_enum, gen_stub_pymethods};
+use std::pin::pin;
+use std::sync::Arc;
+use tokio::sync::{Mutex, mpsc};

 mod exception {
    use pyo3::types::PyTuple;
-    use pyo3::{PyErrArguments, exceptions::PyException, prelude::*};
+    use pyo3::{exceptions::PyException, prelude::*};
    use pyo3_stub_gen::derive::*;

    #[gen_stub_pyclass]
@@ -49,16 +42,11 @@ mod exception {
    #[pymethods]
    impl PyNoPeersSubscribedToTopicError {
        #[new]
-        #[pyo3(signature = (*args))]
-        #[allow(unused_variables)]
-        pub(crate) fn new(args: &Bound<'_, PyTuple>) -> Self {
+        #[pyo3(signature = (*_a))]
+        pub(crate) fn new(_a: &Bound<'_, PyTuple>) -> Self {
            Self {}
        }

-        fn __repr__(&self) -> String {
-            format!("PeerId(\"{}\")", Self::MSG)
-        }
-
        fn __str__(&self) -> String {
            Self::MSG.to_string()
        }
@@ -84,489 +72,179 @@ mod exception {
    #[pymethods]
    impl PyAllQueuesFullError {
        #[new]
-        #[pyo3(signature = (*args))]
-        #[allow(unused_variables)]
-        pub(crate) fn new(args: &Bound<'_, PyTuple>) -> Self {
+        #[pyo3(signature = (*_a))]
+        pub(crate) fn new(_a: &Bound<'_, PyTuple>) -> Self {
            Self {}
        }

-        fn __repr__(&self) -> String {
-            format!("PeerId(\"{}\")", Self::MSG)
-        }
-
        fn __str__(&self) -> String {
            Self::MSG.to_string()
        }
    }
 }

-/// Connection or disconnection event discriminant type.
-#[gen_stub_pyclass_enum]
-#[pyclass(eq, eq_int, name = "ConnectionUpdateType")]
-#[derive(Debug, Clone, PartialEq)]
-enum PyConnectionUpdateType {
-    Connected = 0,
-    Disconnected,
-}
-
 #[gen_stub_pyclass]
-#[pyclass(frozen, name = "ConnectionUpdate")]
-#[derive(Debug, Clone)]
-struct PyConnectionUpdate {
-    /// Whether this is a connection or disconnection event
-    #[pyo3(get)]
-    update_type: PyConnectionUpdateType,
-
-    /// Identity of the peer that we have connected to or disconnected from.
-    #[pyo3(get)]
-    peer_id: PyPeerId,
-
-    /// Remote connection's IPv4 address.
-    #[pyo3(get)]
-    remote_ipv4: String,
-
-    /// Remote connection's TCP port.
-    #[pyo3(get)]
-    remote_tcp_port: u16,
+#[pyclass]
+struct PySwarm {
+    swarm: Arc<Mutex<Swarm>>,
+    from_swarm: Mutex<mpsc::Receiver<FromSwarm>>,
+    to_swarm: Mutex<mpsc::Sender<ToSwarm>>,
 }

-enum ToTask {
-    GossipsubSubscribe {
-        topic: String,
-        result_tx: oneshot::Sender<PyResult<bool>>,
+#[gen_stub_pyclass_complex_enum]
+#[pyclass]
+pub enum PyMessage {
+    Connection {
+        node_id: String,
+        connected: bool,
    },
-    GossipsubUnsubscribe {
+    Gossip {
+        node_id: String,
        topic: String,
-        result_tx: oneshot::Sender<bool>,
-    },
-    GossipsubPublish {
-        topic: String,
-        data: Vec<u8>,
-        result_tx: oneshot::Sender<PyResult<MessageId>>,
+        data: Py<PyBytes>,
    },
 }
-
-#[allow(clippy::enum_glob_use)]
-async fn networking_task(
-    mut swarm: networking::swarm::Swarm,
-    mut to_task_rx: mpsc::Receiver<ToTask>,
-    connection_update_tx: mpsc::Sender<PyConnectionUpdate>,
-    gossipsub_message_tx: mpsc::Sender<(String, Vec<u8>)>,
-) {
-    use SwarmEvent::*;
-    use ToTask::*;
-    use mdns::Event::*;
-    use networking::swarm::BehaviourEvent::*;
-
-    log::info!("RUST: networking task started");
-
-    loop {
-        tokio::select! {
-            message = to_task_rx.recv() => {
-                // handle closed channel
-                let Some(message) = message else {
-                    log::info!("RUST: channel closed");
-                    break;
-                };
-
-                // dispatch incoming messages
-                match message {
-                    GossipsubSubscribe { topic, result_tx } => {
-                        // try to subscribe
-                        let result = swarm.behaviour_mut()
-                            .gossipsub.subscribe(&IdentTopic::new(topic));
-
-                        // send response oneshot
-                        if let Err(e) = result_tx.send(result.pyerr()) {
-                            log::error!("RUST: could not subscribe to gossipsub topic since channel already closed: {e:?}");
-                            continue;
-                        }
-                    }
-                    GossipsubUnsubscribe { topic, result_tx } => {
-                        // try to unsubscribe from the topic
-                        let result = swarm.behaviour_mut()
-                            .gossipsub.unsubscribe(&IdentTopic::new(topic));
-
-                        // send response oneshot (or exit if connection closed)
-                        if let Err(e) = result_tx.send(result) {
-                            log::error!("RUST: could not unsubscribe from gossipsub topic since channel already closed: {e:?}");
-                            continue;
-                        }
-                    }
-                    GossipsubPublish { topic, data, result_tx } => {
-                        // try to publish the data -> catch NoPeersSubscribedToTopic error & convert to correct exception
-                        let result = swarm.behaviour_mut().gossipsub.publish(
-                            IdentTopic::new(topic), data);
-                        let pyresult: PyResult<MessageId> = if let Err(PublishError::NoPeersSubscribedToTopic) = result {
-                            Err(exception::PyNoPeersSubscribedToTopicError::new_err())
-                        } else if let Err(PublishError::AllQueuesFull(_)) = result {
-                            Err(exception::PyAllQueuesFullError::new_err())
-                        } else {
-                            result.pyerr()
-                        };
-
-                        // send response oneshot (or exit if connection closed)
-                        if let Err(e) = result_tx.send(pyresult) {
-                            log::error!("RUST: could not publish gossipsub message since channel already closed: {e:?}");
-                            continue;
-                        }
-                    }
+impl TryFrom<FromSwarm> for PyMessage {
+    type Error = PyErr;
+    fn try_from(value: FromSwarm) -> Result<Self, Self::Error> {
+        match value {
+            FromSwarm::Discovered(nid) => Ok(PyMessage::Connection {
+                node_id: nid.to_base58(),
+                connected: true,
+            }),
+            FromSwarm::Expired(nid) => Ok(PyMessage::Connection {
+                node_id: nid.to_base58(),
+                connected: false,
+            }),
+            FromSwarm::Message(nid, topic, data) => Ok(PyMessage::Gossip {
+                node_id: nid.to_base58(),
+                topic,
+                data: data.pybytes(),
+            }),
+            FromSwarm::PublishError(e) => match e {
+                libp2p::gossipsub::PublishError::NoPeersSubscribedToTopic => {
+                    Err(PyNoPeersSubscribedToTopicError::new_err())
                }
-            }
-
-            // architectural solution to this problem:
-            // create keep_alive behavior who's job it is to dial peers discovered by mDNS (and drop when expired)
-            //   -> it will emmit TRUE connected/disconnected events consumable elsewhere
-            //
-            // gossipsub will feed off-of dial attempts created by networking, and that will bootstrap its' peers list
-            // then for actual communication it will dial those peers if need-be
-            swarm_event = swarm.select_next_some() => {
-                match swarm_event {
-                    Behaviour(Gossipsub(gossipsub::Event::Message {
-                        message: Message {
-                            topic,
-                            data,
-                            ..
-                        },
-                        ..
-                    })) => {
-                        // topic-ID is just the topic hash!!! (since we used identity hasher)
-                        let message = (topic.into_string(), data);
-
-                        // send incoming message to channel (or exit if connection closed)
-                        if let Err(e) = gossipsub_message_tx.send(message).await {
-                            log::error!("RUST: could not send incoming gossipsub message since channel already closed: {e}");
-                            continue;
-                        }
-                    },
-                    Behaviour(Discovery(discovery::Event::ConnectionEstablished { peer_id, remote_ip, remote_tcp_port, .. })) => {
-                        // grab IPv4 string
-                        let remote_ipv4 = match remote_ip {
-                            IpAddr::V4(ip) => ip.to_string(),
-                            IpAddr::V6(ip) => {
-                                log::warn!("RUST: ignoring connection to IPv6 address: {ip}");
-                                continue;
-                            }
-                        };
-
-                        // send connection event to channel (or exit if connection closed)
-                        if let Err(e) = connection_update_tx.send(PyConnectionUpdate {
-                            update_type: PyConnectionUpdateType::Connected,
-                            peer_id: PyPeerId(peer_id),
-                            remote_ipv4,
-                            remote_tcp_port,
-                        }).await {
-                            log::error!("RUST: could not send connection update since channel already closed: {e}");
-                            continue;
-                        }
-                    },
-                    Behaviour(Discovery(discovery::Event::ConnectionClosed { peer_id, remote_ip, remote_tcp_port, .. })) => {
-                        // grab IPv4 string
-                        let remote_ipv4 = match remote_ip {
-                            IpAddr::V4(ip) => ip.to_string(),
-                            IpAddr::V6(ip) => {
-                                log::warn!("RUST: ignoring disconnection from IPv6 address: {ip}");
-                                continue;
-                            }
-                        };
-
-                        // send disconnection event to channel (or exit if connection closed)
-                        if let Err(e) = connection_update_tx.send(PyConnectionUpdate {
-                            update_type: PyConnectionUpdateType::Disconnected,
-                            peer_id: PyPeerId(peer_id),
-                            remote_ipv4,
-                            remote_tcp_port,
-                        }).await {
-                            log::error!("RUST: could not send connection update since channel already closed: {e}");
-                            continue;
-                        }
-                    },
-                    e => {
-                        log::info!("RUST: other event {e:?}");
-                    }
+                libp2p::gossipsub::PublishError::AllQueuesFull(_) => {
+                    Err(PyAllQueuesFullError::new_err())
                }
-            }
+                e => Err(PyRuntimeError::new_err(e.to_string())),
+            },
        }
    }
-
-    log::info!("RUST: networking task stopped");
-}
-
-#[gen_stub_pyclass]
-#[pyclass(name = "NetworkingHandle")]
-#[derive(Debug)]
-struct PyNetworkingHandle {
-    // channels
-    to_task_tx: Option<mpsc::Sender<ToTask>>,
-    connection_update_rx: Mutex<mpsc::Receiver<PyConnectionUpdate>>,
-    gossipsub_message_rx: Mutex<mpsc::Receiver<(String, Vec<u8>)>>,
-}
-
-impl Drop for PyNetworkingHandle {
-    fn drop(&mut self) {
-        // TODO: may or may not need to await a "kill-signal" oneshot channel message,
-        //       to ensure that the networking task is done BEFORE exiting the clear function...
-        //       but this may require GIL?? and it may not be safe to call GIL here??
-        self.to_task_tx = None; // Using Option<T> as a trick to force channel to be dropped
-    }
-}
-
-#[allow(clippy::expect_used)]
-impl PyNetworkingHandle {
-    fn new(
-        to_task_tx: mpsc::Sender<ToTask>,
-        connection_update_rx: mpsc::Receiver<PyConnectionUpdate>,
-        gossipsub_message_rx: mpsc::Receiver<(String, Vec<u8>)>,
-    ) -> Self {
-        Self {
-            to_task_tx: Some(to_task_tx),
-            connection_update_rx: Mutex::new(connection_update_rx),
-            gossipsub_message_rx: Mutex::new(gossipsub_message_rx),
-        }
-    }
-
-    const fn to_task_tx(&self) -> &mpsc::Sender<ToTask> {
-        self.to_task_tx
-            .as_ref()
-            .expect("The sender should only be None after de-initialization.")
-    }
 }

 #[gen_stub_pymethods]
 #[pymethods]
-impl PyNetworkingHandle {
-    // NOTE: `async fn`s here that use `.await` will wrap the future in `.allow_threads_py()`
-    //       immediately beforehand to release the interpreter.
-    //       SEE: https://pyo3.rs/v0.26.0/async-await.html#detaching-from-the-interpreter-across-await
-
-    // ---- Lifecycle management methods ----
-
+impl PySwarm {
    #[new]
    fn py_new(identity: Bound<'_, PyKeypair>) -> PyResult<Self> {
        use pyo3_async_runtimes::tokio::get_runtime;

-        // create communication channels
-        let (to_task_tx, to_task_rx) = mpsc::channel(MPSC_CHANNEL_SIZE);
-        let (connection_update_tx, connection_update_rx) = mpsc::channel(MPSC_CHANNEL_SIZE);
-        let (gossipsub_message_tx, gossipsub_message_rx) = mpsc::channel(MPSC_CHANNEL_SIZE);
-
        // get identity
        let identity = identity.borrow().0.clone();

+        let (to_swarm, from_client) = mpsc::channel(MPSC_CHANNEL_SIZE);
+        let (to_client, from_swarm) = mpsc::channel(MPSC_CHANNEL_SIZE);
        // create networking swarm (within tokio context!! or it crashes)
        let swarm = get_runtime()
-            .block_on(async { create_swarm(identity) })
+            .block_on(async { Swarm::new(identity, from_client, to_client) })
            .pyerr()?;

-        // spawn tokio task running the networking logic
-        get_runtime().spawn(async move {
-            networking_task(
-                swarm,
-                to_task_rx,
-                connection_update_tx,
-                gossipsub_message_tx,
-            )
-            .await;
+        Ok(Self {
+            swarm: Arc::new(Mutex::new(swarm)),
+            from_swarm: Mutex::new(from_swarm),
+            to_swarm: Mutex::new(to_swarm),
+        })
+    }
+
+    #[gen_stub(skip)]
+    async fn run(&self, #[pyo3(cancel_handle)] mut cancel: CancelHandle) -> PyResult<()> {
+        let copy = Arc::clone(&self.swarm);
+        let jh = get_runtime().spawn(async move {
+            copy.try_lock()
+                .expect("tried to run swarm twice")
+                .run()
+                .await
        });
-        Ok(Self::new(
-            to_task_tx,
-            connection_update_rx,
-            gossipsub_message_rx,
-        ))
-    }
-
-    #[gen_stub(skip)]
-    const fn __traverse__(&self, _visit: PyVisit<'_>) -> Result<(), PyTraverseError> {
-        Ok(()) // This is needed purely so `__clear__` can work
-    }
-
-    #[gen_stub(skip)]
-    fn __clear__(&mut self) {
-        // TODO: may or may not need to await a "kill-signal" oneshot channel message,
-        //       to ensure that the networking task is done BEFORE exiting the clear function...
-        //       but this may require GIL?? and it may not be safe to call GIL here??
-        self.to_task_tx = None; // Using Option<T> as a trick to force channel to be dropped
+        jh.or(async {
+            cancel.cancelled().await;
+            Ok(())
+        })
+        .await
+        .map_err(|e| PyRuntimeError::new_err(e.to_string()))
    }

    // ---- Connection update receiver methods ----

-    /// Receives the next `ConnectionUpdate` from networking.
-    async fn connection_update_recv(&self) -> PyResult<PyConnectionUpdate> {
-        self.connection_update_rx
-            .lock()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .recv_py()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
+    /// Receives the next message from networking.
+    async fn recv(&self) -> PyResult<PyMessage> {
+        let msg = pin!(
+            self.from_swarm
+                .try_lock()
+                .expect("called recv concurrently")
+                .recv()
+        )
+        .allow_threads_py()
+        .await;
+        match msg {
+            None => Err(PyConnectionError::new_err("swarm closed")),
+            Some(msg) => msg.try_into(),
+        }
    }

-    /// Receives at most `limit` `ConnectionUpdate`s from networking and returns them.
-    ///
-    /// For `limit = 0`, an empty collection of `ConnectionUpdate`s will be returned immediately.
-    /// For `limit > 0`, if there are no `ConnectionUpdate`s in the channel's queue this method
-    /// will sleep until a `ConnectionUpdate`s is sent.
-    async fn connection_update_recv_many(&self, limit: usize) -> PyResult<Vec<PyConnectionUpdate>> {
-        self.connection_update_rx
-            .lock()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .recv_many_py(limit)
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-    }
-
-    // TODO: rn this blocks main thread if anything else is awaiting the channel (bc its a mutex)
-    //       so its too dangerous to expose just yet. figure out a better semantics for handling this,
-    //       so things don't randomly block
-    // /// Tries to receive the next `ConnectionUpdate` from networking.
-    // fn connection_update_try_recv(&self) -> PyResult<Option<PyConnectionUpdate>> {
-    //     self.connection_update_rx.blocking_lock().try_recv_py()
-    // }
-    //
-    // /// Checks if the `ConnectionUpdate` channel is empty.
-    // fn connection_update_is_empty(&self) -> bool {
-    //     self.connection_update_rx.blocking_lock().is_empty()
-    // }
-    //
-    // /// Returns the number of `ConnectionUpdate`s in the channel.
-    // fn connection_update_len(&self) -> usize {
-    //     self.connection_update_rx.blocking_lock().len()
-    // }
-
-    // ---- Gossipsub management methods ----
-
    /// Subscribe to a `GossipSub` topic.
-    ///
-    /// Returns `True` if the subscription worked. Returns `False` if we were already subscribed.
-    async fn gossipsub_subscribe(&self, topic: String) -> PyResult<bool> {
-        let (tx, rx) = oneshot::channel();
-
+    async fn gossipsub_subscribe(&self, topic: String) -> PyResult<()> {
        // send off request to subscribe
-        self.to_task_tx()
-            .send_py(ToTask::GossipsubSubscribe {
-                topic,
-                result_tx: tx,
-            })
-            .allow_threads_py() // allow-threads-aware async call
-            .await?;
-
-        // wait for response & return any errors
-        rx.allow_threads_py() // allow-threads-aware async call
-            .await
-            .map_err(|_| PyErr::receiver_channel_closed())?
+        pin!(
+            self.to_swarm
+                .try_lock()
+                .expect("called send concurrently")
+                .send(ToSwarm::Subscribe(topic))
+        )
+        .allow_threads_py() // allow-threads-aware async call
+        .await
+        .map_err(|_| PyConnectionError::new_err("swarm closed"))
    }

    /// Unsubscribes from a `GossipSub` topic.
    ///
    /// Returns `True` if we were subscribed to this topic. Returns `False` if we were not subscribed.
-    async fn gossipsub_unsubscribe(&self, topic: String) -> PyResult<bool> {
-        let (tx, rx) = oneshot::channel();
-
+    async fn gossipsub_unsubscribe(&self, topic: String) -> PyResult<()> {
        // send off request to unsubscribe
-        self.to_task_tx()
-            .send_py(ToTask::GossipsubUnsubscribe {
-                topic,
-                result_tx: tx,
-            })
-            .allow_threads_py() // allow-threads-aware async call
-            .await?;
-
-        // wait for response & convert any errors
-        rx.allow_threads_py() // allow-threads-aware async call
-            .await
-            .map_err(|_| PyErr::receiver_channel_closed())
+        pin!(
+            self.to_swarm
+                .try_lock()
+                .expect("called send concurrently")
+                .send(ToSwarm::Unsubscribe(topic))
+        )
+        .allow_threads_py() // allow-threads-aware async call
+        .await
+        .map_err(|_| PyConnectionError::new_err("swarm closed"))
    }

-    /// Publishes a message with multiple topics to the `GossipSub` network.
-    ///
-    /// If no peers are found that subscribe to this topic, throws `NoPeersSubscribedToTopicError` exception.
+    /// Publishes a message to the network on a specific topic.
    async fn gossipsub_publish(&self, topic: String, data: Py<PyBytes>) -> PyResult<()> {
-        let (tx, rx) = oneshot::channel();
-
        // send off request to subscribe
-        let data = Python::with_gil(|py| Vec::from(data.as_bytes(py)));
-        self.to_task_tx()
-            .send_py(ToTask::GossipsubPublish {
-                topic,
-                data,
-                result_tx: tx,
-            })
-            .allow_threads_py() // allow-threads-aware async call
-            .await?;
-
-        // wait for response & return any errors => ignore messageID for now!!!
-        let _ = rx
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .map_err(|_| PyErr::receiver_channel_closed())??;
-        Ok(())
+        let data = Python::attach(|py| Vec::from(data.as_bytes(py)));
+        pin!(
+            self.to_swarm
+                .try_lock()
+                .expect("called send concurrently")
+                .send(ToSwarm::Message(topic, data))
+        )
+        .allow_threads_py() // allow-threads-aware async call
+        .await
+        .map_err(|_| PyConnectionError::new_err("swarm closed"))
    }
-
-    // ---- Gossipsub message receiver methods ----
-
-    /// Receives the next message from the `GossipSub` network.
-    async fn gossipsub_recv(&self) -> PyResult<(String, Py<PyBytes>)> {
-        self.gossipsub_message_rx
-            .lock()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .recv_py()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .map(|(t, d)| (t, d.pybytes()))
-    }
-
-    /// Receives at most `limit` messages from the `GossipSub` network and returns them.
-    ///
-    /// For `limit = 0`, an empty collection of messages will be returned immediately.
-    /// For `limit > 0`, if there are no messages in the channel's queue this method
-    /// will sleep until a message is sent.
-    async fn gossipsub_recv_many(&self, limit: usize) -> PyResult<Vec<(String, Py<PyBytes>)>> {
-        Ok(self
-            .gossipsub_message_rx
-            .lock()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .recv_many_py(limit)
-            .allow_threads_py() // allow-threads-aware async call
-            .await?
-            .into_iter()
-            .map(|(t, d)| (t, d.pybytes()))
-            .collect())
-    }
-
-    // TODO: rn this blocks main thread if anything else is awaiting the channel (bc its a mutex)
-    //       so its too dangerous to expose just yet. figure out a better semantics for handling this,
-    //       so things don't randomly block
-    // /// Tries to receive the next message from the `GossipSub` network.
-    // fn gossipsub_try_recv(&self) -> PyResult<Option<(String, Py<PyBytes>)>> {
-    //     Ok(self
-    //         .gossipsub_message_rx
-    //         .blocking_lock()
-    //         .try_recv_py()?
-    //         .map(|(t, d)| (t, d.pybytes())))
-    // }
-    //
-    // /// Checks if the `GossipSub` message channel is empty.
-    // fn gossipsub_is_empty(&self) -> bool {
-    //     self.gossipsub_message_rx.blocking_lock().is_empty()
-    // }
-    //
-    // /// Returns the number of `GossipSub` messages in the channel.
-    // fn gossipsub_len(&self) -> usize {
-    //     self.gossipsub_message_rx.blocking_lock().len()
-    // }
 }

 pub fn networking_submodule(m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<exception::PyNoPeersSubscribedToTopicError>()?;
    m.add_class::<exception::PyAllQueuesFullError>()?;

-    m.add_class::<PyConnectionUpdateType>()?;
-    m.add_class::<PyConnectionUpdate>()?;
-    m.add_class::<PyConnectionUpdateType>()?;
-    m.add_class::<PyNetworkingHandle>()?;
+    m.add_class::<PySwarm>()?;
+    m.add_class::<PyMessage>()?;

    Ok(())
 }
--- a/rust/exo_pyo3_bindings/src/pylibp2p/ident.rs
+++ b/rust/exo_pyo3_bindings/src/pylibp2p/ident.rs
@@ -1,159 +0,0 @@
-use crate::ext::ResultExt as _;
-use libp2p::PeerId;
-use libp2p::identity::Keypair;
-use pyo3::prelude::{PyBytesMethods as _, PyModule, PyModuleMethods as _};
-use pyo3::types::PyBytes;
-use pyo3::{Bound, PyResult, Python, pyclass, pymethods};
-use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods};
-
-/// Identity keypair of a node.
-#[gen_stub_pyclass]
-#[pyclass(name = "Keypair", frozen)]
-#[repr(transparent)]
-pub struct PyKeypair(pub Keypair);
-
-#[gen_stub_pymethods]
-#[pymethods]
-#[allow(clippy::needless_pass_by_value)]
-impl PyKeypair {
-    /// Generate a new Ed25519 keypair.
-    #[staticmethod]
-    fn generate_ed25519() -> Self {
-        Self(Keypair::generate_ed25519())
-    }
-
-    /// Generate a new ECDSA keypair.
-    #[staticmethod]
-    fn generate_ecdsa() -> Self {
-        Self(Keypair::generate_ecdsa())
-    }
-
-    /// Generate a new Secp256k1 keypair.
-    #[staticmethod]
-    fn generate_secp256k1() -> Self {
-        Self(Keypair::generate_secp256k1())
-    }
-
-    /// Decode a private key from a protobuf structure and parse it as a `Keypair`.
-    #[staticmethod]
-    fn from_protobuf_encoding(bytes: Bound<'_, PyBytes>) -> PyResult<Self> {
-        let bytes = Vec::from(bytes.as_bytes());
-        Ok(Self(Keypair::from_protobuf_encoding(&bytes).pyerr()?))
-    }
-
-    /// Decode an keypair from a DER-encoded secret key in PKCS#8 `PrivateKeyInfo`
-    /// format (i.e. unencrypted) as defined in [RFC5208].
-    ///
-    /// [RFC5208]: https://tools.ietf.org/html/rfc5208#section-5
-    #[staticmethod]
-    fn rsa_from_pkcs8(bytes: Bound<'_, PyBytes>) -> PyResult<Self> {
-        let mut bytes = Vec::from(bytes.as_bytes());
-        Ok(Self(Keypair::rsa_from_pkcs8(&mut bytes).pyerr()?))
-    }
-
-    /// Decode a keypair from a DER-encoded Secp256k1 secret key in an `ECPrivateKey`
-    /// structure as defined in [RFC5915].
-    ///
-    /// [RFC5915]: https://tools.ietf.org/html/rfc5915
-    #[staticmethod]
-    fn secp256k1_from_der(bytes: Bound<'_, PyBytes>) -> PyResult<Self> {
-        let mut bytes = Vec::from(bytes.as_bytes());
-        Ok(Self(Keypair::secp256k1_from_der(&mut bytes).pyerr()?))
-    }
-
-    #[staticmethod]
-    fn ed25519_from_bytes(bytes: Bound<'_, PyBytes>) -> PyResult<Self> {
-        let mut bytes = Vec::from(bytes.as_bytes());
-        Ok(Self(Keypair::ed25519_from_bytes(&mut bytes).pyerr()?))
-    }
-
-    /// Encode a private key as protobuf structure.
-    fn to_protobuf_encoding<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
-        let bytes = self.0.to_protobuf_encoding().pyerr()?;
-        Ok(PyBytes::new(py, &bytes))
-    }
-
-    /// Convert the `Keypair` into the corresponding `PeerId`.
-    fn to_peer_id(&self) -> PyPeerId {
-        PyPeerId(self.0.public().to_peer_id())
-    }
-
-    // /// Hidden constructor for pickling support. TODO: figure out how to do pickling...
-    // #[gen_stub(skip)]
-    // #[new]
-    // fn py_new(bytes: Bound<'_, PyBytes>) -> PyResult<Self> {
-    //     Self::from_protobuf_encoding(bytes)
-    // }
-    //
-    // #[gen_stub(skip)]
-    // fn __setstate__(&mut self, state: Bound<'_, PyBytes>) -> PyResult<()> {
-    //     *self = Self::from_protobuf_encoding(state)?;
-    //     Ok(())
-    // }
-    //
-    // #[gen_stub(skip)]
-    // fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
-    //     self.to_protobuf_encoding(py)
-    // }
-    //
-    // #[gen_stub(skip)]
-    // pub fn __getnewargs__<'py>(&self, py: Python<'py>) -> PyResult<(Bound<'py, PyBytes>,)> {
-    //     Ok((self.to_protobuf_encoding(py)?,))
-    // }
-}
-
-/// Identifier of a peer of the network.
-///
-/// The data is a `CIDv0` compatible multihash of the protobuf encoded public key of the peer
-/// as specified in [specs/peer-ids](https://github.com/libp2p/specs/blob/master/peer-ids/peer-ids.md).
-#[gen_stub_pyclass]
-#[pyclass(name = "PeerId", frozen)]
-#[derive(Debug, Clone)]
-#[repr(transparent)]
-pub struct PyPeerId(pub PeerId);
-
-#[gen_stub_pymethods]
-#[pymethods]
-#[allow(clippy::needless_pass_by_value)]
-impl PyPeerId {
-    /// Generates a random peer ID from a cryptographically secure PRNG.
-    ///
-    /// This is useful for randomly walking on a DHT, or for testing purposes.
-    #[staticmethod]
-    fn random() -> Self {
-        Self(PeerId::random())
-    }
-
-    /// Parses a `PeerId` from bytes.
-    #[staticmethod]
-    fn from_bytes(bytes: Bound<'_, PyBytes>) -> PyResult<Self> {
-        let bytes = Vec::from(bytes.as_bytes());
-        Ok(Self(PeerId::from_bytes(&bytes).pyerr()?))
-    }
-
-    /// Returns a raw bytes representation of this `PeerId`.
-    fn to_bytes<'py>(&self, py: Python<'py>) -> Bound<'py, PyBytes> {
-        let bytes = self.0.to_bytes();
-        PyBytes::new(py, &bytes)
-    }
-
-    /// Returns a base-58 encoded string of this `PeerId`.
-    fn to_base58(&self) -> String {
-        self.0.to_base58()
-    }
-
-    fn __repr__(&self) -> String {
-        format!("PeerId({})", self.to_base58())
-    }
-
-    fn __str__(&self) -> String {
-        self.to_base58()
-    }
-}
-
-pub fn ident_submodule(m: &Bound<'_, PyModule>) -> PyResult<()> {
-    m.add_class::<PyKeypair>()?;
-    m.add_class::<PyPeerId>()?;
-
-    Ok(())
-}
--- a/rust/exo_pyo3_bindings/src/pylibp2p/mod.rs
+++ b/rust/exo_pyo3_bindings/src/pylibp2p/mod.rs
@@ -1,8 +0,0 @@
-//! A module for exposing Rust's libp2p datatypes over Pyo3
-//!
-//! TODO: right now we are coupled to libp2p's identity, but eventually we want to create our own
-//!       independent identity type of some kind or another. This may require handshaking.
-//!
-
-pub mod ident;
-pub mod multiaddr;
--- a/rust/exo_pyo3_bindings/src/pylibp2p/multiaddr.rs
+++ b/rust/exo_pyo3_bindings/src/pylibp2p/multiaddr.rs
@@ -1,81 +0,0 @@
-use crate::ext::ResultExt as _;
-use libp2p::Multiaddr;
-use pyo3::prelude::{PyBytesMethods as _, PyModule, PyModuleMethods as _};
-use pyo3::types::PyBytes;
-use pyo3::{Bound, PyResult, Python, pyclass, pymethods};
-use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods};
-use std::str::FromStr as _;
-
-/// Representation of a Multiaddr.
-#[gen_stub_pyclass]
-#[pyclass(name = "Multiaddr", frozen)]
-#[derive(Debug, Clone)]
-#[repr(transparent)]
-pub struct PyMultiaddr(pub Multiaddr);
-
-#[gen_stub_pymethods]
-#[pymethods]
-#[allow(clippy::needless_pass_by_value)]
-impl PyMultiaddr {
-    /// Create a new, empty multiaddress.
-    #[staticmethod]
-    fn empty() -> Self {
-        Self(Multiaddr::empty())
-    }
-
-    /// Create a new, empty multiaddress with the given capacity.
-    #[staticmethod]
-    fn with_capacity(n: usize) -> Self {
-        Self(Multiaddr::with_capacity(n))
-    }
-
-    /// Parse a `Multiaddr` value from its byte slice representation.
-    #[staticmethod]
-    fn from_bytes(bytes: Bound<'_, PyBytes>) -> PyResult<Self> {
-        let bytes = Vec::from(bytes.as_bytes());
-        Ok(Self(Multiaddr::try_from(bytes).pyerr()?))
-    }
-
-    /// Parse a `Multiaddr` value from its string representation.
-    #[staticmethod]
-    fn from_string(string: String) -> PyResult<Self> {
-        Ok(Self(Multiaddr::from_str(&string).pyerr()?))
-    }
-
-    /// Return the length in bytes of this multiaddress.
-    fn len(&self) -> usize {
-        self.0.len()
-    }
-
-    /// Returns true if the length of this multiaddress is 0.
-    fn is_empty(&self) -> bool {
-        self.0.is_empty()
-    }
-
-    /// Return a copy of this [`Multiaddr`]'s byte representation.
-    fn to_bytes<'py>(&self, py: Python<'py>) -> Bound<'py, PyBytes> {
-        let bytes = self.0.to_vec();
-        PyBytes::new(py, &bytes)
-    }
-
-    /// Convert a Multiaddr to a string.
-    fn to_string(&self) -> String {
-        self.0.to_string()
-    }
-
-    #[gen_stub(skip)]
-    fn __repr__(&self) -> String {
-        format!("Multiaddr({})", self.0)
-    }
-
-    #[gen_stub(skip)]
-    fn __str__(&self) -> String {
-        self.to_string()
-    }
-}
-
-pub fn multiaddr_submodule(m: &Bound<'_, PyModule>) -> PyResult<()> {
-    m.add_class::<PyMultiaddr>()?;
-
-    Ok(())
-}
--- a/rust/networking/Cargo.toml
+++ b/rust/networking/Cargo.toml
@@ -19,21 +19,14 @@ either = { workspace = true }
 # macro dependencies
 extend = { workspace = true }
 delegate = { workspace = true }
-impl-trait-for-tuples = { workspace = true }
-derive_more = { workspace = true }

 # async
 tokio = { workspace = true, features = ["full"] }
-futures = { workspace = true }
+futures-lite = { workspace = true }
 futures-timer = { workspace = true }

 # utility dependencies
 util = { workspace = true }
-thiserror = { workspace = true }
-#internment = { workspace = true }
-#recursion = { workspace = true }
-#generativity = { workspace = true }
-#itertools = { workspace = true }
 tracing-subscriber = { version = "0.3.19", features = ["default", "env-filter"] }
 keccak-const = { workspace = true }

@@ -41,4 +34,4 @@ keccak-const = { workspace = true }
 log = { workspace = true }

 # networking
-libp2p = { workspace = true, features = ["full"] }
+libp2p = { workspace = true, features = ["full"] }
--- a/rust/networking/examples/chatroom.rs
+++ b/rust/networking/examples/chatroom.rs
@@ -1,6 +1,6 @@
-use futures::stream::StreamExt as _;
-use libp2p::{gossipsub, identity, swarm::SwarmEvent};
-use networking::{discovery, swarm};
+use libp2p::identity;
+use networking::swarm::{FromSwarm, Swarm, ToSwarm};
+use tokio::sync::mpsc;
 use tokio::{io, io::AsyncBufReadExt as _, select};
 use tracing_subscriber::EnvFilter;
 use tracing_subscriber::filter::LevelFilter;
@@ -11,60 +11,50 @@ async fn main() {
        .with_env_filter(EnvFilter::from_default_env().add_directive(LevelFilter::INFO.into()))
        .try_init();

+    let (to_swarm, from_client) = mpsc::channel(20);
+    let (to_client, mut from_swarm) = mpsc::channel(20);
    // Configure swarm
-    let mut swarm =
-        swarm::create_swarm(identity::Keypair::generate_ed25519()).expect("Swarm creation failed");
+    let mut swarm = Swarm::new(
+        identity::Keypair::generate_ed25519(),
+        from_client,
+        to_client,
+    )
+    .expect("Swarm creation failed");

    // Create a Gossipsub topic & subscribe
-    let topic = gossipsub::IdentTopic::new("test-net");
-    swarm
-        .behaviour_mut()
-        .gossipsub
-        .subscribe(&topic)
-        .expect("Subscribing to topic failed");
+    _ = to_swarm
+        .send(ToSwarm::Subscribe("test-net".to_owned()))
+        .await;

    // Read full lines from stdin
    let mut stdin = io::BufReader::new(io::stdin()).lines();
    println!("Enter messages via STDIN and they will be sent to connected peers using Gossipsub");

+    tokio::task::spawn(async move { swarm.run().await });
+
    // Kick it off
    loop {
        select! {
            // on gossipsub outgoing
            Ok(Some(line)) = stdin.next_line() => {
-                if let Err(e) = swarm
-                    .behaviour_mut().gossipsub
-                    .publish(topic.clone(), line.as_bytes()) {
-                    println!("Publish error: {e:?}");
-                }
+                _= to_swarm.send(ToSwarm::Message("test-net".to_owned(), line.into_bytes())).await;
            }
-            event = swarm.select_next_some() => match event {
+            event = from_swarm.recv() => match event {
                // on gossipsub incoming
-                SwarmEvent::Behaviour(swarm::BehaviourEvent::Gossipsub(gossipsub::Event::Message {
-                    propagation_source: peer_id,
-                    message_id: id,
-                    message,
-                })) => println!(
-                        "\n\nGot message: '{}' with id: {id} from peer: {peer_id}\n\n",
-                        String::from_utf8_lossy(&message.data),
-                    ),
+                Some(FromSwarm::Message(pid, topic, content)) => {
+                    assert_eq!(topic, "test-net");
+                    let fmt = String::from_utf8_lossy(&content);
+                    println!("{pid}: {fmt}");
+                }

                // on discovery
-                SwarmEvent::Behaviour(swarm::BehaviourEvent::Discovery(e)) => match e {
-                    discovery::Event::ConnectionEstablished {
-                        peer_id, connection_id, remote_ip, remote_tcp_port
-                    } => {
-                        println!("\n\nConnected to: {peer_id}; connection ID: {connection_id}; remote IP: {remote_ip}; remote TCP port: {remote_tcp_port}\n\n");
-                    }
-                    discovery::Event::ConnectionClosed {
-                        peer_id, connection_id, remote_ip, remote_tcp_port
-                    } => {
-                        eprintln!("\n\nDisconnected from: {peer_id}; connection ID: {connection_id}; remote IP: {remote_ip}; remote TCP port: {remote_tcp_port}\n\n");
+                Some(FromSwarm::Discovered(pid)) => {
+                        eprintln!("\n\nConnected to: {pid}\n\n");
                    }
+                Some(FromSwarm::Expired(pid)) => {
+                        eprintln!("\n\nDisconnected from: {pid}\n\n");
                }
-
-                // ignore outgoing errors: those are normal
-                e@SwarmEvent::OutgoingConnectionError { .. } => { log::debug!("Outgoing connection error: {e:?}"); }
+                None => break,

                // otherwise log any other event
                e => { log::info!("Other event {e:?}"); }
--- a/rust/networking/examples/chatroom_manual.rs
+++ b/rust/networking/examples/chatroom_manual.rs
@@ -1,127 +0,0 @@
-// Copyright 2018 Parity Technologies (UK) Ltd.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a
-// copy of this software and associated documentation files (the "Software"),
-// to deal in the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS IN THE SOFTWARE.
-
-use futures::stream::StreamExt;
-use libp2p::{
-    gossipsub, mdns, noise,
-    swarm::{NetworkBehaviour, SwarmEvent},
-    tcp, yamux,
-};
-use std::time::Duration;
-use std::{error::Error, hash::Hash};
-use tokio::{io, io::AsyncBufReadExt, select};
-use tracing_subscriber::EnvFilter;
-
-// We create a custom network behaviour that combines Gossipsub and Mdns.
-#[derive(NetworkBehaviour)]
-struct MyBehaviour {
-    gossipsub: gossipsub::Behaviour,
-    mdns: mdns::tokio::Behaviour,
-}
-
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn Error>> {
-    let _ = tracing_subscriber::fmt()
-        .with_env_filter(EnvFilter::from_default_env())
-        .try_init();
-
-    let mut swarm = libp2p::SwarmBuilder::with_new_identity()
-        .with_tokio()
-        .with_tcp(
-            tcp::Config::default(),
-            noise::Config::new,
-            yamux::Config::default,
-        )?
-        .with_behaviour(|key| {
-            // Set a custom gossipsub configuration
-            let gossipsub_config = gossipsub::ConfigBuilder::default()
-                .heartbeat_interval(Duration::from_secs(10))
-                .validation_mode(gossipsub::ValidationMode::Strict) // This sets the kind of message validation. The default is Strict (enforce message signing)
-                .build()
-                .map_err(io::Error::other)?; // Temporary hack because `build` does not return a proper `std::error::Error`.
-
-            // build a gossipsub network behaviour
-            let gossipsub = gossipsub::Behaviour::new(
-                gossipsub::MessageAuthenticity::Signed(key.clone()),
-                gossipsub_config,
-            )?;
-
-            let mdns =
-                mdns::tokio::Behaviour::new(mdns::Config::default(), key.public().to_peer_id())?;
-            Ok(MyBehaviour { gossipsub, mdns })
-        })?
-        .build();
-
-    println!("Running swarm with identity {}", swarm.local_peer_id());
-
-    // Create a Gossipsub topic
-    let topic = gossipsub::IdentTopic::new("test-net");
-    // subscribes to our topic
-    swarm.behaviour_mut().gossipsub.subscribe(&topic)?;
-
-    // Read full lines from stdin
-    let mut stdin = io::BufReader::new(io::stdin()).lines();
-
-    // Listen on all interfaces and whatever port the OS assigns
-    swarm.listen_on("/ip4/0.0.0.0/tcp/0".parse()?)?;
-
-    println!("Enter messages via STDIN and they will be sent to connected peers using Gossipsub");
-
-    // Kick it off
-    loop {
-        select! {
-            Ok(Some(line)) = stdin.next_line() => {
-                if let Err(e) = swarm
-                    .behaviour_mut().gossipsub
-                    .publish(topic.clone(), line.as_bytes()) {
-                    println!("Publish error: {e:?}");
-                }
-            }
-            event = swarm.select_next_some() => match event {
-                SwarmEvent::Behaviour(MyBehaviourEvent::Mdns(mdns::Event::Discovered(list))) => {
-                    for (peer_id, multiaddr) in list {
-                        println!("mDNS discovered a new peer: {peer_id} on {multiaddr}");
-                        swarm.behaviour_mut().gossipsub.add_explicit_peer(&peer_id);
-                    }
-                },
-                SwarmEvent::Behaviour(MyBehaviourEvent::Mdns(mdns::Event::Expired(list))) => {
-                    for (peer_id, multiaddr) in list {
-                        println!("mDNS discover peer has expired: {peer_id} on {multiaddr}");
-                        swarm.behaviour_mut().gossipsub.remove_explicit_peer(&peer_id);
-                    }
-                },
-                SwarmEvent::Behaviour(MyBehaviourEvent::Gossipsub(gossipsub::Event::Message {
-                    propagation_source: peer_id,
-                    message_id: id,
-                    message,
-                })) => println!(
-                        "Got message: '{}' with id: {id} from peer: {peer_id}",
-                        String::from_utf8_lossy(&message.data),
-                    ),
-                SwarmEvent::NewListenAddr { address, .. } => {
-                    println!("Local node is listening on {address}");
-                }
-                e => {
-                    println!("Other swarm event: {:?}", e);
-                }
-            }
-        }
-    }
-}
--- a/rust/networking/src/discovery.rs
+++ b/rust/networking/src/discovery.rs
@@ -1,11 +1,10 @@
 use crate::ext::MultiaddrExt;
-use crate::keep_alive;
 use delegate::delegate;
 use either::Either;
-use futures::FutureExt;
 use futures_timer::Delay;
 use libp2p::core::transport::PortUse;
 use libp2p::core::{ConnectedPoint, Endpoint};
+use libp2p::futures::FutureExt;
 use libp2p::swarm::behaviour::ConnectionEstablished;
 use libp2p::swarm::dial_opts::DialOpts;
 use libp2p::swarm::{
--- a/rust/networking/src/keep_alive.rs
+++ b/rust/networking/src/keep_alive.rs
@@ -1,44 +0,0 @@
-use delegate::delegate;
-use libp2p::swarm::handler::ConnectionEvent;
-use libp2p::swarm::{ConnectionHandlerEvent, SubstreamProtocol, dummy, handler};
-use std::task::{Context, Poll};
-
-/// An implementation of [`ConnectionHandler`] that doesn't handle any protocols, but it keeps
-/// the connection alive.
-#[derive(Clone)]
-#[repr(transparent)]
-pub struct ConnectionHandler(dummy::ConnectionHandler);
-
-impl ConnectionHandler {
-    pub fn new() -> Self {
-        ConnectionHandler(dummy::ConnectionHandler)
-    }
-}
-
-impl handler::ConnectionHandler for ConnectionHandler {
-    // delegate types and implementation mostly to dummy handler
-    type FromBehaviour = <dummy::ConnectionHandler as handler::ConnectionHandler>::FromBehaviour;
-    type ToBehaviour = <dummy::ConnectionHandler as handler::ConnectionHandler>::ToBehaviour;
-    type InboundProtocol =
-        <dummy::ConnectionHandler as handler::ConnectionHandler>::InboundProtocol;
-    type OutboundProtocol =
-        <dummy::ConnectionHandler as handler::ConnectionHandler>::OutboundProtocol;
-    type InboundOpenInfo =
-        <dummy::ConnectionHandler as handler::ConnectionHandler>::InboundOpenInfo;
-    type OutboundOpenInfo =
-        <dummy::ConnectionHandler as handler::ConnectionHandler>::OutboundOpenInfo;
-
-    delegate! {
-        to self.0 {
-            fn listen_protocol(&self) -> SubstreamProtocol<Self::InboundProtocol, Self::InboundOpenInfo>;
-            fn poll(&mut self, cx: &mut Context<'_>) -> Poll<ConnectionHandlerEvent<Self::OutboundProtocol, Self::OutboundOpenInfo, Self::ToBehaviour>>;
-            fn on_behaviour_event(&mut self, event: Self::FromBehaviour);
-            fn on_connection_event(&mut self, event: ConnectionEvent<Self::InboundProtocol, Self::OutboundProtocol, Self::InboundOpenInfo, Self::OutboundOpenInfo>);
-        }
-    }
-
-    // specifically override this to force connection to stay alive
-    fn connection_keep_alive(&self) -> bool {
-        true
-    }
-}
--- a/rust/networking/src/lib.rs
+++ b/rust/networking/src/lib.rs
@@ -4,18 +4,7 @@
 //!
 //!

-// enable Rust-unstable features for convenience
-#![feature(trait_alias)]
-// #![feature(stmt_expr_attributes)]
-// #![feature(unboxed_closures)]
-// #![feature(assert_matches)]
-// #![feature(async_fn_in_dyn_trait)]
-// #![feature(async_for_loop)]
-// #![feature(auto_traits)]
-// #![feature(negative_impls)]
-
 pub mod discovery;
-pub mod keep_alive;
 pub mod swarm;

 /// Namespace for all the type/trait aliases used by this crate.
@@ -54,11 +43,3 @@ pub(crate) mod ext {
        }
    }
 }
-
-pub(crate) mod private {
-    #![allow(dead_code)]
-
-    /// Sealed traits support
-    pub trait Sealed {}
-    impl<T: ?Sized> Sealed for T {}
-}
--- a/rust/networking/src/swarm.rs
+++ b/rust/networking/src/swarm.rs
@@ -1,9 +1,30 @@
 use crate::alias;
+use crate::discovery;
 use crate::swarm::transport::tcp_transport;
-pub use behaviour::{Behaviour, BehaviourEvent};
-use libp2p::{SwarmBuilder, identity};
+use behaviour::{Behaviour, BehaviourEvent};
+use futures_lite::StreamExt;
+use libp2p::{PeerId, SwarmBuilder, gossipsub, identity, swarm::SwarmEvent};
+use tokio::sync::mpsc;

-pub type Swarm = libp2p::Swarm<Behaviour>;
+pub struct Swarm {
+    swarm: libp2p::Swarm<Behaviour>,
+    from_client: mpsc::Receiver<ToSwarm>,
+    to_client: mpsc::Sender<FromSwarm>,
+}
+
+#[derive(Debug)]
+pub enum FromSwarm {
+    PublishError(gossipsub::PublishError),
+    Discovered(PeerId),
+    Expired(PeerId),
+    Message(PeerId, String, Vec<u8>),
+}
+#[derive(Debug)]
+pub enum ToSwarm {
+    Message(String, Vec<u8>),
+    Subscribe(String),
+    Unsubscribe(String),
+}

 /// The current version of the network: this prevents devices running different versions of the
 /// software from interacting with each other.
@@ -15,23 +36,142 @@ pub type Swarm = libp2p::Swarm<Behaviour>;
 pub const NETWORK_VERSION: &[u8] = b"v0.0.1";
 pub const OVERRIDE_VERSION_ENV_VAR: &str = "EXO_LIBP2P_NAMESPACE";

-/// Create and configure a swarm which listens to all ports on OS
-pub fn create_swarm(keypair: identity::Keypair) -> alias::AnyResult<Swarm> {
-    let mut swarm = SwarmBuilder::with_existing_identity(keypair)
-        .with_tokio()
-        .with_other_transport(tcp_transport)?
-        .with_behaviour(Behaviour::new)?
-        .build();
+impl Swarm {
+    /// Create and configure a swarm which listens to all ports on OS
+    pub fn new(
+        keypair: identity::Keypair,
+        from_client: mpsc::Receiver<ToSwarm>,
+        to_client: mpsc::Sender<FromSwarm>,
+    ) -> alias::AnyResult<Swarm> {
+        let mut swarm = SwarmBuilder::with_existing_identity(keypair)
+            .with_tokio()
+            .with_other_transport(tcp_transport)?
+            .with_behaviour(Behaviour::new)?
+            .build();

-    // Listen on all interfaces and whatever port the OS assigns
-    swarm.listen_on("/ip4/0.0.0.0/tcp/0".parse()?)?;
-    Ok(swarm)
+        // Listen on all interfaces and whatever port the OS assigns
+        swarm.listen_on("/ip4/0.0.0.0/tcp/0".parse()?)?;
+        Ok(Self {
+            swarm,
+            from_client,
+            to_client,
+        })
+    }
+    pub async fn run(&mut self) {
+        log::info!("RUST: networking task started");
+
+        loop {
+            tokio::select! {
+                message = self.from_client.recv() => {
+                    // handle closed channel
+                    let Some(message) = message else {
+                        log::info!("RUST: channel closed");
+                        break;
+                    };
+
+                    // dispatch incoming messages
+                    match message {
+                        ToSwarm::Subscribe(topic) => {
+                            // try to subscribe
+                            match self.swarm.behaviour_mut().gossipsub.subscribe(&gossipsub::IdentTopic::new(topic.clone())) {
+                                    Err(e) => {
+                                        let gossipsub::SubscriptionError::PublishError(e) = e else {
+                                            unreachable!("topic filter used")
+                                        };
+                                        let Ok(()) = self.to_client.send(FromSwarm::PublishError(e)).await else {
+                                            log::warn!("RUST: client connection closed");
+                                            break
+                                        };
+                                    },
+                                    Ok(false) => log::warn!("RUST: tried to subscribe to topic twice"),
+                                    Ok(true) => {},
+                                }
+                        }
+                        ToSwarm::Unsubscribe(topic) => {
+                            // try to subscribe
+                            if !self.swarm.behaviour_mut().gossipsub.unsubscribe(&gossipsub::IdentTopic::new(topic)) {
+                                log::warn!("RUST: tried to unsubscribe from topic twice");
+                            }
+                        }
+                        ToSwarm::Message( topic, data ) => {
+                            // try to publish the data -> catch NoPeersSubscribedToTopic error & convert to correct exception
+                            match self.swarm.behaviour_mut().gossipsub.publish(
+                                gossipsub::IdentTopic::new(topic), data
+                            ) {
+                                Ok(_) => {},
+                                Err(e) => {
+                                    let Ok(()) = self.to_client.send(FromSwarm::PublishError(e)).await else {
+                                        log::warn!("RUST: client connection closed");
+                                        break
+                                    };
+                                },
+                            }
+                        }
+                    }
+                }
+
+                // architectural solution to this problem:
+                // create keep_alive behavior who's job it is to dial peers discovered by mDNS (and drop when expired)
+                //   -> it will emmit TRUE connected/disconnected events consumable elsewhere
+                //
+                // gossipsub will feed off-of dial attempts created by networking, and that will bootstrap its' peers list
+                // then for actual communication it will dial those peers if need-be
+                swarm_event = self.swarm.next() => {
+                    let Some(swarm_event) = swarm_event else {
+                        log::warn!("RUST: swarm closed communication");
+                        break
+                    };
+                    let SwarmEvent::Behaviour(behaviour_event) = swarm_event else {
+                        continue
+                    };
+                    match behaviour_event {
+                        BehaviourEvent::Gossipsub(gossipsub::Event::Message {
+                            message: gossipsub::Message {
+                                source,
+                                topic,
+                                data,
+                                ..
+                            },
+                            ..
+                        }) => {
+                            let Some(peer_id) = source else {
+                                log::warn!("RUST: ignoring message with unknown source on {topic}");
+                                continue;
+                            };
+                            // send incoming message to channel (or exit if connection closed)
+                            if let Err(e) = self.to_client.send(FromSwarm::Message(peer_id, topic.into_string(), data)).await {
+                                log::warn!("RUST: could not send incoming gossipsub message since channel already closed: {e}");
+                                break
+                            };
+                        },
+                        BehaviourEvent::Discovery(discovery::Event::ConnectionEstablished { peer_id, .. }) => {
+                            // send connection event to channel (or exit if connection closed)
+                            if let Err(_) = self.to_client.send(FromSwarm::Discovered(peer_id)).await {
+                                log::warn!("RUST: swarm closed communication");
+                            };
+                        },
+                        BehaviourEvent::Discovery(discovery::Event::ConnectionClosed { peer_id, .. }) => {
+                            // send connection event to channel (or exit if connection closed)
+                            if let Err(_) = self.to_client.send(FromSwarm::Expired(peer_id)).await {
+                                log::warn!("RUST: swarm closed communication");
+                            };
+                        },
+                        e => {
+                            log::debug!("RUST: other event {e:?}");
+                        }
+                    }
+                }
+            }
+        }
+
+        log::info!("RUST: networking task stopped");
+    }
 }

 mod transport {
    use crate::alias;
    use crate::swarm::{NETWORK_VERSION, OVERRIDE_VERSION_ENV_VAR};
-    use futures::{AsyncRead, AsyncWrite};
+    use futures_lite::{AsyncRead, AsyncWrite};
    use keccak_const::Sha3_256;
    use libp2p::core::muxing;
    use libp2p::core::transport::Boxed;
--- a/src/exo/download/coordinator.py
+++ b/src/exo/download/coordinator.py
@@ -14,6 +14,7 @@ from exo.download.download_utils import (
    map_repo_download_progress_to_download_progress_data,
 )
 from exo.download.shard_downloader import ShardDownloader
+from exo.shared.constants import EXO_MODELS_DIR
 from exo.shared.models.model_cards import ModelId
 from exo.shared.types.commands import (
    CancelDownload,
@@ -63,6 +64,9 @@ class DownloadCoordinator:
        self.event_sender, self.event_receiver = channel[Event]()
        self.shard_downloader.on_progress(self._download_progress_callback)

+    def _model_dir(self, model_id: ModelId) -> str:
+        return str(EXO_MODELS_DIR / model_id.normalize())
+
    async def _download_progress_callback(
        self, callback_shard: ShardMetadata, progress: RepoDownloadProgress
    ) -> None:
@@ -74,6 +78,7 @@ class DownloadCoordinator:
                shard_metadata=callback_shard,
                node_id=self.node_id,
                total_bytes=progress.total_bytes,
+                model_directory=self._model_dir(model_id),
            )
            self.download_status[model_id] = completed
            await self.event_sender.send(
@@ -93,6 +98,7 @@ class DownloadCoordinator:
                download_progress=map_repo_download_progress_to_download_progress_data(
                    progress
                ),
+                model_directory=self._model_dir(model_id),
            )
            self.download_status[model_id] = ongoing
            await self.event_sender.send(
@@ -170,7 +176,11 @@ class DownloadCoordinator:
                return

        # Emit pending status
-        progress = DownloadPending(shard_metadata=shard, node_id=self.node_id)
+        progress = DownloadPending(
+            shard_metadata=shard,
+            node_id=self.node_id,
+            model_directory=self._model_dir(model_id),
+        )
        self.download_status[model_id] = progress
        await self.event_sender.send(NodeDownloadProgress(download_progress=progress))

@@ -184,6 +194,7 @@ class DownloadCoordinator:
                shard_metadata=shard,
                node_id=self.node_id,
                total_bytes=initial_progress.total_bytes,
+                model_directory=self._model_dir(model_id),
            )
            self.download_status[model_id] = completed
            await self.event_sender.send(
@@ -206,6 +217,7 @@ class DownloadCoordinator:
            download_progress=map_repo_download_progress_to_download_progress_data(
                initial_progress
            ),
+            model_directory=self._model_dir(model_id),
        )
        self.download_status[model_id] = status
        self.event_sender.send_nowait(NodeDownloadProgress(download_progress=status))
@@ -219,6 +231,7 @@ class DownloadCoordinator:
                    shard_metadata=shard,
                    node_id=self.node_id,
                    error_message=str(e),
+                    model_directory=self._model_dir(model_id),
                )
                self.download_status[model_id] = failed
                await self.event_sender.send(
@@ -253,6 +266,7 @@ class DownloadCoordinator:
            pending = DownloadPending(
                shard_metadata=current_status.shard_metadata,
                node_id=self.node_id,
+                model_directory=self._model_dir(model_id),
            )
            await self.event_sender.send(
                NodeDownloadProgress(download_progress=pending)
@@ -295,11 +309,18 @@ class DownloadCoordinator:
                            node_id=self.node_id,
                            shard_metadata=progress.shard,
                            total_bytes=progress.total_bytes,
+                            model_directory=self._model_dir(
+                                progress.shard.model_card.model_id
+                            ),
                        )
                    elif progress.status in ["in_progress", "not_started"]:
                        if progress.downloaded_bytes_this_session.in_bytes == 0:
                            status = DownloadPending(
-                                node_id=self.node_id, shard_metadata=progress.shard
+                                node_id=self.node_id,
+                                shard_metadata=progress.shard,
+                                model_directory=self._model_dir(
+                                    progress.shard.model_card.model_id
+                                ),
                            )
                        else:
                            status = DownloadOngoing(
@@ -308,6 +329,9 @@ class DownloadCoordinator:
                                download_progress=map_repo_download_progress_to_download_progress_data(
                                    progress
                                ),
+                                model_directory=self._model_dir(
+                                    progress.shard.model_card.model_id
+                                ),
                            )
                    else:
                        continue
--- a/src/exo/main.py
+++ b/src/exo/main.py
@@ -44,7 +44,7 @@ class Node:
    @classmethod
    async def create(cls, args: "Args") -> "Self":
        keypair = get_node_id_keypair()
-        node_id = NodeId(keypair.to_peer_id().to_base58())
+        node_id = NodeId(keypair.to_string())
        session_id = SessionId(master_node_id=node_id, election_clock=0)
        router = Router.create(keypair)
        await router.register_topic(topics.GLOBAL_EVENTS)
@@ -136,6 +136,8 @@ class Node:

    async def run(self):
        async with self._tg as tg:
+            signal.signal(signal.SIGINT, lambda _, __: self.shutdown())
+            signal.signal(signal.SIGTERM, lambda _, __: self.shutdown())
            tg.start_soon(self.router.run)
            tg.start_soon(self.election.run)
            if self.download_coordinator:
@@ -147,8 +149,6 @@ class Node:
            if self.api:
                tg.start_soon(self.api.run)
            tg.start_soon(self._elect_loop)
-            signal.signal(signal.SIGINT, lambda _, __: self.shutdown())
-            signal.signal(signal.SIGTERM, lambda _, __: self.shutdown())

    def shutdown(self):
        # if this is our second call to shutdown, just sys.exit
--- a/src/exo/master/adapters/chat_completions.py
+++ b/src/exo/master/adapters/chat_completions.py
@@ -17,6 +17,7 @@ from exo.shared.types.api import (
    LogprobsContentItem,
    StreamingChoiceResponse,
    ToolCall,
+    Usage,
 )
 from exo.shared.types.chunks import ErrorChunk, TokenChunk, ToolCallChunk
 from exo.shared.types.common import CommandId
@@ -125,6 +126,8 @@ async def generate_chat_stream(
    chunk_stream: AsyncGenerator[ErrorChunk | ToolCallChunk | TokenChunk, None],
 ) -> AsyncGenerator[str, None]:
    """Generate Chat Completions API streaming events from chunks."""
+    last_usage: Usage | None = None
+
    async for chunk in chunk_stream:
        if isinstance(chunk, ErrorChunk):
            error_response = ErrorResponse(
@@ -138,6 +141,8 @@ async def generate_chat_stream(
            yield "data: [DONE]\n\n"
            return

+        last_usage = chunk.usage or last_usage
+
        if isinstance(chunk, ToolCallChunk):
            tool_call_deltas = [
                ToolCall(
@@ -161,12 +166,15 @@ async def generate_chat_stream(
                        finish_reason="tool_calls",
                    )
                ],
+                usage=last_usage,
            )
            yield f"data: {tool_response.model_dump_json()}\n\n"
            yield "data: [DONE]\n\n"
            return

        chunk_response = chunk_to_response(chunk, command_id)
+        if chunk.finish_reason is not None:
+            chunk_response = chunk_response.model_copy(update={"usage": last_usage})
        yield f"data: {chunk_response.model_dump_json()}\n\n"

        if chunk.finish_reason is not None:
@@ -177,6 +185,8 @@ async def collect_chat_response(
    command_id: CommandId,
    chunk_stream: AsyncGenerator[ErrorChunk | ToolCallChunk | TokenChunk, None],
 ) -> AsyncGenerator[str]:
+    # This is an AsyncGenerator[str] rather than returning a ChatCompletionReponse because
+    # FastAPI handles the cancellation better but wouldn't auto-serialize for some reason
    """Collect all token chunks and return a single ChatCompletionResponse."""
    text_parts: list[str] = []
    tool_calls: list[ToolCall] = []
@@ -184,6 +194,7 @@ async def collect_chat_response(
    model: str | None = None
    finish_reason: FinishReason | None = None
    error_message: str | None = None
+    last_usage: Usage | None = None

    async for chunk in chunk_stream:
        if isinstance(chunk, ErrorChunk):
@@ -193,6 +204,8 @@ async def collect_chat_response(
        if model is None:
            model = chunk.model

+        last_usage = chunk.usage or last_usage
+
        if isinstance(chunk, TokenChunk):
            text_parts.append(chunk.text)
            if chunk.logprob is not None:
@@ -241,5 +254,6 @@ async def collect_chat_response(
                finish_reason=finish_reason,
            )
        ],
+        usage=last_usage,
    ).model_dump_json()
    return
--- a/src/exo/master/adapters/claude.py
+++ b/src/exo/master/adapters/claude.py
@@ -4,7 +4,7 @@ import json
 from collections.abc import AsyncGenerator
 from typing import Any

-from exo.shared.types.api import FinishReason
+from exo.shared.types.api import FinishReason, Usage
 from exo.shared.types.chunks import ErrorChunk, TokenChunk, ToolCallChunk
 from exo.shared.types.claude_api import (
    ClaudeContentBlock,
@@ -161,12 +161,14 @@ async def collect_claude_response(
    command_id: CommandId,
    model: str,
    chunk_stream: AsyncGenerator[ErrorChunk | ToolCallChunk | TokenChunk, None],
-) -> ClaudeMessagesResponse:
+) -> AsyncGenerator[str]:
+    # This is an AsyncGenerator[str] rather than returning a ChatCompletionReponse because
+    # FastAPI handles the cancellation better but wouldn't auto-serialize for some reason
    """Collect all token chunks and return a single ClaudeMessagesResponse."""
    text_parts: list[str] = []
    tool_use_blocks: list[ClaudeToolUseBlock] = []
    stop_reason: ClaudeStopReason | None = None
-    last_stats = None
+    last_usage: Usage | None = None
    error_message: str | None = None

    async for chunk in chunk_stream:
@@ -174,6 +176,8 @@ async def collect_claude_response(
            error_message = chunk.error_message or "Internal server error"
            break

+        last_usage = chunk.usage or last_usage
+
        if isinstance(chunk, ToolCallChunk):
            for tool in chunk.tool_calls:
                tool_use_blocks.append(
@@ -183,12 +187,10 @@ async def collect_claude_response(
                        input=json.loads(tool.arguments),  # pyright: ignore[reportAny]
                    )
                )
-            last_stats = chunk.stats or last_stats
            stop_reason = "tool_use"
            continue

        text_parts.append(chunk.text)
-        last_stats = chunk.stats or last_stats

        if chunk.finish_reason is not None:
            stop_reason = finish_reason_to_claude_stop_reason(chunk.finish_reason)
@@ -208,11 +210,11 @@ async def collect_claude_response(
    if not content:
        content.append(ClaudeTextBlock(text=""))

-    # Use actual usage data from stats if available
-    input_tokens = last_stats.prompt_tokens if last_stats else 0
-    output_tokens = last_stats.generation_tokens if last_stats else 0
+    # Use actual usage data if available
+    input_tokens = last_usage.prompt_tokens if last_usage else 0
+    output_tokens = last_usage.completion_tokens if last_usage else 0

-    return ClaudeMessagesResponse(
+    yield ClaudeMessagesResponse(
        id=f"msg_{command_id}",
        model=model,
        content=content,
@@ -221,7 +223,8 @@ async def collect_claude_response(
            input_tokens=input_tokens,
            output_tokens=output_tokens,
        ),
-    )
+    ).model_dump_json()
+    return


 async def generate_claude_stream(
@@ -249,7 +252,7 @@ async def generate_claude_stream(

    output_tokens = 0
    stop_reason: ClaudeStopReason | None = None
-    last_stats = None
+    last_usage: Usage | None = None
    next_block_index = 1  # text block is 0, tool blocks start at 1

    async for chunk in chunk_stream:
@@ -257,8 +260,9 @@ async def generate_claude_stream(
            # Close text block and bail
            break

+        last_usage = chunk.usage or last_usage
+
        if isinstance(chunk, ToolCallChunk):
-            last_stats = chunk.stats or last_stats
            stop_reason = "tool_use"

            # Emit tool_use content blocks
@@ -290,7 +294,6 @@ async def generate_claude_stream(
            continue

        output_tokens += 1  # Count each chunk as one token
-        last_stats = chunk.stats or last_stats

        # content_block_delta
        delta_event = ClaudeContentBlockDeltaEvent(
@@ -302,9 +305,9 @@ async def generate_claude_stream(
        if chunk.finish_reason is not None:
            stop_reason = finish_reason_to_claude_stop_reason(chunk.finish_reason)

-    # Use actual token count from stats if available
-    if last_stats is not None:
-        output_tokens = last_stats.generation_tokens
+    # Use actual token count from usage if available
+    if last_usage is not None:
+        output_tokens = last_usage.completion_tokens

    # content_block_stop for text block
    block_stop = ClaudeContentBlockStopEvent(index=0)
--- a/src/exo/master/adapters/responses.py
+++ b/src/exo/master/adapters/responses.py
@@ -4,6 +4,7 @@ from collections.abc import AsyncGenerator
 from itertools import count
 from typing import Any

+from exo.shared.types.api import Usage
 from exo.shared.types.chunks import ErrorChunk, TokenChunk, ToolCallChunk
 from exo.shared.types.common import CommandId
 from exo.shared.types.openai_responses import (
@@ -121,13 +122,15 @@ async def collect_responses_response(
    command_id: CommandId,
    model: str,
    chunk_stream: AsyncGenerator[ErrorChunk | ToolCallChunk | TokenChunk, None],
-) -> ResponsesResponse:
+) -> AsyncGenerator[str]:
+    # This is an AsyncGenerator[str] rather than returning a ChatCompletionReponse because
+    # FastAPI handles the cancellation better but wouldn't auto-serialize for some reason
    """Collect all token chunks and return a single ResponsesResponse."""
    response_id = f"resp_{command_id}"
    item_id = f"item_{command_id}"
    accumulated_text = ""
    function_call_items: list[ResponseFunctionCallItem] = []
-    last_stats = None
+    last_usage: Usage | None = None
    error_message: str | None = None

    async for chunk in chunk_stream:
@@ -135,32 +138,32 @@ async def collect_responses_response(
            error_message = chunk.error_message or "Internal server error"
            break

+        last_usage = chunk.usage or last_usage
+
        if isinstance(chunk, ToolCallChunk):
            for tool in chunk.tool_calls:
                function_call_items.append(
                    ResponseFunctionCallItem(
-                        id=f"fc_{tool.id}",
-                        call_id=f"call_{tool.id}",
+                        id=tool.id,
+                        call_id=tool.id,
                        name=tool.name,
                        arguments=tool.arguments,
                    )
                )
-            last_stats = chunk.stats or last_stats
            continue

        accumulated_text += chunk.text
-        last_stats = chunk.stats or last_stats

    if error_message is not None:
        raise ValueError(error_message)

-    # Create usage from stats if available
+    # Create usage from usage data if available
    usage = None
-    if last_stats is not None:
+    if last_usage is not None:
        usage = ResponseUsage(
-            input_tokens=last_stats.prompt_tokens,
-            output_tokens=last_stats.generation_tokens,
-            total_tokens=last_stats.prompt_tokens + last_stats.generation_tokens,
+            input_tokens=last_usage.prompt_tokens,
+            output_tokens=last_usage.completion_tokens,
+            total_tokens=last_usage.total_tokens,
        )

    output: list[ResponseItem] = [
@@ -172,14 +175,15 @@ async def collect_responses_response(
    ]
    output.extend(function_call_items)

-    return ResponsesResponse(
+    yield ResponsesResponse(
        id=response_id,
        model=model,
        status="completed",
        output=output,
        output_text=accumulated_text,
        usage=usage,
-    )
+    ).model_dump_json()
+    return


 async def generate_responses_stream(
@@ -235,15 +239,16 @@ async def generate_responses_stream(

    accumulated_text = ""
    function_call_items: list[ResponseFunctionCallItem] = []
-    last_stats = None
+    last_usage: Usage | None = None
    next_output_index = 1  # message item is at 0

    async for chunk in chunk_stream:
        if isinstance(chunk, ErrorChunk):
            break

+        last_usage = chunk.usage or last_usage
+
        if isinstance(chunk, ToolCallChunk):
-            last_stats = chunk.stats or last_stats
            for tool in chunk.tool_calls:
                fc_id = f"fc_{tool.id}"
                call_id = f"call_{tool.id}"
@@ -302,7 +307,6 @@ async def generate_responses_stream(
            continue

        accumulated_text += chunk.text
-        last_stats = chunk.stats or last_stats

        # response.output_text.delta
        delta_event = ResponseTextDeltaEvent(
@@ -346,13 +350,13 @@ async def generate_responses_stream(
    )
    yield f"event: response.output_item.done\ndata: {item_done.model_dump_json()}\n\n"

-    # Create usage from stats if available
+    # Create usage from usage data if available
    usage = None
-    if last_stats is not None:
+    if last_usage is not None:
        usage = ResponseUsage(
-            input_tokens=last_stats.prompt_tokens,
-            output_tokens=last_stats.generation_tokens,
-            total_tokens=last_stats.prompt_tokens + last_stats.generation_tokens,
+            input_tokens=last_usage.prompt_tokens,
+            output_tokens=last_usage.completion_tokens,
+            total_tokens=last_usage.total_tokens,
        )

    # response.completed
--- a/src/exo/master/api.py
+++ b/src/exo/master/api.py
@@ -1232,12 +1232,15 @@ class API:
                    "X-Accel-Buffering": "no",
                },
            )
-
-        return await collect_claude_response(
-            command.command_id,
-            payload.model,
-            self._token_chunk_stream(command.command_id),
-        )
+        else:
+            return StreamingResponse(
+                collect_claude_response(
+                    command.command_id,
+                    payload.model,
+                    self._token_chunk_stream(command.command_id),
+                ),
+                media_type="application/json",
+            )

    async def openai_responses(
        self, payload: ResponsesRequest
@@ -1265,11 +1268,15 @@ class API:
                },
            )

-        return await collect_responses_response(
-            command.command_id,
-            payload.model,
-            self._token_chunk_stream(command.command_id),
-        )
+        else:
+            return StreamingResponse(
+                collect_responses_response(
+                    command.command_id,
+                    payload.model,
+                    self._token_chunk_stream(command.command_id),
+                ),
+                media_type="application/json",
+            )

    def _calculate_total_available_memory(self) -> Memory:
        """Calculate total available memory across all nodes in bytes."""
--- a/src/exo/master/tests/test_claude_tool_use.py
+++ b/src/exo/master/tests/test_claude_tool_use.py
@@ -4,7 +4,11 @@ import json
 from collections.abc import AsyncGenerator
 from typing import Any, cast

-from exo.master.adapters.claude import collect_claude_response, generate_claude_stream
+from exo.master.adapters.claude import (
+    ClaudeMessagesResponse,
+    collect_claude_response,
+    generate_claude_stream,
+)
 from exo.shared.types.api import ToolCallItem
 from exo.shared.types.chunks import ErrorChunk, TokenChunk, ToolCallChunk
 from exo.shared.types.common import CommandId, ModelId
@@ -17,6 +21,18 @@ async def _chunks_to_stream(
        yield chunk


+async def _collect_response(
+    command_id: CommandId,
+    model: str,
+    chunk_stream: AsyncGenerator[ErrorChunk | ToolCallChunk | TokenChunk, None],
+) -> ClaudeMessagesResponse:
+    """Helper to consume the async generator and parse the JSON response."""
+    parts: list[str] = []
+    async for part in collect_claude_response(command_id, model, chunk_stream):
+        parts.append(part)
+    return ClaudeMessagesResponse.model_validate_json("".join(parts))
+
+
 MODEL = ModelId("test-model")
 COMMAND_ID = CommandId("cmd_test123")

@@ -47,7 +63,7 @@ class TestCollectClaudeResponseToolUse:
                ],
            ),
        ]
-        response = await collect_claude_response(
+        response = await _collect_response(
            COMMAND_ID, "test-model", _chunks_to_stream(chunks)
        )

@@ -77,7 +93,7 @@ class TestCollectClaudeResponseToolUse:
                ],
            ),
        ]
-        response = await collect_claude_response(
+        response = await _collect_response(
            COMMAND_ID, "test-model", _chunks_to_stream(chunks)
        )

@@ -102,7 +118,7 @@ class TestCollectClaudeResponseToolUse:
                ],
            ),
        ]
-        response = await collect_claude_response(
+        response = await _collect_response(
            COMMAND_ID, "test-model", _chunks_to_stream(chunks)
        )

@@ -116,7 +132,7 @@ class TestCollectClaudeResponseToolUse:

    async def test_no_content_produces_empty_text_block(self):
        chunks: list[ErrorChunk | ToolCallChunk | TokenChunk] = []
-        response = await collect_claude_response(
+        response = await _collect_response(
            COMMAND_ID, "test-model", _chunks_to_stream(chunks)
        )
        assert len(response.content) == 1
--- a/src/exo/master/tests/test_master.py
+++ b/src/exo/master/tests/test_master.py
@@ -42,7 +42,7 @@ from exo.utils.channels import channel
@pytest.mark.asyncio
 async def test_master():
    keypair = get_node_id_keypair()
-    node_id = NodeId(keypair.to_peer_id().to_base58())
+    node_id = NodeId(keypair.to_string())
    session_id = SessionId(master_node_id=node_id, election_clock=0)

    ge_sender, global_event_receiver = channel[ForwarderEvent]()
@@ -75,7 +75,7 @@ async def test_master():
    async with anyio.create_task_group() as tg:
        tg.start_soon(master.run)

-        sender_node_id = NodeId(f"{keypair.to_peer_id().to_base58()}_sender")
+        sender_node_id = NodeId(f"{keypair.to_string()}_sender")
        # inject a NodeGatheredInfo event
        logger.info("inject a NodeGatheredInfo event")
        await local_event_sender.send(
--- a/src/exo/routing/connection_message.py
+++ b/src/exo/routing/connection_message.py
@@ -1,37 +0,0 @@
-from enum import Enum
-
-from exo_pyo3_bindings import ConnectionUpdate, ConnectionUpdateType
-
-from exo.shared.types.common import NodeId
-from exo.utils.pydantic_ext import CamelCaseModel
-
-"""Serialisable types for Connection Updates/Messages"""
-
-
-class ConnectionMessageType(Enum):
-    Connected = 0
-    Disconnected = 1
-
-    @staticmethod
-    def from_update_type(update_type: ConnectionUpdateType):
-        match update_type:
-            case ConnectionUpdateType.Connected:
-                return ConnectionMessageType.Connected
-            case ConnectionUpdateType.Disconnected:
-                return ConnectionMessageType.Disconnected
-
-
-class ConnectionMessage(CamelCaseModel):
-    node_id: NodeId
-    connection_type: ConnectionMessageType
-    remote_ipv4: str
-    remote_tcp_port: int
-
-    @classmethod
-    def from_update(cls, update: ConnectionUpdate) -> "ConnectionMessage":
-        return cls(
-            node_id=NodeId(update.peer_id.to_base58()),
-            connection_type=ConnectionMessageType.from_update_type(update.update_type),
-            remote_ipv4=update.remote_ipv4,
-            remote_tcp_port=update.remote_tcp_port,
-        )
--- a/src/exo/routing/router.py
+++ b/src/exo/routing/router.py
@@ -16,17 +16,19 @@ from anyio.abc import TaskGroup
 from exo_pyo3_bindings import (
    AllQueuesFullError,
    Keypair,
-    NetworkingHandle,
    NoPeersSubscribedToTopicError,
+    PyMessage,
+    PySwarm,
 )
 from filelock import FileLock
 from loguru import logger

 from exo.shared.constants import EXO_NODE_ID_KEYPAIR
+from exo.shared.election import ConnectionMessage
+from exo.shared.types.common import NodeId
 from exo.utils.channels import Receiver, Sender, channel
 from exo.utils.pydantic_ext import CamelCaseModel

-from .connection_message import ConnectionMessage
 from .topics import CONNECTION_MESSAGES, PublishPolicy, TypedTopic


@@ -102,13 +104,13 @@ class TopicRouter[T: CamelCaseModel]:
 class Router:
    @classmethod
    def create(cls, identity: Keypair) -> "Router":
-        return cls(handle=NetworkingHandle(identity))
+        return cls(handle=PySwarm(identity))

-    def __init__(self, handle: NetworkingHandle):
+    def __init__(self, handle: PySwarm):
        self.topic_routers: dict[str, TopicRouter[CamelCaseModel]] = {}
        send, recv = channel[tuple[str, bytes]]()
        self.networking_receiver: Receiver[tuple[str, bytes]] = recv
-        self._net: NetworkingHandle = handle
+        self._net = handle
        self._tmp_networking_sender: Sender[tuple[str, bytes]] | None = send
        self._id_count = count()
        self._tg: TaskGroup | None = None
@@ -154,7 +156,6 @@ class Router:
                    router = self.topic_routers[topic]
                    tg.start_soon(router.run)
                tg.start_soon(self._networking_recv)
-                tg.start_soon(self._networking_recv_connection_messages)
                tg.start_soon(self._networking_publish)
                # Router only shuts down if you cancel it.
                await sleep_forever()
@@ -179,38 +180,44 @@ class Router:

    async def _networking_recv(self):
        while True:
-            topic, data = await self._net.gossipsub_recv()
-            logger.trace(f"Received message on {topic} with payload {data}")
-            if topic not in self.topic_routers:
-                logger.warning(f"Received message on unknown or inactive topic {topic}")
+            try:
+                msg = await self._net.recv()
+            except NoPeersSubscribedToTopicError:
+                continue
+            except AllQueuesFullError:
+                logger.warning("All peer queues full, messages have been lost")
                continue

-            router = self.topic_routers[topic]
-            await router.publish_bytes(data)
-
-    async def _networking_recv_connection_messages(self):
-        while True:
-            update = await self._net.connection_update_recv()
-            message = ConnectionMessage.from_update(update)
-            logger.trace(
-                f"Received message on connection_messages with payload {message}"
-            )
-            if CONNECTION_MESSAGES.topic in self.topic_routers:
-                router = self.topic_routers[CONNECTION_MESSAGES.topic]
-                assert router.topic.model_type == ConnectionMessage
-                router = cast(TopicRouter[ConnectionMessage], router)
-                await router.publish(message)
+            match msg:
+                case PyMessage.Connection():
+                    if CONNECTION_MESSAGES.topic in self.topic_routers:
+                        router = self.topic_routers[CONNECTION_MESSAGES.topic]
+                        assert router.topic.model_type == ConnectionMessage
+                        router = cast(TopicRouter[ConnectionMessage], router)
+                        await router.publish(
+                            ConnectionMessage(
+                                node_id=NodeId(msg.node_id), connected=msg.connected
+                            )
+                        )
+                case PyMessage.Gossip():
+                    if msg.topic not in self.topic_routers:
+                        logger.warning(
+                            f"Received message on unknown or inactive topic {msg.topic}"
+                        )
+                        continue
+                    logger.trace(
+                        f"Received message on {msg.topic} with payload {msg.data}"
+                    )
+                    router = self.topic_routers[msg.topic]
+                    await router.publish_bytes(msg.data)
+                case _:
+                    raise ValueError("net recv returned something impossible")

    async def _networking_publish(self):
        with self.networking_receiver as networked_items:
            async for topic, data in networked_items:
-                try:
-                    logger.trace(f"Sending message on {topic} with payload {data}")
-                    await self._net.gossipsub_publish(topic, data)
-                except NoPeersSubscribedToTopicError:
-                    pass
-                except AllQueuesFullError:
-                    logger.warning(f"All peer queues full, dropping message on {topic}")
+                logger.trace(f"Sending message on {topic} with payload {data}")
+                await self._net.gossipsub_publish(topic, data)


 def get_node_id_keypair(
@@ -221,7 +228,7 @@ def get_node_id_keypair(
    Obtain the :class:`PeerId` by from it.
    """
    # TODO(evan): bring back node id persistence once we figure out how to deal with duplicates
-    return Keypair.generate_ed25519()
+    return Keypair.generate()

    def lock_path(path: str | bytes | PathLike[str] | PathLike[bytes]) -> Path:
        return Path(str(path) + ".lock")
@@ -235,12 +242,12 @@ def get_node_id_keypair(
                protobuf_encoded = f.read()

                try:  # if decoded successfully, save & return
-                    return Keypair.from_protobuf_encoding(protobuf_encoded)
+                    return Keypair.deserialize(protobuf_encoded)
                except ValueError as e:  # on runtime error, assume corrupt file
                    logger.warning(f"Encountered error when trying to get keypair: {e}")

        # if no valid credentials, create new ones and persist
        with open(path, "w+b") as f:
-            keypair = Keypair.generate_ed25519()
-            f.write(keypair.to_protobuf_encoding())
+            keypair = Keypair.generate()
+            f.write(keypair.serialize())
            return keypair
--- a/src/exo/routing/topics.py
+++ b/src/exo/routing/topics.py
@@ -1,8 +1,7 @@
 from dataclasses import dataclass
 from enum import Enum

-from exo.routing.connection_message import ConnectionMessage
-from exo.shared.election import ElectionMessage
+from exo.shared.election import ConnectionMessage, ElectionMessage
 from exo.shared.types.commands import ForwarderCommand, ForwarderDownloadCommand
 from exo.shared.types.events import (
    ForwarderEvent,
--- a/src/exo/shared/apply.py
+++ b/src/exo/shared/apply.py
@@ -218,11 +218,6 @@ def apply_node_timed_out(event: NodeTimedOut, state: State) -> State:
        key: value for key, value in state.downloads.items() if key != event.node_id
    }
    # Clean up all granular node mappings
-    node_identities = {
-        key: value
-        for key, value in state.node_identities.items()
-        if key != event.node_id
-    }
    node_memory = {
        key: value for key, value in state.node_memory.items() if key != event.node_id
    }
@@ -263,7 +258,6 @@ def apply_node_timed_out(event: NodeTimedOut, state: State) -> State:
            "downloads": downloads,
            "topology": topology,
            "last_seen": last_seen,
-            "node_identities": node_identities,
            "node_memory": node_memory,
            "node_disk": node_disk,
            "node_system": node_system,
--- a/src/exo/shared/election.py
+++ b/src/exo/shared/election.py
@@ -10,7 +10,6 @@ from anyio import (
 from anyio.abc import TaskGroup
 from loguru import logger

-from exo.routing.connection_message import ConnectionMessage
 from exo.shared.types.commands import ForwarderCommand
 from exo.shared.types.common import NodeId, SessionId
 from exo.utils.channels import Receiver, Sender
@@ -19,6 +18,11 @@ from exo.utils.pydantic_ext import CamelCaseModel
 DEFAULT_ELECTION_TIMEOUT = 3.0


+class ConnectionMessage(CamelCaseModel):
+    node_id: NodeId
+    connected: bool
+
+
 class ElectionMessage(CamelCaseModel):
    clock: int
    seniority: int
--- a/src/exo/shared/tests/test_election.py
+++ b/src/exo/shared/tests/test_election.py
@@ -1,7 +1,7 @@
 import pytest
 from anyio import create_task_group, fail_after, move_on_after

-from exo.routing.connection_message import ConnectionMessage, ConnectionMessageType
+from exo.routing.router import ConnectionMessage
 from exo.shared.election import Election, ElectionMessage, ElectionResult
 from exo.shared.types.commands import ForwarderCommand, TestCommand
 from exo.shared.types.common import NodeId, SessionId
@@ -330,9 +330,7 @@ async def test_connection_message_triggers_new_round_broadcast() -> None:
            await cm_tx.send(
                ConnectionMessage(
                    node_id=NodeId(),
-                    connection_type=ConnectionMessageType.Connected,
-                    remote_ipv4="",
-                    remote_tcp_port=0,
+                    connected=True,
                )
            )

--- a/src/exo/shared/tests/test_node_id_persistence.py
+++ b/src/exo/shared/tests/test_node_id_persistence.py
@@ -23,7 +23,7 @@ def _get_keypair_concurrent_subprocess_task(
    sem.release()
    # wait to be told to begin simultaneous read
    ev.wait()
-    queue.put(get_node_id_keypair().to_protobuf_encoding())
+    queue.put(get_node_id_keypair().serialize())


 def _get_keypair_concurrent(num_procs: int) -> bytes:
--- a/src/exo/shared/types/api.py
+++ b/src/exo/shared/types/api.py
@@ -3,8 +3,7 @@ from collections.abc import Generator
 from typing import Annotated, Any, Literal
 from uuid import uuid4

-from pydantic import BaseModel, Field, field_validator
-from pydantic_core import PydanticUseDefault
+from pydantic import BaseModel, Field

 from exo.shared.models.model_cards import ModelCard, ModelId
 from exo.shared.types.common import CommandId, NodeId
@@ -228,13 +227,6 @@ class PlaceInstanceParams(BaseModel):
    instance_meta: InstanceMeta = InstanceMeta.MlxRing
    min_nodes: int = 1

-    @field_validator("sharding", "instance_meta", mode="plain")
-    @classmethod
-    def use_default(cls, v: object):
-        if not v or not isinstance(v, (Sharding, InstanceMeta)):
-            raise PydanticUseDefault()
-        return v
-

 class CreateInstanceParams(BaseModel):
    instance: Instance
--- a/src/exo/shared/types/worker/downloads.py
+++ b/src/exo/shared/types/worker/downloads.py
@@ -26,6 +26,7 @@ class DownloadProgressData(CamelCaseModel):
 class BaseDownloadProgress(TaggedModel):
    node_id: NodeId
    shard_metadata: ShardMetadata
+    model_directory: str = ""


 class DownloadPending(BaseDownloadProgress):
--- a/src/exo/shared/types/worker/runner_response.py
+++ b/src/exo/shared/types/worker/runner_response.py
@@ -62,6 +62,7 @@ class PartialImageResponse(BaseRunnerResponse):
 class ToolCallResponse(BaseRunnerResponse):
    tool_calls: list[ToolCallItem]
    usage: Usage | None
+    stats: GenerationStats | None = None


 class FinishedResponse(BaseRunnerResponse):
--- a/src/exo/utils/banner.py
+++ b/src/exo/utils/banner.py
@@ -1,5 +1,7 @@
+import sys
+
+
 def print_startup_banner(port: int) -> None:
-    """Print a prominent startup banner with API endpoint information."""
    dashboard_url = f"http://localhost:{port}"
    banner = f"""
 ╔═══════════════════════════════════════════════════════════════════════╗
@@ -27,4 +29,4 @@ def print_startup_banner(port: int) -> None:

 """

-    print(banner)
+    print(banner, file=sys.stderr)
--- a/src/exo/utils/channels.py
+++ b/src/exo/utils/channels.py
@@ -125,7 +125,9 @@ class MpSender[T]:
            self._state.buffer.put(item, block=True)

    async def send_async(self, item: T) -> None:
-        await to_thread.run_sync(self.send, item, limiter=CapacityLimiter(1))
+        await to_thread.run_sync(
+            self.send, item, limiter=CapacityLimiter(1), abandon_on_cancel=True
+        )

    def close(self) -> None:
        if not self._state.closed.is_set():
--- a/src/exo/worker/engines/mlx/generator/generate.py
+++ b/src/exo/worker/engines/mlx/generator/generate.py
@@ -306,7 +306,7 @@ def mlx_generate(
    max_stop_len = max((len(s) for s in stop_sequences), default=0)

    mx_barrier(group)
-    logger.info("Ready to prefill")
+    logger.info("Starting prefill")

    # Prefill cache with all tokens except the last one
    prefill_tps, prefill_tokens, ssm_snapshots_list = prefill(
@@ -393,10 +393,11 @@ def mlx_generate(
                    f"Model generated unexpected finish_reason: {out.finish_reason}"
                )

+            total_prompt_tokens = len(all_prompt_tokens)
            usage = Usage(
-                prompt_tokens=int(out.prompt_tokens),
+                prompt_tokens=total_prompt_tokens,
                completion_tokens=completion_tokens,
-                total_tokens=int(out.prompt_tokens) + completion_tokens,
+                total_tokens=total_prompt_tokens + completion_tokens,
                prompt_tokens_details=PromptTokensDetails(
                    cached_tokens=prefix_hit_length
                ),
--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -353,7 +353,13 @@ def load_tokenizer_for_model_id(
            return list(hf_tokenizer.model.encode(text, allowed_special="all"))  # pyright: ignore[reportUnknownMemberType,reportUnknownArgumentType]

        hf_tokenizer.encode = _patched_encode
-        return TokenizerWrapper(hf_tokenizer, eos_token_ids=eos_token_ids)
+        return TokenizerWrapper(
+            hf_tokenizer,
+            eos_token_ids=eos_token_ids,
+            tool_call_start="<|tool_calls_section_begin|>",
+            tool_call_end="<|tool_calls_section_end|>",
+            tool_parser=_parse_kimi_tool_calls,
+        )

    tokenizer = load_tokenizer(
        model_path,
@@ -585,3 +591,41 @@ def mx_barrier(group: Group | None):
            mx.array(1.0), group=group, stream=mx.default_stream(mx.Device(mx.cpu))
        )
    )
+
+
+def _parse_kimi_tool_calls(text: str):
+    import regex as re
+
+    # kimi has a fixed function naming scheme, with a json formatted arg
+    #   functions.multiply:0<|tool_call_argument_begin|>{"a": 2, "b": 3}
+    _func_name_regex = re.compile(
+        r"^\s*((?:functions\.)?(.+?):\d+)\s*<\|tool_call_argument_begin\|>", re.DOTALL
+    )
+    _func_arg_regex = re.compile(r"<\|tool_call_argument_begin\|>\s*(.*)\s*", re.DOTALL)
+    _tool_call_split_regex = re.compile(
+        r"<\|tool_call_begin\|>(.*?)<\|tool_call_end\|>", re.DOTALL
+    )
+
+    def _parse_single_tool(text: str) -> dict[str, Any]:
+        func_name_match = _func_name_regex.search(text)
+        if func_name_match is None:
+            raise ValueError("No tool call found.")
+        tool_call_id = func_name_match.group(1)  # e.g. "functions.get_weather:0"
+        func_name = func_name_match.group(2)  # e.g. "get_weather"
+
+        func_args_match = _func_arg_regex.search(text)
+        if func_args_match is None:
+            raise ValueError("No tool call arguments found.")
+        func_args = func_args_match.group(1)
+        try:
+            arg_dct = json.loads(func_args)  # pyright: ignore[reportAny]
+        except Exception:
+            arg_dct = None
+
+        return dict(id=tool_call_id, name=func_name, arguments=arg_dct)
+
+    tool_matches = _tool_call_split_regex.findall(text)
+    if tool_matches:
+        return [_parse_single_tool(match) for match in tool_matches]  # pyright: ignore[reportAny]
+    else:
+        return [_parse_single_tool(text)]
--- a/src/exo/worker/runner/runner.py
+++ b/src/exo/worker/runner/runner.py
@@ -1,11 +1,10 @@
 import base64
-import json
 import math
 import resource
 import time
 from collections.abc import Generator
 from functools import cache
-from typing import Any, Callable, Literal
+from typing import Literal

 import mlx.core as mx
 from mlx_lm.models.gpt_oss import Model as GptOssModel
@@ -16,7 +15,6 @@ from openai_harmony import (  # pyright: ignore[reportMissingTypeStubs]
    StreamableParser,
    load_harmony_encoding,
 )
-from pydantic import ValidationError

 from exo.shared.constants import EXO_MAX_CHUNK_SIZE, EXO_TRACING_ENABLED
 from exo.shared.models.model_cards import ModelId, ModelTask
@@ -93,6 +91,8 @@ from exo.worker.engines.mlx.utils_mlx import (
 )
 from exo.worker.runner.bootstrap import logger

+from .tool_parsers import ToolParser, make_mlx_parser
+

 def _is_primary_output_node(shard_metadata: ShardMetadata) -> bool:
    """Check if this node is the primary output node for image generation.
@@ -138,6 +138,7 @@ def main(
    inference_model: Model | None = None
    image_model: DistributedImageModel | None = None
    tokenizer = None
+    tool_parser: ToolParser | None = None
    group = None
    kv_prefix_cache: KVPrefixCache | None = None
    check_for_cancel_every: int | None = None
@@ -203,8 +204,17 @@ def main(
                            bound_instance, group, on_timeout=on_model_load_timeout
                        )
                        logger.info(
-                            f"model has_tool_calling={tokenizer.has_tool_calling}"
+                            f"model has_tool_calling={tokenizer.has_tool_calling} using tokens {tokenizer.tool_call_start}, {tokenizer.tool_call_end}"
                        )
+                        if tokenizer.has_tool_calling:
+                            assert tokenizer.tool_call_start
+                            assert tokenizer.tool_call_end
+                            assert tokenizer.tool_parser  # pyright: ignore[reportAny]
+                            tool_parser = make_mlx_parser(
+                                tokenizer.tool_call_start,
+                                tokenizer.tool_call_end,
+                                tokenizer.tool_parser,  # pyright: ignore[reportAny]
+                            )
                        kv_prefix_cache = KVPrefixCache(group)

                    elif (
@@ -233,7 +243,7 @@ def main(
                        assert inference_model
                        assert tokenizer

-                        t = time.perf_counter()
+                        t = time.monotonic()
                        toks = warmup_inference(
                            model=inference_model,
                            tokenizer=tokenizer,
@@ -241,7 +251,7 @@ def main(
                        )
                        logger.info(f"warmed up by generating {toks} tokens")
                        check_for_cancel_every = min(
-                            math.ceil(toks / (time.perf_counter() - t)), 100
+                            math.ceil(toks / min(time.monotonic() - t, 0.001)), 100
                        )
                        if group is not None:
                            check_for_cancel_every = int(
@@ -310,31 +320,11 @@ def main(
                                mlx_generator, tokenizer
                            )

-                        # Kimi-K2 has tool call sections - we don't care about them
-                        if "kimi" in shard_metadata.model_card.model_id.lower():
-                            mlx_generator = filter_kimi_tokens(mlx_generator)
-                            patch_kimi_tokenizer(tokenizer)
-
-                        # GLM models need patched parser (upstream has bug with None regex match)
-                        elif "glm" in shard_metadata.model_card.model_id.lower():
-                            patch_glm_tokenizer(tokenizer)
-
                        # GPT-OSS specific parsing to match other model formats.
-                        elif isinstance(inference_model, GptOssModel):
+                        if isinstance(inference_model, GptOssModel):
                            mlx_generator = parse_gpt_oss(mlx_generator)
-
-                        if tokenizer.has_tool_calling and not isinstance(
-                            inference_model, GptOssModel
-                        ):
-                            assert tokenizer.tool_call_start
-                            assert tokenizer.tool_call_end
-                            assert tokenizer.tool_parser  # pyright: ignore[reportAny]
-                            mlx_generator = parse_tool_calls(
-                                mlx_generator,
-                                tokenizer.tool_call_start,
-                                tokenizer.tool_call_end,
-                                tokenizer.tool_parser,  # pyright: ignore[reportAny]
-                            )
+                        elif tool_parser:
+                            mlx_generator = parse_tool_calls(mlx_generator, tool_parser)

                        completion_tokens = 0
                        tokens_since_last_cancel_check = 0
@@ -396,6 +386,7 @@ def main(
                                                    tool_calls=response.tool_calls,
                                                    model=shard_metadata.model_card.model_id,
                                                    usage=response.usage,
+                                                    stats=response.stats,
                                                ),
                                            )
                                        )
@@ -559,9 +550,15 @@ def main(
                    raise ValueError(
                        f"Received {task.__class__.__name__} outside of state machine in {current_status=}"
                    )
-            event_sender.send(
-                TaskStatusUpdated(task_id=task.task_id, task_status=TaskStatus.Complete)
+            was_cancelled = (task.task_id in cancelled_tasks) or (
+                TaskId("CANCEL_CURRENT_TASK") in cancelled_tasks
            )
+            if not was_cancelled:
+                event_sender.send(
+                    TaskStatusUpdated(
+                        task_id=task.task_id, task_status=TaskStatus.Complete
+                    )
+                )
            event_sender.send(
                RunnerStatusUpdated(runner_id=runner_id, runner_status=current_status)
            )
@@ -580,21 +577,8 @@ def get_gpt_oss_encoding():
    return encoding


-def filter_kimi_tokens(
-    responses: Generator[GenerationResponse | ToolCallResponse],
-) -> Generator[GenerationResponse]:
-    for resp in responses:
-        assert isinstance(resp, GenerationResponse)
-        if (
-            resp.text == "<|tool_calls_section_begin|>"
-            or resp.text == "<|tool_calls_section_end|>"
-        ):
-            continue
-        yield resp
-
-
 def parse_gpt_oss(
-    responses: Generator[GenerationResponse | ToolCallResponse],
+    responses: Generator[GenerationResponse],
 ) -> Generator[GenerationResponse | ToolCallResponse]:
    encoding = get_gpt_oss_encoding()
    stream = StreamableParser(encoding, role=Role.ASSISTANT)
@@ -651,9 +635,9 @@ def parse_gpt_oss(


 def parse_thinking_models(
-    responses: Generator[GenerationResponse | ToolCallResponse],
+    responses: Generator[GenerationResponse],
    tokenizer: TokenizerWrapper,
-) -> Generator[GenerationResponse | ToolCallResponse]:
+) -> Generator[GenerationResponse]:
    """
    For models that inject thinking tags in the prompt (like GLM-4.7),
    prepend the thinking tag to the output stream so the frontend
@@ -774,218 +758,55 @@ def _process_image_response(


 def parse_tool_calls(
-    responses: Generator[GenerationResponse | ToolCallResponse],
-    tool_call_start: str,
-    tool_call_end: str,
-    tool_parser: Callable[[str], dict[str, Any] | list[dict[str, Any]]],
+    responses: Generator[GenerationResponse], tool_parser: ToolParser
 ) -> Generator[GenerationResponse | ToolCallResponse]:
    in_tool_call = False
    tool_call_text_parts: list[str] = []
    for response in responses:
-        assert isinstance(response, GenerationResponse)
-        # assumption: the tool call start is one token
-        if response.text == tool_call_start:
+        if response.text.startswith(tool_parser.start_parsing):
            in_tool_call = True
-            continue
-        # assumption: the tool call end is one token
-        if in_tool_call and response.text == tool_call_end:
-            try:
-                # tool_parser returns an arbitrarily nested python dictionary
-                # we actually don't want the python dictionary, we just want to
-                # parse the top level { function: ..., arguments: ... } structure
-                # as we're just gonna hand it back to the api anyway
-                parsed = tool_parser("".join(tool_call_text_parts).strip())
-                logger.info(f"parsed {tool_call_text_parts=} into {parsed=}")
-                if isinstance(parsed, list):
-                    tools = [_validate_single_tool(tool) for tool in parsed]
-                else:
-                    tools = [_validate_single_tool(parsed)]
-                yield ToolCallResponse(tool_calls=tools, usage=response.usage)
-
-            except (
-                json.JSONDecodeError,
-                ValidationError,
-                ValueError,
-                AttributeError,
-            ) as e:
-                # ValueError: our parsers raise this for malformed tool calls
-                # AttributeError: upstream parsers (e.g. glm47) may raise this when regex doesn't match
-                logger.opt(exception=e).warning("tool call parsing failed")
-                # assumption: talking about tool calls, not making a tool call
-                response.text = (
-                    tool_call_start + "".join(tool_call_text_parts) + tool_call_end
-                )
-                yield response
-
-            in_tool_call = False
-            tool_call_text_parts = []
-            continue

        if in_tool_call:
            tool_call_text_parts.append(response.text)
+            if response.text.endswith(tool_parser.end_parsing):
+                # parse the actual tool calls from the tool call text
+                parsed = tool_parser.parse_tool_calls(
+                    "".join(tool_call_text_parts).strip()
+                )
+                logger.info(f"parsed {tool_call_text_parts=} into {parsed=}")
+                if parsed is not None:
+                    yield ToolCallResponse(
+                        tool_calls=parsed, usage=response.usage, stats=response.stats
+                    )
+                else:
+                    logger.warning(
+                        f"tool call parsing failed for text {''.join(tool_call_text_parts)}"
+                    )
+                    response.text = "".join(tool_call_text_parts)
+                    yield response
+
+                in_tool_call = False
+                tool_call_text_parts = []
+                continue
+
            if response.finish_reason is not None:
                logger.info(
-                    "toll call parsing interrupted, yield partial tool call as text"
+                    "tool call parsing interrupted, yield partial tool call as text"
                )
-                yield GenerationResponse(
-                    text=tool_call_start + "".join(tool_call_text_parts),
-                    token=0,
-                    finish_reason=response.finish_reason,
-                    usage=None,
+                response = response.model_copy(
+                    update={
+                        "text": "".join(tool_call_text_parts),
+                        "token": 0,
+                    }
                )
+                yield response
+
            continue
+
        # fallthrough
        yield response


-def patch_kimi_tokenizer(tokenizer: TokenizerWrapper):
-    """
-    Version of to-be-upstreamed kimi-k2 tool parser
-    """
-    import ast
-    import json
-    from typing import Any
-
-    import regex as re
-
-    # kimi has a fixed function naming scheme, with a json formatted arg
-    #   functions.multiply:0 <|tool_call_argument_begin|> {"a": 2, "b": 3}
-    #   Also needs to handle tools like call_0<|tool_call_argument_begin|>{"filePath": "..."}
-    _func_name_regex = re.compile(
-        r"^\s*(.+)[:](\d+)\s*<\|tool_call_argument_begin\|>", re.DOTALL
-    )
-    _func_arg_regex = re.compile(r"<\|tool_call_argument_begin\|>\s*(.*)\s*", re.DOTALL)
-
-    # kimi has a tool_calls_section - we're leaving this up to the caller to handle
-    tool_call_start = "<|tool_call_begin|>"
-    tool_call_end = "<|tool_call_end|>"
-
-    def _deserialize(value: str) -> Any:  # pyright: ignore[reportAny]
-        try:
-            return json.loads(value)  # pyright: ignore[reportAny]
-        except Exception:
-            pass
-
-        try:
-            return ast.literal_eval(value)  # pyright: ignore[reportAny]
-        except Exception:
-            pass
-        return value
-
-    def parse_tool_call(text: str, tools: Any | None = None):
-        func_name_match = _func_name_regex.search(text)
-        if func_name_match is None:
-            raise ValueError(f"Could not parse function name from tool call: {text!r}")
-        original_func_name = func_name_match.group(1)
-        tool_id = func_name_match.group(2)
-        # strip off the `functions.` prefix, if it exists.
-        func_name = original_func_name[original_func_name.find(".") + 1 :]
-
-        func_args_match = _func_arg_regex.search(text)
-        if func_args_match is None:
-            raise ValueError(f"Could not parse function args from tool call: {text!r}")
-        func_args = func_args_match.group(1)
-        # the args should be valid json - no need to check against our tools to deserialize
-        arg_dct = _deserialize(func_args)  # pyright: ignore[reportAny]
-
-        return dict(
-            id=f"{original_func_name}:{tool_id}",
-            name=func_name,
-            arguments=arg_dct,  # pyright: ignore[reportAny]
-        )
-
-    tokenizer._tool_call_start = tool_call_start
-    tokenizer._tool_call_end = tool_call_end
-    tokenizer._tool_parser = parse_tool_call
-
-
-def patch_glm_tokenizer(tokenizer: TokenizerWrapper):
-    """
-    Fixed version of mlx_lm's glm47 tool parser that handles regex match failures.
-    """
-    import ast
-    import json
-    from typing import Any
-
-    import regex as re
-
-    _func_name_regex = re.compile(r"^(.*?)<arg_key>", re.DOTALL)
-    _func_arg_regex = re.compile(
-        r"<arg_key>(.*?)</arg_key>(?:\n|\s)*<arg_value>(.*?)(?:</arg_value>|(?=<arg_key>)|$)",
-        re.DOTALL,
-    )
-
-    tool_call_start = "<tool_call>"
-    tool_call_end = "</tool_call>"
-
-    def _is_string_type(
-        tool_name: str,
-        arg_name: str,
-        tools: list[Any] | None,
-    ) -> bool:
-        if tools is None:
-            return False
-        for tool in tools:  # pyright: ignore[reportAny]
-            func = tool["function"]  # pyright: ignore[reportAny]
-            if func["name"] == tool_name:
-                params = func["parameters"]  # pyright: ignore[reportAny]
-                if params is None:
-                    return False
-                props = params.get("properties", {})  # pyright: ignore[reportAny]
-                arg_props = props.get(arg_name, {})  # pyright: ignore[reportAny]
-                arg_type = arg_props.get("type", None)  # pyright: ignore[reportAny]
-                return arg_type == "string"  # pyright: ignore[reportAny]
-        return False
-
-    def _deserialize(value: str) -> Any:  # pyright: ignore[reportAny]
-        try:
-            return json.loads(value)  # pyright: ignore[reportAny]
-        except Exception:
-            pass
-        try:
-            return ast.literal_eval(value)  # pyright: ignore[reportAny]
-        except Exception:
-            pass
-        return value
-
-    def parse_tool_call(text: str, tools: list[Any] | None = None):
-        func_name_match = _func_name_regex.search(text)
-        if func_name_match is None:
-            raise ValueError(f"Could not parse function name from tool call: {text!r}")
-        func_name = func_name_match.group(1)
-
-        pairs = _func_arg_regex.findall(text)
-        arg_dct: dict[str, Any] = {}
-        for key, value in pairs:  # pyright: ignore[reportAny]
-            arg_key = key.strip()  # pyright: ignore[reportAny]
-            arg_val = value.strip()  # pyright: ignore[reportAny]
-            if not _is_string_type(func_name, arg_key, tools):  # pyright: ignore[reportAny]
-                arg_val = _deserialize(arg_val)  # pyright: ignore[reportAny]
-            arg_dct[arg_key] = arg_val
-        return dict(name=func_name, arguments=arg_dct)
-
-    tokenizer._tool_call_start = tool_call_start
-    tokenizer._tool_call_end = tool_call_end
-    tokenizer._tool_parser = parse_tool_call
-
-
-def _validate_single_tool(obj: dict[str, Any]) -> ToolCallItem:
-    if (
-        ((name := obj.get("name")) is not None)
-        and ((args := obj.get("arguments")) is not None)
-        and isinstance(name, str)
-    ):
-        raw_id: object = obj.get("id")
-        extra = {"id": str(raw_id)} if raw_id is not None else {}
-        return ToolCallItem(
-            **extra,
-            name=name,
-            arguments=json.dumps(args),
-        )
-    else:
-        raise ValidationError
-
-
 EXO_RUNNER_MUST_FAIL = "EXO RUNNER MUST FAIL"
 EXO_RUNNER_MUST_OOM = "EXO RUNNER MUST OOM"
 EXO_RUNNER_MUST_TIMEOUT = "EXO RUNNER MUST TIMEOUT"
--- a/src/exo/worker/runner/runner_supervisor.py
+++ b/src/exo/worker/runner/runner_supervisor.py
@@ -101,6 +101,7 @@ class RunnerSupervisor:
        self._ev_recv.close()
        self._task_sender.close()
        self._event_sender.close()
+        self._cancel_sender.send(TaskId("CANCEL_CURRENT_TASK"))
        self._cancel_sender.close()
        self.runner_process.join(1)
        if not self.runner_process.is_alive():
@@ -143,7 +144,11 @@ class RunnerSupervisor:
            logger.info(f"Unable to cancel {task_id} as it has been completed")
            return
        self.cancelled.add(task_id)
-        await self._cancel_sender.send_async(task_id)
+        with anyio.move_on_after(0.5) as scope:
+            await self._cancel_sender.send_async(task_id)
+        if scope.cancel_called:
+            logger.error("RunnerSupervisor cancel pipe blocked")
+            await self._check_runner(TimeoutError("cancel pipe blocked"))

    async def _forward_events(self):
        with self._ev_recv as events:
--- a/src/exo/worker/runner/tool_parsers.py
+++ b/src/exo/worker/runner/tool_parsers.py
@@ -0,0 +1,72 @@
+import json
+from dataclasses import dataclass
+from typing import Any, Callable
+
+from exo.shared.types.api import ToolCallItem
+
+
+@dataclass
+class ToolParser:
+    start_parsing: str
+    end_parsing: str
+    parse_tool_calls: Callable[[str], list[ToolCallItem] | None]
+
+
+def make_mlx_parser(
+    tool_call_start: str,
+    tool_call_end: str,
+    tool_parser: Callable[[str], dict[str, Any] | list[dict[str, Any]]],
+) -> ToolParser:
+    def parse_tool_calls(text: str) -> list[ToolCallItem] | None:
+        try:
+            text = text.removeprefix(tool_call_start)
+            text = text.removesuffix(tool_call_end)
+            parsed = tool_parser(text)
+            if isinstance(parsed, list):
+                return [ToolCallItem.model_validate(_flatten(p)) for p in parsed]
+            else:
+                return [ToolCallItem.model_validate(_flatten(parsed))]
+
+        except Exception:
+            return None
+
+    return ToolParser(
+        start_parsing=tool_call_start,
+        end_parsing=tool_call_end,
+        parse_tool_calls=parse_tool_calls,
+    )
+
+
+# TODO / example code:
+def _parse_json_calls(text: str) -> list[ToolCallItem] | None:
+    try:
+        text = text.removeprefix("<tool_call>")
+        text = text.removesuffix("</tool_call>")
+        top_level = {
+            k: json.dumps(v) if isinstance(v, (dict, list)) else v
+            for k, v in json.loads(text).items()  # pyright: ignore[reportAny]
+        }
+        return [ToolCallItem.model_validate(top_level)]
+    except Exception:
+        return None
+
+
+def _flatten(p: dict[str, Any]) -> dict[str, str]:
+    return {
+        k: json.dumps(v) if isinstance(v, (dict, list)) else str(v)  # pyright: ignore[reportAny]
+        for k, v in p.items()  # pyright: ignore[reportAny]
+    }
+
+
+json_tool_parser = ToolParser(
+    start_parsing="<tool_call>",
+    end_parsing="</tool_call>",
+    parse_tool_calls=_parse_json_calls,
+)
+
+
+def infer_tool_parser(chat_template: str) -> ToolParser | None:
+    """Attempt to auto-infer a tool parser from the chat template."""
+    if "<tool_call>" in chat_template and "tool_call.name" in chat_template:
+        return json_tool_parser
+    return None
--- a/src/exo/worker/tests/unittests/test_runner/test_event_ordering.py
+++ b/src/exo/worker/tests/unittests/test_runner/test_event_ordering.py
@@ -1,4 +1,5 @@
 # Check tasks are complete before runner is ever ready.
+import unittest.mock
 from collections.abc import Iterable
 from typing import Callable

@@ -115,7 +116,6 @@ def patch_out_mlx(monkeypatch: pytest.MonkeyPatch):
    monkeypatch.setattr(mlx_runner, "load_mlx_items", make_nothin((1, MockTokenizer)))
    monkeypatch.setattr(mlx_runner, "warmup_inference", make_nothin(1))
    monkeypatch.setattr(mlx_runner, "_check_for_debug_prompts", nothin)
-    monkeypatch.setattr(mx.distributed, "all_gather", make_nothin(mx.array([1])))
    monkeypatch.setattr(mlx_runner, "mx_any", make_nothin(False))
    # Mock apply_chat_template since we're using a fake tokenizer (integer 1).
    # Returns a prompt without thinking tag so detect_thinking_prompt_suffix returns None.
@@ -178,8 +178,16 @@ def _run(tasks: Iterable[Task]):
        # this is some c++ nonsense
        task_receiver.close = nothin
        task_receiver.join = nothin
-
-        mlx_runner.main(bound_instance, event_sender, task_receiver, cancel_receiver)  # pyright: ignore[reportArgumentType]
+        with unittest.mock.patch(
+            "exo.worker.runner.runner.mx.distributed.all_gather",
+            make_nothin(mx.array([1])),
+        ):
+            mlx_runner.main(
+                bound_instance,
+                event_sender,  # pyright: ignore[reportArgumentType]
+                task_receiver,
+                cancel_receiver,
+            )

        return event_sender.events

--- a/src/exo/worker/tests/unittests/test_runner/test_parse_tool_calls.py
+++ b/src/exo/worker/tests/unittests/test_runner/test_parse_tool_calls.py
@@ -5,12 +5,13 @@ from typing import Any

 from exo.shared.types.worker.runner_response import GenerationResponse, ToolCallResponse
 from exo.worker.runner.runner import parse_tool_calls
+from exo.worker.runner.tool_parsers import make_mlx_parser


 def _make_responses(
    texts: list[str],
    finish_on_last: bool = True,
-) -> Generator[GenerationResponse | ToolCallResponse]:
+) -> Generator[GenerationResponse]:
    """Create a sequence of GenerationResponses from text strings."""
    for i, text in enumerate(texts):
        is_last = i == len(texts) - 1
@@ -22,10 +23,13 @@ def _make_responses(
        )


-def _dummy_parser(text: str) -> dict[str, Any]:
+def _dummier_parser(text: str) -> dict[str, Any]:
    return {"name": "test_fn", "arguments": {"arg": text}}


+_dummy_parser = make_mlx_parser("<tool_call>", "</tool_call>", _dummier_parser)
+
+
 class TestParseToolCalls:
    """Tests for parse_tool_calls generator."""

@@ -35,8 +39,6 @@ class TestParseToolCalls:
        results = list(
            parse_tool_calls(
                _make_responses(texts, finish_on_last=False),
-                "<tool_call>",
-                "</tool_call>",
                _dummy_parser,
            )
        )
@@ -50,8 +52,6 @@ class TestParseToolCalls:
        results = list(
            parse_tool_calls(
                _make_responses(texts),
-                "<tool_call>",
-                "</tool_call>",
                _dummy_parser,
            )
        )
@@ -76,9 +76,7 @@ class TestParseToolCalls:
        results = list(
            parse_tool_calls(
                _make_responses(texts, finish_on_last=False),
-                "<tool_call>",
-                "</tool_call>",
-                _failing_parser,
+                make_mlx_parser("<tool_call>", "</tool_call>", _failing_parser),
            )
        )

--- a/tests/run_exo_on.sh
+++ b/tests/run_exo_on.sh
@@ -43,4 +43,5 @@ for host; do
  echo "Waiting for $host..."
  until curl -sf "http://$host:52415/models" &>/dev/null; do sleep 1; done
 done
+echo "all hosts alive!"
 wait
--- a/uv.lock
+++ b/uv.lock
@@ -377,8 +377,8 @@ dependencies = [
    { name = "hypercorn", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "loguru", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "mflux", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", extra = ["cpu"], marker = "sys_platform == 'linux'" },
+    { name = "mlx", version = "0.30.6", source = { registry = "https://pypi.org/simple" }, extra = ["cpu"], marker = "sys_platform == 'linux'" },
+    { name = "mlx", version = "0.30.7.dev20260217+50487b41", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#50487b4141f3c951122655db3b83df5146c1fbeb" }, marker = "sys_platform == 'darwin'" },
    { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "msgspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "openai-harmony", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -416,9 +416,9 @@ requires-dist = [
    { name = "hypercorn", specifier = ">=0.18.0" },
    { name = "loguru", specifier = ">=0.7.3" },
    { name = "mflux", specifier = "==0.15.5" },
-    { name = "mlx", marker = "sys_platform == 'darwin'", specifier = "==0.30.6" },
+    { name = "mlx", marker = "sys_platform == 'darwin'", git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks" },
    { name = "mlx", extras = ["cpu"], marker = "sys_platform == 'linux'", specifier = "==0.30.6" },
-    { name = "mlx-lm", specifier = "==0.30.6" },
+    { name = "mlx-lm", specifier = "==0.30.7" },
    { name = "msgspec", specifier = ">=0.19.0" },
    { name = "openai-harmony", specifier = ">=0.0.8" },
    { name = "pillow", specifier = ">=11.0,<12.0" },
@@ -1020,8 +1020,8 @@ dependencies = [
    { name = "fonttools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "matplotlib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", extra = ["cuda13"], marker = "sys_platform == 'linux'" },
+    { name = "mlx", version = "0.30.6", source = { registry = "https://pypi.org/simple" }, extra = ["cuda13"], marker = "sys_platform == 'linux'" },
+    { name = "mlx", version = "0.30.7.dev20260217+50487b41", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#50487b4141f3c951122655db3b83df5146c1fbeb" }, marker = "sys_platform == 'darwin'" },
    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "opencv-python", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "piexif", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -1048,18 +1048,12 @@ wheels = [
 name = "mlx"
 version = "0.30.6"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "mlx-metal", marker = "sys_platform == 'darwin'" },
+resolution-markers = [
+    "sys_platform == 'linux'",
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ae/5b/e460e144a34d5529e010056cccf50b538d56ed001473bc6b246018fd58cb/mlx-0.30.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ed86f8bffc174c2f259ca589ea25464c96cf69d1bb457074a2bf2ef53737e54f", size = 573515, upload-time = "2026-02-06T03:45:23.405Z" },
-    { url = "https://files.pythonhosted.org/packages/60/25/69833fefb9a3fef30b56792b1bcd022496c4fea83e45411d289b77ef7546/mlx-0.30.6-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:c52294958269e20f300639a17c1900ca8fc737d859ddda737f9811e94bd040e5", size = 573516, upload-time = "2026-02-06T03:45:24.618Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/6a/7e7fbeebc5cb51b6a5eba96b263a6298707bcbdc059f4b0b73e088bc3dea/mlx-0.30.6-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:b5b6636f7c49a4d86d8ec82643b972f45a144a7a9f3a967b27b2e6e22cf71e6a", size = 573592, upload-time = "2026-02-06T03:45:25.928Z" },
    { url = "https://files.pythonhosted.org/packages/93/06/280f6f2ba80520a7109730425eda0d966658793aa0d02d8be8d351f75253/mlx-0.30.6-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:67e6c9e30a9faeacc209917ef5523177cf9b086914b6b5d83ff886e4294b727d", size = 622011, upload-time = "2026-02-06T03:45:28.165Z" },
    { url = "https://files.pythonhosted.org/packages/fe/35/f872afbee9c079cc69924d9e9c46f5663adb7da58cba3511db082dd307c1/mlx-0.30.6-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:47db8b16fcb6f6c5a47c0bdb24ed377b41237017ac93aa6cb6aa206c9bdf82e4", size = 663650, upload-time = "2026-02-06T03:45:30.315Z" },
-    { url = "https://files.pythonhosted.org/packages/60/23/361dc7a5797634e4d7e9bdd6564c6b28f9b1246672632def2f91bf066b18/mlx-0.30.6-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:78804a89dcff4a838f7c2da72392fe87a523e95122a3c840e53df019122aad45", size = 575028, upload-time = "2026-02-06T03:45:31.549Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/69/1854484d414171586814dfbe8def95f75c4ea2c7341ba13ba8ee675f7c62/mlx-0.30.6-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:ec13584ab069665cc7ad34a05494d9291cd623aef6ae96be48875fc87cfc25d6", size = 575026, upload-time = "2026-02-06T03:45:33.072Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/b8/3adbc441924209a7e4c568308b2a0b54bd09aee6a68db5bae85304791e54/mlx-0.30.6-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:b2c5e8a090a753ef99a1380a4d059c983083f36198864f6df9faaf1223d083df", size = 575041, upload-time = "2026-02-06T03:45:34.814Z" },
    { url = "https://files.pythonhosted.org/packages/3f/54/9d9e06804fb2088202a2cdf60458e00b221f71420bea285720b60f9e82b5/mlx-0.30.6-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:9ceddede4af0de31d1f6b3099f70e5469d60cd7c546975dedbdbeab3519cab3f", size = 624002, upload-time = "2026-02-06T03:45:36Z" },
    { url = "https://files.pythonhosted.org/packages/42/92/3140a15a50cb1f9267a6552171e1dfa577861de53e093124bc43707f2a0e/mlx-0.30.6-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:4a6ffd2d16728cf95f63a1b555d7c2eaeea686a0e6b73228bd265411cb5d77a4", size = 663569, upload-time = "2026-02-06T03:45:37.242Z" },
 ]
@@ -1072,6 +1066,14 @@ cuda13 = [
    { name = "mlx-cuda-13", marker = "sys_platform == 'linux'" },
 ]

+[[package]]
+name = "mlx"
+version = "0.30.7.dev20260217+50487b41"
+source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#50487b4141f3c951122655db3b83df5146c1fbeb" }
+resolution-markers = [
+    "sys_platform == 'darwin'",
+]
+
 [[package]]
 name = "mlx-cpu"
 version = "0.30.6"
@@ -1098,30 +1100,20 @@ wheels = [

 [[package]]
 name = "mlx-lm"
-version = "0.30.6"
+version = "0.30.7"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", marker = "sys_platform == 'darwin'" },
+    { name = "mlx", version = "0.30.7.dev20260217+50487b41", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#50487b4141f3c951122655db3b83df5146c1fbeb" }, marker = "sys_platform == 'darwin'" },
    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "sentencepiece", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/76/cb/815deddc8699b1f694d7e1f9cbed52934c03a8b49432c8add72932bb2f0b/mlx_lm-0.30.6.tar.gz", hash = "sha256:807e042d7040268f1b19190b7eaefd8b2efbff5590a65460974ad4225b91dda1", size = 271733, upload-time = "2026-02-04T21:27:45.741Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/66/0d/56542e2ae13ec6f542d3977d7cff89a205d4f6c5122e0ce23f33265f61c9/mlx_lm-0.30.7.tar.gz", hash = "sha256:e5f31ac58d9f2381f28e1ba639ff903e64f7cff1bdc245c0bc97f72264be329c", size = 275764, upload-time = "2026-02-12T18:41:11.86Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/20/5f/01d281f1fa8a1521d5936659beb4f5ab1f32b463d059263cf9d4cef969d9/mlx_lm-0.30.6-py3-none-any.whl", hash = "sha256:a7405bd581eacc4bf8209d7a6b7f23629585a0d7c6740c2a97e51fee35b3b0e1", size = 379451, upload-time = "2026-02-04T21:27:43.222Z" },
-]
-
-[[package]]
-name = "mlx-metal"
-version = "0.30.6"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f3/85/44406b521f920248fad621334d4dc15e77660a494edf890e7cbee33bf38d/mlx_metal-0.30.6-py3-none-macosx_14_0_arm64.whl", hash = "sha256:ea6d0c973def9a5b4f652cc77036237db3f88c9d0af63701d76b5fddde99b820", size = 38437818, upload-time = "2026-02-06T03:44:56.19Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/cb/10a516995f7d0c154b0d7e633c54b51e96977a86a355105b6474cfcbe0d0/mlx_metal-0.30.6-py3-none-macosx_15_0_arm64.whl", hash = "sha256:0f8cb94634d07e06a372d6ad9a090f38a18bab1ff19a140aede60eacf707bb94", size = 38433701, upload-time = "2026-02-06T03:44:59.678Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/7d/70cb272f7373c334709f210ed8420511fc9d64d05a7a646c0b3b94c29c04/mlx_metal-0.30.6-py3-none-macosx_26_0_arm64.whl", hash = "sha256:d761ae26304f2c4b454eeea7f612a56919d9e5e57dbb1dc0788f8e34aa6f41c2", size = 47718448, upload-time = "2026-02-06T03:45:03.133Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/17/a41c798a3d9cbdc47f39c6db5bba4c2cd199203ead26bf911cb03b644070/mlx_lm-0.30.7-py3-none-any.whl", hash = "sha256:17442a4bf01c4c2d3bca1e647712fe44f19890c3f1eadc8589d389e57b44b9bf", size = 386591, upload-time = "2026-02-12T18:41:10.236Z" },
 ]

 [[package]]