woahg

2026-02-18 14:55:13 -05:00 · 2026-02-17 11:44:23 +00:00
85 changed files with 1341 additions and 3337 deletions
--- a/.mlx_typings/mlx_lm/models/glm_moe_dsa.pyi
+++ b/.mlx_typings/mlx_lm/models/glm_moe_dsa.pyi
@@ -1,46 +0,0 @@
-"""Type stubs for mlx_lm.models.glm_moe_dsa"""
-
-from dataclasses import dataclass
-from typing import Any, Dict, Optional
-
-from .base import BaseModelArgs
-from .deepseek_v32 import Model as DSV32Model
-
-@dataclass
-class ModelArgs(BaseModelArgs):
-    model_type: str
-    vocab_size: int
-    hidden_size: int
-    index_head_dim: int
-    index_n_heads: int
-    index_topk: int
-    intermediate_size: int
-    moe_intermediate_size: int
-    num_hidden_layers: int
-    num_attention_heads: int
-    num_key_value_heads: int
-    n_shared_experts: Optional[int]
-    n_routed_experts: Optional[int]
-    routed_scaling_factor: float
-    kv_lora_rank: int
-    q_lora_rank: int
-    qk_rope_head_dim: int
-    v_head_dim: int
-    qk_nope_head_dim: int
-    topk_method: str
-    scoring_func: str
-    norm_topk_prob: bool
-    n_group: int
-    topk_group: int
-    num_experts_per_tok: int
-    moe_layer_freq: int
-    first_k_dense_replace: int
-    max_position_embeddings: int
-    rms_norm_eps: float
-    rope_parameters: Dict[str, Any]
-    attention_bias: bool
-    rope_scaling: Dict[str, Any] | None
-    rope_theta: float | None
-
-class Model(DSV32Model):
-    def __init__(self, config: ModelArgs) -> None: ...
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -673,17 +673,6 @@ dependencies = [
 "syn 2.0.111",
 ]

-[[package]]
-name = "delegate"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "780eb241654bf097afb00fc5f054a09b687dad862e485fdcf8399bb056565370"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.111",
-]
-
 [[package]]
 name = "der"
 version = "0.7.10"
@@ -887,31 +876,16 @@ dependencies = [
 name = "exo_pyo3_bindings"
 version = "0.0.1"
 dependencies = [
- "delegate",
 "env_logger",
- "extend",
 "futures-lite",
 "libp2p",
 "log",
 "networking",
- "pin-project",
 "pyo3",
 "pyo3-async-runtimes",
 "pyo3-log",
 "pyo3-stub-gen",
 "tokio",
- "util",
-]
-
-[[package]]
-name = "extend"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "311a6d2f1f9d60bff73d2c78a0af97ed27f79672f15c238192a5bbb64db56d00"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.111",
 ]

 [[package]]
@@ -1747,12 +1721,6 @@ dependencies = [
 "cpufeatures",
 ]

-[[package]]
-name = "keccak-const"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57d8d8ce877200136358e0bbff3a77965875db3af755a11e1fa6b1b3e2df13ea"
-
 [[package]]
 name = "lalrpop-util"
 version = "0.20.2"
@@ -2759,17 +2727,10 @@ dependencies = [
 name = "networking"
 version = "0.0.1"
 dependencies = [
- "delegate",
- "either",
- "extend",
- "futures-lite",
- "futures-timer",
- "keccak-const",
 "libp2p",
 "log",
 "tokio",
 "tracing-subscriber",
- "util",
 ]

 [[package]]
@@ -4599,10 +4560,6 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"

-[[package]]
-name = "util"
-version = "0.0.1"
-
 [[package]]
 name = "uuid"
 version = "1.19.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,6 @@ resolver = "3"
 members = [
    "rust/networking",
    "rust/exo_pyo3_bindings",
-    "rust/util",
 ]

 [workspace.package]
@@ -24,33 +23,18 @@ opt-level = 3
 [workspace.dependencies]
 ## Crate members as common dependencies
 networking = { path = "rust/networking" }
-util = { path = "rust/util" }
-
-# Macro dependecies
-extend = "1.2"
-delegate = "0.13"
-
-# Utility dependencies
-keccak-const = "0.2"

 # Async dependencies
 tokio = "1.46"
-futures-lite = "2.6.1"
-futures-timer = "3.0"
-
-# Data structures
-either = "1.15"

 # Tracing/logging
 log = "0.4"

 # networking
 libp2p = "0.56"
-libp2p-tcp = "0.44"

 [workspace.lints.rust]
-static_mut_refs = "warn"      # Or use "warn" instead of deny
-incomplete_features = "allow"
+static_mut_refs = "warn"

 # Clippy's lint category level configurations;
 # every member crate needs to inherit these by adding
@@ -71,64 +55,3 @@ perf = { level = "warn", priority = -1 }
 pedantic = { level = "warn", priority = -1 }
 nursery = { level = "warn", priority = -1 }
 cargo = { level = "warn", priority = -1 }
-
-# Individual Clippy lints from the `restriction` category
-arithmetic_side_effects = "warn"
-as_conversions = "warn"
-assertions_on_result_states = "warn"
-clone_on_ref_ptr = "warn"
-decimal_literal_representation = "warn"
-default_union_representation = "warn"
-deref_by_slicing = "warn"
-disallowed_script_idents = "deny"
-else_if_without_else = "warn"
-empty_enum_variants_with_brackets = "warn"
-empty_structs_with_brackets = "warn"
-error_impl_error = "warn"
-exit = "deny"
-expect_used = "warn"
-float_cmp_const = "warn"
-get_unwrap = "warn"
-if_then_some_else_none = "warn"
-impl_trait_in_params = "warn"
-indexing_slicing = "warn"
-infinite_loop = "warn"
-let_underscore_must_use = "warn"
-let_underscore_untyped = "warn"
-lossy_float_literal = "warn"
-mem_forget = "warn"
-missing_inline_in_public_items = "warn"
-multiple_inherent_impl = "warn"
-multiple_unsafe_ops_per_block = "warn"
-mutex_atomic = "warn"
-non_zero_suggestions = "warn"
-panic = "warn"
-partial_pub_fields = "warn"
-pattern_type_mismatch = "warn"
-pub_without_shorthand = "warn"
-rc_buffer = "warn"
-rc_mutex = "warn"
-redundant_type_annotations = "warn"
-renamed_function_params = "warn"
-rest_pat_in_fully_bound_structs = "warn"
-same_name_method = "warn"
-self_named_module_files = "deny"
-semicolon_inside_block = "warn"
-shadow_same = "warn"
-shadow_unrelated = "warn"
-str_to_string = "warn"
-string_add = "warn"
-string_lit_chars_any = "warn"
-string_to_string = "warn"
-tests_outside_test_module = "warn"
-todo = "warn"
-try_err = "warn"
-undocumented_unsafe_blocks = "warn"
-unnecessary_safety_comment = "warn"
-unnecessary_safety_doc = "warn"
-unneeded_field_pattern = "warn"
-unseparated_literal_suffix = "warn"
-unused_result_ok = "warn"
-unused_trait_names = "warn"
-unwrap_used = "warn"
-verbose_file_reads = "warn"
--- a/MISSED_THINGS.md
+++ b/MISSED_THINGS.md
@@ -1,5 +1,5 @@
 # Missed things
-[X] Log EXO_LIBP2P_NAMESPACE on start in exo/main.py
+[X] Log namespace on start in exo/main.py
 [X] Ordering of warmup was changed, which is wrong. It was changed to rank < n-1, then rank=n-1. It should be rank!=0 then rank=0 (this matches the auto_parallel implementation. NOTE: we use a different convention to mlx-lm, our terminal rank is rank=n-1 whereas mlx-lm is rank=0 hence i can see why this was changed wrongly).
 [X] Downloads keying by model_id not shard_metadata (worker/plan.py, worker/main.py).
 [X] Fetching download status of all models on start
--- a/README.md
+++ b/README.md
@@ -72,23 +72,16 @@ There are two ways to run exo:

 ### Run from Source (macOS)

-If you have [Nix](https://nixos.org/) installed, you can skip most of the steps below and run exo directly (after accepting the Cachix cache):
-
-```bash
-nix run .#exo
-```
-
 **Prerequisites:**
- [Xcode](https://developer.apple.com/xcode/) (provides the Metal ToolChain required for MLX compilation)
 - [brew](https://github.com/Homebrew/brew) (for simple package management on macOS)
-
+  
  ```bash
  /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
  ```
 - [uv](https://github.com/astral-sh/uv) (for Python dependency management)
 - [macmon](https://github.com/vladkens/macmon) (for hardware monitoring on Apple Silicon)
 - [node](https://github.com/nodejs/node) (for building the dashboard)
-
+  
  ```bash
  brew install uv macmon node
  ```
@@ -206,14 +199,14 @@ The app will ask for permission to modify system settings and install a new Netw

 **Custom Namespace for Cluster Isolation:**

-The macOS app includes a custom namespace feature that allows you to isolate your exo cluster from others on the same network. This is configured through the `EXO_LIBP2P_NAMESPACE` setting:
+The macOS app includes a custom namespace feature that allows you to isolate your exo cluster from others on the same network. This is configured through the `--namespace` cli arg:

 - **Use cases**:
  - Running multiple separate exo clusters on the same network
  - Isolating development/testing clusters from production clusters
  - Preventing accidental cluster joining

- **Configuration**: Access this setting in the app's Advanced settings (or set the `EXO_LIBP2P_NAMESPACE` environment variable when running from source)
+- **Configuration**: Access this setting in the app's Advanced settings (or set the `--namespace` argument when running from source)

 The namespace is logged on startup for debugging purposes.

@@ -425,4 +418,4 @@ On macOS, exo uses the GPU. On Linux, exo currently runs on CPU. We are working

 ## Contributing

-See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on how to contribute to exo.
+See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on how to contribute to exo.
--- a/app/EXO/EXO/ExoProcessController.swift
+++ b/app/EXO/EXO/ExoProcessController.swift
@@ -82,6 +82,7 @@ final class ExoProcessController: ObservableObject {

            let child = Process()
            child.executableURL = executableURL
+            child.arguments = ["--namespace", computeNamespace()]
            let exoHomeURL = Self.exoDirectoryURL
            try? FileManager.default.createDirectory(
                at: exoHomeURL, withIntermediateDirectories: true
@@ -126,37 +127,11 @@ final class ExoProcessController: ObservableObject {
            return
        }
        process.terminationHandler = nil
-        status = .stopped
-
-        guard process.isRunning else {
-            self.process = nil
-            return
+        if process.isRunning {
+            process.terminate()
        }
-
-        let proc = process
        self.process = nil
-
-        Task.detached {
-            proc.interrupt()
-
-            for _ in 0..<50 {
-                if !proc.isRunning { return }
-                try? await Task.sleep(nanoseconds: 100_000_000)
-            }
-
-            if proc.isRunning {
-                proc.terminate()
-            }
-
-            for _ in 0..<30 {
-                if !proc.isRunning { return }
-                try? await Task.sleep(nanoseconds: 100_000_000)
-            }
-
-            if proc.isRunning {
-                kill(proc.processIdentifier, SIGKILL)
-            }
-        }
+        status = .stopped
    }

    func restart() {
@@ -242,7 +217,6 @@ final class ExoProcessController: ObservableObject {
    private func makeEnvironment(for runtimeURL: URL) -> [String: String] {
        var environment = ProcessInfo.processInfo.environment
        environment["EXO_RUNTIME_DIR"] = runtimeURL.path
-        environment["EXO_LIBP2P_NAMESPACE"] = computeNamespace()
        if !hfToken.isEmpty {
            environment["HF_TOKEN"] = hfToken
        }
--- a/dashboard/src/lib/components/ChatForm.svelte
+++ b/dashboard/src/lib/components/ChatForm.svelte
@@ -103,7 +103,7 @@
  const modelSupportsThinking = $derived(() => {
    if (!currentModel) return false;
    const caps = modelCapabilities[currentModel] || [];
-    return caps.includes("thinking_toggle") && caps.includes("text");
+    return caps.includes("thinking") && caps.includes("text");
  });

  const isEditOnlyWithoutImage = $derived(
--- a/dashboard/src/lib/components/ImageParamsPanel.svelte
+++ b/dashboard/src/lib/components/ImageParamsPanel.svelte
@@ -59,14 +59,13 @@
  }

  const sizeOptions: ImageGenerationParams["size"][] = [
-    "auto",
    "512x512",
    "768x768",
    "1024x1024",
    "1024x768",
    "768x1024",
-    "1024x1536",
-    "1536x1024",
+    "1024x1365",
+    "1365x1024",
  ];

  const qualityOptions: ImageGenerationParams["quality"][] = [
@@ -177,90 +176,92 @@
 <div class="border-b border-exo-medium-gray/30 px-3 py-2">
  <!-- Basic params row -->
  <div class="flex items-center gap-3 flex-wrap">
-    <!-- Size -->
-    <div class="flex items-center gap-1.5">
-      <span class="text-xs text-exo-light-gray uppercase tracking-wider"
-        >SIZE:</span
-      >
-      <div class="relative">
-        <button
-          bind:this={sizeButtonRef}
-          type="button"
-          onclick={() => (isSizeDropdownOpen = !isSizeDropdownOpen)}
-          class="bg-exo-medium-gray/50 border border-exo-yellow/30 rounded pl-2 pr-6 py-1 text-xs font-mono text-exo-yellow cursor-pointer transition-all duration-200 hover:border-exo-yellow/50 focus:outline-none focus:border-exo-yellow/70 {isSizeDropdownOpen
-            ? 'border-exo-yellow/70'
-            : ''}"
+    <!-- Size (hidden in edit mode - output size comes from input image) -->
+    {#if !isEditMode}
+      <div class="flex items-center gap-1.5">
+        <span class="text-xs text-exo-light-gray uppercase tracking-wider"
+          >SIZE:</span
        >
-          {params.size.toUpperCase()}
-        </button>
-        <div
-          class="absolute right-1.5 top-1/2 -translate-y-1/2 pointer-events-none transition-transform duration-200 {isSizeDropdownOpen
-            ? 'rotate-180'
-            : ''}"
-        >
-          <svg
-            class="w-3 h-3 text-exo-yellow/60"
-            fill="none"
-            viewBox="0 0 24 24"
-            stroke="currentColor"
+        <div class="relative">
+          <button
+            bind:this={sizeButtonRef}
+            type="button"
+            onclick={() => (isSizeDropdownOpen = !isSizeDropdownOpen)}
+            class="bg-exo-medium-gray/50 border border-exo-yellow/30 rounded pl-2 pr-6 py-1 text-xs font-mono text-exo-yellow cursor-pointer transition-all duration-200 hover:border-exo-yellow/50 focus:outline-none focus:border-exo-yellow/70 {isSizeDropdownOpen
+              ? 'border-exo-yellow/70'
+              : ''}"
          >
-            <path
-              stroke-linecap="round"
-              stroke-linejoin="round"
-              stroke-width="2"
-              d="M19 9l-7 7-7-7"
-            />
-          </svg>
-        </div>
-      </div>
-
-      {#if isSizeDropdownOpen}
-        <!-- Backdrop to close dropdown -->
-        <button
-          type="button"
-          class="fixed inset-0 z-[9998] cursor-default"
-          onclick={() => (isSizeDropdownOpen = false)}
-          aria-label="Close dropdown"
-        ></button>
-
-        <!-- Dropdown Panel - fixed positioning to escape overflow:hidden -->
-        <div
-          class="fixed bg-exo-dark-gray border border-exo-yellow/30 rounded shadow-lg shadow-black/50 z-[9999] max-h-48 overflow-y-auto overflow-x-hidden min-w-max"
-          style="bottom: calc(100vh - {sizeDropdownPosition()
-            .top}px + 4px); left: {sizeDropdownPosition().left}px;"
-        >
-          <div class="py-1">
-            {#each sizeOptions as size}
-              <button
-                type="button"
-                onclick={() => selectSize(size)}
-                class="w-full px-3 py-1.5 text-left text-xs font-mono tracking-wide transition-colors duration-100 flex items-center gap-2 {params.size ===
-                size
-                  ? 'bg-transparent text-exo-yellow'
-                  : 'text-exo-light-gray hover:text-exo-yellow'}"
-              >
-                {#if params.size === size}
-                  <svg
-                    class="w-3 h-3 flex-shrink-0"
-                    fill="currentColor"
-                    viewBox="0 0 20 20"
-                  >
-                    <path
-                      fill-rule="evenodd"
-                      d="M16.707 5.293a1 1 0 010 1.414l-8 8a1 1 0 01-1.414 0l-4-4a1 1 0 011.414-1.414L8 12.586l7.293-7.293a1 1 0 011.414 0z"
-                      clip-rule="evenodd"
-                    />
-                  </svg>
-                {:else}
-                  <span class="w-3"></span>
-                {/if}
-                <span>{size.toUpperCase()}</span>
-              </button>
-            {/each}
+            {params.size}
+          </button>
+          <div
+            class="absolute right-1.5 top-1/2 -translate-y-1/2 pointer-events-none transition-transform duration-200 {isSizeDropdownOpen
+              ? 'rotate-180'
+              : ''}"
+          >
+            <svg
+              class="w-3 h-3 text-exo-yellow/60"
+              fill="none"
+              viewBox="0 0 24 24"
+              stroke="currentColor"
+            >
+              <path
+                stroke-linecap="round"
+                stroke-linejoin="round"
+                stroke-width="2"
+                d="M19 9l-7 7-7-7"
+              />
+            </svg>
          </div>
        </div>
-      {/if}
-    </div>
+
+        {#if isSizeDropdownOpen}
+          <!-- Backdrop to close dropdown -->
+          <button
+            type="button"
+            class="fixed inset-0 z-[9998] cursor-default"
+            onclick={() => (isSizeDropdownOpen = false)}
+            aria-label="Close dropdown"
+          ></button>
+
+          <!-- Dropdown Panel - fixed positioning to escape overflow:hidden -->
+          <div
+            class="fixed bg-exo-dark-gray border border-exo-yellow/30 rounded shadow-lg shadow-black/50 z-[9999] max-h-48 overflow-y-auto min-w-max"
+            style="bottom: calc(100vh - {sizeDropdownPosition()
+              .top}px + 4px); left: {sizeDropdownPosition().left}px;"
+          >
+            <div class="py-1">
+              {#each sizeOptions as size}
+                <button
+                  type="button"
+                  onclick={() => selectSize(size)}
+                  class="w-full px-3 py-1.5 text-left text-xs font-mono tracking-wide transition-colors duration-100 flex items-center gap-2 {params.size ===
+                  size
+                    ? 'bg-transparent text-exo-yellow'
+                    : 'text-exo-light-gray hover:text-exo-yellow'}"
+                >
+                  {#if params.size === size}
+                    <svg
+                      class="w-3 h-3 flex-shrink-0"
+                      fill="currentColor"
+                      viewBox="0 0 20 20"
+                    >
+                      <path
+                        fill-rule="evenodd"
+                        d="M16.707 5.293a1 1 0 010 1.414l-8 8a1 1 0 01-1.414 0l-4-4a1 1 0 011.414-1.414L8 12.586l7.293-7.293a1 1 0 011.414 0z"
+                        clip-rule="evenodd"
+                      />
+                    </svg>
+                  {:else}
+                    <span class="w-3"></span>
+                  {/if}
+                  <span>{size}</span>
+                </button>
+              {/each}
+            </div>
+          </div>
+        {/if}
+      </div>
+    {/if}

    <!-- Quality -->
    <div class="flex items-center gap-1.5">
@@ -310,7 +311,7 @@

        <!-- Dropdown Panel - fixed positioning to escape overflow:hidden -->
        <div
-          class="fixed bg-exo-dark-gray border border-exo-yellow/30 rounded shadow-lg shadow-black/50 z-[9999] max-h-48 overflow-y-auto overflow-x-hidden min-w-max"
+          class="fixed bg-exo-dark-gray border border-exo-yellow/30 rounded shadow-lg shadow-black/50 z-[9999] max-h-48 overflow-y-auto min-w-max"
          style="bottom: calc(100vh - {qualityDropdownPosition()
            .top}px + 4px); left: {qualityDropdownPosition().left}px;"
        >
--- a/dashboard/src/lib/stores/app.svelte.ts
+++ b/dashboard/src/lib/stores/app.svelte.ts
@@ -306,14 +306,13 @@ const IMAGE_PARAMS_STORAGE_KEY = "exo-image-generation-params";
 export interface ImageGenerationParams {
  // Basic params
  size:
-    | "auto"
    | "512x512"
    | "768x768"
    | "1024x1024"
    | "1024x768"
    | "768x1024"
-    | "1024x1536"
-    | "1536x1024";
+    | "1024x1365"
+    | "1365x1024";
  quality: "low" | "medium" | "high";
  outputFormat: "png" | "jpeg";
  numImages: number;
@@ -337,7 +336,7 @@ export interface EditingImage {
 }

 const DEFAULT_IMAGE_PARAMS: ImageGenerationParams = {
-  size: "auto",
+  size: "1024x1024",
  quality: "medium",
  outputFormat: "png",
  numImages: 1,
--- a/dashboard/src/routes/downloads/+page.svelte
+++ b/dashboard/src/routes/downloads/+page.svelte
--- a/flake.nix
+++ b/flake.nix
@@ -74,6 +74,7 @@
      perSystem =
        { config, self', inputs', pkgs, lib, system, ... }:
        let
+          fenixToolchain = inputs'.fenix.packages.complete;
          # Use pinned nixpkgs for swift-format (swift is broken on x86_64-linux in newer nixpkgs)
          pkgsSwift = import inputs.nixpkgs-swift { inherit system; };
        in
@@ -114,7 +115,7 @@
          packages = lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin (
            let
              uvLock = builtins.fromTOML (builtins.readFile ./uv.lock);
-              mlxPackage = builtins.head (builtins.filter (p: p.name == "mlx" && p.source ? git) uvLock.package);
+              mlxPackage = builtins.head (builtins.filter (p: p.name == "mlx") uvLock.package);
              uvLockMlxVersion = mlxPackage.version;
            in
            {
--- a/nix/mlx.nix
+++ b/nix/mlx.nix
@@ -41,16 +41,16 @@ let

  mlx = stdenv.mkDerivation rec {
    pname = "mlx";
-    version = let v = "0.30.7.dev20260218+14841977"; in
+    version = let v = "0.30.6"; in
      assert v == uvLockMlxVersion || throw "MLX version mismatch: nix/mlx.nix has ${v} but uv.lock has ${uvLockMlxVersion}. Update both the version and hash in nix/mlx.nix.";
      v;
    pyproject = true;

    src = fetchFromGitHub {
-      owner = "rltakashige";
-      repo = "mlx-jaccl-fix-small-recv";
-      rev = "1484197707f35186ad3bd614357c7c47fdf86ebc";
-      hash = "sha256-FupCMoK/SF/ldfKuvMSAKECcOP8c+ANgkQlPZttDsLk=";
+      owner = "ml-explore";
+      repo = "mlx";
+      tag = "v${version}";
+      hash = "sha256-avD5EGhwgmPdXLAyQSqTO6AXk/W3ziH+f6AetjK3Sdo=";
    };

    patches = [
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,9 +17,9 @@ dependencies = [
    "loguru>=0.7.3",
    "exo_pyo3_bindings", # rust bindings
    "anyio==4.11.0",
-    "mlx; sys_platform == 'darwin'",
+    "mlx==0.30.6; sys_platform == 'darwin'",
    "mlx[cpu]==0.30.6; sys_platform == 'linux'",
-    "mlx-lm==0.30.7",
+    "mlx-lm==0.30.6",
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
    "hypercorn>=0.18.0",
    "openai-harmony>=0.0.8",
@@ -64,7 +64,6 @@ members = [

 [tool.uv.sources]
 exo_pyo3_bindings = { workspace = true }
-mlx = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git", branch = "address-rdma-gpu-locks", marker = "sys_platform == 'darwin'" }
 #mlx-lm = { git = "https://github.com/davidmcc73/mlx-lm", branch = "stable" }
 # Uncomment to use local mlx/mlx-lm development versions:
 # mlx = { path = "/Users/Shared/mlx", editable=true }
@@ -133,7 +132,7 @@ markers = [
 env = [
  "EXO_TESTS=1"
 ]
-addopts = "-m 'not slow' --ignore=tests/start_distributed_test.py"
+addopts = "-m 'not slow'"
 filterwarnings = [
    "ignore:builtin type Swig:DeprecationWarning",
 ]
--- a/python/parts.nix
+++ b/python/parts.nix
@@ -58,21 +58,6 @@
        lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux (
          (lib.mapAttrs (_: ignoreMissing) nvidiaPackages) // {
            mlx = ignoreMissing prev.mlx;
-            mlx-cuda-13 = prev.mlx-cuda-13.overrideAttrs (old: {
-              buildInputs = (old.buildInputs or [ ]) ++ [
-                final.nvidia-cublas
-                final.nvidia-cuda-nvrtc
-                final.nvidia-cudnn-cu13
-                final.nvidia-nccl-cu13
-              ];
-              preFixup = ''
-                addAutoPatchelfSearchPath ${final.nvidia-cublas}
-                addAutoPatchelfSearchPath ${final.nvidia-cuda-nvrtc}
-                addAutoPatchelfSearchPath ${final.nvidia-cudnn-cu13}
-                addAutoPatchelfSearchPath ${final.nvidia-nccl-cu13}
-              '';
-              autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
-            });
            torch = ignoreMissing prev.torch;
            triton = ignoreMissing prev.triton;
          }
@@ -89,25 +74,14 @@
          linuxOverlay
        ]
      );
-      # mlx-cpu and mlx-cuda-13 both ship mlx/ site-packages files; keep first.
-      # mlx-cpu/mlx-cuda-13 and nvidia-cudnn-cu12/cu13 ship overlapping files.
-      venvCollisionPaths = lib.optionals pkgs.stdenv.hostPlatform.isLinux [
-        "lib/python3.13/site-packages/mlx*"
-        "lib/python3.13/site-packages/nvidia*"
-      ];
-
-      exoVenv = (pythonSet.mkVirtualEnv "exo-env" workspace.deps.default).overrideAttrs {
-        venvIgnoreCollisions = venvCollisionPaths;
-      };
+      exoVenv = pythonSet.mkVirtualEnv "exo-env" workspace.deps.default;

      # Virtual environment with dev dependencies for testing
-      testVenv = (pythonSet.mkVirtualEnv "exo-test-env" (
+      testVenv = pythonSet.mkVirtualEnv "exo-test-env" (
        workspace.deps.default // {
          exo = [ "dev" ]; # Include pytest, pytest-asyncio, pytest-env
        }
-      )).overrideAttrs {
-        venvIgnoreCollisions = venvCollisionPaths;
-      };
+      );

      mkPythonScript = name: path: pkgs.writeShellApplication {
        inherit name;
--- a/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-4bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "deepseek"
 quantization = "4bit"
 base_model = "DeepSeek V3.1"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 405874409472
--- a/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-8bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "deepseek"
 quantization = "8bit"
 base_model = "DeepSeek V3.1"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 765577920512
--- a/resources/inference_model_cards/mlx-community--GLM-4.5-Air-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.5-Air-8bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "glm"
 quantization = "8bit"
 base_model = "GLM 4.5 Air"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 122406567936
--- a/resources/inference_model_cards/mlx-community--GLM-4.5-Air-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.5-Air-bf16.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "glm"
 quantization = "bf16"
 base_model = "GLM 4.5 Air"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 229780750336
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-4bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "glm"
 quantization = "4bit"
 base_model = "GLM 4.7"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 198556925568
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-6bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "glm"
 quantization = "6bit"
 base_model = "GLM 4.7"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 286737579648
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-8bit-gs32.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-8bit-gs32.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "glm"
 quantization = "8bit"
 base_model = "GLM 4.7"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 396963397248
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-4bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "glm"
 quantization = "4bit"
 base_model = "GLM 4.7 Flash"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 19327352832
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-5bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-5bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "glm"
 quantization = "5bit"
 base_model = "GLM 4.7 Flash"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 22548578304
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-6bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "glm"
 quantization = "6bit"
 base_model = "GLM 4.7 Flash"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 26843545600
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-8bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "glm"
 quantization = "8bit"
 base_model = "GLM 4.7 Flash"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 34359738368
--- a/resources/inference_model_cards/mlx-community--GLM-5-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-5-8bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-5-8bit-MXFP8"
-n_layers = 78
-hidden_size = 6144
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "8bit"
-base_model = "GLM-5"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 790517400864
--- a/resources/inference_model_cards/mlx-community--GLM-5-MXFP4-Q8.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-5-MXFP4-Q8.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-5-MXFP4-Q8"
-n_layers = 78
-hidden_size = 6144
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "MXFP4-Q8"
-base_model = "GLM-5"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 405478939008
--- a/resources/inference_model_cards/mlx-community--GLM-5-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-5-bf16.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-5"
-n_layers = 78
-hidden_size = 6144
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "bf16"
-base_model = "GLM-5"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 1487822475264
--- a/resources/inference_model_cards/mlx-community--Kimi-K2-Thinking.toml
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2-Thinking.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "kimi"
 quantization = ""
 base_model = "Kimi K2"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 706522120192
--- a/resources/inference_model_cards/mlx-community--Kimi-K2.5.toml
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2.5.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "kimi"
 quantization = ""
 base_model = "Kimi K2.5"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 662498705408
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.1-3bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.1-3bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "minimax"
 quantization = "3bit"
 base_model = "MiniMax M2.1"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 100086644736
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.1-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.1-8bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "minimax"
 quantization = "8bit"
 base_model = "MiniMax M2.1"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 242986745856
--- a/resources/inference_model_cards/mlx-community--Qwen3-0.6B-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-0.6B-4bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "qwen"
 quantization = "4bit"
 base_model = "Qwen3 0.6B"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 342884352
--- a/resources/inference_model_cards/mlx-community--Qwen3-0.6B-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-0.6B-8bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "qwen"
 quantization = "8bit"
 base_model = "Qwen3 0.6B"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 698351616
--- a/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-4bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "qwen"
 quantization = "4bit"
 base_model = "Qwen3 235B"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 141733920768
--- a/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-8bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "qwen"
 quantization = "8bit"
 base_model = "Qwen3 235B"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 268435456000
--- a/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-4bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "qwen"
 quantization = "4bit"
 base_model = "Qwen3 30B"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 17612931072
--- a/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-8bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "qwen"
 quantization = "8bit"
 base_model = "Qwen3 30B"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 33279705088
--- a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-4bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "qwen"
 quantization = "4bit"
 base_model = "Qwen3 Next 80B"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 47080074240
--- a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-8bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "qwen"
 quantization = "8bit"
 base_model = "Qwen3 Next 80B"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 88814387200
--- a/resources/inference_model_cards/mlx-community--Step-3.5-Flash-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Step-3.5-Flash-4bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "step"
 quantization = "4bit"
 base_model = "Step 3.5 Flash"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 114572190076
--- a/resources/inference_model_cards/mlx-community--Step-3.5-Flash-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--Step-3.5-Flash-6bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "step"
 quantization = "6bit"
 base_model = "Step 3.5 Flash"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 159039627774
--- a/resources/inference_model_cards/mlx-community--Step-3.5-Flash-8Bit.toml
+++ b/resources/inference_model_cards/mlx-community--Step-3.5-Flash-8Bit.toml
@@ -6,7 +6,7 @@ tasks = ["TextGeneration"]
 family = "step"
 quantization = "8bit"
 base_model = "Step 3.5 Flash"
-capabilities = ["text", "thinking", "thinking_toggle"]
+capabilities = ["text", "thinking"]

 [storage_size]
 in_bytes = 209082699847
--- a/rust/clippy.toml
+++ b/rust/clippy.toml
@@ -1,2 +0,0 @@
-# we can manually exclude false-positive lint errors for dual packages (if in dependencies)
-#allowed-duplicate-crates = ["hashbrown"]
--- a/rust/exo_pyo3_bindings/Cargo.toml
+++ b/rust/exo_pyo3_bindings/Cargo.toml
@@ -26,11 +26,11 @@ networking = { workspace = true }

 # interop
 pyo3 = { version = "0.27.2", features = [
-    # "abi3-py313", # tells pyo3 (and maturin) to build using the stable ABI with minimum Python version 3.13
+    "abi3-py313", # tells pyo3 (and maturin) to build using the stable ABI with minimum Python version 3.13
    # "nightly", # enables better-supported GIL integration
-    "experimental-async", # async support in #[pyfunction] & #[pymethods]
-    #"experimental-inspect", # inspection of generated binary => easier to automate type-hint generation
-    #"py-clone", # adding Clone-ing of `Py<T>` without GIL (may cause panics - remove if panics happen)
+    "experimental-async" # async support in #[pyfunction] & #[pymethods]
+    # "experimental-inspect", # inspection of generated binary => easier to automate type-hint generation
+    # "py-clone", # adding Clone-ing of `Py<T>` without GIL (may cause panics - remove if panics happen)
    # "multiple-pymethods", # allows multiple #[pymethods] sections per class

    # integrations with other libraries
@@ -42,21 +42,18 @@ pyo3-stub-gen = { version = "0.17.2" }
 pyo3-async-runtimes = { version = "0.27.0", features = ["attributes", "tokio-runtime", "testing"] }
 pyo3-log = "0.13.2"

-# macro dependencies
-extend = { workspace = true }
-delegate = { workspace = true }
-
 # async runtime
 tokio = { workspace = true, features = ["full", "tracing"] }
-futures-lite = { workspace = true }
-
-# utility dependencies
-util = { workspace = true }
+futures-lite = "2.6.1"

 # Tracing
+#tracing = "0.1"
+#tracing-subscriber = "0.3"
+#console-subscriber = "0.1.5"
+#tracing-log = "0.2.0"
 log = { workspace = true }
 env_logger = "0.11"

+
 # Networking
 libp2p = { workspace = true, features = ["full"] }
-pin-project = "1.1.10"
--- a/rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi
+++ b/rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi
@@ -2,220 +2,39 @@
 # ruff: noqa: E501, F401

 import builtins
-import enum
 import typing

-@typing.final
-class AllQueuesFullError(builtins.Exception):
-    def __new__(cls, *args: typing.Any) -> AllQueuesFullError: ...
-    def __repr__(self) -> builtins.str: ...
-    def __str__(self) -> builtins.str: ...
-
-@typing.final
-class ConnectionUpdate:
-    @property
-    def update_type(self) -> ConnectionUpdateType:
-        r"""
-        Whether this is a connection or disconnection event
-        """
-    @property
-    def peer_id(self) -> PeerId:
-        r"""
-        Identity of the peer that we have connected to or disconnected from.
-        """
-    @property
-    def remote_ipv4(self) -> builtins.str:
-        r"""
-        Remote connection's IPv4 address.
-        """
-    @property
-    def remote_tcp_port(self) -> builtins.int:
-        r"""
-        Remote connection's TCP port.
-        """
-
@typing.final
 class Keypair:
-    r"""
-    Identity keypair of a node.
-    """
    @staticmethod
-    def generate_ed25519() -> Keypair:
+    def generate() -> Keypair:
        r"""
-        Generate a new Ed25519 keypair.
-        """
-    @staticmethod
-    def generate_ecdsa() -> Keypair:
-        r"""
-        Generate a new ECDSA keypair.
-        """
-    @staticmethod
-    def generate_secp256k1() -> Keypair:
-        r"""
-        Generate a new Secp256k1 keypair.
+        Generate a new ed25519 keypair
        """
    @staticmethod
    def from_protobuf_encoding(bytes: bytes) -> Keypair:
        r"""
        Decode a private key from a protobuf structure and parse it as a `Keypair`.
        """
-    @staticmethod
-    def rsa_from_pkcs8(bytes: bytes) -> Keypair:
-        r"""
-        Decode an keypair from a DER-encoded secret key in PKCS#8 `PrivateKeyInfo`
-        format (i.e. unencrypted) as defined in [RFC5208].
-        
-        [RFC5208]: https://tools.ietf.org/html/rfc5208#section-5
-        """
-    @staticmethod
-    def secp256k1_from_der(bytes: bytes) -> Keypair:
-        r"""
-        Decode a keypair from a DER-encoded Secp256k1 secret key in an `ECPrivateKey`
-        structure as defined in [RFC5915].
-        
-        [RFC5915]: https://tools.ietf.org/html/rfc5915
-        """
-    @staticmethod
-    def ed25519_from_bytes(bytes: bytes) -> Keypair: ...
    def to_protobuf_encoding(self) -> bytes:
        r"""
-        Encode a private key as protobuf structure.
-        """
-    def to_peer_id(self) -> PeerId:
-        r"""
-        Convert the `Keypair` into the corresponding `PeerId`.
+        Encode a private key to a protobuf structure.
        """
+    def to_string(self) -> builtins.str: ...

@typing.final
-class Multiaddr:
-    r"""
-    Representation of a Multiaddr.
-    """
+class PyPeer:
    @staticmethod
-    def empty() -> Multiaddr:
-        r"""
-        Create a new, empty multiaddress.
-        """
-    @staticmethod
-    def with_capacity(n: builtins.int) -> Multiaddr:
-        r"""
-        Create a new, empty multiaddress with the given capacity.
-        """
-    @staticmethod
-    def from_bytes(bytes: bytes) -> Multiaddr:
-        r"""
-        Parse a `Multiaddr` value from its byte slice representation.
-        """
-    @staticmethod
-    def from_string(string: builtins.str) -> Multiaddr:
-        r"""
-        Parse a `Multiaddr` value from its string representation.
-        """
-    def len(self) -> builtins.int:
-        r"""
-        Return the length in bytes of this multiaddress.
-        """
-    def is_empty(self) -> builtins.bool:
-        r"""
-        Returns true if the length of this multiaddress is 0.
-        """
-    def to_bytes(self) -> bytes:
-        r"""
-        Return a copy of this [`Multiaddr`]'s byte representation.
-        """
-    def to_string(self) -> builtins.str:
-        r"""
-        Convert a Multiaddr to a string.
-        """
+    def new(kp: Keypair, namespace: builtins.str) -> PyPeer: ...
+    async def subscribe(self, topic: builtins.str) -> None: ...
+    async def unsubscribe(self, topic: builtins.str) -> None: ...
+    async def send(self, topic: builtins.str, payload: bytes) -> None: ...
+    async def run(self) -> None: ...
+    async def recv(self) -> PySwarmEvent: ...

@typing.final
-class NetworkingHandle:
-    def __new__(cls, identity: Keypair) -> NetworkingHandle: ...
-    async def connection_update_recv(self) -> ConnectionUpdate:
-        r"""
-        Receives the next `ConnectionUpdate` from networking.
-        """
-    async def connection_update_recv_many(self, limit: builtins.int) -> builtins.list[ConnectionUpdate]:
-        r"""
-        Receives at most `limit` `ConnectionUpdate`s from networking and returns them.
-        
-        For `limit = 0`, an empty collection of `ConnectionUpdate`s will be returned immediately.
-        For `limit > 0`, if there are no `ConnectionUpdate`s in the channel's queue this method
-        will sleep until a `ConnectionUpdate`s is sent.
-        """
-    async def gossipsub_subscribe(self, topic: builtins.str) -> builtins.bool:
-        r"""
-        Subscribe to a `GossipSub` topic.
-        
-        Returns `True` if the subscription worked. Returns `False` if we were already subscribed.
-        """
-    async def gossipsub_unsubscribe(self, topic: builtins.str) -> builtins.bool:
-        r"""
-        Unsubscribes from a `GossipSub` topic.
-        
-        Returns `True` if we were subscribed to this topic. Returns `False` if we were not subscribed.
-        """
-    async def gossipsub_publish(self, topic: builtins.str, data: bytes) -> None:
-        r"""
-        Publishes a message with multiple topics to the `GossipSub` network.
-        
-        If no peers are found that subscribe to this topic, throws `NoPeersSubscribedToTopicError` exception.
-        """
-    async def gossipsub_recv(self) -> tuple[builtins.str, bytes]:
-        r"""
-        Receives the next message from the `GossipSub` network.
-        """
-    async def gossipsub_recv_many(self, limit: builtins.int) -> builtins.list[tuple[builtins.str, bytes]]:
-        r"""
-        Receives at most `limit` messages from the `GossipSub` network and returns them.
-        
-        For `limit = 0`, an empty collection of messages will be returned immediately.
-        For `limit > 0`, if there are no messages in the channel's queue this method
-        will sleep until a message is sent.
-        """
-
-@typing.final
-class NoPeersSubscribedToTopicError(builtins.Exception):
-    def __new__(cls, *args: typing.Any) -> NoPeersSubscribedToTopicError: ...
-    def __repr__(self) -> builtins.str: ...
-    def __str__(self) -> builtins.str: ...
-
-@typing.final
-class PeerId:
-    r"""
-    Identifier of a peer of the network.
-    
-    The data is a `CIDv0` compatible multihash of the protobuf encoded public key of the peer
-    as specified in [specs/peer-ids](https://github.com/libp2p/specs/blob/master/peer-ids/peer-ids.md).
-    """
-    @staticmethod
-    def random() -> PeerId:
-        r"""
-        Generates a random peer ID from a cryptographically secure PRNG.
-        
-        This is useful for randomly walking on a DHT, or for testing purposes.
-        """
-    @staticmethod
-    def from_bytes(bytes: bytes) -> PeerId:
-        r"""
-        Parses a `PeerId` from bytes.
-        """
-    def to_bytes(self) -> bytes:
-        r"""
-        Returns a raw bytes representation of this `PeerId`.
-        """
-    def to_base58(self) -> builtins.str:
-        r"""
-        Returns a base-58 encoded string of this `PeerId`.
-        """
-    def __repr__(self) -> builtins.str: ...
-    def __str__(self) -> builtins.str: ...
-
-@typing.final
-class ConnectionUpdateType(enum.Enum):
-    r"""
-    Connection or disconnection event discriminant type.
-    """
-    Connected = ...
-    Disconnected = ...
+class PySwarmEvent:
+    def downcast_discovered(self) -> typing.Optional[builtins.str]: ...
+    def downcast_expired(self) -> typing.Optional[builtins.str]: ...
+    def downcast_message(self) -> typing.Optional[tuple[builtins.str, builtins.str, bytes]]: ...

--- a/rust/exo_pyo3_bindings/src/allow_threading.rs
+++ b/rust/exo_pyo3_bindings/src/allow_threading.rs
@@ -1,37 +1,22 @@
-//! SEE: https://pyo3.rs/v0.26.0/async-await.html#detaching-from-the-interpreter-across-await
-//!
-
-use pin_project::pin_project;
+//! See: <https://pyo3.rs/v0.27.2/async-await.html#detaching-from-the-interpreter-across-await>
 use pyo3::prelude::*;
 use std::{
    future::Future,
-    pin::Pin,
+    pin::{Pin, pin},
    task::{Context, Poll},
 };

-/// SEE: https://pyo3.rs/v0.26.0/async-await.html#detaching-from-the-interpreter-across-await
-#[pin_project]
-#[repr(transparent)]
-pub(crate) struct AllowThreads<F>(#[pin] F);
-
-impl<F> AllowThreads<F>
-where
-    Self: Future,
-{
-    pub fn new(f: F) -> Self {
-        Self(f)
-    }
-}
+pub struct AllowThreads<F>(pub(crate) F);

 impl<F> Future for AllowThreads<F>
 where
-    F: Future + Send,
+    F: Future + Unpin + Send,
    F::Output: Send,
 {
    type Output = F::Output;

-    fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
        let waker = cx.waker();
-        Python::attach(|py| py.detach(|| self.project().0.poll(&mut Context::from_waker(waker))))
+        Python::attach(|py| py.detach(|| pin!(&mut self.0).poll(&mut Context::from_waker(waker))))
    }
 }
--- a/rust/exo_pyo3_bindings/src/ident.rs
+++ b/rust/exo_pyo3_bindings/src/ident.rs
@@ -1,159 +0,0 @@
-use crate::ext::ResultExt as _;
-use libp2p::PeerId;
-use libp2p::identity::Keypair;
-use pyo3::prelude::{PyBytesMethods as _, PyModule, PyModuleMethods as _};
-use pyo3::types::PyBytes;
-use pyo3::{Bound, PyResult, Python, pyclass, pymethods};
-use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods};
-
-/// Identity keypair of a node.
-#[gen_stub_pyclass]
-#[pyclass(name = "Keypair", frozen)]
-#[repr(transparent)]
-pub struct PyKeypair(pub Keypair);
-
-#[gen_stub_pymethods]
-#[pymethods]
-#[allow(clippy::needless_pass_by_value)]
-impl PyKeypair {
-    /// Generate a new Ed25519 keypair.
-    #[staticmethod]
-    fn generate_ed25519() -> Self {
-        Self(Keypair::generate_ed25519())
-    }
-
-    /// Generate a new ECDSA keypair.
-    #[staticmethod]
-    fn generate_ecdsa() -> Self {
-        Self(Keypair::generate_ecdsa())
-    }
-
-    /// Generate a new Secp256k1 keypair.
-    #[staticmethod]
-    fn generate_secp256k1() -> Self {
-        Self(Keypair::generate_secp256k1())
-    }
-
-    /// Decode a private key from a protobuf structure and parse it as a `Keypair`.
-    #[staticmethod]
-    fn from_protobuf_encoding(bytes: Bound<'_, PyBytes>) -> PyResult<Self> {
-        let bytes = Vec::from(bytes.as_bytes());
-        Ok(Self(Keypair::from_protobuf_encoding(&bytes).pyerr()?))
-    }
-
-    /// Decode an keypair from a DER-encoded secret key in PKCS#8 `PrivateKeyInfo`
-    /// format (i.e. unencrypted) as defined in [RFC5208].
-    ///
-    /// [RFC5208]: https://tools.ietf.org/html/rfc5208#section-5
-    #[staticmethod]
-    fn rsa_from_pkcs8(bytes: Bound<'_, PyBytes>) -> PyResult<Self> {
-        let mut bytes = Vec::from(bytes.as_bytes());
-        Ok(Self(Keypair::rsa_from_pkcs8(&mut bytes).pyerr()?))
-    }
-
-    /// Decode a keypair from a DER-encoded Secp256k1 secret key in an `ECPrivateKey`
-    /// structure as defined in [RFC5915].
-    ///
-    /// [RFC5915]: https://tools.ietf.org/html/rfc5915
-    #[staticmethod]
-    fn secp256k1_from_der(bytes: Bound<'_, PyBytes>) -> PyResult<Self> {
-        let mut bytes = Vec::from(bytes.as_bytes());
-        Ok(Self(Keypair::secp256k1_from_der(&mut bytes).pyerr()?))
-    }
-
-    #[staticmethod]
-    fn ed25519_from_bytes(bytes: Bound<'_, PyBytes>) -> PyResult<Self> {
-        let mut bytes = Vec::from(bytes.as_bytes());
-        Ok(Self(Keypair::ed25519_from_bytes(&mut bytes).pyerr()?))
-    }
-
-    /// Encode a private key as protobuf structure.
-    fn to_protobuf_encoding<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
-        let bytes = self.0.to_protobuf_encoding().pyerr()?;
-        Ok(PyBytes::new(py, &bytes))
-    }
-
-    /// Convert the `Keypair` into the corresponding `PeerId`.
-    fn to_peer_id(&self) -> PyPeerId {
-        PyPeerId(self.0.public().to_peer_id())
-    }
-
-    // /// Hidden constructor for pickling support. TODO: figure out how to do pickling...
-    // #[gen_stub(skip)]
-    // #[new]
-    // fn py_new(bytes: Bound<'_, PyBytes>) -> PyResult<Self> {
-    //     Self::from_protobuf_encoding(bytes)
-    // }
-    //
-    // #[gen_stub(skip)]
-    // fn __setstate__(&mut self, state: Bound<'_, PyBytes>) -> PyResult<()> {
-    //     *self = Self::from_protobuf_encoding(state)?;
-    //     Ok(())
-    // }
-    //
-    // #[gen_stub(skip)]
-    // fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
-    //     self.to_protobuf_encoding(py)
-    // }
-    //
-    // #[gen_stub(skip)]
-    // pub fn __getnewargs__<'py>(&self, py: Python<'py>) -> PyResult<(Bound<'py, PyBytes>,)> {
-    //     Ok((self.to_protobuf_encoding(py)?,))
-    // }
-}
-
-/// Identifier of a peer of the network.
-///
-/// The data is a `CIDv0` compatible multihash of the protobuf encoded public key of the peer
-/// as specified in [specs/peer-ids](https://github.com/libp2p/specs/blob/master/peer-ids/peer-ids.md).
-#[gen_stub_pyclass]
-#[pyclass(name = "PeerId", frozen)]
-#[derive(Debug, Clone)]
-#[repr(transparent)]
-pub struct PyPeerId(pub PeerId);
-
-#[gen_stub_pymethods]
-#[pymethods]
-#[allow(clippy::needless_pass_by_value)]
-impl PyPeerId {
-    /// Generates a random peer ID from a cryptographically secure PRNG.
-    ///
-    /// This is useful for randomly walking on a DHT, or for testing purposes.
-    #[staticmethod]
-    fn random() -> Self {
-        Self(PeerId::random())
-    }
-
-    /// Parses a `PeerId` from bytes.
-    #[staticmethod]
-    fn from_bytes(bytes: Bound<'_, PyBytes>) -> PyResult<Self> {
-        let bytes = Vec::from(bytes.as_bytes());
-        Ok(Self(PeerId::from_bytes(&bytes).pyerr()?))
-    }
-
-    /// Returns a raw bytes representation of this `PeerId`.
-    fn to_bytes<'py>(&self, py: Python<'py>) -> Bound<'py, PyBytes> {
-        let bytes = self.0.to_bytes();
-        PyBytes::new(py, &bytes)
-    }
-
-    /// Returns a base-58 encoded string of this `PeerId`.
-    fn to_base58(&self) -> String {
-        self.0.to_base58()
-    }
-
-    fn __repr__(&self) -> String {
-        format!("PeerId({})", self.to_base58())
-    }
-
-    fn __str__(&self) -> String {
-        self.to_base58()
-    }
-}
-
-pub fn ident_submodule(m: &Bound<'_, PyModule>) -> PyResult<()> {
-    m.add_class::<PyKeypair>()?;
-    m.add_class::<PyPeerId>()?;
-
-    Ok(())
-}
--- a/rust/exo_pyo3_bindings/src/lib.rs
+++ b/rust/exo_pyo3_bindings/src/lib.rs
@@ -1,169 +1,42 @@
 //! TODO: crate documentation
-//!
-//! this is here as a placeholder documentation
-//!
-//!
+pub(crate) mod allow_threading;

-mod allow_threading;
-mod ident;
-mod networking;
+pub(crate) mod networking;
+pub(crate) mod take_once {
+    use std::sync::Mutex;
+
+    pub struct TakeOnce<T>(Mutex<Option<T>>);
+    impl<T> TakeOnce<T> {
+        pub fn new(t: T) -> Self {
+            Self(Mutex::new(Some(t)))
+        }
+        pub fn take(&self) -> Option<T> {
+            match self.0.try_lock() {
+                Ok(mut o) => o.take(),
+                Err(_) => None,
+            }
+        }
+    }
+}
+
+use pyo3::prelude::*;

-use crate::ident::ident_submodule;
-use crate::networking::networking_submodule;
-use pyo3::prelude::PyModule;
-use pyo3::{Bound, PyResult, pyclass, pymodule};
 use pyo3_stub_gen::define_stub_info_gatherer;

-/// Namespace for all the constants used by this crate.
-pub(crate) mod r#const {
-    pub const MPSC_CHANNEL_SIZE: usize = 1024;
-}
-
-/// Namespace for crate-wide extension traits/methods
-pub(crate) mod ext {
-    use crate::allow_threading::AllowThreads;
-    use extend::ext;
-    use pyo3::exceptions::{PyConnectionError, PyRuntimeError};
-    use pyo3::types::PyBytes;
-    use pyo3::{Py, PyErr, PyResult, Python};
-    use tokio::runtime::Runtime;
-    use tokio::sync::mpsc;
-    use tokio::sync::mpsc::error::TryRecvError;
-    use tokio::task::JoinHandle;
-
-    #[ext(pub, name = ByteArrayExt)]
-    impl [u8] {
-        fn pybytes(&self) -> Py<PyBytes> {
-            Python::attach(|py| PyBytes::new(py, self).unbind())
-        }
-    }
-
-    #[ext(pub, name = ResultExt)]
-    impl<T, E> Result<T, E>
-    where
-        E: ToString,
-    {
-        fn pyerr(self) -> PyResult<T> {
-            self.map_err(|e| PyRuntimeError::new_err(e.to_string()))
-        }
-    }
-
-    pub trait FutureExt: Future + Sized {
-        /// SEE: https://pyo3.rs/v0.26.0/async-await.html#detaching-from-the-interpreter-across-await
-        fn allow_threads_py(self) -> AllowThreads<Self>
-        where
-            AllowThreads<Self>: Future,
-        {
-            AllowThreads::new(self)
-        }
-    }
-
-    impl<T: Future> FutureExt for T {}
-
-    #[ext(pub, name = PyErrExt)]
-    impl PyErr {
-        fn receiver_channel_closed() -> Self {
-            PyConnectionError::new_err("Receiver channel closed unexpectedly")
-        }
-    }
-
-    #[ext(pub, name = PyResultExt)]
-    impl<T> PyResult<T> {
-        fn write_unraisable(self) -> Option<T> {
-            Python::attach(|py| self.write_unraisable_with(py))
-        }
-
-        fn write_unraisable_with(self, py: Python<'_>) -> Option<T> {
-            match self {
-                Ok(v) => Some(v),
-                Err(e) => {
-                    // write error back to python
-                    e.write_unraisable(py, None);
-                    None
-                }
-            }
-        }
-    }
-
-    #[ext(pub, name = TokioRuntimeExt)]
-    impl Runtime {
-        fn spawn_with_scope<F>(&self, py: Python<'_>, future: F) -> PyResult<JoinHandle<F::Output>>
-        where
-            F: Future + Send + 'static,
-            F::Output: Send + 'static,
-        {
-            let locals = pyo3_async_runtimes::tokio::get_current_locals(py)?;
-            Ok(self.spawn(pyo3_async_runtimes::tokio::scope(locals, future)))
-        }
-    }
-
-    #[ext(pub, name = TokioMpscSenderExt)]
-    impl<T> mpsc::Sender<T> {
-        /// Sends a value, waiting until there is capacity.
-        ///
-        /// A successful send occurs when it is determined that the other end of the
-        /// channel has not hung up already. An unsuccessful send would be one where
-        /// the corresponding receiver has already been closed.
-        async fn send_py(&self, value: T) -> PyResult<()> {
-            self.send(value)
-                .await
-                .map_err(|_| PyErr::receiver_channel_closed())
-        }
-    }
-
-    #[ext(pub, name = TokioMpscReceiverExt)]
-    impl<T> mpsc::Receiver<T> {
-        /// Receives the next value for this receiver.
-        async fn recv_py(&mut self) -> PyResult<T> {
-            self.recv().await.ok_or_else(PyErr::receiver_channel_closed)
-        }
-
-        /// Receives at most `limit` values for this receiver and returns them.
-        ///
-        /// For `limit = 0`, an empty collection of messages will be returned immediately.
-        /// For `limit > 0`, if there are no messages in the channel's queue this method
-        /// will sleep until a message is sent.
-        async fn recv_many_py(&mut self, limit: usize) -> PyResult<Vec<T>> {
-            // get updates from receiver channel
-            let mut updates = Vec::with_capacity(limit);
-            let received = self.recv_many(&mut updates, limit).await;
-
-            // if we received zero items, then the channel was unexpectedly closed
-            if limit != 0 && received == 0 {
-                return Err(PyErr::receiver_channel_closed());
-            }
-
-            Ok(updates)
-        }
-
-        /// Tries to receive the next value for this receiver.
-        fn try_recv_py(&mut self) -> PyResult<Option<T>> {
-            match self.try_recv() {
-                Ok(v) => Ok(Some(v)),
-                Err(TryRecvError::Empty) => Ok(None),
-                Err(TryRecvError::Disconnected) => Err(PyErr::receiver_channel_closed()),
-            }
-        }
-    }
-}
-
 /// A Python module implemented in Rust. The name of this function must match
 /// the `lib.name` setting in the `Cargo.toml`, else Python will not be able to
 /// import the module.
 #[pymodule(name = "exo_pyo3_bindings")]
-fn main_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
+pub fn networking_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
    // install logger
    pyo3_log::init();
+    // setup runtime
+    let mut builder = tokio::runtime::Builder::new_multi_thread();
+    builder.enable_all();
+    pyo3_async_runtimes::tokio::init(builder);

-    // TODO: for now this is all NOT a submodule, but figure out how to make the submodule system
-    //       work with maturin, where the types generate correctly, in the right folder, without
-    //       too many importing issues...
-    ident_submodule(m)?;
-    networking_submodule(m)?;
-
-    // top-level constructs
-    // TODO: ...
-
+    m.add_class::<networking::PyPeer>()?;
+    m.add_class::<networking::PyKeypair>()?;
    Ok(())
 }

--- a/rust/exo_pyo3_bindings/src/networking.rs
+++ b/rust/exo_pyo3_bindings/src/networking.rs
@@ -1,571 +1,214 @@
-#![allow(
-    clippy::multiple_inherent_impl,
-    clippy::unnecessary_wraps,
-    clippy::unused_self,
-    clippy::needless_pass_by_value
-)]
+use crate::allow_threading::AllowThreads;
+use crate::take_once::TakeOnce;

-use crate::r#const::MPSC_CHANNEL_SIZE;
-use crate::ext::{ByteArrayExt as _, FutureExt, PyErrExt as _};
-use crate::ext::{ResultExt as _, TokioMpscReceiverExt as _, TokioMpscSenderExt as _};
-use crate::ident::{PyKeypair, PyPeerId};
-use crate::pyclass;
-use libp2p::futures::StreamExt as _;
-use libp2p::gossipsub;
-use libp2p::gossipsub::{IdentTopic, Message, MessageId, PublishError};
-use libp2p::swarm::SwarmEvent;
-use networking::discovery;
-use networking::swarm::create_swarm;
-use pyo3::prelude::{PyModule, PyModuleMethods as _};
-use pyo3::types::PyBytes;
-use pyo3::{Bound, Py, PyErr, PyResult, PyTraverseError, PyVisit, Python, pymethods};
-use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pyclass_enum, gen_stub_pymethods};
-use std::net::IpAddr;
-use tokio::sync::{Mutex, mpsc, oneshot};
+use std::pin::pin;

-mod exception {
-    use pyo3::types::PyTuple;
-    use pyo3::{exceptions::PyException, prelude::*};
-    use pyo3_stub_gen::derive::*;
-
-    #[gen_stub_pyclass]
-    #[pyclass(frozen, extends=PyException, name="NoPeersSubscribedToTopicError")]
-    pub struct PyNoPeersSubscribedToTopicError {}
-
-    impl PyNoPeersSubscribedToTopicError {
-        const MSG: &'static str = "\
-        No peers are currently subscribed to receive messages on this topic. \
-        Wait for peers to subscribe or check your network connectivity.";
-
-        ///   Creates a new  [ `PyErr` ]  of this type.
-        ///
-        ///   [`PyErr`] :  https://docs.rs/pyo3/latest/pyo3/struct.PyErr.html   "PyErr in pyo3"
-        pub(crate) fn new_err() -> PyErr {
-            PyErr::new::<Self, _>(()) // TODO: check if this needs to be replaced???
-        }
-    }
-
-    #[gen_stub_pymethods]
-    #[pymethods]
-    impl PyNoPeersSubscribedToTopicError {
-        #[new]
-        #[pyo3(signature = (*args))]
-        #[allow(unused_variables)]
-        pub(crate) fn new(args: &Bound<'_, PyTuple>) -> Self {
-            Self {}
-        }
-
-        fn __repr__(&self) -> String {
-            format!("PeerId(\"{}\")", Self::MSG)
-        }
-
-        fn __str__(&self) -> String {
-            Self::MSG.to_string()
-        }
-    }
-
-    #[gen_stub_pyclass]
-    #[pyclass(frozen, extends=PyException, name="AllQueuesFullError")]
-    pub struct PyAllQueuesFullError {}
-
-    impl PyAllQueuesFullError {
-        const MSG: &'static str =
-            "All libp2p peers are unresponsive, resend the message or reconnect.";
-
-        ///   Creates a new  [ `PyErr` ]  of this type.
-        ///
-        ///   [`PyErr`] :  https://docs.rs/pyo3/latest/pyo3/struct.PyErr.html   "PyErr in pyo3"
-        pub(crate) fn new_err() -> PyErr {
-            PyErr::new::<Self, _>(()) // TODO: check if this needs to be replaced???
-        }
-    }
-
-    #[gen_stub_pymethods]
-    #[pymethods]
-    impl PyAllQueuesFullError {
-        #[new]
-        #[pyo3(signature = (*args))]
-        #[allow(unused_variables)]
-        pub(crate) fn new(args: &Bound<'_, PyTuple>) -> Self {
-            Self {}
-        }
-
-        fn __repr__(&self) -> String {
-            format!("PeerId(\"{}\")", Self::MSG)
-        }
-
-        fn __str__(&self) -> String {
-            Self::MSG.to_string()
-        }
-    }
-}
-
-/// Connection or disconnection event discriminant type.
-#[gen_stub_pyclass_enum]
-#[pyclass(eq, eq_int, name = "ConnectionUpdateType")]
-#[derive(Debug, Clone, PartialEq)]
-enum PyConnectionUpdateType {
-    Connected = 0,
-    Disconnected,
-}
+use futures_lite::FutureExt;
+use libp2p::{gossipsub::PublishError, identity::Keypair};
+use networking::{FromSwarm, Peer, ToSwarm};
+use pyo3::{
+    coroutine::CancelHandle,
+    exceptions::{PyConnectionError, PyRuntimeError, PyValueError},
+    prelude::*,
+    types::PyBytes,
+};
+use pyo3_stub_gen::{
+    derive::{gen_methods_from_python, gen_stub_pyclass, gen_stub_pymethods},
+    inventory::submit,
+};
+use tokio::sync::{Mutex, mpsc};

 #[gen_stub_pyclass]
-#[pyclass(frozen, name = "ConnectionUpdate")]
-#[derive(Debug, Clone)]
-struct PyConnectionUpdate {
-    /// Whether this is a connection or disconnection event
-    #[pyo3(get)]
-    update_type: PyConnectionUpdateType,
+#[pyclass(name = "Keypair", frozen)]
+#[derive(Clone)]
+pub struct PyKeypair(Keypair);

-    /// Identity of the peer that we have connected to or disconnected from.
-    #[pyo3(get)]
-    peer_id: PyPeerId,
+#[gen_stub_pymethods]
+#[pymethods]
+impl PyKeypair {
+    /// Generate a new ed25519 keypair
+    #[staticmethod]
+    fn generate() -> Self {
+        Self(Keypair::generate_ed25519())
+    }

-    /// Remote connection's IPv4 address.
-    #[pyo3(get)]
-    remote_ipv4: String,
+    /// Decode a private key from a protobuf structure and parse it as a `Keypair`.
+    #[staticmethod]
+    fn from_protobuf_encoding(bytes: &Bound<'_, PyBytes>) -> Self {
+        let bytes = Vec::from(bytes.as_bytes());
+        Self(Keypair::from_protobuf_encoding(&bytes).expect("todo"))
+    }

-    /// Remote connection's TCP port.
-    #[pyo3(get)]
-    remote_tcp_port: u16,
-}
-
-enum ToTask {
-    GossipsubSubscribe {
-        topic: String,
-        result_tx: oneshot::Sender<PyResult<bool>>,
-    },
-    GossipsubUnsubscribe {
-        topic: String,
-        result_tx: oneshot::Sender<bool>,
-    },
-    GossipsubPublish {
-        topic: String,
-        data: Vec<u8>,
-        result_tx: oneshot::Sender<PyResult<MessageId>>,
-    },
-}
-
-#[allow(clippy::enum_glob_use)]
-async fn networking_task(
-    mut swarm: networking::swarm::Swarm,
-    mut to_task_rx: mpsc::Receiver<ToTask>,
-    connection_update_tx: mpsc::Sender<PyConnectionUpdate>,
-    gossipsub_message_tx: mpsc::Sender<(String, Vec<u8>)>,
-) {
-    use SwarmEvent::*;
-    use ToTask::*;
-    use networking::swarm::BehaviourEvent::*;
-
-    log::info!("RUST: networking task started");
-
-    loop {
-        tokio::select! {
-            message = to_task_rx.recv() => {
-                // handle closed channel
-                let Some(message) = message else {
-                    log::info!("RUST: channel closed");
-                    break;
-                };
-
-                // dispatch incoming messages
-                match message {
-                    GossipsubSubscribe { topic, result_tx } => {
-                        // try to subscribe
-                        let result = swarm.behaviour_mut()
-                            .gossipsub.subscribe(&IdentTopic::new(topic));
-
-                        // send response oneshot
-                        if let Err(e) = result_tx.send(result.pyerr()) {
-                            log::error!("RUST: could not subscribe to gossipsub topic since channel already closed: {e:?}");
-                            continue;
-                        }
-                    }
-                    GossipsubUnsubscribe { topic, result_tx } => {
-                        // try to unsubscribe from the topic
-                        let result = swarm.behaviour_mut()
-                            .gossipsub.unsubscribe(&IdentTopic::new(topic));
-
-                        // send response oneshot (or exit if connection closed)
-                        if let Err(e) = result_tx.send(result) {
-                            log::error!("RUST: could not unsubscribe from gossipsub topic since channel already closed: {e:?}");
-                            continue;
-                        }
-                    }
-                    GossipsubPublish { topic, data, result_tx } => {
-                        // try to publish the data -> catch NoPeersSubscribedToTopic error & convert to correct exception
-                        let result = swarm.behaviour_mut().gossipsub.publish(
-                            IdentTopic::new(topic), data);
-                        let pyresult: PyResult<MessageId> = if let Err(PublishError::NoPeersSubscribedToTopic) = result {
-                            Err(exception::PyNoPeersSubscribedToTopicError::new_err())
-                        } else if let Err(PublishError::AllQueuesFull(_)) = result {
-                            Err(exception::PyAllQueuesFullError::new_err())
-                        } else {
-                            result.pyerr()
-                        };
-
-                        // send response oneshot (or exit if connection closed)
-                        if let Err(e) = result_tx.send(pyresult) {
-                            log::error!("RUST: could not publish gossipsub message since channel already closed: {e:?}");
-                            continue;
-                        }
-                    }
-                }
-            }
-
-            // architectural solution to this problem:
-            // create keep_alive behavior who's job it is to dial peers discovered by mDNS (and drop when expired)
-            //   -> it will emmit TRUE connected/disconnected events consumable elsewhere
-            //
-            // gossipsub will feed off-of dial attempts created by networking, and that will bootstrap its' peers list
-            // then for actual communication it will dial those peers if need-be
-            swarm_event = swarm.select_next_some() => {
-                match swarm_event {
-                    Behaviour(Gossipsub(gossipsub::Event::Message {
-                        message: Message {
-                            topic,
-                            data,
-                            ..
-                        },
-                        ..
-                    })) => {
-                        // topic-ID is just the topic hash!!! (since we used identity hasher)
-                        let message = (topic.into_string(), data);
-
-                        // send incoming message to channel (or exit if connection closed)
-                        if let Err(e) = gossipsub_message_tx.send(message).await {
-                            log::error!("RUST: could not send incoming gossipsub message since channel already closed: {e}");
-                            continue;
-                        }
-                    },
-                    Behaviour(Discovery(discovery::Event::ConnectionEstablished { peer_id, remote_ip, remote_tcp_port, .. })) => {
-                        // grab IPv4 string
-                        let remote_ipv4 = match remote_ip {
-                            IpAddr::V4(ip) => ip.to_string(),
-                            IpAddr::V6(ip) => {
-                                log::warn!("RUST: ignoring connection to IPv6 address: {ip}");
-                                continue;
-                            }
-                        };
-
-                        // send connection event to channel (or exit if connection closed)
-                        if let Err(e) = connection_update_tx.send(PyConnectionUpdate {
-                            update_type: PyConnectionUpdateType::Connected,
-                            peer_id: PyPeerId(peer_id),
-                            remote_ipv4,
-                            remote_tcp_port,
-                        }).await {
-                            log::error!("RUST: could not send connection update since channel already closed: {e}");
-                            continue;
-                        }
-                    },
-                    Behaviour(Discovery(discovery::Event::ConnectionClosed { peer_id, remote_ip, remote_tcp_port, .. })) => {
-                        // grab IPv4 string
-                        let remote_ipv4 = match remote_ip {
-                            IpAddr::V4(ip) => ip.to_string(),
-                            IpAddr::V6(ip) => {
-                                log::warn!("RUST: ignoring disconnection from IPv6 address: {ip}");
-                                continue;
-                            }
-                        };
-
-                        // send disconnection event to channel (or exit if connection closed)
-                        if let Err(e) = connection_update_tx.send(PyConnectionUpdate {
-                            update_type: PyConnectionUpdateType::Disconnected,
-                            peer_id: PyPeerId(peer_id),
-                            remote_ipv4,
-                            remote_tcp_port,
-                        }).await {
-                            log::error!("RUST: could not send connection update since channel already closed: {e}");
-                            continue;
-                        }
-                    },
-                    e => {
-                        log::info!("RUST: other event {e:?}");
-                    }
-                }
-            }
+    /// Encode a private key to a protobuf structure.
+    fn to_protobuf_encoding<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
+        match self.0.to_protobuf_encoding() {
+            Ok(bytes) => Ok(PyBytes::new(py, &bytes)),
+            Err(e) => Err(PyValueError::new_err(e.to_string())),
        }
    }

-    log::info!("RUST: networking task stopped");
+    fn to_string(&self) -> String {
+        self.0.public().to_peer_id().to_base58()
+    }
 }

+struct PeerBuilder(
+    String,
+    Keypair,
+    mpsc::Sender<FromSwarm>,
+    mpsc::Receiver<ToSwarm>,
+);
+
 #[gen_stub_pyclass]
-#[pyclass(name = "NetworkingHandle")]
-#[derive(Debug)]
-struct PyNetworkingHandle {
-    // channels
-    to_task_tx: Option<mpsc::Sender<ToTask>>,
-    connection_update_rx: Mutex<mpsc::Receiver<PyConnectionUpdate>>,
-    gossipsub_message_rx: Mutex<mpsc::Receiver<(String, Vec<u8>)>>,
-}
-
-impl Drop for PyNetworkingHandle {
-    fn drop(&mut self) {
-        // TODO: may or may not need to await a "kill-signal" oneshot channel message,
-        //       to ensure that the networking task is done BEFORE exiting the clear function...
-        //       but this may require GIL?? and it may not be safe to call GIL here??
-        self.to_task_tx = None; // Using Option<T> as a trick to force channel to be dropped
-    }
-}
-
-#[allow(clippy::expect_used)]
-impl PyNetworkingHandle {
-    fn new(
-        to_task_tx: mpsc::Sender<ToTask>,
-        connection_update_rx: mpsc::Receiver<PyConnectionUpdate>,
-        gossipsub_message_rx: mpsc::Receiver<(String, Vec<u8>)>,
-    ) -> Self {
-        Self {
-            to_task_tx: Some(to_task_tx),
-            connection_update_rx: Mutex::new(connection_update_rx),
-            gossipsub_message_rx: Mutex::new(gossipsub_message_rx),
-        }
-    }
-
-    const fn to_task_tx(&self) -> &mpsc::Sender<ToTask> {
-        self.to_task_tx
-            .as_ref()
-            .expect("The sender should only be None after de-initialization.")
-    }
+#[pyclass]
+pub struct PyPeer {
+    peer: TakeOnce<PeerBuilder>,
+    to_swarm: mpsc::Sender<ToSwarm>,
+    from_swarm: Mutex<mpsc::Receiver<FromSwarm>>,
 }

 #[gen_stub_pymethods]
 #[pymethods]
-impl PyNetworkingHandle {
-    // NOTE: `async fn`s here that use `.await` will wrap the future in `.allow_threads_py()`
-    //       immediately beforehand to release the interpreter.
-    //       SEE: https://pyo3.rs/v0.26.0/async-await.html#detaching-from-the-interpreter-across-await
-
-    // ---- Lifecycle management methods ----
-
-    #[new]
-    fn py_new(identity: Bound<'_, PyKeypair>) -> PyResult<Self> {
-        use pyo3_async_runtimes::tokio::get_runtime;
-
-        // create communication channels
-        let (to_task_tx, to_task_rx) = mpsc::channel(MPSC_CHANNEL_SIZE);
-        let (connection_update_tx, connection_update_rx) = mpsc::channel(MPSC_CHANNEL_SIZE);
-        let (gossipsub_message_tx, gossipsub_message_rx) = mpsc::channel(MPSC_CHANNEL_SIZE);
-
-        // get identity
-        let identity = identity.borrow().0.clone();
-
-        // create networking swarm (within tokio context!! or it crashes)
-        let swarm = get_runtime()
-            .block_on(async { create_swarm(identity) })
-            .pyerr()?;
-
-        // spawn tokio task running the networking logic
-        get_runtime().spawn(async move {
-            networking_task(
-                swarm,
-                to_task_rx,
-                connection_update_tx,
-                gossipsub_message_tx,
-            )
-            .await;
-        });
-        Ok(Self::new(
-            to_task_tx,
-            connection_update_rx,
-            gossipsub_message_rx,
-        ))
+impl PyPeer {
+    #[staticmethod]
+    fn new(kp: PyKeypair, namespace: String) -> PyResult<Self> {
+        let (to_client, from_swarm) = mpsc::channel(1024);
+        let (to_swarm, from_client) = mpsc::channel(1024);
+        Ok(Self {
+            peer: TakeOnce::new(PeerBuilder(namespace, kp.0, to_client, from_client)),
+            to_swarm,
+            from_swarm: Mutex::new(from_swarm),
+        })
    }

    #[gen_stub(skip)]
-    const fn __traverse__(&self, _visit: PyVisit<'_>) -> Result<(), PyTraverseError> {
-        Ok(()) // This is needed purely so `__clear__` can work
+    async fn run(&self, #[pyo3(cancel_handle)] mut cancel: CancelHandle) -> PyResult<()> {
+        let builder = self
+            .peer
+            .take()
+            .ok_or_else(|| PyRuntimeError::new_err("tried to run peer twice"))?;
+        let jh = pyo3_async_runtimes::tokio::get_runtime()
+            .spawn(async move {
+                let mut peer =
+                    Peer::new(builder.0, builder.1, builder.2, builder.3).map_err(|_| {
+                        PyConnectionError::new_err("peer failed to listen on default address")
+                    })?;
+                peer.run()
+                    .await
+                    .map_err(|()| PyConnectionError::new_err("peer communication closed"))
+            })
+            .or(async {
+                cancel.cancelled().await;
+                Ok(Ok(()))
+            });
+        match AllowThreads(pin!(jh)).await {
+            Err(e) if e.is_cancelled() => Ok(()),
+            Err(e) if e.is_panic() => Err(PyRuntimeError::new_err(format!("tokio panic {e}"))),
+            Err(_) => unreachable!(),
+            Ok(res) => res,
+        }
+    }
+
+    async fn subscribe(&self, topic: String) -> PyResult<()> {
+        self.to_swarm
+            .send(ToSwarm::Subscribe(topic))
+            .await
+            .map_err(|_| PyRuntimeError::new_err("swarm communication closed"))
+    }
+    async fn unsubscribe(&self, topic: String) -> PyResult<()> {
+        self.to_swarm
+            .send(ToSwarm::Unsubscribe(topic))
+            .await
+            .map_err(|_| PyRuntimeError::new_err("swarm communication closed"))
+    }
+    async fn send(&self, topic: String, payload: Py<PyBytes>) -> PyResult<()> {
+        // this function attaches to the python interpreter synchronously to avoid holding the GIL
+        let bytes = Python::attach(|py| Vec::from(payload.bind(py).as_bytes()));
+        self.to_swarm
+            .send(ToSwarm::Message(topic, bytes))
+            .await
+            .map_err(|_| PyRuntimeError::new_err("swarm communication closed"))
    }

    #[gen_stub(skip)]
-    fn __clear__(&mut self) {
-        // TODO: may or may not need to await a "kill-signal" oneshot channel message,
-        //       to ensure that the networking task is done BEFORE exiting the clear function...
-        //       but this may require GIL?? and it may not be safe to call GIL here??
-        self.to_task_tx = None; // Using Option<T> as a trick to force channel to be dropped
+    async fn recv(
+        &self,
+        #[pyo3(cancel_handle)] mut cancel: CancelHandle,
+    ) -> PyResult<PySwarmEvent> {
+        loop {
+            return match AllowThreads(pin!(
+                self.from_swarm
+                    .try_lock()
+                    .map_err(|_| PyRuntimeError::new_err("tried to recv twice"))?
+                    .recv()
+                    .or(async {
+                        cancel.cancelled().await;
+                        None
+                    })
+            ))
+            .await
+            {
+                Some(FromSwarm::PublishError(p)) => match p {
+                    PublishError::AllQueuesFull(_) => {
+                        Err(PyConnectionError::new_err("swarm overloaded"))
+                    }
+                    PublishError::MessageTooLarge => {
+                        Err(PyValueError::new_err("message too large"))
+                    }
+                    PublishError::NoPeersSubscribedToTopic => {
+                        continue;
+                    }
+                    // TODO(evan): logs here
+                    _ => continue,
+                },
+                None => Err(PyRuntimeError::new_err("swarm communication closed")),
+                Some(fs) => Ok(PySwarmEvent(fs)),
+            };
+        }
    }
-
-    // ---- Connection update receiver methods ----
-
-    /// Receives the next `ConnectionUpdate` from networking.
-    async fn connection_update_recv(&self) -> PyResult<PyConnectionUpdate> {
-        self.connection_update_rx
-            .lock()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .recv_py()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-    }
-
-    /// Receives at most `limit` `ConnectionUpdate`s from networking and returns them.
-    ///
-    /// For `limit = 0`, an empty collection of `ConnectionUpdate`s will be returned immediately.
-    /// For `limit > 0`, if there are no `ConnectionUpdate`s in the channel's queue this method
-    /// will sleep until a `ConnectionUpdate`s is sent.
-    async fn connection_update_recv_many(&self, limit: usize) -> PyResult<Vec<PyConnectionUpdate>> {
-        self.connection_update_rx
-            .lock()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .recv_many_py(limit)
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-    }
-
-    // TODO: rn this blocks main thread if anything else is awaiting the channel (bc its a mutex)
-    //       so its too dangerous to expose just yet. figure out a better semantics for handling this,
-    //       so things don't randomly block
-    // /// Tries to receive the next `ConnectionUpdate` from networking.
-    // fn connection_update_try_recv(&self) -> PyResult<Option<PyConnectionUpdate>> {
-    //     self.connection_update_rx.blocking_lock().try_recv_py()
-    // }
-    //
-    // /// Checks if the `ConnectionUpdate` channel is empty.
-    // fn connection_update_is_empty(&self) -> bool {
-    //     self.connection_update_rx.blocking_lock().is_empty()
-    // }
-    //
-    // /// Returns the number of `ConnectionUpdate`s in the channel.
-    // fn connection_update_len(&self) -> usize {
-    //     self.connection_update_rx.blocking_lock().len()
-    // }
-
-    // ---- Gossipsub management methods ----
-
-    /// Subscribe to a `GossipSub` topic.
-    ///
-    /// Returns `True` if the subscription worked. Returns `False` if we were already subscribed.
-    async fn gossipsub_subscribe(&self, topic: String) -> PyResult<bool> {
-        let (tx, rx) = oneshot::channel();
-
-        // send off request to subscribe
-        self.to_task_tx()
-            .send_py(ToTask::GossipsubSubscribe {
-                topic,
-                result_tx: tx,
-            })
-            .allow_threads_py() // allow-threads-aware async call
-            .await?;
-
-        // wait for response & return any errors
-        rx.allow_threads_py() // allow-threads-aware async call
-            .await
-            .map_err(|_| PyErr::receiver_channel_closed())?
-    }
-
-    /// Unsubscribes from a `GossipSub` topic.
-    ///
-    /// Returns `True` if we were subscribed to this topic. Returns `False` if we were not subscribed.
-    async fn gossipsub_unsubscribe(&self, topic: String) -> PyResult<bool> {
-        let (tx, rx) = oneshot::channel();
-
-        // send off request to unsubscribe
-        self.to_task_tx()
-            .send_py(ToTask::GossipsubUnsubscribe {
-                topic,
-                result_tx: tx,
-            })
-            .allow_threads_py() // allow-threads-aware async call
-            .await?;
-
-        // wait for response & convert any errors
-        rx.allow_threads_py() // allow-threads-aware async call
-            .await
-            .map_err(|_| PyErr::receiver_channel_closed())
-    }
-
-    /// Publishes a message with multiple topics to the `GossipSub` network.
-    ///
-    /// If no peers are found that subscribe to this topic, throws `NoPeersSubscribedToTopicError` exception.
-    async fn gossipsub_publish(&self, topic: String, data: Py<PyBytes>) -> PyResult<()> {
-        let (tx, rx) = oneshot::channel();
-
-        // send off request to subscribe
-        let data = Python::attach(|py| Vec::from(data.as_bytes(py)));
-        self.to_task_tx()
-            .send_py(ToTask::GossipsubPublish {
-                topic,
-                data,
-                result_tx: tx,
-            })
-            .allow_threads_py() // allow-threads-aware async call
-            .await?;
-
-        // wait for response & return any errors => ignore messageID for now!!!
-        let _ = rx
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .map_err(|_| PyErr::receiver_channel_closed())??;
-        Ok(())
-    }
-
-    // ---- Gossipsub message receiver methods ----
-
-    /// Receives the next message from the `GossipSub` network.
-    async fn gossipsub_recv(&self) -> PyResult<(String, Py<PyBytes>)> {
-        self.gossipsub_message_rx
-            .lock()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .recv_py()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .map(|(t, d)| (t, d.pybytes()))
-    }
-
-    /// Receives at most `limit` messages from the `GossipSub` network and returns them.
-    ///
-    /// For `limit = 0`, an empty collection of messages will be returned immediately.
-    /// For `limit > 0`, if there are no messages in the channel's queue this method
-    /// will sleep until a message is sent.
-    async fn gossipsub_recv_many(&self, limit: usize) -> PyResult<Vec<(String, Py<PyBytes>)>> {
-        Ok(self
-            .gossipsub_message_rx
-            .lock()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .recv_many_py(limit)
-            .allow_threads_py() // allow-threads-aware async call
-            .await?
-            .into_iter()
-            .map(|(t, d)| (t, d.pybytes()))
-            .collect())
-    }
-
-    // TODO: rn this blocks main thread if anything else is awaiting the channel (bc its a mutex)
-    //       so its too dangerous to expose just yet. figure out a better semantics for handling this,
-    //       so things don't randomly block
-    // /// Tries to receive the next message from the `GossipSub` network.
-    // fn gossipsub_try_recv(&self) -> PyResult<Option<(String, Py<PyBytes>)>> {
-    //     Ok(self
-    //         .gossipsub_message_rx
-    //         .blocking_lock()
-    //         .try_recv_py()?
-    //         .map(|(t, d)| (t, d.pybytes())))
-    // }
-    //
-    // /// Checks if the `GossipSub` message channel is empty.
-    // fn gossipsub_is_empty(&self) -> bool {
-    //     self.gossipsub_message_rx.blocking_lock().is_empty()
-    // }
-    //
-    // /// Returns the number of `GossipSub` messages in the channel.
-    // fn gossipsub_len(&self) -> usize {
-    //     self.gossipsub_message_rx.blocking_lock().len()
-    // }
 }

-pub fn networking_submodule(m: &Bound<'_, PyModule>) -> PyResult<()> {
-    m.add_class::<exception::PyNoPeersSubscribedToTopicError>()?;
-    m.add_class::<exception::PyAllQueuesFullError>()?;
-
-    m.add_class::<PyConnectionUpdateType>()?;
-    m.add_class::<PyConnectionUpdate>()?;
-    m.add_class::<PyConnectionUpdateType>()?;
-    m.add_class::<PyNetworkingHandle>()?;
-
-    Ok(())
+// Manually submit the run()/recv() stub because the cancelhandle is poorly understood
+submit! {
+    gen_methods_from_python! {
+        r#"
+        class PyPeer:
+            async def run(self): ...
+            async def recv(self) -> PySwarmEvent: ...
+        "#
+    }
+}
+
+#[gen_stub_pyclass]
+#[pyclass]
+pub struct PySwarmEvent(FromSwarm);
+
+#[gen_stub_pymethods]
+#[pymethods]
+impl PySwarmEvent {
+    // probably a better way to do this, but...
+    fn downcast_discovered(&self) -> Option<String> {
+        if let FromSwarm::Discovered(peer_id) = self.0 {
+            Some(peer_id.to_base58())
+        } else {
+            None
+        }
+    }
+    fn downcast_expired(&self) -> Option<String> {
+        if let FromSwarm::Expired(peer_id) = self.0 {
+            Some(peer_id.to_base58())
+        } else {
+            None
+        }
+    }
+    fn downcast_message<'py>(
+        &self,
+        py: Python<'py>,
+    ) -> Option<(String, String, Bound<'py, PyBytes>)> {
+        if let FromSwarm::Message(peer_id, topic, data) = &self.0 {
+            Some((peer_id.to_base58(), topic.clone(), PyBytes::new(py, data)))
+        } else {
+            None
+        }
+    }
 }
--- a/rust/exo_pyo3_bindings/tests/dummy.rs
+++ b/rust/exo_pyo3_bindings/tests/dummy.rs
@@ -1,54 +0,0 @@
-#[cfg(test)]
-mod tests {
-    use core::mem::drop;
-    use core::option::Option::Some;
-    use core::time::Duration;
-    use tokio;
-    use tokio::sync::mpsc;
-
-    #[tokio::test]
-    async fn test_drop_channel() {
-        struct Ping;
-
-        let (tx, mut rx) = mpsc::channel::<Ping>(10);
-
-        let _ = tokio::spawn(async move {
-            println!("TASK: entered");
-
-            loop {
-                tokio::select! {
-                    result = rx.recv() => {
-                        match result {
-                            Some(_) => {
-                                println!("TASK: pinged");
-                            }
-                            None => {
-                                println!("TASK: closing channel");
-                                break;
-                            }
-                        }
-                    }
-                    _ = tokio::time::sleep(Duration::from_secs_f32(0.1)) => {
-                        println!("TASK: heartbeat");
-                    }
-                }
-            }
-
-            println!("TASK: exited");
-        });
-
-        let tx2 = tx.clone();
-
-        tokio::time::sleep(Duration::from_secs_f32(0.11)).await;
-
-        tx.send(Ping).await.expect("Should not fail");
-        drop(tx);
-
-        tokio::time::sleep(Duration::from_secs_f32(0.11)).await;
-
-        tx2.send(Ping).await.expect("Should not fail");
-        drop(tx2);
-
-        tokio::time::sleep(Duration::from_secs_f32(0.11)).await;
-    }
-}
--- a/rust/networking/Cargo.toml
+++ b/rust/networking/Cargo.toml
@@ -13,22 +13,11 @@ path = "src/lib.rs"
 workspace = true

 [dependencies]
-# datastructures
-either = { workspace = true }
-
-# macro dependencies
-extend = { workspace = true }
-delegate = { workspace = true }
-
 # async
 tokio = { workspace = true, features = ["full"] }
-futures-lite = { workspace = true }
-futures-timer = { workspace = true }

 # utility dependencies
-util = { workspace = true }
 tracing-subscriber = { version = "0.3.19", features = ["default", "env-filter"] }
-keccak-const = { workspace = true }

 # tracing/logging
 log = { workspace = true }
--- a/rust/networking/examples/chatroom.rs
+++ b/rust/networking/examples/chatroom.rs
@@ -1,6 +1,6 @@
-use futures_lite::StreamExt;
-use libp2p::{gossipsub, identity, swarm::SwarmEvent};
-use networking::{discovery, swarm};
+use libp2p::identity;
+use networking::{self, FromSwarm, ToSwarm};
+use tokio::sync::mpsc;
 use tokio::{io, io::AsyncBufReadExt as _, select};
 use tracing_subscriber::EnvFilter;
 use tracing_subscriber::filter::LevelFilter;
@@ -12,63 +12,51 @@ async fn main() {
        .try_init();

    // Configure swarm
-    let mut swarm =
-        swarm::create_swarm(identity::Keypair::generate_ed25519()).expect("Swarm creation failed");
+    let (to_client, mut from_swarm) = mpsc::channel(20);
+    let (to_swarm, from_client) = mpsc::channel(20);
+    let mut peer = networking::Peer::new(
+        "chatroom!".to_string(),
+        identity::Keypair::generate_ed25519(),
+        to_client,
+        from_client,
+    )
+    .expect("listen error");

    // Create a Gossipsub topic & subscribe
-    let topic = gossipsub::IdentTopic::new("test-net");
-    swarm
-        .behaviour_mut()
-        .gossipsub
-        .subscribe(&topic)
-        .expect("Subscribing to topic failed");
-
    // Read full lines from stdin
    let mut stdin = io::BufReader::new(io::stdin()).lines();
    println!("Enter messages via STDIN and they will be sent to connected peers using Gossipsub");

+    let jh = tokio::spawn(async move { peer.run().await });
+    _ = to_swarm
+        .send(ToSwarm::Subscribe("chatting".to_string()))
+        .await;
+
    // Kick it off
    loop {
        select! {
            // on gossipsub outgoing
            Ok(Some(line)) = stdin.next_line() => {
-                if let Err(e) = swarm
-                    .behaviour_mut().gossipsub
-                    .publish(topic.clone(), line.as_bytes()) {
-                    println!("Publish error: {e:?}");
-                }
+                _ = to_swarm.send(ToSwarm::Message("chatting".to_string(), line.into_bytes())).await;
            }
-            event = swarm.next() => match event {
+            event = from_swarm.recv() => match event {
                // on gossipsub incoming
-                Some(SwarmEvent::Behaviour(swarm::BehaviourEvent::Gossipsub(gossipsub::Event::Message {
-                    propagation_source: peer_id,
-                    message_id: id,
-                    message,
-                }))) => println!(
-                        "\n\nGot message: '{}' with id: {id} from peer: {peer_id}\n\n",
-                        String::from_utf8_lossy(&message.data),
+                Some(FromSwarm::Message(peer_id,_, data)) => println!(
+                        "\n\nGot message: '{}' from peer: {peer_id}\n\n",
+                        String::from_utf8_lossy(&data),
                    ),

                // on discovery
-                Some(SwarmEvent::Behaviour(swarm::BehaviourEvent::Discovery(e)) )=> match e {
-                    discovery::Event::ConnectionEstablished {
-                        peer_id, connection_id, remote_ip, remote_tcp_port
-                    } => {
-                        println!("\n\nConnected to: {peer_id}; connection ID: {connection_id}; remote IP: {remote_ip}; remote TCP port: {remote_tcp_port}\n\n");
-                    }
-                    discovery::Event::ConnectionClosed {
-                        peer_id, connection_id, remote_ip, remote_tcp_port
-                    } => {
-                        eprintln!("\n\nDisconnected from: {peer_id}; connection ID: {connection_id}; remote IP: {remote_ip}; remote TCP port: {remote_tcp_port}\n\n");
-                    }
+                Some(FromSwarm::Discovered(peer_id)) => {
+                    println!("\n\nConnected to: {peer_id}\n\n");
                }
-
-                // ignore outgoing errors: those are normal
-                e@Some(SwarmEvent::OutgoingConnectionError { .. }) => { log::debug!("Outgoing connection error: {e:?}"); }
-
-                // otherwise log any other event
-                e => { log::info!("Other event {e:?}"); }
+                Some(FromSwarm::Expired(peer_id)) => {
+                    println!("\n\nDisconnected from: {peer_id}\n\n");
+                }
+                Some(FromSwarm::PublishError(e)) => eprintln!("\n\nError {e:?}\n\n"),
+                None => break,
            }
        }
    }
+    _ = jh.await;
 }
--- a/rust/networking/src/RESEARCH_NOTES.txt
+++ b/rust/networking/src/RESEARCH_NOTES.txt
@@ -1,44 +0,0 @@
-https://github.com/ml-explore/mlx/commit/3fe98bacc7640d857acf3539f1d21b47a32e5609
-^raw sockets distributed -> `<net/ndrv.h>` -> https://newosxbook.com/code/xnu-3247.1.106/bsd/net/ndrv.h.auto.html
--> header file for a networking component found in the macOS kernel (XNU) that defines structures for network device driver registration, specifically the ndrv_demux_desc and ndrv_protocol_desc structures used for demultiplexing protocol data at the network interface level. It specifies how to describe protocol data, such as an Ethernet type or a SNAP header, and how to associate these descriptions with a specific protocol family to receive matching packets.
--> Used to bind an NDRV socket so that packets that match given protocol demux descriptions can be received.
--> An NDRV socket is a special kind of socket in the Darwin/macOS operating system's XNU kernel, used for low-level network packet manipulation and binding to specific protocols for packet processing. It allows user-space applications or drivers to directly write Layer 2 (L2) network packets or interact with the network stack at a lower level, often by binding to protocol descriptors like the ndrv_protocol_desc. This type of socket is used for functions such as capturing and injecting packets, especially in network infrastructure software like routers or for kernel-level network monitoring and security tools.
--> also called PF_NDRV sockets --> https://newosxbook.com/bonus/vol1ch16.html
----> they are conceptually similar to https://scapy.disruptivelabs.in/networking/socket-interface PF_RAW or PF_PACKET
-
-https://stackoverflow.com/questions/17169298/af-packet-on-osx
-^AF_PACKET duplicates the packets as soon as it receives them from the physical layer (for incoming packets) or just before sending them out to the physical layer (for outgoing packets). -> this is on Linux only
-^it doesn't exist on OS X so you can use /dev/bpfX (Berkeley Packet Filter) for sniffing
-
-https://www.unix.com/man_page/mojave/4/ip/
-^OS X manpages for IP
-
-https://developer.apple.com/documentation/kernel/implementing_drivers_system_extensions_and_kexts
-^driver kit, system extensions & kexts for macOS
-
----
-
-To set up a Linux system to use a Thunderbolt connection as a network device, connect the two computers with a Thunderbolt cable, load the thunderbolt-net kernel module (usually automatic but modprobe is an option for manual loading), and then the operating system will create virtual Ethernet interfaces (e.g., thunderbolt0) for networking. You can then use standard tools like ifconfig or your desktop environment's network manager to configure these new interfaces for a link-local network.
--> https://gist.github.com/geosp/80fbd39e617b7d1d9421683df4ea224a
----> here is a guide on how to set up thunderbolt-ethernet on linux
----> I may be able to steal the thunderbolt-net code ideas to implement a kernel module for MacOS
-
-https://chatgpt.com/s/t_68af8e41a8548191993281a014f846a7
-^GPT discussion about making socket interface
-
-https://chatgpt.com/s/t_68afb798a85c8191973c02a0fa7a48a3 --> link-local address,,??
-https://chatgpt.com/s/t_68afb02987e08191b2b0044d3667ece2
-^GPT discussion about accessing TB on MacOS low level interactions
-
--------------------------------
-
-https://www.intel.com/content/www/us/en/support/articles/000098893/software.html
-^Thunderbolt Share & Thunderbolt Networking Mode => intel's equivalent of thunderbolt bridge
-
-
---------------------------------
-
-https://www.zerotier.com/blog/how-zerotier-eliminated-kernel-extensions-on-macos/
-->fake ethernet devices on MacOS -> omg??? we can detect thunderbolt bridge, then bind to it, then re-expose it as fake ethernet??
-->ps: https://chatgpt.com/s/t_68afb2b25fb881919526763fb5d7359c, AF/PF_NDRV are one and the same!!!
-->https://github.com/zerotier/ZeroTierOne/blob/dev/osdep/MacEthernetTapAgent.c
--- a/rust/networking/src/discovery.rs
+++ b/rust/networking/src/discovery.rs
@@ -1,382 +0,0 @@
-use crate::ext::MultiaddrExt;
-use delegate::delegate;
-use either::Either;
-use futures_lite::FutureExt;
-use futures_timer::Delay;
-use libp2p::core::transport::PortUse;
-use libp2p::core::{ConnectedPoint, Endpoint};
-use libp2p::swarm::behaviour::ConnectionEstablished;
-use libp2p::swarm::dial_opts::DialOpts;
-use libp2p::swarm::{
-    CloseConnection, ConnectionClosed, ConnectionDenied, ConnectionHandler,
-    ConnectionHandlerSelect, ConnectionId, FromSwarm, NetworkBehaviour, THandler, THandlerInEvent,
-    THandlerOutEvent, ToSwarm, dummy,
-};
-use libp2p::{Multiaddr, PeerId, identity, mdns};
-use std::collections::{BTreeSet, HashMap};
-use std::convert::Infallible;
-use std::io;
-use std::net::IpAddr;
-use std::task::{Context, Poll};
-use std::time::Duration;
-use util::wakerdeque::WakerDeque;
-
-const RETRY_CONNECT_INTERVAL: Duration = Duration::from_secs(5);
-
-mod managed {
-    use libp2p::swarm::NetworkBehaviour;
-    use libp2p::{identity, mdns, ping};
-    use std::io;
-    use std::time::Duration;
-
-    const MDNS_RECORD_TTL: Duration = Duration::from_secs(2_500);
-    const MDNS_QUERY_INTERVAL: Duration = Duration::from_secs(1_500);
-    const PING_TIMEOUT: Duration = Duration::from_millis(2_500);
-    const PING_INTERVAL: Duration = Duration::from_millis(2_500);
-
-    #[derive(NetworkBehaviour)]
-    pub struct Behaviour {
-        mdns: mdns::tokio::Behaviour,
-        ping: ping::Behaviour,
-    }
-
-    impl Behaviour {
-        pub fn new(keypair: &identity::Keypair) -> io::Result<Self> {
-            Ok(Self {
-                mdns: mdns_behaviour(keypair)?,
-                ping: ping_behaviour(),
-            })
-        }
-    }
-
-    fn mdns_behaviour(keypair: &identity::Keypair) -> io::Result<mdns::tokio::Behaviour> {
-        use mdns::{Config, tokio};
-
-        // mDNS config => enable IPv6
-        let mdns_config = Config {
-            ttl: MDNS_RECORD_TTL,
-            query_interval: MDNS_QUERY_INTERVAL,
-
-            // enable_ipv6: true, // TODO: for some reason, TCP+mDNS don't work well with ipv6?? figure out how to make work
-            ..Default::default()
-        };
-
-        let mdns_behaviour = tokio::Behaviour::new(mdns_config, keypair.public().to_peer_id());
-        Ok(mdns_behaviour?)
-    }
-
-    fn ping_behaviour() -> ping::Behaviour {
-        ping::Behaviour::new(
-            ping::Config::new()
-                .with_timeout(PING_TIMEOUT)
-                .with_interval(PING_INTERVAL),
-        )
-    }
-}
-
-/// Events for when a listening connection is truly established and truly closed.
-#[derive(Debug, Clone)]
-pub enum Event {
-    ConnectionEstablished {
-        peer_id: PeerId,
-        connection_id: ConnectionId,
-        remote_ip: IpAddr,
-        remote_tcp_port: u16,
-    },
-    ConnectionClosed {
-        peer_id: PeerId,
-        connection_id: ConnectionId,
-        remote_ip: IpAddr,
-        remote_tcp_port: u16,
-    },
-}
-
-/// Discovery behavior that wraps mDNS to produce truly discovered durable peer-connections.
-///
-/// The behaviour operates as such:
-///  1) All true (listening) connections/disconnections are tracked, emitting corresponding events
-///     to the swarm.
-///  1) mDNS discovered/expired peers are tracked; discovered but not connected peers are dialed
-///     immediately, and expired but connected peers are disconnected from immediately.
-///  2) Every fixed interval: discovered but not connected peers are dialed, and expired but
-///     connected peers are disconnected from.
-pub struct Behaviour {
-    // state-tracking for managed behaviors & mDNS-discovered peers
-    managed: managed::Behaviour,
-    mdns_discovered: HashMap<PeerId, BTreeSet<Multiaddr>>,
-
-    retry_delay: Delay, // retry interval
-
-    // pending events to emmit => waker-backed Deque to control polling
-    pending_events: WakerDeque<ToSwarm<Event, Infallible>>,
-}
-
-impl Behaviour {
-    pub fn new(keypair: &identity::Keypair) -> io::Result<Self> {
-        Ok(Self {
-            managed: managed::Behaviour::new(keypair)?,
-            mdns_discovered: HashMap::new(),
-            retry_delay: Delay::new(RETRY_CONNECT_INTERVAL),
-            pending_events: WakerDeque::new(),
-        })
-    }
-
-    fn dial(&mut self, peer_id: PeerId, addr: Multiaddr) {
-        self.pending_events.push_back(ToSwarm::Dial {
-            opts: DialOpts::peer_id(peer_id).addresses(vec![addr]).build(),
-        })
-    }
-
-    fn close_connection(&mut self, peer_id: PeerId, connection: ConnectionId) {
-        // push front to make this IMMEDIATE
-        self.pending_events.push_front(ToSwarm::CloseConnection {
-            peer_id,
-            connection: CloseConnection::One(connection),
-        })
-    }
-
-    fn handle_mdns_discovered(&mut self, peers: Vec<(PeerId, Multiaddr)>) {
-        for (p, ma) in peers {
-            self.dial(p, ma.clone()); // always connect
-
-            // get peer's multi-addresses or insert if missing
-            let Some(mas) = self.mdns_discovered.get_mut(&p) else {
-                self.mdns_discovered.insert(p, BTreeSet::from([ma]));
-                continue;
-            };
-
-            // multiaddress should never already be present - else something has gone wrong
-            let is_new_addr = mas.insert(ma);
-            assert!(is_new_addr, "cannot discover a discovered peer");
-        }
-    }
-
-    fn handle_mdns_expired(&mut self, peers: Vec<(PeerId, Multiaddr)>) {
-        for (p, ma) in peers {
-            // at this point, we *must* have the peer
-            let mas = self
-                .mdns_discovered
-                .get_mut(&p)
-                .expect("nonexistent peer cannot expire");
-
-            // at this point, we *must* have the multiaddress
-            let was_present = mas.remove(&ma);
-            assert!(was_present, "nonexistent multiaddress cannot expire");
-
-            // if empty, remove the peer-id entirely
-            if mas.is_empty() {
-                self.mdns_discovered.remove(&p);
-            }
-        }
-    }
-
-    fn on_connection_established(
-        &mut self,
-        peer_id: PeerId,
-        connection_id: ConnectionId,
-        remote_ip: IpAddr,
-        remote_tcp_port: u16,
-    ) {
-        // send out connected event
-        self.pending_events
-            .push_back(ToSwarm::GenerateEvent(Event::ConnectionEstablished {
-                peer_id,
-                connection_id,
-                remote_ip,
-                remote_tcp_port,
-            }));
-    }
-
-    fn on_connection_closed(
-        &mut self,
-        peer_id: PeerId,
-        connection_id: ConnectionId,
-        remote_ip: IpAddr,
-        remote_tcp_port: u16,
-    ) {
-        // send out disconnected event
-        self.pending_events
-            .push_back(ToSwarm::GenerateEvent(Event::ConnectionClosed {
-                peer_id,
-                connection_id,
-                remote_ip,
-                remote_tcp_port,
-            }));
-    }
-}
-
-impl NetworkBehaviour for Behaviour {
-    type ConnectionHandler =
-        ConnectionHandlerSelect<dummy::ConnectionHandler, THandler<managed::Behaviour>>;
-    type ToSwarm = Event;
-
-    // simply delegate to underlying mDNS behaviour
-
-    delegate! {
-        to self.managed {
-            fn handle_pending_inbound_connection(&mut self, connection_id: ConnectionId, local_addr: &Multiaddr, remote_addr: &Multiaddr) -> Result<(), ConnectionDenied>;
-            fn handle_pending_outbound_connection(&mut self, connection_id: ConnectionId, maybe_peer: Option<PeerId>, addresses: &[Multiaddr], effective_role: Endpoint) -> Result<Vec<Multiaddr>, ConnectionDenied>;
-        }
-    }
-
-    fn handle_established_inbound_connection(
-        &mut self,
-        connection_id: ConnectionId,
-        peer: PeerId,
-        local_addr: &Multiaddr,
-        remote_addr: &Multiaddr,
-    ) -> Result<THandler<Self>, ConnectionDenied> {
-        Ok(ConnectionHandler::select(
-            dummy::ConnectionHandler,
-            self.managed.handle_established_inbound_connection(
-                connection_id,
-                peer,
-                local_addr,
-                remote_addr,
-            )?,
-        ))
-    }
-
-    #[allow(clippy::needless_question_mark)]
-    fn handle_established_outbound_connection(
-        &mut self,
-        connection_id: ConnectionId,
-        peer: PeerId,
-        addr: &Multiaddr,
-        role_override: Endpoint,
-        port_use: PortUse,
-    ) -> Result<THandler<Self>, ConnectionDenied> {
-        Ok(ConnectionHandler::select(
-            dummy::ConnectionHandler,
-            self.managed.handle_established_outbound_connection(
-                connection_id,
-                peer,
-                addr,
-                role_override,
-                port_use,
-            )?,
-        ))
-    }
-
-    fn on_connection_handler_event(
-        &mut self,
-        peer_id: PeerId,
-        connection_id: ConnectionId,
-        event: THandlerOutEvent<Self>,
-    ) {
-        match event {
-            Either::Left(ev) => libp2p::core::util::unreachable(ev),
-            Either::Right(ev) => {
-                self.managed
-                    .on_connection_handler_event(peer_id, connection_id, ev)
-            }
-        }
-    }
-
-    // hook into these methods to drive behavior
-
-    fn on_swarm_event(&mut self, event: FromSwarm) {
-        self.managed.on_swarm_event(event); // let mDNS handle swarm events
-
-        // handle swarm events to update internal state:
-        match event {
-            FromSwarm::ConnectionEstablished(ConnectionEstablished {
-                peer_id,
-                connection_id,
-                endpoint,
-                ..
-            }) => {
-                let remote_address = match endpoint {
-                    ConnectedPoint::Dialer { address, .. } => address,
-                    ConnectedPoint::Listener { send_back_addr, .. } => send_back_addr,
-                };
-
-                if let Some((ip, port)) = remote_address.try_to_tcp_addr() {
-                    // handle connection established event which is filtered correctly
-                    self.on_connection_established(peer_id, connection_id, ip, port)
-                }
-            }
-            FromSwarm::ConnectionClosed(ConnectionClosed {
-                peer_id,
-                connection_id,
-                endpoint,
-                ..
-            }) => {
-                let remote_address = match endpoint {
-                    ConnectedPoint::Dialer { address, .. } => address,
-                    ConnectedPoint::Listener { send_back_addr, .. } => send_back_addr,
-                };
-
-                if let Some((ip, port)) = remote_address.try_to_tcp_addr() {
-                    // handle connection closed event which is filtered correctly
-                    self.on_connection_closed(peer_id, connection_id, ip, port)
-                }
-            }
-
-            // since we are running TCP/IP transport layer, we are assuming that
-            // no address changes can occur, hence encountering one is a fatal error
-            FromSwarm::AddressChange(a) => {
-                unreachable!("unhandlable: address change encountered: {:?}", a)
-            }
-            _ => {}
-        }
-    }
-
-    fn poll(&mut self, cx: &mut Context) -> Poll<ToSwarm<Self::ToSwarm, THandlerInEvent<Self>>> {
-        // delegate to managed behaviors for any behaviors they need to perform
-        match self.managed.poll(cx) {
-            Poll::Ready(ToSwarm::GenerateEvent(e)) => {
-                match e {
-                    // handle discovered and expired events from mDNS
-                    managed::BehaviourEvent::Mdns(e) => match e.clone() {
-                        mdns::Event::Discovered(peers) => {
-                            self.handle_mdns_discovered(peers);
-                        }
-                        mdns::Event::Expired(peers) => {
-                            self.handle_mdns_expired(peers);
-                        }
-                    },
-
-                    // handle ping events => if error then disconnect
-                    managed::BehaviourEvent::Ping(e) => {
-                        if let Err(_) = e.result {
-                            self.close_connection(e.peer, e.connection.clone())
-                        }
-                    }
-                }
-
-                // since we just consumed an event, we should immediately wake just in case
-                // there are more events to come where that came from
-                cx.waker().wake_by_ref();
-            }
-
-            // forward any other mDNS event to the swarm or its connection handler(s)
-            Poll::Ready(e) => {
-                return Poll::Ready(
-                    e.map_out(|_| unreachable!("events returning to swarm already handled"))
-                        .map_in(Either::Right),
-                );
-            }
-
-            Poll::Pending => {}
-        }
-
-        // retry connecting to all mDNS peers periodically (fails safely if already connected)
-        if self.retry_delay.poll(cx).is_ready() {
-            for (p, mas) in self.mdns_discovered.clone() {
-                for ma in mas {
-                    self.dial(p, ma)
-                }
-            }
-            self.retry_delay.reset(RETRY_CONNECT_INTERVAL) // reset timeout
-        }
-
-        // send out any pending events from our own service
-        if let Some(e) = self.pending_events.pop_front(cx) {
-            return Poll::Ready(e.map_in(Either::Left));
-        }
-
-        // wait for pending events
-        Poll::Pending
-    }
-}
--- a/rust/networking/src/lib.rs
+++ b/rust/networking/src/lib.rs
@@ -1,44 +1,299 @@
-//! TODO: crate documentation
-//!
-//! this is here as a placeholder documentation
-//!
-//!
-pub mod discovery;
-pub mod swarm;
+use libp2p::{
+    Multiaddr, PeerId,
+    futures::StreamExt,
+    gossipsub::{self, TopicHash},
+    identify,
+    identity::Keypair,
+    mdns,
+    swarm::{NetworkBehaviour, SwarmEvent, dial_opts::DialOpts},
+};
+use std::collections::HashMap;
+use tokio::sync::mpsc;

-/// Namespace for all the type/trait aliases used by this crate.
-pub(crate) mod alias {
-    use std::error::Error;
+#[derive(Debug)]
+pub struct ListenError;

-    pub type AnyError = Box<dyn Error + Send + Sync + 'static>;
-    pub type AnyResult<T> = Result<T, AnyError>;
+pub enum FromSwarm {
+    PublishError(gossipsub::PublishError),
+    Discovered(PeerId),
+    Expired(PeerId),
+    Message(PeerId, String, Vec<u8>),
+}
+pub enum ToSwarm {
+    Message(String, Vec<u8>),
+    Subscribe(String),
+    Unsubscribe(String),
 }

-/// Namespace for crate-wide extension traits/methods
-pub(crate) mod ext {
-    use extend::ext;
-    use libp2p::Multiaddr;
-    use libp2p::multiaddr::Protocol;
-    use std::net::IpAddr;
+pub struct Peer {
+    pub swarm: libp2p::Swarm<Behaviour>,
+    to_client: mpsc::Sender<FromSwarm>,
+    from_client: mpsc::Receiver<ToSwarm>,
+    namespace: String,
+    known_peers: HashMap<PeerId, Vec<Multiaddr>>,
+}
+impl Peer {
+    pub fn new(
+        namespace: String,
+        kp: Keypair,
+        to_client: mpsc::Sender<FromSwarm>,
+        from_client: mpsc::Receiver<ToSwarm>,
+    ) -> Result<Self, ListenError> {
+        let mut swarm = libp2p::SwarmBuilder::with_existing_identity(kp)
+            .with_tokio()
+            .with_quic()
+            // TODO(evan) .with_bandwidth_metrics()
+            .with_behaviour(|kp| Behaviour::new(namespace.clone(), kp))
+            .expect("invalid swarm behaviour")
+            .build();

-    #[ext(pub, name = MultiaddrExt)]
-    impl Multiaddr {
-        /// If the multiaddress corresponds to a TCP address, extracts it
-        fn try_to_tcp_addr(&self) -> Option<(IpAddr, u16)> {
-            let mut ps = self.into_iter();
-            let ip = if let Some(p) = ps.next() {
-                match p {
-                    Protocol::Ip4(ip) => IpAddr::V4(ip),
-                    Protocol::Ip6(ip) => IpAddr::V6(ip),
-                    _ => return None,
+        swarm
+            .listen_on("/ip6/::/udp/0/quic-v1".parse().expect("invalid multiaddr"))
+            .map_err(|_| ListenError)?;
+        swarm
+            .listen_on(
+                "/ip4/0.0.0.0/udp/0/quic-v1"
+                    .parse()
+                    .expect("invalid multiaddr"),
+            )
+            .map_err(|_| ListenError)?;
+        Ok(Self {
+            swarm,
+            to_client,
+            from_client,
+            namespace,
+            known_peers: HashMap::default(),
+        })
+    }
+    pub async fn run(&mut self) -> Result<(), ()> {
+        loop {
+            tokio::select! {
+                event = self.swarm.next() => self.handle_event(event.ok_or(())?).await?,
+                msg = self.from_client.recv() => self.handle_message(msg.ok_or(())?).await?,
+            }
+        }
+    }
+    async fn handle_message(&mut self, message: ToSwarm) -> Result<(), ()> {
+        match message {
+            ToSwarm::Message(topic, data) => {
+                if let Err(e) = self
+                    .swarm
+                    .behaviour_mut()
+                    .gossipsub
+                    .publish(TopicHash::from_raw(topic), data)
+                {
+                    self.to_client
+                        .send(FromSwarm::PublishError(e))
+                        .await
+                        .map_err(|_| ())?;
                }
-            } else {
-                return None;
-            };
-            let Some(Protocol::Tcp(port)) = ps.next() else {
-                return None;
-            };
-            Some((ip, port))
+            }
+            ToSwarm::Subscribe(topic) => {
+                match self
+                    .swarm
+                    .behaviour_mut()
+                    .gossipsub
+                    .subscribe(&gossipsub::IdentTopic::new(topic))
+                {
+                    Ok(_) => {}
+                    Err(gossipsub::SubscriptionError::NotAllowed) => {
+                        unreachable!("subscription filter hit")
+                    }
+                    Err(gossipsub::SubscriptionError::PublishError(e)) => self
+                        .to_client
+                        .send(FromSwarm::PublishError(e))
+                        .await
+                        .map_err(|_| ())?,
+                }
+            }
+            ToSwarm::Unsubscribe(topic) => {
+                self.swarm
+                    .behaviour_mut()
+                    .gossipsub
+                    .unsubscribe(&gossipsub::IdentTopic::new(topic));
+            }
+        }
+        Ok(())
+    }
+    async fn handle_event(&mut self, event: SwarmEvent<BehaviourEvent>) -> Result<(), ()> {
+        let SwarmEvent::Behaviour(event) = event else {
+            return Ok(());
+        };
+        match event {
+            BehaviourEvent::Gossipsub(gossipsub::Event::Message { message, .. }) => {
+                if let Some(source) = message.source {
+                    self.to_client
+                        .send(FromSwarm::Message(
+                            source,
+                            message.topic.into_string(),
+                            message.data,
+                        ))
+                        .await
+                        .map_err(|_| ())?;
+                }
+            }
+            BehaviourEvent::Identify(identify::Event::Received { peer_id, info, .. }) => {
+                log::debug!(
+                    "identify from {peer_id}: protocol_version='{}' agent_version='{}' (local namespace='{}')",
+                    info.protocol_version,
+                    info.agent_version,
+                    self.namespace
+                );
+                if info.protocol_version == self.namespace {
+                    self.passed_namespace(peer_id);
+                    self.to_client
+                        .send(FromSwarm::Discovered(peer_id))
+                        .await
+                        .map_err(|_| ())?;
+                } else {
+                    self.failed_namespace(peer_id);
+                }
+            }
+            BehaviourEvent::Mdns(mdns::Event::Discovered(v)) => {
+                for (peer_id, addr) in v {
+                    self.known_peers.entry(peer_id).or_default().push(addr);
+                }
+                for (peer_id, addrs) in &self.known_peers {
+                    // dialopts handles rate limiting, we should check errors if we want to blacklist earlier
+                    let _ = self
+                        .swarm
+                        .dial(DialOpts::peer_id(*peer_id).addresses(addrs.clone()).build());
+                }
+            }
+            BehaviourEvent::Mdns(mdns::Event::Expired(v)) => {
+                for (peer_id, addr) in v {
+                    let addrs = self.known_peers.entry(peer_id).or_default();
+                    addrs.retain(|a| *a != addr);
+                    if addrs.is_empty() {
+                        self.known_peers.remove(&peer_id);
+                        self.swarm
+                            .behaviour_mut()
+                            .gossipsub
+                            .remove_explicit_peer(&peer_id);
+                        self.to_client
+                            .send(FromSwarm::Expired(peer_id))
+                            .await
+                            .map_err(|_| ())?;
+                    }
+                }
+            }
+            _ => {}
+        }
+        Ok(())
+    }
+    fn passed_namespace(&mut self, peer_id: PeerId) {
+        self.swarm
+            .behaviour_mut()
+            .gossipsub
+            .remove_blacklisted_peer(&peer_id);
+        self.swarm
+            .behaviour_mut()
+            .gossipsub
+            .add_explicit_peer(&peer_id);
+    }
+    fn failed_namespace(&mut self, peer_id: PeerId) {
+        self.swarm
+            .behaviour_mut()
+            .gossipsub
+            .blacklist_peer(&peer_id);
+        self.swarm
+            .behaviour_mut()
+            .gossipsub
+            .remove_explicit_peer(&peer_id);
+    }
+}
+
+#[derive(NetworkBehaviour)]
+pub struct Behaviour {
+    gossipsub: gossipsub::Behaviour,
+    mdns: mdns::tokio::Behaviour,
+    identify: identify::Behaviour,
+}
+
+impl Behaviour {
+    fn new(namespace: String, kp: &Keypair) -> Self {
+        let mdns = mdns::Behaviour::new(mdns::Config::default(), kp.public().to_peer_id())
+            .expect("mdns behaviour failed to build");
+
+        let identify =
+            identify::Behaviour::new(identify::Config::new_with_signed_peer_record(namespace, kp));
+
+        let gossipsub = gossipsub::Behaviour::new(
+            gossipsub::MessageAuthenticity::Signed(kp.clone()),
+            gossipsub::ConfigBuilder::default()
+                .max_transmit_size(1024 * 1024)
+                .validation_mode(gossipsub::ValidationMode::Strict)
+                .build()
+                .expect("invalid gossipsub configuration"),
+        )
+        .expect("gossipsub behaviour failed ot build");
+
+        Self {
+            gossipsub,
+            mdns,
+            identify,
        }
    }
 }
+
+// TODO: more tests
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tokio::time::{Duration, timeout};
+
+    fn make_peer(namespace: &str) -> (Peer, mpsc::Receiver<FromSwarm>, mpsc::Sender<ToSwarm>) {
+        let kp = Keypair::generate_ed25519();
+
+        let (to_client_tx, to_client_rx) = mpsc::channel(64);
+        let (to_peer_tx, to_peer_rx) = mpsc::channel(64);
+
+        let peer = Peer::new(namespace.to_string(), kp, to_client_tx, to_peer_rx)
+            .expect("Peer::new should succeed in tests");
+
+        (peer, to_client_rx, to_peer_tx)
+    }
+
+    async fn next_listen_addr(peer: &mut Peer) -> Multiaddr {
+        loop {
+            match peer.swarm.next().await {
+                Some(SwarmEvent::NewListenAddr { address, .. }) => return address,
+                Some(_) => {}
+                None => panic!("swarm stream ended unexpectedly"),
+            }
+        }
+    }
+
+    #[tokio::test]
+    async fn subscribe_and_unsubscribe_do_not_error() {
+        let (mut peer, mut events_rx, commands_tx) = make_peer("ns-test");
+
+        // Drive the swarm just enough to get at least one listen address event,
+        // so the background run loop has something initialized.
+        let _addr = next_listen_addr(&mut peer).await;
+
+        // Run the peer loop in the background.
+        let handle = tokio::spawn(async move {
+            let _ = peer.run().await;
+        });
+
+        commands_tx
+            .send(ToSwarm::Subscribe("topic-a".to_string()))
+            .await
+            .unwrap();
+
+        commands_tx
+            .send(ToSwarm::Unsubscribe("topic-a".to_string()))
+            .await
+            .unwrap();
+
+        // We don't *require* any FromSwarm events here; this is mainly a
+        // smoke test that the message-handling path doesn't panic/hang.
+        // Still, poll briefly to ensure the task is alive.
+        let _ = timeout(Duration::from_millis(200), events_rx.recv()).await;
+
+        // Shut down: dropping the command sender closes the channel, causing run() to return Err.
+        drop(commands_tx);
+        let _ = handle.await;
+    }
+}
--- a/rust/networking/src/swarm.rs
+++ b/rust/networking/src/swarm.rs
@@ -1,143 +0,0 @@
-use crate::alias;
-use crate::swarm::transport::tcp_transport;
-pub use behaviour::{Behaviour, BehaviourEvent};
-use libp2p::{SwarmBuilder, identity};
-
-pub type Swarm = libp2p::Swarm<Behaviour>;
-
-/// The current version of the network: this prevents devices running different versions of the
-/// software from interacting with each other.
-///
-/// TODO: right now this is a hardcoded constant; figure out what the versioning semantics should
-///       even be, and how to inject the right version into this config/initialization. E.g. should
-///       this be passed in as a parameter? What about rapidly changing versions in debug builds?
-///       this is all VERY very hard to figure out and needs to be mulled over as a team.
-pub const NETWORK_VERSION: &[u8] = b"v0.0.1";
-pub const OVERRIDE_VERSION_ENV_VAR: &str = "EXO_LIBP2P_NAMESPACE";
-
-/// Create and configure a swarm which listens to all ports on OS
-pub fn create_swarm(keypair: identity::Keypair) -> alias::AnyResult<Swarm> {
-    let mut swarm = SwarmBuilder::with_existing_identity(keypair)
-        .with_tokio()
-        .with_other_transport(tcp_transport)?
-        .with_behaviour(Behaviour::new)?
-        .build();
-
-    // Listen on all interfaces and whatever port the OS assigns
-    swarm.listen_on("/ip4/0.0.0.0/tcp/0".parse()?)?;
-    Ok(swarm)
-}
-
-mod transport {
-    use crate::alias;
-    use crate::swarm::{NETWORK_VERSION, OVERRIDE_VERSION_ENV_VAR};
-    use futures_lite::{AsyncRead, AsyncWrite};
-    use keccak_const::Sha3_256;
-    use libp2p::core::muxing;
-    use libp2p::core::transport::Boxed;
-    use libp2p::pnet::{PnetError, PnetOutput};
-    use libp2p::{PeerId, Transport, identity, noise, pnet, yamux};
-    use std::{env, sync::LazyLock};
-
-    /// Key used for networking's private network; parametrized on the [`NETWORK_VERSION`].
-    /// See [`pnet_upgrade`] for more.
-    static PNET_PRESHARED_KEY: LazyLock<[u8; 32]> = LazyLock::new(|| {
-        let builder = Sha3_256::new().update(b"exo_discovery_network");
-
-        if let Ok(var) = env::var(OVERRIDE_VERSION_ENV_VAR) {
-            let bytes = var.into_bytes();
-            builder.update(&bytes)
-        } else {
-            builder.update(NETWORK_VERSION)
-        }
-        .finalize()
-    });
-
-    /// Make the Swarm run on a private network, as to not clash with public libp2p nodes and
-    /// also different-versioned instances of this same network.
-    /// This is implemented as an additional "upgrade" ontop of existing [`libp2p::Transport`] layers.
-    async fn pnet_upgrade<TSocket>(
-        socket: TSocket,
-        _: impl Sized,
-    ) -> Result<PnetOutput<TSocket>, PnetError>
-    where
-        TSocket: AsyncRead + AsyncWrite + Send + Unpin + 'static,
-    {
-        use pnet::{PnetConfig, PreSharedKey};
-        PnetConfig::new(PreSharedKey::new(*PNET_PRESHARED_KEY))
-            .handshake(socket)
-            .await
-    }
-
-    /// TCP/IP transport layer configuration.
-    pub fn tcp_transport(
-        keypair: &identity::Keypair,
-    ) -> alias::AnyResult<Boxed<(PeerId, muxing::StreamMuxerBox)>> {
-        use libp2p::{
-            core::upgrade::Version,
-            tcp::{Config, tokio},
-        };
-
-        // `TCP_NODELAY` enabled => avoid latency
-        let tcp_config = Config::default().nodelay(true);
-
-        // V1 + lazy flushing => 0-RTT negotiation
-        let upgrade_version = Version::V1Lazy;
-
-        // Noise is faster than TLS + we don't care much for security
-        let noise_config = noise::Config::new(keypair)?;
-
-        // Use default Yamux config for multiplexing
-        let yamux_config = yamux::Config::default();
-
-        // Create new Tokio-driven TCP/IP transport layer
-        let base_transport = tokio::Transport::new(tcp_config)
-            .and_then(pnet_upgrade)
-            .upgrade(upgrade_version)
-            .authenticate(noise_config)
-            .multiplex(yamux_config);
-
-        // Return boxed transport (to flatten complex type)
-        Ok(base_transport.boxed())
-    }
-}
-
-mod behaviour {
-    use crate::{alias, discovery};
-    use libp2p::swarm::NetworkBehaviour;
-    use libp2p::{gossipsub, identity};
-
-    /// Behavior of the Swarm which composes all desired behaviors:
-    /// Right now its just [`discovery::Behaviour`] and [`gossipsub::Behaviour`].
-    #[derive(NetworkBehaviour)]
-    pub struct Behaviour {
-        pub discovery: discovery::Behaviour,
-        pub gossipsub: gossipsub::Behaviour,
-    }
-
-    impl Behaviour {
-        pub fn new(keypair: &identity::Keypair) -> alias::AnyResult<Self> {
-            Ok(Self {
-                discovery: discovery::Behaviour::new(keypair)?,
-                gossipsub: gossipsub_behaviour(keypair),
-            })
-        }
-    }
-
-    fn gossipsub_behaviour(keypair: &identity::Keypair) -> gossipsub::Behaviour {
-        use gossipsub::{ConfigBuilder, MessageAuthenticity, ValidationMode};
-
-        // build a gossipsub network behaviour
-        //  => signed message authenticity + strict validation mode means the message-ID is
-        //     automatically provided by gossipsub w/out needing to provide custom message-ID function
-        gossipsub::Behaviour::new(
-            MessageAuthenticity::Signed(keypair.clone()),
-            ConfigBuilder::default()
-                .max_transmit_size(1024 * 1024)
-                .validation_mode(ValidationMode::Strict)
-                .build()
-                .expect("the configuration should always be valid"),
-        )
-        .expect("creating gossipsub behavior should always work")
-    }
-}
--- a/rust/networking/tests/dummy.rs
+++ b/rust/networking/tests/dummy.rs
@@ -1,7 +0,0 @@
-// maybe this will hold test in the future...??
-
-#[cfg(test)]
-mod tests {
-    #[test]
-    fn does_nothing() {}
-}
--- a/rust/parts.nix
+++ b/rust/parts.nix
@@ -1,10 +1,11 @@
 { inputs, ... }:
 {
  perSystem =
-    { inputs', pkgs, lib, ... }:
+    { config, self', inputs', pkgs, lib, ... }:
    let
      # Fenix nightly toolchain with all components
-      rustToolchain = inputs'.fenix.packages.stable.withComponents [
+      fenixPkgs = inputs'.fenix.packages;
+      rustToolchain = fenixPkgs.complete.withComponents [
        "cargo"
        "rustc"
        "clippy"
--- a/rust/util/Cargo.toml
+++ b/rust/util/Cargo.toml
@@ -1,15 +0,0 @@
-[package]
-name = "util"
-version = { workspace = true }
-edition = { workspace = true }
-publish = false
-
-[lib]
-doctest = false
-name = "util"
-path = "src/lib.rs"
-
-[lints]
-workspace = true
-
-[dependencies]
--- a/rust/util/src/lib.rs
+++ b/rust/util/src/lib.rs
@@ -1 +0,0 @@
-pub mod wakerdeque;
--- a/rust/util/src/wakerdeque.rs
+++ b/rust/util/src/wakerdeque.rs
@@ -1,55 +0,0 @@
-use std::collections::VecDeque;
-use std::fmt::{Debug, Formatter};
-use std::task::{Context, Waker};
-
-/// A wrapper around [`VecDeque`] which wakes (if it can) on any `push_*` methods,
-/// and updates the internally stored waker by consuming [`Context`] on any `pop_*` methods.
-pub struct WakerDeque<T> {
-    waker: Option<Waker>,
-    deque: VecDeque<T>,
-}
-
-impl<T: Debug> Debug for WakerDeque<T> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        self.deque.fmt(f)
-    }
-}
-
-impl<T> WakerDeque<T> {
-    pub fn new() -> Self {
-        Self {
-            waker: None,
-            deque: VecDeque::new(),
-        }
-    }
-
-    fn update(&mut self, cx: &mut Context<'_>) {
-        self.waker = Some(cx.waker().clone());
-    }
-
-    fn wake(&mut self) {
-        let Some(ref mut w) = self.waker else { return };
-        w.wake_by_ref();
-        self.waker = None;
-    }
-
-    pub fn pop_front(&mut self, cx: &mut Context<'_>) -> Option<T> {
-        self.update(cx);
-        self.deque.pop_front()
-    }
-
-    pub fn pop_back(&mut self, cx: &mut Context<'_>) -> Option<T> {
-        self.update(cx);
-        self.deque.pop_back()
-    }
-
-    pub fn push_front(&mut self, value: T) {
-        self.wake();
-        self.deque.push_front(value);
-    }
-
-    pub fn push_back(&mut self, value: T) {
-        self.wake();
-        self.deque.push_back(value);
-    }
-}
--- a/src/exo/download/coordinator.py
+++ b/src/exo/download/coordinator.py
@@ -14,7 +14,6 @@ from exo.download.download_utils import (
    map_repo_download_progress_to_download_progress_data,
 )
 from exo.download.shard_downloader import ShardDownloader
-from exo.shared.constants import EXO_MODELS_DIR
 from exo.shared.models.model_cards import ModelId
 from exo.shared.types.commands import (
    CancelDownload,
@@ -47,7 +46,6 @@ class DownloadCoordinator:
    download_command_receiver: Receiver[ForwarderDownloadCommand]
    local_event_sender: Sender[ForwarderEvent]
    event_index_counter: Iterator[int]
-    offline: bool = False

    # Local state
    download_status: dict[ModelId, DownloadProgress] = field(default_factory=dict)
@@ -63,13 +61,8 @@ class DownloadCoordinator:

    def __post_init__(self) -> None:
        self.event_sender, self.event_receiver = channel[Event]()
-        if self.offline:
-            self.shard_downloader.set_internet_connection(False)
        self.shard_downloader.on_progress(self._download_progress_callback)

-    def _model_dir(self, model_id: ModelId) -> str:
-        return str(EXO_MODELS_DIR / model_id.normalize())
-
    async def _download_progress_callback(
        self, callback_shard: ShardMetadata, progress: RepoDownloadProgress
    ) -> None:
@@ -81,7 +74,6 @@ class DownloadCoordinator:
                shard_metadata=callback_shard,
                node_id=self.node_id,
                total_bytes=progress.total_bytes,
-                model_directory=self._model_dir(model_id),
            )
            self.download_status[model_id] = completed
            await self.event_sender.send(
@@ -101,7 +93,6 @@ class DownloadCoordinator:
                download_progress=map_repo_download_progress_to_download_progress_data(
                    progress
                ),
-                model_directory=self._model_dir(model_id),
            )
            self.download_status[model_id] = ongoing
            await self.event_sender.send(
@@ -110,17 +101,13 @@ class DownloadCoordinator:
            self._last_progress_time[model_id] = current_time()

    async def run(self) -> None:
-        logger.info(
-            f"Starting DownloadCoordinator{' (offline mode)' if self.offline else ''}"
-        )
-        if not self.offline:
-            self._test_internet_connection()
+        logger.info("Starting DownloadCoordinator")
+        self._test_internet_connection()
        async with self._tg as tg:
            tg.start_soon(self._command_processor)
            tg.start_soon(self._forward_events)
            tg.start_soon(self._emit_existing_download_progress)
-            if not self.offline:
-                tg.start_soon(self._check_internet_connection)
+            tg.start_soon(self._check_internet_connection)

    def _test_internet_connection(self) -> None:
        try:
@@ -183,11 +170,7 @@ class DownloadCoordinator:
                return

        # Emit pending status
-        progress = DownloadPending(
-            shard_metadata=shard,
-            node_id=self.node_id,
-            model_directory=self._model_dir(model_id),
-        )
+        progress = DownloadPending(shard_metadata=shard, node_id=self.node_id)
        self.download_status[model_id] = progress
        await self.event_sender.send(NodeDownloadProgress(download_progress=progress))

@@ -201,7 +184,6 @@ class DownloadCoordinator:
                shard_metadata=shard,
                node_id=self.node_id,
                total_bytes=initial_progress.total_bytes,
-                model_directory=self._model_dir(model_id),
            )
            self.download_status[model_id] = completed
            await self.event_sender.send(
@@ -209,20 +191,6 @@ class DownloadCoordinator:
            )
            return

-        if self.offline:
-            logger.warning(
-                f"Offline mode: model {model_id} is not fully available locally, cannot download"
-            )
-            failed = DownloadFailed(
-                shard_metadata=shard,
-                node_id=self.node_id,
-                error_message=f"Model files not found locally in offline mode: {model_id}",
-                model_directory=self._model_dir(model_id),
-            )
-            self.download_status[model_id] = failed
-            await self.event_sender.send(NodeDownloadProgress(download_progress=failed))
-            return
-
        # Start actual download
        self._start_download_task(shard, initial_progress)

@@ -238,7 +206,6 @@ class DownloadCoordinator:
            download_progress=map_repo_download_progress_to_download_progress_data(
                initial_progress
            ),
-            model_directory=self._model_dir(model_id),
        )
        self.download_status[model_id] = status
        self.event_sender.send_nowait(NodeDownloadProgress(download_progress=status))
@@ -252,7 +219,6 @@ class DownloadCoordinator:
                    shard_metadata=shard,
                    node_id=self.node_id,
                    error_message=str(e),
-                    model_directory=self._model_dir(model_id),
                )
                self.download_status[model_id] = failed
                await self.event_sender.send(
@@ -287,7 +253,6 @@ class DownloadCoordinator:
            pending = DownloadPending(
                shard_metadata=current_status.shard_metadata,
                node_id=self.node_id,
-                model_directory=self._model_dir(model_id),
            )
            await self.event_sender.send(
                NodeDownloadProgress(download_progress=pending)
@@ -330,18 +295,11 @@ class DownloadCoordinator:
                            node_id=self.node_id,
                            shard_metadata=progress.shard,
                            total_bytes=progress.total_bytes,
-                            model_directory=self._model_dir(
-                                progress.shard.model_card.model_id
-                            ),
                        )
                    elif progress.status in ["in_progress", "not_started"]:
                        if progress.downloaded_bytes_this_session.in_bytes == 0:
                            status = DownloadPending(
-                                node_id=self.node_id,
-                                shard_metadata=progress.shard,
-                                model_directory=self._model_dir(
-                                    progress.shard.model_card.model_id
-                                ),
+                                node_id=self.node_id, shard_metadata=progress.shard
                            )
                        else:
                            status = DownloadOngoing(
@@ -350,9 +308,6 @@ class DownloadCoordinator:
                                download_progress=map_repo_download_progress_to_download_progress_data(
                                    progress
                                ),
-                                model_directory=self._model_dir(
-                                    progress.shard.model_card.model_id
-                                ),
                            )
                    else:
                        continue
--- a/src/exo/download/download_utils.py
+++ b/src/exo/download/download_utils.py
@@ -448,13 +448,12 @@ async def download_file_with_retry(
    target_dir: Path,
    on_progress: Callable[[int, int, bool], None] = lambda _, __, ___: None,
    on_connection_lost: Callable[[], None] = lambda: None,
-    skip_internet: bool = False,
 ) -> Path:
    n_attempts = 3
    for attempt in range(n_attempts):
        try:
            return await _download_file(
-                model_id, revision, path, target_dir, on_progress, skip_internet
+                model_id, revision, path, target_dir, on_progress
            )
        except HuggingFaceAuthenticationError:
            raise
@@ -488,14 +487,10 @@ async def _download_file(
    path: str,
    target_dir: Path,
    on_progress: Callable[[int, int, bool], None] = lambda _, __, ___: None,
-    skip_internet: bool = False,
 ) -> Path:
    target_path = target_dir / path

    if await aios.path.exists(target_path):
-        if skip_internet:
-            return target_path
-
        local_size = (await aios.stat(target_path)).st_size

        # Try to verify against remote, but allow offline operation
@@ -515,11 +510,6 @@ async def _download_file(
            )
            return target_path

-    if skip_internet:
-        raise FileNotFoundError(
-            f"File {path} not found locally and cannot download in offline mode"
-        )
-
    await aios.makedirs((target_dir / path).parent, exist_ok=True)
    length, etag = await file_meta(model_id, revision, path)
    remote_hash = etag[:-5] if etag.endswith("-gzip") else etag
@@ -824,7 +814,6 @@ async def download_shard(
                    file, curr_bytes, total_bytes, is_renamed
                ),
                on_connection_lost=on_connection_lost,
-                skip_internet=skip_internet,
            )

    if not skip_download:
--- a/src/exo/download/tests/test_offline_mode.py
+++ b/src/exo/download/tests/test_offline_mode.py
@@ -1,230 +0,0 @@
-"""Tests for offline/air-gapped mode."""
-
-from collections.abc import AsyncIterator
-from pathlib import Path
-from unittest.mock import AsyncMock, patch
-
-import aiofiles
-import aiofiles.os as aios
-import pytest
-
-from exo.download.download_utils import (
-    _download_file,  # pyright: ignore[reportPrivateUsage]
-    download_file_with_retry,
-    fetch_file_list_with_cache,
-)
-from exo.shared.types.common import ModelId
-from exo.shared.types.worker.downloads import FileListEntry
-
-
-@pytest.fixture
-def model_id() -> ModelId:
-    return ModelId("test-org/test-model")
-
-
-@pytest.fixture
-async def temp_models_dir(tmp_path: Path) -> AsyncIterator[Path]:
-    models_dir = tmp_path / "models"
-    await aios.makedirs(models_dir, exist_ok=True)
-    with patch("exo.download.download_utils.EXO_MODELS_DIR", models_dir):
-        yield models_dir
-
-
-class TestDownloadFileOffline:
-    """Tests for _download_file with skip_internet=True."""
-
-    async def test_returns_local_file_without_http_verification(
-        self, model_id: ModelId, tmp_path: Path
-    ) -> None:
-        """When skip_internet=True and file exists locally, return it immediately
-        without making any HTTP calls (no file_meta verification)."""
-        target_dir = tmp_path / "downloads"
-        await aios.makedirs(target_dir, exist_ok=True)
-
-        local_file = target_dir / "model.safetensors"
-        async with aiofiles.open(local_file, "wb") as f:
-            await f.write(b"model weights data")
-
-        with patch(
-            "exo.download.download_utils.file_meta",
-            new_callable=AsyncMock,
-        ) as mock_file_meta:
-            result = await _download_file(
-                model_id,
-                "main",
-                "model.safetensors",
-                target_dir,
-                skip_internet=True,
-            )
-
-            assert result == local_file
-            mock_file_meta.assert_not_called()
-
-    async def test_raises_file_not_found_for_missing_file(
-        self, model_id: ModelId, tmp_path: Path
-    ) -> None:
-        """When skip_internet=True and file does NOT exist locally,
-        raise FileNotFoundError instead of attempting download."""
-        target_dir = tmp_path / "downloads"
-        await aios.makedirs(target_dir, exist_ok=True)
-
-        with pytest.raises(FileNotFoundError, match="offline mode"):
-            await _download_file(
-                model_id,
-                "main",
-                "missing_model.safetensors",
-                target_dir,
-                skip_internet=True,
-            )
-
-    async def test_returns_local_file_in_subdirectory(
-        self, model_id: ModelId, tmp_path: Path
-    ) -> None:
-        """When skip_internet=True and file exists in a subdirectory,
-        return it without HTTP calls."""
-        target_dir = tmp_path / "downloads"
-        subdir = target_dir / "transformer"
-        await aios.makedirs(subdir, exist_ok=True)
-
-        local_file = subdir / "diffusion_pytorch_model.safetensors"
-        async with aiofiles.open(local_file, "wb") as f:
-            await f.write(b"weights")
-
-        with patch(
-            "exo.download.download_utils.file_meta",
-            new_callable=AsyncMock,
-        ) as mock_file_meta:
-            result = await _download_file(
-                model_id,
-                "main",
-                "transformer/diffusion_pytorch_model.safetensors",
-                target_dir,
-                skip_internet=True,
-            )
-
-            assert result == local_file
-            mock_file_meta.assert_not_called()
-
-
-class TestDownloadFileWithRetryOffline:
-    """Tests for download_file_with_retry with skip_internet=True."""
-
-    async def test_propagates_skip_internet_to_download_file(
-        self, model_id: ModelId, tmp_path: Path
-    ) -> None:
-        """Verify skip_internet is passed through to _download_file."""
-        target_dir = tmp_path / "downloads"
-        await aios.makedirs(target_dir, exist_ok=True)
-
-        local_file = target_dir / "config.json"
-        async with aiofiles.open(local_file, "wb") as f:
-            await f.write(b'{"model_type": "qwen2"}')
-
-        with patch(
-            "exo.download.download_utils.file_meta",
-            new_callable=AsyncMock,
-        ) as mock_file_meta:
-            result = await download_file_with_retry(
-                model_id,
-                "main",
-                "config.json",
-                target_dir,
-                skip_internet=True,
-            )
-
-            assert result == local_file
-            mock_file_meta.assert_not_called()
-
-    async def test_file_not_found_does_not_retry(
-        self, model_id: ModelId, tmp_path: Path
-    ) -> None:
-        """FileNotFoundError from offline mode should not trigger retries."""
-        target_dir = tmp_path / "downloads"
-        await aios.makedirs(target_dir, exist_ok=True)
-
-        with pytest.raises(FileNotFoundError):
-            await download_file_with_retry(
-                model_id,
-                "main",
-                "nonexistent.safetensors",
-                target_dir,
-                skip_internet=True,
-            )
-
-
-class TestFetchFileListOffline:
-    """Tests for fetch_file_list_with_cache with skip_internet=True."""
-
-    async def test_uses_cached_file_list(
-        self, model_id: ModelId, temp_models_dir: Path
-    ) -> None:
-        """When skip_internet=True and cache file exists, use it without network."""
-        from pydantic import TypeAdapter
-
-        cache_dir = temp_models_dir / "caches" / model_id.normalize()
-        await aios.makedirs(cache_dir, exist_ok=True)
-
-        cached_list = [
-            FileListEntry(type="file", path="model.safetensors", size=1000),
-            FileListEntry(type="file", path="config.json", size=200),
-        ]
-        cache_file = cache_dir / f"{model_id.normalize()}--main--file_list.json"
-        async with aiofiles.open(cache_file, "w") as f:
-            await f.write(
-                TypeAdapter(list[FileListEntry]).dump_json(cached_list).decode()
-            )
-
-        with patch(
-            "exo.download.download_utils.fetch_file_list_with_retry",
-            new_callable=AsyncMock,
-        ) as mock_fetch:
-            result = await fetch_file_list_with_cache(
-                model_id, "main", skip_internet=True
-            )
-
-            assert result == cached_list
-            mock_fetch.assert_not_called()
-
-    async def test_falls_back_to_local_directory_scan(
-        self, model_id: ModelId, temp_models_dir: Path
-    ) -> None:
-        """When skip_internet=True and no cache but local files exist,
-        build file list from local directory."""
-        import json
-
-        model_dir = temp_models_dir / model_id.normalize()
-        await aios.makedirs(model_dir, exist_ok=True)
-
-        async with aiofiles.open(model_dir / "config.json", "w") as f:
-            await f.write('{"model_type": "qwen2"}')
-
-        index_data = {
-            "metadata": {},
-            "weight_map": {"model.layers.0.weight": "model.safetensors"},
-        }
-        async with aiofiles.open(model_dir / "model.safetensors.index.json", "w") as f:
-            await f.write(json.dumps(index_data))
-
-        async with aiofiles.open(model_dir / "model.safetensors", "wb") as f:
-            await f.write(b"x" * 500)
-
-        with patch(
-            "exo.download.download_utils.fetch_file_list_with_retry",
-            new_callable=AsyncMock,
-        ) as mock_fetch:
-            result = await fetch_file_list_with_cache(
-                model_id, "main", skip_internet=True
-            )
-
-            mock_fetch.assert_not_called()
-            paths = {entry.path for entry in result}
-            assert "config.json" in paths
-            assert "model.safetensors" in paths
-
-    async def test_raises_when_no_cache_and_no_local_files(
-        self, model_id: ModelId, temp_models_dir: Path
-    ) -> None:
-        """When skip_internet=True and neither cache nor local files exist,
-        raise FileNotFoundError."""
-        with pytest.raises(FileNotFoundError, match="No internet"):
-            await fetch_file_list_with_cache(model_id, "main", skip_internet=True)
--- a/src/exo/main.py
+++ b/src/exo/main.py
@@ -1,4 +1,5 @@
 import argparse
+import importlib.metadata
 import itertools
 import multiprocessing as mp
 import os
@@ -39,15 +40,14 @@ class Node:

    node_id: NodeId
    event_index_counter: Iterator[int]
-    offline: bool
    _tg: TaskGroup = field(init=False, default_factory=anyio.create_task_group)

    @classmethod
    async def create(cls, args: "Args") -> "Self":
        keypair = get_node_id_keypair()
-        node_id = NodeId(keypair.to_peer_id().to_base58())
+        node_id = NodeId(keypair.to_string())
        session_id = SessionId(master_node_id=node_id, election_clock=0)
-        router = Router.create(keypair)
+        router = Router.create(keypair, namespace=args.namespace)
        await router.register_topic(topics.GLOBAL_EVENTS)
        await router.register_topic(topics.LOCAL_EVENTS)
        await router.register_topic(topics.COMMANDS)
@@ -69,12 +69,11 @@ class Node:
                download_command_receiver=router.receiver(topics.DOWNLOAD_COMMANDS),
                local_event_sender=router.sender(topics.LOCAL_EVENTS),
                event_index_counter=event_index_counter,
-                offline=args.offline,
            )
        else:
            download_coordinator = None

-        if args.spawn_api:
+        if not args.no_api:
            api = API(
                node_id,
                session_id,
@@ -134,13 +133,10 @@ class Node:
            api,
            node_id,
            event_index_counter,
-            args.offline,
        )

    async def run(self):
        async with self._tg as tg:
-            signal.signal(signal.SIGINT, lambda _, __: self.shutdown())
-            signal.signal(signal.SIGTERM, lambda _, __: self.shutdown())
            tg.start_soon(self.router.run)
            tg.start_soon(self.election.run)
            if self.download_coordinator:
@@ -152,6 +148,8 @@ class Node:
            if self.api:
                tg.start_soon(self.api.run)
            tg.start_soon(self._elect_loop)
+            signal.signal(signal.SIGINT, lambda _, __: self.shutdown())
+            signal.signal(signal.SIGTERM, lambda _, __: self.shutdown())

    def shutdown(self):
        # if this is our second call to shutdown, just sys.exit
@@ -225,7 +223,6 @@ class Node:
                            ),
                            local_event_sender=self.router.sender(topics.LOCAL_EVENTS),
                            event_index_counter=self.event_index_counter,
-                            offline=self.offline,
                        )
                        self._tg.start_soon(self.download_coordinator.run)
                    if self.worker:
@@ -262,10 +259,7 @@ def main():
    # TODO: Refactor the current verbosity system
    logger_setup(EXO_LOG, args.verbosity)
    logger.info("Starting EXO")
-    logger.info(f"EXO_LIBP2P_NAMESPACE: {os.getenv('EXO_LIBP2P_NAMESPACE')}")
-
-    if args.offline:
-        logger.info("Running in OFFLINE mode — no internet checks, local models only")
+    logger.info(f"Namespace: {args.namespace}")

    # Set FAST_SYNCH override env var for runner subprocesses
    if args.fast_synch is True:
@@ -282,14 +276,13 @@ def main():


 class Args(CamelCaseModel):
-    verbosity: int = 0
-    force_master: bool = False
-    spawn_api: bool = False
-    api_port: PositiveInt = 52415
-    tb_only: bool = False
+    verbosity: int
+    force_master: bool
+    no_api: bool
+    api_port: PositiveInt
    no_worker: bool = False
    no_downloads: bool = False
-    offline: bool = False
+    namespace: str
    fast_synch: bool | None = None  # None = auto, True = force on, False = force off

    @classmethod
@@ -319,14 +312,15 @@ class Args(CamelCaseModel):
        )
        parser.add_argument(
            "--no-api",
-            action="store_false",
-            dest="spawn_api",
+            action="store_true",
+            help="Disable the API server for this node",
        )
        parser.add_argument(
            "--api-port",
            type=int,
            dest="api_port",
            default=52415,
+            help="Which port the API server will be available on",
        )
        parser.add_argument(
            "--no-worker",
@@ -338,9 +332,9 @@ class Args(CamelCaseModel):
            help="Disable the download coordinator (node won't download models)",
        )
        parser.add_argument(
-            "--offline",
-            action="store_true",
-            help="Run in offline/air-gapped mode: skip internet checks, use only pre-staged local models",
+            "--namespace",
+            default=importlib.metadata.version("exo"),
+            help="Set the EXO namespace to run multiple isolated clusters",
        )
        fast_synch_group = parser.add_mutually_exclusive_group()
        fast_synch_group.add_argument(
--- a/src/exo/master/api.py
+++ b/src/exo/master/api.py
@@ -85,7 +85,6 @@ from exo.shared.types.api import (
    ImageGenerationTaskParams,
    ImageListItem,
    ImageListResponse,
-    ImageSize,
    ModelList,
    ModelListModel,
    PlaceInstanceParams,
@@ -101,7 +100,6 @@ from exo.shared.types.api import (
    TraceRankStats,
    TraceResponse,
    TraceStatsResponse,
-    normalize_image_size,
 )
 from exo.shared.types.chunks import (
    ErrorChunk,
@@ -753,11 +751,9 @@ class API:
        When stream=True and partial_images > 0, returns a StreamingResponse
        with SSE-formatted events for partial and final images.
        """
+        payload.model = await self._validate_image_model(ModelId(payload.model))
        payload = payload.model_copy(
-            update={
-                "model": await self._validate_image_model(ModelId(payload.model)),
-                "advanced_params": _ensure_seed(payload.advanced_params),
-            }
+            update={"advanced_params": _ensure_seed(payload.advanced_params)}
        )

        command = ImageGeneration(
@@ -1013,13 +1009,12 @@ class API:
    async def bench_image_generations(
        self, request: Request, payload: BenchImageGenerationTaskParams
    ) -> BenchImageGenerationResponse:
+        payload.model = await self._validate_image_model(ModelId(payload.model))
+
+        payload.stream = False
+        payload.partial_images = 0
        payload = payload.model_copy(
-            update={
-                "model": await self._validate_image_model(ModelId(payload.model)),
-                "stream": False,
-                "partial_images": 0,
-                "advanced_params": _ensure_seed(payload.advanced_params),
-            }
+            update={"advanced_params": _ensure_seed(payload.advanced_params)}
        )

        command = ImageGeneration(
@@ -1040,7 +1035,7 @@ class API:
        prompt: str,
        model: ModelId,
        n: int,
-        size: ImageSize,
+        size: str,
        response_format: Literal["url", "b64_json"],
        input_fidelity: Literal["low", "high"],
        stream: bool,
@@ -1110,7 +1105,7 @@ class API:
        prompt: str = Form(...),
        model: str = Form(...),
        n: int = Form(1),
-        size: str | None = Form(None),
+        size: str = Form("1024x1024"),
        response_format: Literal["url", "b64_json"] = Form("b64_json"),
        input_fidelity: Literal["low", "high"] = Form("low"),
        stream: str = Form("false"),
@@ -1136,7 +1131,7 @@ class API:
            prompt=prompt,
            model=ModelId(model),
            n=n,
-            size=normalize_image_size(size),
+            size=size,
            response_format=response_format,
            input_fidelity=input_fidelity,
            stream=stream_bool,
@@ -1172,7 +1167,7 @@ class API:
        prompt: str = Form(...),
        model: str = Form(...),
        n: int = Form(1),
-        size: str | None = Form(None),
+        size: str = Form("1024x1024"),
        response_format: Literal["url", "b64_json"] = Form("b64_json"),
        input_fidelity: Literal["low", "high"] = Form("low"),
        quality: Literal["high", "medium", "low"] = Form("medium"),
@@ -1192,7 +1187,7 @@ class API:
            prompt=prompt,
            model=ModelId(model),
            n=n,
-            size=normalize_image_size(size),
+            size=size,
            response_format=response_format,
            input_fidelity=input_fidelity,
            stream=False,
--- a/src/exo/master/main.py
+++ b/src/exo/master/main.py
@@ -396,7 +396,7 @@ class Master:
                        await self._handle_traces_collected(event)
                        continue

-                    logger.debug(f"Master indexing event: {str(event)[:100]}")
+                    logger.trace(f"Master indexing event: {str(event)[:100]}")
                    indexed = IndexedEvent(event=event, idx=len(self._event_log))
                    self.state = apply(self.state, indexed)

--- a/src/exo/master/tests/test_master.py
+++ b/src/exo/master/tests/test_master.py
@@ -42,7 +42,7 @@ from exo.utils.channels import channel
@pytest.mark.asyncio
 async def test_master():
    keypair = get_node_id_keypair()
-    node_id = NodeId(keypair.to_peer_id().to_base58())
+    node_id = NodeId(keypair.to_string())
    session_id = SessionId(master_node_id=node_id, election_clock=0)

    ge_sender, global_event_receiver = channel[ForwarderEvent]()
@@ -75,7 +75,7 @@ async def test_master():
    async with anyio.create_task_group() as tg:
        tg.start_soon(master.run)

-        sender_node_id = NodeId(f"{keypair.to_peer_id().to_base58()}_sender")
+        sender_node_id = NodeId(f"{keypair.to_string()}_sender")
        # inject a NodeGatheredInfo event
        logger.info("inject a NodeGatheredInfo event")
        await local_event_sender.send(
--- a/src/exo/routing/connection_message.py
+++ b/src/exo/routing/connection_message.py
@@ -1,37 +1,9 @@
-from enum import Enum
-
-from exo_pyo3_bindings import ConnectionUpdate, ConnectionUpdateType
-
 from exo.shared.types.common import NodeId
 from exo.utils.pydantic_ext import CamelCaseModel

 """Serialisable types for Connection Updates/Messages"""


-class ConnectionMessageType(Enum):
-    Connected = 0
-    Disconnected = 1
-
-    @staticmethod
-    def from_update_type(update_type: ConnectionUpdateType):
-        match update_type:
-            case ConnectionUpdateType.Connected:
-                return ConnectionMessageType.Connected
-            case ConnectionUpdateType.Disconnected:
-                return ConnectionMessageType.Disconnected
-
-
 class ConnectionMessage(CamelCaseModel):
    node_id: NodeId
-    connection_type: ConnectionMessageType
-    remote_ipv4: str
-    remote_tcp_port: int
-
-    @classmethod
-    def from_update(cls, update: ConnectionUpdate) -> "ConnectionMessage":
-        return cls(
-            node_id=NodeId(update.peer_id.to_base58()),
-            connection_type=ConnectionMessageType.from_update_type(update.update_type),
-            remote_ipv4=update.remote_ipv4,
-            remote_tcp_port=update.remote_tcp_port,
-        )
+    expired: bool
--- a/src/exo/routing/router.py
+++ b/src/exo/routing/router.py
@@ -1,5 +1,5 @@
 from copy import copy
-from itertools import count
+from dataclasses import dataclass, field
 from math import inf
 from os import PathLike
 from pathlib import Path
@@ -14,15 +14,14 @@ from anyio import (
 )
 from anyio.abc import TaskGroup
 from exo_pyo3_bindings import (
-    AllQueuesFullError,
    Keypair,
-    NetworkingHandle,
-    NoPeersSubscribedToTopicError,
+    PyPeer,
 )
 from filelock import FileLock
 from loguru import logger

 from exo.shared.constants import EXO_NODE_ID_KEYPAIR
+from exo.shared.types.common import NodeId
 from exo.utils.channels import Receiver, Sender, channel
 from exo.utils.pydantic_ext import CamelCaseModel

@@ -99,28 +98,32 @@ class TopicRouter[T: CamelCaseModel]:
        )


+@dataclass
 class Router:
-    @classmethod
-    def create(cls, identity: Keypair) -> "Router":
-        return cls(handle=NetworkingHandle(identity))
+    _peer: PyPeer
+    topic_routers: dict[str, TopicRouter[CamelCaseModel]] = field(
+        init=False, default_factory=dict
+    )
+    networking_receiver: Receiver[tuple[str, bytes]] = field(init=False)
+    _tmp_networking_sender: Sender[tuple[str, bytes]] | None = field(init=False)
+    _tg: TaskGroup | None = None

-    def __init__(self, handle: NetworkingHandle):
-        self.topic_routers: dict[str, TopicRouter[CamelCaseModel]] = {}
-        send, recv = channel[tuple[str, bytes]]()
-        self.networking_receiver: Receiver[tuple[str, bytes]] = recv
-        self._net: NetworkingHandle = handle
-        self._tmp_networking_sender: Sender[tuple[str, bytes]] | None = send
-        self._id_count = count()
-        self._tg: TaskGroup | None = None
+    def __post_init__(self):
+        self._tmp_networking_sender, self.networking_receiver = channel()
+
+    @classmethod
+    def create(cls, identity: Keypair, namespace: str) -> "Router":
+        return cls(_peer=PyPeer.new(identity, namespace))

    async def register_topic[T: CamelCaseModel](self, topic: TypedTopic[T]):
-        assert self._tg is None, "Attempted to register topic after setup time"
        send = self._tmp_networking_sender
        if send:
            self._tmp_networking_sender = None
        else:
            send = self.networking_receiver.clone_sender()
        router = TopicRouter[T](topic, send)
+        if self._tg is not None:
+            self._tg.start_soon(router.run)
        self.topic_routers[topic.topic] = cast(TopicRouter[CamelCaseModel], router)
        await self._networking_subscribe(str(topic.topic))

@@ -148,14 +151,18 @@ class Router:
    async def run(self):
        logger.debug("Starting Router")
        try:
+
+            async def _peer_run():
+                await self._peer.run()
+
            async with create_task_group() as tg:
                self._tg = tg
                for topic in self.topic_routers:
                    router = self.topic_routers[topic]
                    tg.start_soon(router.run)
                tg.start_soon(self._networking_recv)
-                tg.start_soon(self._networking_recv_connection_messages)
                tg.start_soon(self._networking_publish)
+                tg.start_soon(_peer_run)
                # Router only shuts down if you cancel it.
                await sleep_forever()
        finally:
@@ -170,47 +177,58 @@ class Router:
        self._tg.cancel_scope.cancel()

    async def _networking_subscribe(self, topic: str):
-        await self._net.gossipsub_subscribe(topic)
+        await self._peer.subscribe(topic)
        logger.info(f"Subscribed to {topic}")

    async def _networking_unsubscribe(self, topic: str):
-        await self._net.gossipsub_unsubscribe(topic)
+        await self._peer.unsubscribe(topic)
        logger.info(f"Unsubscribed from {topic}")

    async def _networking_recv(self):
        while True:
-            topic, data = await self._net.gossipsub_recv()
-            logger.trace(f"Received message on {topic} with payload {data}")
+            try:
+                swarm_event = await self._peer.recv()
+            except ValueError:
+                logger.error("Message too large for gossipsub, dropped")
+                continue
+            except ConnectionError:
+                logger.error("All peer queues full, network overloaded")
+                continue
+            except RuntimeError:
+                break
+
+            cm = None
+            if (peer_id := swarm_event.downcast_discovered()) is not None:
+                cm = ConnectionMessage(node_id=NodeId(peer_id), expired=False)
+            if (peer_id := swarm_event.downcast_expired()) is not None:
+                cm = ConnectionMessage(node_id=NodeId(peer_id), expired=True)
+
+            if cm is not None:
+                if CONNECTION_MESSAGES.topic in self.topic_routers:
+                    router = self.topic_routers[CONNECTION_MESSAGES.topic]
+                    assert router.topic.model_type == ConnectionMessage
+                    router = cast(TopicRouter[ConnectionMessage], router)
+                    await router.publish(cm)
+                continue
+
+            assert (msg := swarm_event.downcast_message()) is not None
+            _origin, topic, payload = msg
+            logger.debug(f"Received message on {topic} with payload {payload}")
            if topic not in self.topic_routers:
                logger.warning(f"Received message on unknown or inactive topic {topic}")
                continue

            router = self.topic_routers[topic]
-            await router.publish_bytes(data)
-
-    async def _networking_recv_connection_messages(self):
-        while True:
-            update = await self._net.connection_update_recv()
-            message = ConnectionMessage.from_update(update)
-            logger.trace(
-                f"Received message on connection_messages with payload {message}"
-            )
-            if CONNECTION_MESSAGES.topic in self.topic_routers:
-                router = self.topic_routers[CONNECTION_MESSAGES.topic]
-                assert router.topic.model_type == ConnectionMessage
-                router = cast(TopicRouter[ConnectionMessage], router)
-                await router.publish(message)
+            await router.publish_bytes(payload)

    async def _networking_publish(self):
        with self.networking_receiver as networked_items:
            async for topic, data in networked_items:
                try:
                    logger.trace(f"Sending message on {topic} with payload {data}")
-                    await self._net.gossipsub_publish(topic, data)
-                except NoPeersSubscribedToTopicError:
-                    pass
-                except AllQueuesFullError:
-                    logger.warning(f"All peer queues full, dropping message on {topic}")
+                    await self._peer.send(topic, data)
+                except RuntimeError:
+                    break


 def get_node_id_keypair(
@@ -221,7 +239,7 @@ def get_node_id_keypair(
    Obtain the :class:`PeerId` by from it.
    """
    # TODO(evan): bring back node id persistence once we figure out how to deal with duplicates
-    return Keypair.generate_ed25519()
+    return Keypair.generate()

    def lock_path(path: str | bytes | PathLike[str] | PathLike[bytes]) -> Path:
        return Path(str(path) + ".lock")
--- a/src/exo/shared/apply.py
+++ b/src/exo/shared/apply.py
@@ -218,6 +218,11 @@ def apply_node_timed_out(event: NodeTimedOut, state: State) -> State:
        key: value for key, value in state.downloads.items() if key != event.node_id
    }
    # Clean up all granular node mappings
+    node_identities = {
+        key: value
+        for key, value in state.node_identities.items()
+        if key != event.node_id
+    }
    node_memory = {
        key: value for key, value in state.node_memory.items() if key != event.node_id
    }
@@ -258,6 +263,7 @@ def apply_node_timed_out(event: NodeTimedOut, state: State) -> State:
            "downloads": downloads,
            "topology": topology,
            "last_seen": last_seen,
+            "node_identities": node_identities,
            "node_memory": node_memory,
            "node_disk": node_disk,
            "node_system": node_system,
--- a/src/exo/shared/models/model_cards.py
+++ b/src/exo/shared/models/model_cards.py
@@ -44,8 +44,7 @@ async def _refresh_card_cache():
        async for toml_file in path.rglob("*.toml"):
            try:
                card = await ModelCard.load_from_path(toml_file)
-                if card.model_id not in _card_cache:
-                    _card_cache[card.model_id] = card
+                _card_cache[card.model_id] = card
            except (ValidationError, TOMLKitError):
                pass

@@ -183,7 +182,6 @@ class ConfigData(BaseModel):
    def supports_tensor(self) -> bool:
        return self.architectures in [
            ["Glm4MoeLiteForCausalLM"],
-            ["GlmMoeDsaForCausalLM"],
            ["DeepseekV32ForCausalLM"],
            ["DeepseekV3ForCausalLM"],
            ["Qwen3NextForCausalLM"],
--- a/src/exo/shared/tests/test_election.py
+++ b/src/exo/shared/tests/test_election.py
@@ -1,7 +1,7 @@
 import pytest
 from anyio import create_task_group, fail_after, move_on_after

-from exo.routing.connection_message import ConnectionMessage, ConnectionMessageType
+from exo.routing.connection_message import ConnectionMessage
 from exo.shared.election import Election, ElectionMessage, ElectionResult
 from exo.shared.types.commands import ForwarderCommand, TestCommand
 from exo.shared.types.common import NodeId, SessionId
@@ -330,9 +330,7 @@ async def test_connection_message_triggers_new_round_broadcast() -> None:
            await cm_tx.send(
                ConnectionMessage(
                    node_id=NodeId(),
-                    connection_type=ConnectionMessageType.Connected,
-                    remote_ipv4="",
-                    remote_tcp_port=0,
+                    expired=False,
                )
            )

--- a/src/exo/shared/types/api.py
+++ b/src/exo/shared/types/api.py
@@ -1,9 +1,9 @@
 import time
 from collections.abc import Generator
-from typing import Annotated, Any, Literal, get_args
+from typing import Annotated, Any, Literal
 from uuid import uuid4

-from pydantic import BaseModel, Field, field_validator
+from pydantic import BaseModel, Field

 from exo.shared.models.model_cards import ModelCard, ModelId
 from exo.shared.types.common import CommandId, NodeId
@@ -262,27 +262,6 @@ class DeleteInstanceResponse(BaseModel):
    instance_id: InstanceId


-ImageSize = Literal[
-    "auto",
-    "512x512",
-    "768x768",
-    "1024x768",
-    "768x1024",
-    "1024x1024",
-    "1024x1536",
-    "1536x1024",
-]
-
-
-def normalize_image_size(v: object) -> ImageSize:
-    """Shared validator for ImageSize fields: maps None → "auto" and rejects invalid values."""
-    if v is None:
-        return "auto"
-    if v not in get_args(ImageSize):
-        raise ValueError(f"Invalid size: {v!r}. Must be one of {get_args(ImageSize)}")
-    return v  # pyright: ignore[reportReturnType]
-
-
 class AdvancedImageParams(BaseModel):
    seed: Annotated[int, Field(ge=0)] | None = None
    num_inference_steps: Annotated[int, Field(ge=1, le=100)] | None = None
@@ -302,7 +281,7 @@ class ImageGenerationTaskParams(BaseModel):
    partial_images: int | None = 0
    quality: Literal["high", "medium", "low"] | None = "medium"
    response_format: Literal["url", "b64_json"] | None = "b64_json"
-    size: ImageSize = "auto"
+    size: str | None = "1024x1024"
    stream: bool | None = False
    style: str | None = "vivid"
    user: str | None = None
@@ -310,11 +289,6 @@ class ImageGenerationTaskParams(BaseModel):
    # Internal flag for benchmark mode - set by API, preserved through serialization
    bench: bool = False

-    @field_validator("size", mode="before")
-    @classmethod
-    def normalize_size(cls, v: object) -> ImageSize:
-        return normalize_image_size(v)
-

 class BenchImageGenerationTaskParams(ImageGenerationTaskParams):
    bench: bool = True
@@ -331,18 +305,13 @@ class ImageEditsTaskParams(BaseModel):
    quality: Literal["high", "medium", "low"] | None = "medium"
    output_format: Literal["png", "jpeg", "webp"] = "png"
    response_format: Literal["url", "b64_json"] | None = "b64_json"
-    size: ImageSize = "auto"
+    size: str | None = "1024x1024"
    image_strength: float | None = 0.7
    stream: bool = False
    partial_images: int | None = 0
    advanced_params: AdvancedImageParams | None = None
    bench: bool = False

-    @field_validator("size", mode="before")
-    @classmethod
-    def normalize_size(cls, v: object) -> ImageSize:
-        return normalize_image_size(v)
-
    def __repr_args__(self) -> Generator[tuple[str, Any], None, None]:
        for name, value in super().__repr_args__():  # pyright: ignore[reportAny]
            if name == "image_data":
--- a/src/exo/shared/types/worker/downloads.py
+++ b/src/exo/shared/types/worker/downloads.py
@@ -26,7 +26,6 @@ class DownloadProgressData(CamelCaseModel):
 class BaseDownloadProgress(TaggedModel):
    node_id: NodeId
    shard_metadata: ShardMetadata
-    model_directory: str = ""


 class DownloadPending(BaseDownloadProgress):
--- a/src/exo/worker/engines/image/generate.py
+++ b/src/exo/worker/engines/image/generate.py
@@ -14,7 +14,6 @@ from exo.shared.types.api import (
    ImageEditsTaskParams,
    ImageGenerationStats,
    ImageGenerationTaskParams,
-    ImageSize,
 )
 from exo.shared.types.memory import Memory
 from exo.shared.types.worker.runner_response import (
@@ -24,9 +23,9 @@ from exo.shared.types.worker.runner_response import (
 from exo.worker.engines.image.distributed_model import DistributedImageModel


-def parse_size(size_str: ImageSize) -> tuple[int, int]:
+def parse_size(size_str: str | None) -> tuple[int, int]:
    """Parse size parameter like '1024x1024' to (width, height) tuple."""
-    if size_str == "auto":
+    if not size_str:
        return (1024, 1024)

    try:
@@ -110,9 +109,6 @@ def generate_image(
            # Decode base64 image data and save to temp file
            image_path = Path(tmpdir) / "input.png"
            image_path.write_bytes(base64.b64decode(task.image_data))
-            if task.size == "auto":
-                with Image.open(image_path) as img:
-                    width, height = img.size

        for image_num in range(num_images):
            # Increment seed for each image to ensure unique results
--- a/src/exo/worker/engines/mlx/auto_parallel.py
+++ b/src/exo/worker/engines/mlx/auto_parallel.py
@@ -163,14 +163,11 @@ class PipelineLastLayer(CustomMlxLayer):
                output, (self.r + 1) % self.s, group=self.group
            )
            if cache is not None:
-                # CacheList (used by MLA models like DeepSeekV32, GLM MoE DSA)
-                # doesn't have .keys directly; access via first sub-cache.
-                _cache = cache[0] if hasattr(cache, "caches") else cache  # type: ignore
-                _cache.keys = mx.depends(_cache.keys, output)  # type: ignore
+                cache.keys = mx.depends(cache.keys, output)  # type: ignore[reportUnknownMemberType]
            if self.is_prefill:
                mx.eval(output)
                if cache is not None:
-                    mx.eval(_cache.keys)  # type: ignore
+                    mx.eval(cache.keys)  # type: ignore

        if not self.is_prefill:
            output = mx.distributed.all_gather(output, group=self.group)[
@@ -310,9 +307,7 @@ def patch_pipeline_model[T](model: T, group: mx.distributed.Group) -> T:

        # Add dependency to last cache entry to ensure distributed ops are evaluated
        if cache is not None:
-            last = cache[-1]  # type: ignore
-            dep_cache = last[0] if hasattr(last, "caches") else last  # type: ignore
-            dep_cache.keys = mx.depends(dep_cache.keys, logits)  # type: ignore
+            cache[-1].state = mx.depends(cache[-1].state, logits)  # type: ignore

        return logits

@@ -338,9 +333,7 @@ def patch_tensor_model[T](model: T) -> T:

        # Add dependency to last cache entry to ensure distributed ops are evaluated
        if cache is not None and len(cache) > 0:  # pyright: ignore[reportAny]
-            last = cache[-1]  # pyright: ignore[reportAny]
-            dep_cache = last[0] if hasattr(last, "caches") else last  # pyright: ignore[reportAny]
-            dep_cache.keys = mx.depends(dep_cache.keys, logits)  # pyright: ignore[reportAny,reportUnknownMemberType]
+            cache[-1].state = mx.depends(cache[-1].state, logits)  # pyright: ignore[reportAny,reportUnknownMemberType]

        return logits

@@ -554,12 +547,10 @@ class DeepSeekShardingStrategy(TensorParallelShardingStrategy):
        on_timeout: TimeoutCallback | None,
    ) -> nn.Module:
        model = cast(DeepseekV3Model, model)
-
        for layer in model.layers:
            eval_with_timeout(
                layer.parameters(), timeout_seconds / len(model.layers), on_timeout
            )
-
            # Shard the self attention
            if layer.self_attn.q_lora_rank is None:
                layer.self_attn.q_proj = self.all_to_sharded_linear(
@@ -590,18 +581,12 @@ class DeepSeekShardingStrategy(TensorParallelShardingStrategy):
                layer.mlp.down_proj = self.sharded_to_all_linear(layer.mlp.down_proj)
                layer.mlp.up_proj = self.all_to_sharded_linear(layer.mlp.up_proj)

-            # Shard the MoE.
+            # Shard the MoE. Shard in place since the MoE should be responsible
+            # for aggregating the results.
            else:
-                if getattr(layer.mlp, "shared_experts", None) is not None:
-                    self.all_to_sharded_linear_in_place(
-                        layer.mlp.shared_experts.gate_proj
-                    )
-                    self.sharded_to_all_linear_in_place(
-                        layer.mlp.shared_experts.down_proj
-                    )
-                    self.all_to_sharded_linear_in_place(
-                        layer.mlp.shared_experts.up_proj
-                    )
+                self.all_to_sharded_linear_in_place(layer.mlp.shared_experts.gate_proj)
+                self.sharded_to_all_linear_in_place(layer.mlp.shared_experts.down_proj)
+                self.all_to_sharded_linear_in_place(layer.mlp.shared_experts.up_proj)
                self.all_to_sharded_linear_in_place(layer.mlp.switch_mlp.gate_proj)
                self.sharded_to_all_linear_in_place(layer.mlp.switch_mlp.down_proj)
                self.all_to_sharded_linear_in_place(layer.mlp.switch_mlp.up_proj)
@@ -794,7 +779,8 @@ class MiniMaxShardingStrategy(TensorParallelShardingStrategy):

            layer.self_attn = WrappedMiniMaxAttention(layer.self_attn, self.group)  # pyright: ignore[reportAttributeAccessIssue,reportArgumentType]

-            # Shard the MoE.
+            # Shard the MoE. Shard in place since the MoE should be responsible
+            # for aggregating the results.
            self.all_to_sharded_linear_in_place(
                layer.block_sparse_moe.switch_mlp.gate_proj
            )
@@ -907,7 +893,8 @@ class QwenShardingStrategy(TensorParallelShardingStrategy):
                    layer.self_attn.num_attention_heads //= self.N
                    layer.self_attn.num_key_value_heads //= self.N

-            # Shard the MoE.
+            # Shard the MoE. Shard in place since the MoE should be responsible
+            # for aggregating the results.
            if isinstance(layer.mlp, (Qwen3MoeSparseMoeBlock, Qwen3NextSparseMoeBlock)):
                self.all_to_sharded_linear_in_place(layer.mlp.switch_mlp.gate_proj)
                self.sharded_to_all_linear_in_place(layer.mlp.switch_mlp.down_proj)
--- a/src/exo/worker/engines/mlx/generator/generate.py
+++ b/src/exo/worker/engines/mlx/generator/generate.py
@@ -57,7 +57,6 @@ def prefill(
    sampler: Callable[[mx.array], mx.array],
    prompt_tokens: mx.array,
    cache: KVCacheType,
-    group: mx.distributed.Group | None = None,
 ) -> tuple[float, int, list[CacheSnapshot]]:
    """Prefill the KV cache with prompt tokens.

@@ -87,9 +86,6 @@ def prefill(

    set_pipeline_prefill(model, is_prefill=True)

-    mx_barrier(group)
-    logger.info("Starting prefill")
-
    # Use max_tokens=1 because max_tokens=0 does not work.
    # We just throw away the generated token - we only care about filling the cache
    for _ in stream_generate(
@@ -309,9 +305,16 @@ def mlx_generate(
    )
    max_stop_len = max((len(s) for s in stop_sequences), default=0)

+    mx_barrier(group)
+    logger.info("Starting prefill")
+
    # Prefill cache with all tokens except the last one
    prefill_tps, prefill_tokens, ssm_snapshots_list = prefill(
-        model, tokenizer, sampler, prompt_tokens[:-1], caches, group
+        model,
+        tokenizer,
+        sampler,
+        prompt_tokens[:-1],
+        caches,
    )
    cache_snapshots: list[CacheSnapshot] | None = ssm_snapshots_list or None

@@ -328,7 +331,6 @@ def mlx_generate(
    think_start = tokenizer.think_start
    think_end = tokenizer.think_end

-    logger.info("Starting decode")
    mx_barrier(group)

    for completion_tokens, out in enumerate(
--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -285,12 +285,10 @@ def get_eos_token_ids_for_model(model_id: ModelId) -> list[int] | None:
    model_id_lower = model_id.lower()
    if "kimi-k2" in model_id_lower:
        return [163586]
-    elif "glm-5" in model_id_lower or "glm-4.7" in model_id_lower:
-        # For GLM-5 and GLM-4.7
+    elif "glm-4.7-flash" in model_id_lower:
        # 154820: <|endoftext|>, 154827: <|user|>, 154829: <|observation|>
        return [154820, 154827, 154829]
    elif "glm" in model_id_lower:
-        # For GLM-4.5 and older
        return [151336, 151329, 151338]
    return None

--- a/src/exo/worker/main.py
+++ b/src/exo/worker/main.py
@@ -348,7 +348,7 @@ class Worker:
                    session=self.session_id,
                    event=event,
                )
-                logger.debug(f"Worker published event {idx}: {str(event)[:100]}")
+                logger.trace(f"Worker published event {idx}: {str(event)[:100]}")
                await self.local_event_sender.send(fe)
                self.out_for_delivery[event.event_id] = fe

--- a/src/exo/worker/runner/runner_supervisor.py
+++ b/src/exo/worker/runner/runner_supervisor.py
@@ -191,7 +191,7 @@ class RunnerSupervisor:
        logger.info("Checking runner's status")
        if self.runner_process.is_alive():
            logger.info("Runner was found to be alive, attempting to join process")
-            await to_thread.run_sync(self.runner_process.join, 5)
+            await to_thread.run_sync(self.runner_process.join, 1)
        rc = self.runner_process.exitcode
        logger.info(f"RunnerSupervisor exited with exit code {rc}")
        if rc == 0:
--- a/tests/auto_bench.sh
+++ b/tests/auto_bench.sh
@@ -28,12 +28,12 @@ trap 'cleanup' EXIT INT TERM

 for host; do
  ssh -T -o BatchMode=yes -o ServerAliveInterval=30 "$host@$host" \
-    "EXO_LIBP2P_NAMESPACE=$commit /nix/var/nix/profiles/default/bin/nix build github:exo-explore/exo/$commit" &
+    "/nix/var/nix/profiles/default/bin/nix build github:exo-explore/exo/$commit" &
 done
 wait
 for host; do
  ssh -T -o BatchMode=yes -o ServerAliveInterval=30 "$host@$host" \
-    "EXO_LIBP2P_NAMESPACE=$commit /nix/var/nix/profiles/default/bin/nix run github:exo-explore/exo/$commit" &>/dev/null &
+    "/nix/var/nix/profiles/default/bin/nix run github:exo-explore/exo/$commit -- --namespace $commit" &>/dev/null &
 done

 for host; do
--- a/tests/run_exo_on.sh
+++ b/tests/run_exo_on.sh
@@ -35,7 +35,7 @@ i=0
 for host; do
  colour=${colours[i++ % 4]}
  ssh -T -o BatchMode=yes -o ServerAliveInterval=30 "$host@$host" \
-    "EXO_LIBP2P_NAMESPACE=$commit /nix/var/nix/profiles/default/bin/nix run github:exo-explore/exo/$commit" |&
+    "/nix/var/nix/profiles/default/bin/nix run github:exo-explore/exo/$commit -- --namespace $commit" |&
    awk -v p="${colour}[${host}]${reset}" '{ print p $0; fflush() }' &
 done

--- a/uv.lock
+++ b/uv.lock
@@ -193,20 +193,14 @@ sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8
 wheels = [
    { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
    { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
    { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
    { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
    { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
    { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" },
    { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" },
    { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" },
    { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" },
    { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" },
    { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" },
    { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" },
    { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" },
@@ -312,10 +306,8 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/5c/49/498c86566a1d80e978b42f0d702795f69887005548c041636df6ae1ca64c/cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d", size = 4450807, upload-time = "2025-10-15T23:16:56.414Z" },
    { url = "https://files.pythonhosted.org/packages/4b/0a/863a3604112174c8624a2ac3c038662d9e59970c7f926acdcfaed8d61142/cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb", size = 4299615, upload-time = "2025-10-15T23:16:58.442Z" },
    { url = "https://files.pythonhosted.org/packages/64/02/b73a533f6b64a69f3cd3872acb6ebc12aef924d8d103133bb3ea750dc703/cryptography-46.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5bf0ed4490068a2e72ac03d786693adeb909981cc596425d09032d372bcc849", size = 4016800, upload-time = "2025-10-15T23:17:00.378Z" },
-    { url = "https://files.pythonhosted.org/packages/25/d5/16e41afbfa450cde85a3b7ec599bebefaef16b5c6ba4ec49a3532336ed72/cryptography-46.0.3-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5ecfccd2329e37e9b7112a888e76d9feca2347f12f37918facbb893d7bb88ee8", size = 4984707, upload-time = "2025-10-15T23:17:01.98Z" },
    { url = "https://files.pythonhosted.org/packages/c9/56/e7e69b427c3878352c2fb9b450bd0e19ed552753491d39d7d0a2f5226d41/cryptography-46.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a2c0cd47381a3229c403062f764160d57d4d175e022c1df84e168c6251a22eec", size = 4482541, upload-time = "2025-10-15T23:17:04.078Z" },
    { url = "https://files.pythonhosted.org/packages/78/f6/50736d40d97e8483172f1bb6e698895b92a223dba513b0ca6f06b2365339/cryptography-46.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:549e234ff32571b1f4076ac269fcce7a808d3bf98b76c8dd560e42dbc66d7d91", size = 4299464, upload-time = "2025-10-15T23:17:05.483Z" },
-    { url = "https://files.pythonhosted.org/packages/00/de/d8e26b1a855f19d9994a19c702fa2e93b0456beccbcfe437eda00e0701f2/cryptography-46.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:c0a7bb1a68a5d3471880e264621346c48665b3bf1c3759d682fc0864c540bd9e", size = 4950838, upload-time = "2025-10-15T23:17:07.425Z" },
    { url = "https://files.pythonhosted.org/packages/8f/29/798fc4ec461a1c9e9f735f2fc58741b0daae30688f41b2497dcbc9ed1355/cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926", size = 4481596, upload-time = "2025-10-15T23:17:09.343Z" },
    { url = "https://files.pythonhosted.org/packages/15/8d/03cd48b20a573adfff7652b76271078e3045b9f49387920e7f1f631d125e/cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71", size = 4426782, upload-time = "2025-10-15T23:17:11.22Z" },
    { url = "https://files.pythonhosted.org/packages/fa/b1/ebacbfe53317d55cf33165bda24c86523497a6881f339f9aae5c2e13e57b/cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac", size = 4698381, upload-time = "2025-10-15T23:17:12.829Z" },
@@ -323,10 +315,8 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/c5/fd/bc1daf8230eaa075184cbbf5f8cd00ba9db4fd32d63fb83da4671b72ed8a/cryptography-46.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39b6755623145ad5eff1dab323f4eae2a32a77a7abef2c5089a04a3d04366715", size = 4435078, upload-time = "2025-10-15T23:17:23.042Z" },
    { url = "https://files.pythonhosted.org/packages/82/98/d3bd5407ce4c60017f8ff9e63ffee4200ab3e23fe05b765cab805a7db008/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:db391fa7c66df6762ee3f00c95a89e6d428f4d60e7abc8328f4fe155b5ac6e54", size = 4293460, upload-time = "2025-10-15T23:17:24.885Z" },
    { url = "https://files.pythonhosted.org/packages/26/e9/e23e7900983c2b8af7a08098db406cf989d7f09caea7897e347598d4cd5b/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:78a97cf6a8839a48c49271cdcbd5cf37ca2c1d6b7fdd86cc864f302b5e9bf459", size = 3995237, upload-time = "2025-10-15T23:17:26.449Z" },
-    { url = "https://files.pythonhosted.org/packages/91/15/af68c509d4a138cfe299d0d7ddb14afba15233223ebd933b4bbdbc7155d3/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:dfb781ff7eaa91a6f7fd41776ec37c5853c795d3b358d4896fdbb5df168af422", size = 4967344, upload-time = "2025-10-15T23:17:28.06Z" },
    { url = "https://files.pythonhosted.org/packages/ca/e3/8643d077c53868b681af077edf6b3cb58288b5423610f21c62aadcbe99f4/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6f61efb26e76c45c4a227835ddeae96d83624fb0d29eb5df5b96e14ed1a0afb7", size = 4466564, upload-time = "2025-10-15T23:17:29.665Z" },
    { url = "https://files.pythonhosted.org/packages/0e/43/c1e8726fa59c236ff477ff2b5dc071e54b21e5a1e51aa2cee1676f1c986f/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:23b1a8f26e43f47ceb6d6a43115f33a5a37d57df4ea0ca295b780ae8546e8044", size = 4292415, upload-time = "2025-10-15T23:17:31.686Z" },
-    { url = "https://files.pythonhosted.org/packages/42/f9/2f8fefdb1aee8a8e3256a0568cffc4e6d517b256a2fe97a029b3f1b9fe7e/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b419ae593c86b87014b9be7396b385491ad7f320bde96826d0dd174459e54665", size = 4931457, upload-time = "2025-10-15T23:17:33.478Z" },
    { url = "https://files.pythonhosted.org/packages/79/30/9b54127a9a778ccd6d27c3da7563e9f2d341826075ceab89ae3b41bf5be2/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:50fc3343ac490c6b08c0cf0d704e881d0d660be923fd3076db3e932007e726e3", size = 4466074, upload-time = "2025-10-15T23:17:35.158Z" },
    { url = "https://files.pythonhosted.org/packages/ac/68/b4f4a10928e26c941b1b6a179143af9f4d27d88fe84a6a3c53592d2e76bf/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22d7e97932f511d6b0b04f2bfd818d73dcd5928db509460aaf48384778eb6d20", size = 4420569, upload-time = "2025-10-15T23:17:37.188Z" },
    { url = "https://files.pythonhosted.org/packages/a3/49/3746dab4c0d1979888f125226357d3262a6dd40e114ac29e3d2abdf1ec55/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d55f3dffadd674514ad19451161118fd010988540cee43d8bc20675e775925de", size = 4681941, upload-time = "2025-10-15T23:17:39.236Z" },
@@ -334,10 +324,8 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/26/42/fa8389d4478368743e24e61eea78846a0006caffaf72ea24a15159215a14/cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d", size = 4440029, upload-time = "2025-10-15T23:17:49.837Z" },
    { url = "https://files.pythonhosted.org/packages/5f/eb/f483db0ec5ac040824f269e93dd2bd8a21ecd1027e77ad7bdf6914f2fd80/cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0", size = 4297222, upload-time = "2025-10-15T23:17:51.357Z" },
    { url = "https://files.pythonhosted.org/packages/fd/cf/da9502c4e1912cb1da3807ea3618a6829bee8207456fbbeebc361ec38ba3/cryptography-46.0.3-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10ca84c4668d066a9878890047f03546f3ae0a6b8b39b697457b7757aaf18dbc", size = 4012280, upload-time = "2025-10-15T23:17:52.964Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/8f/9adb86b93330e0df8b3dcf03eae67c33ba89958fc2e03862ef1ac2b42465/cryptography-46.0.3-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:36e627112085bb3b81b19fed209c05ce2a52ee8b15d161b7c643a7d5a88491f3", size = 4978958, upload-time = "2025-10-15T23:17:54.965Z" },
    { url = "https://files.pythonhosted.org/packages/d1/a0/5fa77988289c34bdb9f913f5606ecc9ada1adb5ae870bd0d1054a7021cc4/cryptography-46.0.3-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1000713389b75c449a6e979ffc7dcc8ac90b437048766cef052d4d30b8220971", size = 4473714, upload-time = "2025-10-15T23:17:56.754Z" },
    { url = "https://files.pythonhosted.org/packages/14/e5/fc82d72a58d41c393697aa18c9abe5ae1214ff6f2a5c18ac470f92777895/cryptography-46.0.3-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b02cf04496f6576afffef5ddd04a0cb7d49cf6be16a9059d793a30b035f6b6ac", size = 4296970, upload-time = "2025-10-15T23:17:58.588Z" },
-    { url = "https://files.pythonhosted.org/packages/78/06/5663ed35438d0b09056973994f1aec467492b33bd31da36e468b01ec1097/cryptography-46.0.3-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:71e842ec9bc7abf543b47cf86b9a743baa95f4677d22baa4c7d5c69e49e9bc04", size = 4940236, upload-time = "2025-10-15T23:18:00.897Z" },
    { url = "https://files.pythonhosted.org/packages/fc/59/873633f3f2dcd8a053b8dd1d38f783043b5fce589c0f6988bf55ef57e43e/cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506", size = 4472642, upload-time = "2025-10-15T23:18:02.749Z" },
    { url = "https://files.pythonhosted.org/packages/3d/39/8e71f3930e40f6877737d6f69248cf74d4e34b886a3967d32f919cc50d3b/cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963", size = 4423126, upload-time = "2025-10-15T23:18:04.85Z" },
    { url = "https://files.pythonhosted.org/packages/cd/c7/f65027c2810e14c3e7268353b1681932b87e5a48e65505d8cc17c99e36ae/cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4", size = 4686573, upload-time = "2025-10-15T23:18:06.908Z" },
@@ -377,8 +365,8 @@ dependencies = [
    { name = "hypercorn", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "loguru", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "mflux", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.6", source = { registry = "https://pypi.org/simple" }, extra = ["cpu"], marker = "sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.7.dev20260218+14841977", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#1484197707f35186ad3bd614357c7c47fdf86ebc" }, marker = "sys_platform == 'darwin'" },
+    { name = "mlx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mlx", extra = ["cpu"], marker = "sys_platform == 'linux'" },
    { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "msgspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "openai-harmony", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -416,9 +404,9 @@ requires-dist = [
    { name = "hypercorn", specifier = ">=0.18.0" },
    { name = "loguru", specifier = ">=0.7.3" },
    { name = "mflux", specifier = "==0.15.5" },
-    { name = "mlx", marker = "sys_platform == 'darwin'", git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks" },
+    { name = "mlx", marker = "sys_platform == 'darwin'", specifier = "==0.30.6" },
    { name = "mlx", extras = ["cpu"], marker = "sys_platform == 'linux'", specifier = "==0.30.6" },
-    { name = "mlx-lm", specifier = "==0.30.7" },
+    { name = "mlx-lm", specifier = "==0.30.6" },
    { name = "msgspec", specifier = ">=0.19.0" },
    { name = "openai-harmony", specifier = ">=0.0.8" },
    { name = "pillow", specifier = ">=11.0,<12.0" },
@@ -1020,8 +1008,8 @@ dependencies = [
    { name = "fonttools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "matplotlib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.6", source = { registry = "https://pypi.org/simple" }, extra = ["cuda13"], marker = "sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.7.dev20260218+14841977", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#1484197707f35186ad3bd614357c7c47fdf86ebc" }, marker = "sys_platform == 'darwin'" },
+    { name = "mlx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mlx", extra = ["cuda13"], marker = "sys_platform == 'linux'" },
    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "opencv-python", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "piexif", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -1048,12 +1036,18 @@ wheels = [
 name = "mlx"
 version = "0.30.6"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "sys_platform == 'linux'",
+dependencies = [
+    { name = "mlx-metal", marker = "sys_platform == 'darwin'" },
 ]
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/ae/5b/e460e144a34d5529e010056cccf50b538d56ed001473bc6b246018fd58cb/mlx-0.30.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ed86f8bffc174c2f259ca589ea25464c96cf69d1bb457074a2bf2ef53737e54f", size = 573515, upload-time = "2026-02-06T03:45:23.405Z" },
+    { url = "https://files.pythonhosted.org/packages/60/25/69833fefb9a3fef30b56792b1bcd022496c4fea83e45411d289b77ef7546/mlx-0.30.6-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:c52294958269e20f300639a17c1900ca8fc737d859ddda737f9811e94bd040e5", size = 573516, upload-time = "2026-02-06T03:45:24.618Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/6a/7e7fbeebc5cb51b6a5eba96b263a6298707bcbdc059f4b0b73e088bc3dea/mlx-0.30.6-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:b5b6636f7c49a4d86d8ec82643b972f45a144a7a9f3a967b27b2e6e22cf71e6a", size = 573592, upload-time = "2026-02-06T03:45:25.928Z" },
    { url = "https://files.pythonhosted.org/packages/93/06/280f6f2ba80520a7109730425eda0d966658793aa0d02d8be8d351f75253/mlx-0.30.6-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:67e6c9e30a9faeacc209917ef5523177cf9b086914b6b5d83ff886e4294b727d", size = 622011, upload-time = "2026-02-06T03:45:28.165Z" },
    { url = "https://files.pythonhosted.org/packages/fe/35/f872afbee9c079cc69924d9e9c46f5663adb7da58cba3511db082dd307c1/mlx-0.30.6-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:47db8b16fcb6f6c5a47c0bdb24ed377b41237017ac93aa6cb6aa206c9bdf82e4", size = 663650, upload-time = "2026-02-06T03:45:30.315Z" },
+    { url = "https://files.pythonhosted.org/packages/60/23/361dc7a5797634e4d7e9bdd6564c6b28f9b1246672632def2f91bf066b18/mlx-0.30.6-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:78804a89dcff4a838f7c2da72392fe87a523e95122a3c840e53df019122aad45", size = 575028, upload-time = "2026-02-06T03:45:31.549Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/69/1854484d414171586814dfbe8def95f75c4ea2c7341ba13ba8ee675f7c62/mlx-0.30.6-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:ec13584ab069665cc7ad34a05494d9291cd623aef6ae96be48875fc87cfc25d6", size = 575026, upload-time = "2026-02-06T03:45:33.072Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/b8/3adbc441924209a7e4c568308b2a0b54bd09aee6a68db5bae85304791e54/mlx-0.30.6-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:b2c5e8a090a753ef99a1380a4d059c983083f36198864f6df9faaf1223d083df", size = 575041, upload-time = "2026-02-06T03:45:34.814Z" },
    { url = "https://files.pythonhosted.org/packages/3f/54/9d9e06804fb2088202a2cdf60458e00b221f71420bea285720b60f9e82b5/mlx-0.30.6-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:9ceddede4af0de31d1f6b3099f70e5469d60cd7c546975dedbdbeab3519cab3f", size = 624002, upload-time = "2026-02-06T03:45:36Z" },
    { url = "https://files.pythonhosted.org/packages/42/92/3140a15a50cb1f9267a6552171e1dfa577861de53e093124bc43707f2a0e/mlx-0.30.6-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:4a6ffd2d16728cf95f63a1b555d7c2eaeea686a0e6b73228bd265411cb5d77a4", size = 663569, upload-time = "2026-02-06T03:45:37.242Z" },
 ]
@@ -1066,14 +1060,6 @@ cuda13 = [
    { name = "mlx-cuda-13", marker = "sys_platform == 'linux'" },
 ]

-[[package]]
-name = "mlx"
-version = "0.30.7.dev20260218+14841977"
-source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#1484197707f35186ad3bd614357c7c47fdf86ebc" }
-resolution-markers = [
-    "sys_platform == 'darwin'",
-]
-
 [[package]]
 name = "mlx-cpu"
 version = "0.30.6"
@@ -1100,20 +1086,30 @@ wheels = [

 [[package]]
 name = "mlx-lm"
-version = "0.30.7"
+version = "0.30.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.7.dev20260218+14841977", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#1484197707f35186ad3bd614357c7c47fdf86ebc" }, marker = "sys_platform == 'darwin'" },
+    { name = "mlx", marker = "sys_platform == 'darwin'" },
    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "sentencepiece", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/66/0d/56542e2ae13ec6f542d3977d7cff89a205d4f6c5122e0ce23f33265f61c9/mlx_lm-0.30.7.tar.gz", hash = "sha256:e5f31ac58d9f2381f28e1ba639ff903e64f7cff1bdc245c0bc97f72264be329c", size = 275764, upload-time = "2026-02-12T18:41:11.86Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/76/cb/815deddc8699b1f694d7e1f9cbed52934c03a8b49432c8add72932bb2f0b/mlx_lm-0.30.6.tar.gz", hash = "sha256:807e042d7040268f1b19190b7eaefd8b2efbff5590a65460974ad4225b91dda1", size = 271733, upload-time = "2026-02-04T21:27:45.741Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1e/17/a41c798a3d9cbdc47f39c6db5bba4c2cd199203ead26bf911cb03b644070/mlx_lm-0.30.7-py3-none-any.whl", hash = "sha256:17442a4bf01c4c2d3bca1e647712fe44f19890c3f1eadc8589d389e57b44b9bf", size = 386591, upload-time = "2026-02-12T18:41:10.236Z" },
+    { url = "https://files.pythonhosted.org/packages/20/5f/01d281f1fa8a1521d5936659beb4f5ab1f32b463d059263cf9d4cef969d9/mlx_lm-0.30.6-py3-none-any.whl", hash = "sha256:a7405bd581eacc4bf8209d7a6b7f23629585a0d7c6740c2a97e51fee35b3b0e1", size = 379451, upload-time = "2026-02-04T21:27:43.222Z" },
+]
+
+[[package]]
+name = "mlx-metal"
+version = "0.30.6"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f3/85/44406b521f920248fad621334d4dc15e77660a494edf890e7cbee33bf38d/mlx_metal-0.30.6-py3-none-macosx_14_0_arm64.whl", hash = "sha256:ea6d0c973def9a5b4f652cc77036237db3f88c9d0af63701d76b5fddde99b820", size = 38437818, upload-time = "2026-02-06T03:44:56.19Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/cb/10a516995f7d0c154b0d7e633c54b51e96977a86a355105b6474cfcbe0d0/mlx_metal-0.30.6-py3-none-macosx_15_0_arm64.whl", hash = "sha256:0f8cb94634d07e06a372d6ad9a090f38a18bab1ff19a140aede60eacf707bb94", size = 38433701, upload-time = "2026-02-06T03:44:59.678Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/7d/70cb272f7373c334709f210ed8420511fc9d64d05a7a646c0b3b94c29c04/mlx_metal-0.30.6-py3-none-macosx_26_0_arm64.whl", hash = "sha256:d761ae26304f2c4b454eeea7f612a56919d9e5e57dbb1dc0788f8e34aa6f41c2", size = 47718448, upload-time = "2026-02-06T03:45:03.133Z" },
 ]

 [[package]]