woahg

add resources dir to nix
feat: add model picker modal with grouped models and HF Hub search (#1369 )
2026-02-04 11:11:45 -05:00 · 2026-02-04 14:58:05 +00:00 · 2026-02-04 14:34:04 +00:00 · 2026-02-04 05:56:23 -08:00 · 2026-02-04 05:06:15 -08:00 · 2026-02-03 20:03:29 +00:00
178 changed files with 11604 additions and 6415 deletions
--- a/.github/actions/typecheck/action.yml
+++ b/.github/actions/typecheck/action.yml
@@ -1,12 +0,0 @@
-name: Type Check
-
-description: "Run type checker"
-
-runs:
-  using: "composite"
-  steps:
-    - name: Run type checker
-      run: |
-        nix --extra-experimental-features nix-command --extra-experimental-features flakes develop -c just sync
-        nix --extra-experimental-features nix-command --extra-experimental-features flakes develop -c just check
-      shell: bash
--- a/.github/workflows/pipeline.yml
+++ b/.github/workflows/pipeline.yml
@@ -26,73 +26,14 @@ jobs:
          name: exo
          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"

-      - name: Configure git user
-        run: |
-          git config --local user.email "github-actions@users.noreply.github.com"
-          git config --local user.name  "github-actions bot"
-        shell: bash
+      - name: Load nix develop environment
+        run: nix run github:nicknovitski/nix-develop/v1

-      - name: Pull LFS files
-        run: |
-          echo "Pulling Git LFS files..."
-          git lfs pull
-        shell: bash
+      - name: Sync dependencies
+        run: uv sync --all-packages

-      - name: Setup Nix Environment
-        run: |
-          echo "Checking for nix installation..."
-          
-          # Check if nix binary exists directly
-          if [ -f /nix/var/nix/profiles/default/bin/nix ]; then
-            echo "Found nix binary at /nix/var/nix/profiles/default/bin/nix"
-            export PATH="/nix/var/nix/profiles/default/bin:$PATH"
-            echo "PATH=$PATH" >> $GITHUB_ENV
-            nix --version
-          elif [ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]; then
-            echo "Found nix profile script, sourcing..."
-            source /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
-            nix --version
-          elif command -v nix >/dev/null 2>&1; then
-            echo "Nix already in PATH"
-            nix --version
-          else
-            echo "Nix not found. Debugging info:"
-            echo "Contents of /nix/var/nix/profiles/default/:"
-            ls -la /nix/var/nix/profiles/default/ 2>/dev/null || echo "Directory not found"
-            echo "Contents of /nix/var/nix/profiles/default/bin/:"
-            ls -la /nix/var/nix/profiles/default/bin/ 2>/dev/null || echo "Directory not found"
-            exit 1
-          fi
-        shell: bash
-
-      - name: Configure basedpyright include for local MLX
-        run: |
-          RUNNER_LABELS='${{ toJSON(runner.labels) }}'
-          if echo "$RUNNER_LABELS" | grep -q "local_mlx"; then
-            if [ -d "/Users/Shared/mlx" ]; then
-              echo "Updating [tool.basedpyright].include to use /Users/Shared/mlx"
-              awk '
-                BEGIN { in=0 }
-                /^\[tool\.basedpyright\]/ { in=1; print; next }
-                in && /^\[/ { in=0 }  # next section
-                in && /^[ \t]*include[ \t]*=/ {
-                  print "include = [\"/Users/Shared/mlx\"]"
-                  next
-                }
-                { print }
-              ' pyproject.toml > pyproject.toml.tmp && mv pyproject.toml.tmp pyproject.toml
-
-              echo "New [tool.basedpyright] section:"
-              sed -n '/^\[tool\.basedpyright\]/,/^\[/p' pyproject.toml | sed '$d' || true
-            else
-              echo "local_mlx tag present but /Users/Shared/mlx not found; leaving pyproject unchanged."
-            fi
-          else
-            echo "Runner does not have 'local_mlx' tag; leaving pyproject unchanged."
-          fi
-        shell: bash
-
-      - uses: ./.github/actions/typecheck
+      - name: Run type checker
+        run: uv run basedpyright --project pyproject.toml

  nix:
    name: Build and check (${{ matrix.system }})
@@ -123,6 +64,63 @@ jobs:
          name: exo
          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"

+      - name: Build Metal packages (macOS only)
+        if: runner.os == 'macOS'
+        run: |
+          # Try to build metal-toolchain first (may succeed via cachix cache hit)
+          if nix build .#metal-toolchain 2>/dev/null; then
+            echo "metal-toolchain built successfully (likely cache hit)"
+          else
+            echo "metal-toolchain build failed, extracting from Xcode..."
+
+            NAR_HASH="sha256-ayR5mXN4sZAddwKEG2OszGRF93k9ZFc7H0yi2xbylQw="
+            NAR_NAME="metal-toolchain-17C48.nar"
+
+            # Use RUNNER_TEMP to avoid /tmp symlink issues on macOS
+            WORK_DIR="${RUNNER_TEMP}/metal-work"
+            mkdir -p "$WORK_DIR"
+
+            # Download the Metal toolchain component
+            xcodebuild -downloadComponent MetalToolchain
+
+            # Find and mount the DMG
+            DMG_PATH=$(find /System/Library/AssetsV2/com_apple_MobileAsset_MetalToolchain -name '*.dmg' 2>/dev/null | head -1)
+            if [ -z "$DMG_PATH" ]; then
+              echo "Error: Could not find Metal toolchain DMG"
+              exit 1
+            fi
+
+            echo "Found DMG at: $DMG_PATH"
+            hdiutil attach "$DMG_PATH" -mountpoint "${WORK_DIR}/metal-dmg"
+
+            # Copy the toolchain
+            cp -R "${WORK_DIR}/metal-dmg/Metal.xctoolchain" "${WORK_DIR}/metal-export"
+            hdiutil detach "${WORK_DIR}/metal-dmg"
+
+            # Create NAR and add to store
+            nix nar pack "${WORK_DIR}/metal-export" > "${WORK_DIR}/${NAR_NAME}"
+            STORE_PATH=$(nix store add --mode flat "${WORK_DIR}/${NAR_NAME}")
+            echo "Added NAR to store: $STORE_PATH"
+
+            # Verify the hash matches
+            ACTUAL_HASH=$(nix hash file "${WORK_DIR}/${NAR_NAME}")
+            if [ "$ACTUAL_HASH" != "$NAR_HASH" ]; then
+              echo "Warning: NAR hash mismatch!"
+              echo "Expected: $NAR_HASH"
+              echo "Actual:   $ACTUAL_HASH"
+              echo "The metal-toolchain.nix may need updating"
+            fi
+
+            # Clean up
+            rm -rf "$WORK_DIR"
+
+            # Retry the build now that NAR is in store
+            nix build .#metal-toolchain
+          fi
+
+          # Build mlx (depends on metal-toolchain)
+          nix build .#mlx
+
      - name: Build all Nix outputs
        run: |
          nix flake show --json | jq -r '
@@ -134,3 +132,16 @@ jobs:

      - name: Run nix flake check
        run: nix flake check
+
+      - name: Run pytest (macOS only)
+        if: runner.os == 'macOS'
+        run: |
+          # Build the test environment (requires relaxed sandbox for uv2nix on macOS)
+          TEST_ENV=$(nix build '.#exo-test-env' --option sandbox relaxed --print-out-paths)
+
+          # Run pytest outside sandbox (needs GPU access for MLX)
+          export HOME="$RUNNER_TEMP"
+          export EXO_TESTS=1
+          export EXO_DASHBOARD_DIR="$PWD/dashboard/" 
+          export EXO_RESOURCES_DIR="$PWD/resources" 
+          $TEST_ENV/bin/python -m pytest src -m "not slow" --import-mode=importlib
--- a/.gitignore
+++ b/.gitignore
@@ -28,3 +28,7 @@ target/
 dashboard/build/
 dashboard/node_modules/
 dashboard/.svelte-kit/
+
+# host config snapshots
+hosts_*.json
+.swp
--- a/.mlx_typings/mlx_lm/tokenizer_utils.pyi
+++ b/.mlx_typings/mlx_lm/tokenizer_utils.pyi
@@ -108,6 +108,7 @@ class TokenizerWrapper:
    _tokenizer: PreTrainedTokenizerFast
    eos_token_id: int | None
    eos_token: str | None
+    eos_token_ids: list[int] | set[int] | None
    bos_token_id: int | None
    bos_token: str | None
    vocab_size: int
@@ -117,7 +118,7 @@ class TokenizerWrapper:
        self,
        tokenizer: Any,
        detokenizer_class: Any = ...,
-        eos_token_ids: list[int] | None = ...,
+        eos_token_ids: list[int] | set[int] | None = ...,
        chat_template: Any = ...,
        tool_parser: Any = ...,
        tool_call_start: str | None = ...,
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -141,12 +141,6 @@ version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"

-[[package]]
-name = "arrayvec"
-version = "0.7.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
-
 [[package]]
 name = "asn1-rs"
 version = "0.7.1"
@@ -304,19 +298,6 @@ version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba"

-[[package]]
-name = "bigdecimal"
-version = "0.4.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934"
-dependencies = [
- "autocfg",
- "libm",
- "num-bigint",
- "num-integer",
- "num-traits",
-]
-
 [[package]]
 name = "bimap"
 version = "0.6.3"
@@ -353,31 +334,6 @@ dependencies = [
 "generic-array",
 ]

-[[package]]
-name = "bon"
-version = "3.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ebeb9aaf9329dff6ceb65c689ca3db33dbf15f324909c60e4e5eef5701ce31b1"
-dependencies = [
- "bon-macros",
- "rustversion",
-]
-
-[[package]]
-name = "bon-macros"
-version = "3.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645"
-dependencies = [
- "darling",
- "ident_case",
- "prettyplease",
- "proc-macro2",
- "quote",
- "rustversion",
- "syn 2.0.111",
-]
-
 [[package]]
 name = "bs58"
 version = "0.5.1"
@@ -541,15 +497,6 @@ version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3"

-[[package]]
-name = "convert_case"
-version = "0.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9"
-dependencies = [
- "unicode-segmentation",
-]
-
 [[package]]
 name = "core-foundation"
 version = "0.9.4"
@@ -700,41 +647,6 @@ dependencies = [
 "syn 2.0.111",
 ]

-[[package]]
-name = "darling"
-version = "0.21.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0"
-dependencies = [
- "darling_core",
- "darling_macro",
-]
-
-[[package]]
-name = "darling_core"
-version = "0.21.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4"
-dependencies = [
- "fnv",
- "ident_case",
- "proc-macro2",
- "quote",
- "strsim",
- "syn 2.0.111",
-]
-
-[[package]]
-name = "darling_macro"
-version = "0.21.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
-dependencies = [
- "darling_core",
- "quote",
- "syn 2.0.111",
-]
-
 [[package]]
 name = "data-encoding"
 version = "2.9.0"
@@ -761,17 +673,6 @@ dependencies = [
 "syn 2.0.111",
 ]

-[[package]]
-name = "delegate"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "780eb241654bf097afb00fc5f054a09b687dad862e485fdcf8399bb056565370"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.111",
-]
-
 [[package]]
 name = "der"
 version = "0.7.10"
@@ -806,29 +707,6 @@ dependencies = [
 "powerfmt",
 ]

-[[package]]
-name = "derive_more"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "10b768e943bed7bf2cab53df09f4bc34bfd217cdb57d971e769874c9a6710618"
-dependencies = [
- "derive_more-impl",
-]
-
-[[package]]
-name = "derive_more-impl"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d286bfdaf75e988b4a78e013ecd79c581e06399ab53fbacd2d916c2f904f30b"
-dependencies = [
- "convert_case",
- "proc-macro2",
- "quote",
- "rustc_version",
- "syn 2.0.111",
- "unicode-xid",
-]
-
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -998,37 +876,23 @@ dependencies = [
 name = "exo_pyo3_bindings"
 version = "0.0.1"
 dependencies = [
- "delegate",
- "derive_more",
 "env_logger",
- "extend",
- "futures",
- "impl-trait-for-tuples",
+ "futures-lite",
 "libp2p",
 "log",
 "networking",
- "once_cell",
- "pin-project",
 "pyo3",
 "pyo3-async-runtimes",
 "pyo3-log",
 "pyo3-stub-gen",
- "thiserror 2.0.17",
- "thread_local",
 "tokio",
- "util",
 ]

 [[package]]
-name = "extend"
-version = "1.2.0"
+name = "fastrand"
+version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "311a6d2f1f9d60bff73d2c78a0af97ed27f79672f15c238192a5bbb64db56d00"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.111",
-]
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"

 [[package]]
 name = "ff"
@@ -1138,7 +1002,10 @@ version = "2.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f78e10609fe0e0b3f4157ffab1876319b5b0db102a2c60dc4626306dc46b44ad"
 dependencies = [
+ "fastrand",
 "futures-core",
+ "futures-io",
+ "parking",
 "pin-project-lite",
 ]

@@ -1625,12 +1492,6 @@ dependencies = [
 "zerovec",
 ]

-[[package]]
-name = "ident_case"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
-
 [[package]]
 name = "idna"
 version = "1.1.0"
@@ -1706,17 +1567,6 @@ dependencies = [
 "xmltree",
 ]

-[[package]]
-name = "impl-trait-for-tuples"
-version = "0.2.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0eb5a3343abf848c0984fe4604b2b105da9539376e24fc0a3b0007411ae4fd9"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.111",
-]
-
 [[package]]
 name = "indexmap"
 version = "2.12.1"
@@ -1745,15 +1595,6 @@ dependencies = [
 "generic-array",
 ]

-[[package]]
-name = "internment"
-version = "0.8.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "636d4b0f6a39fd684effe2a73f5310df16a3fa7954c26d36833e98f44d1977a2"
-dependencies = [
- "hashbrown 0.15.5",
-]
-
 [[package]]
 name = "inventory"
 version = "0.3.21"
@@ -1880,12 +1721,6 @@ dependencies = [
 "cpufeatures",
 ]

-[[package]]
-name = "keccak-const"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57d8d8ce877200136358e0bbff3a77965875db3af755a11e1fa6b1b3e2df13ea"
-
 [[package]]
 name = "lalrpop-util"
 version = "0.20.2"
@@ -1904,12 +1739,6 @@ version = "0.2.178"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"

-[[package]]
-name = "libm"
-version = "0.2.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
-
 [[package]]
 name = "libp2p"
 version = "0.56.0"
@@ -2898,20 +2727,10 @@ dependencies = [
 name = "networking"
 version = "0.0.1"
 dependencies = [
- "delegate",
- "derive_more",
- "either",
- "extend",
- "futures",
- "futures-timer",
- "impl-trait-for-tuples",
- "keccak-const",
 "libp2p",
 "log",
- "thiserror 2.0.17",
 "tokio",
 "tracing-subscriber",
- "util",
 ]

 [[package]]
@@ -2993,17 +2812,6 @@ dependencies = [
 "num-traits",
 ]

-[[package]]
-name = "num-rational"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
-dependencies = [
- "num-bigint",
- "num-integer",
- "num-traits",
-]
-
 [[package]]
 name = "num-traits"
 version = "0.2.19"
@@ -3307,16 +3115,6 @@ dependencies = [
 "zerocopy",
 ]

-[[package]]
-name = "prettyplease"
-version = "0.2.37"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
-dependencies = [
- "proc-macro2",
- "syn 2.0.111",
-]
-
 [[package]]
 name = "primeorder"
 version = "0.13.6"
@@ -3364,28 +3162,14 @@ version = "0.27.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ab53c047fcd1a1d2a8820fe84f05d6be69e9526be40cb03b73f86b6b03e6d87d"
 dependencies = [
- "bigdecimal",
- "either",
- "hashbrown 0.16.1",
- "indexmap",
 "indoc",
- "inventory",
 "libc",
- "lock_api",
 "memoffset",
- "num-bigint",
- "num-complex",
- "num-rational",
- "num-traits",
 "once_cell",
- "ordered-float",
- "parking_lot",
 "portable-atomic",
 "pyo3-build-config",
 "pyo3-ffi",
 "pyo3-macros",
- "rust_decimal",
- "smallvec",
 "unindent",
 ]

@@ -3740,12 +3524,6 @@ dependencies = [
 "yasna",
 ]

-[[package]]
-name = "recursion"
-version = "0.5.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9dba2197bf7b1d87b4dd460c195f4edeb45a94e82e8054f8d5f317c1f0e93ca1"
-
 [[package]]
 name = "redox_syscall"
 version = "0.5.18"
@@ -3832,16 +3610,6 @@ dependencies = [
 "tokio",
 ]

-[[package]]
-name = "rust_decimal"
-version = "1.39.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35affe401787a9bd846712274d97654355d21b2a2c092a3139aabe31e9022282"
-dependencies = [
- "arrayvec",
- "num-traits",
-]
-
 [[package]]
 name = "rustc-hash"
 version = "1.1.0"
@@ -4706,24 +4474,12 @@ version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"

-[[package]]
-name = "unicode-segmentation"
-version = "1.12.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
-
 [[package]]
 name = "unicode-width"
 version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"

-[[package]]
-name = "unicode-xid"
-version = "0.2.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
-
 [[package]]
 name = "unicode_names2"
 version = "1.3.0"
@@ -4804,19 +4560,6 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"

-[[package]]
-name = "util"
-version = "0.0.1"
-dependencies = [
- "bon",
- "derive_more",
- "extend",
- "internment",
- "once_cell",
- "recursion",
- "thiserror 2.0.17",
-]
-
 [[package]]
 name = "uuid"
 version = "1.19.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,6 @@ resolver = "3"
 members = [
    "rust/networking",
    "rust/exo_pyo3_bindings",
-    "rust/util",
 ]

 [workspace.package]
@@ -24,62 +23,18 @@ opt-level = 3
 [workspace.dependencies]
 ## Crate members as common dependencies
 networking = { path = "rust/networking" }
-util = { path = "rust/util" }
-
-# Proc-macro authoring tools
-syn = "2.0"
-quote = "1.0"
-proc-macro2 = "1.0"
-darling = "0.20"
-
-# Macro dependecies
-extend = "1.2"
-delegate = "0.13"
-impl-trait-for-tuples = "0.2"
-clap = "4.5"
-derive_more = { version = "2.0.1", features = ["display"] }
-pin-project = "1"
-
-# Utility dependencies
-itertools = "0.14"
-thiserror = "2"
-internment = "0.8"
-recursion = "0.5"
-regex = "1.11"
-once_cell = "1.21"
-thread_local = "1.1"
-bon = "3.4"
-generativity = "1.1"
-anyhow = "1.0"
-keccak-const = "0.2"
-
-# Functional generics/lenses frameworks
-frunk_core = "0.4"
-frunk = "0.4"
-frunk_utils = "0.2"
-frunk-enum-core = "0.3"

 # Async dependencies
 tokio = "1.46"
-futures = "0.3"
-futures-util = "0.3"
-futures-timer = "3.0"
-
-# Data structures
-either = "1.15"
-ordered-float = "5.0"
-ahash = "0.8"

 # Tracing/logging
 log = "0.4"

 # networking
 libp2p = "0.56"
-libp2p-tcp = "0.44"

 [workspace.lints.rust]
-static_mut_refs = "warn"      # Or use "warn" instead of deny
-incomplete_features = "allow"
+static_mut_refs = "warn"

 # Clippy's lint category level configurations;
 # every member crate needs to inherit these by adding
@@ -100,64 +55,3 @@ perf = { level = "warn", priority = -1 }
 pedantic = { level = "warn", priority = -1 }
 nursery = { level = "warn", priority = -1 }
 cargo = { level = "warn", priority = -1 }
-
-# Individual Clippy lints from the `restriction` category
-arithmetic_side_effects = "warn"
-as_conversions = "warn"
-assertions_on_result_states = "warn"
-clone_on_ref_ptr = "warn"
-decimal_literal_representation = "warn"
-default_union_representation = "warn"
-deref_by_slicing = "warn"
-disallowed_script_idents = "deny"
-else_if_without_else = "warn"
-empty_enum_variants_with_brackets = "warn"
-empty_structs_with_brackets = "warn"
-error_impl_error = "warn"
-exit = "deny"
-expect_used = "warn"
-float_cmp_const = "warn"
-get_unwrap = "warn"
-if_then_some_else_none = "warn"
-impl_trait_in_params = "warn"
-indexing_slicing = "warn"
-infinite_loop = "warn"
-let_underscore_must_use = "warn"
-let_underscore_untyped = "warn"
-lossy_float_literal = "warn"
-mem_forget = "warn"
-missing_inline_in_public_items = "warn"
-multiple_inherent_impl = "warn"
-multiple_unsafe_ops_per_block = "warn"
-mutex_atomic = "warn"
-non_zero_suggestions = "warn"
-panic = "warn"
-partial_pub_fields = "warn"
-pattern_type_mismatch = "warn"
-pub_without_shorthand = "warn"
-rc_buffer = "warn"
-rc_mutex = "warn"
-redundant_type_annotations = "warn"
-renamed_function_params = "warn"
-rest_pat_in_fully_bound_structs = "warn"
-same_name_method = "warn"
-self_named_module_files = "deny"
-semicolon_inside_block = "warn"
-shadow_same = "warn"
-shadow_unrelated = "warn"
-str_to_string = "warn"
-string_add = "warn"
-string_lit_chars_any = "warn"
-string_to_string = "warn"
-tests_outside_test_module = "warn"
-todo = "warn"
-try_err = "warn"
-undocumented_unsafe_blocks = "warn"
-unnecessary_safety_comment = "warn"
-unnecessary_safety_doc = "warn"
-unneeded_field_pattern = "warn"
-unseparated_literal_suffix = "warn"
-unused_result_ok = "warn"
-unused_trait_names = "warn"
-unwrap_used = "warn"
-verbose_file_reads = "warn"
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
  <img alt="exo logo" src="/docs/imgs/exo-logo-transparent.png" width="50%" height="50%">
 </picture>

-exo: Run your own AI cluster at home with everyday devices. Maintained by [exo labs](https://x.com/exolabs).
+exo: Run frontier AI locally. Maintained by [exo labs](https://x.com/exolabs).

 <p align="center">
  <a href="https://discord.gg/TJ4P57arEm" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/Discord-Join%20Server-5865F2?logo=discord&logoColor=white" alt="Discord"></a>
@@ -107,6 +107,10 @@ uv run exo

 This starts the exo dashboard and API at http://localhost:52415/

+
+*Please view the section on RDMA to enable this feature on MacOS >=26.2!*
+
+
 ### Run from Source (Linux)

 **Prerequisites:**
@@ -230,7 +234,7 @@ This removes:

 RDMA is a new capability added to macOS 26.2. It works on any Mac with Thunderbolt 5 (M4 Pro Mac Mini, M4 Max Mac Studio, M4 Max MacBook Pro, M3 Ultra Mac Studio).

-Note that on Mac Studio, you cannot use the Thunderbolt 5 port next to the Ethernet port.
+Please refer to the caveats for immediate troubleshooting.

 To enable RDMA on macOS, follow these steps:

@@ -247,6 +251,14 @@ To enable RDMA on macOS, follow these steps:

 After that, RDMA will be enabled in macOS and exo will take care of the rest.

+**Important Caveats**
+
+1. Devices that wish to be part of an RDMA cluster must be connected to all other devices in the cluster.
+2. The cables must support TB5.
+3. On a Mac Studio, you cannot use the Thunderbolt 5 port next to the Ethernet port.
+4. If running from source, please use the script found at `tmp/set_rdma_network_config.sh`, which will disable Thunderbolt Bridge and set dhcp on each RDMA port.
+5. RDMA ports may be unable to discover each other on different versions of MacOS. Please ensure that OS versions match exactly (even beta version numbers) on all devices.
+
 ---

 ### Using the API
--- a/app/EXO/EXO.xcodeproj/project.pbxproj
+++ b/app/EXO/EXO.xcodeproj/project.pbxproj
@@ -342,6 +342,8 @@
 				SDKROOT = macosx;
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+				SWIFT_TREAT_WARNINGS_AS_ERRORS = YES;
+				GCC_TREAT_WARNINGS_AS_ERRORS = YES;
 			};
 			name = Debug;
 		};
@@ -397,6 +399,8 @@
 				MTL_FAST_MATH = YES;
 				SDKROOT = macosx;
 				SWIFT_COMPILATION_MODE = wholemodule;
+				SWIFT_TREAT_WARNINGS_AS_ERRORS = YES;
+				GCC_TREAT_WARNINGS_AS_ERRORS = YES;
 			};
 			name = Release;
 		};
--- a/app/EXO/EXO/EXOApp.swift
+++ b/app/EXO/EXO/EXOApp.swift
@@ -225,7 +225,7 @@ private final class ExoUpdaterDelegate: NSObject, SPUUpdaterDelegate {
        }
    }

-    private func showNotification(title: String, body: String) {
+    nonisolated private func showNotification(title: String, body: String) {
        let center = UNUserNotificationCenter.current()
        let content = UNMutableNotificationContent()
        content.title = title
--- a/app/EXO/EXO/Models/ClusterState.swift
+++ b/app/EXO/EXO/Models/ClusterState.swift
@@ -293,7 +293,7 @@ struct ClusterTask {
    let modelName: String?
    let promptPreview: String?
    let errorMessage: String?
-    let parameters: ChatCompletionTaskParameters?
+    let parameters: TextGenerationTaskParameters?

    var sortPriority: Int {
        switch status {
@@ -330,12 +330,12 @@ struct ClusterTaskPayload: Decodable {
    let taskStatus: TaskStatus?
    let instanceId: String?
    let commandId: String?
-    let taskParams: ChatCompletionTaskParameters?
+    let taskParams: TextGenerationTaskParameters?
    let errorType: String?
    let errorMessage: String?
 }

-struct ChatCompletionTaskParameters: Decodable, Equatable {
+struct TextGenerationTaskParameters: Decodable, Equatable {
    let model: String?
    let messages: [ChatCompletionMessage]?
    let maxTokens: Int?
@@ -374,7 +374,7 @@ extension ClusterTask {
        guard let id = payload.taskId else { return nil }
        let status = payload.taskStatus ?? .unknown
        switch kindKey {
-        case "ChatCompletion":
+        case "TextGeneration":
            self.init(
                id: id,
                status: status,
--- a/app/EXO/EXO/Services/NetworkSetupHelper.swift
+++ b/app/EXO/EXO/Services/NetworkSetupHelper.swift
@@ -19,7 +19,7 @@ enum NetworkSetupHelper {
        set -euo pipefail

        # Wait for macOS to finish network setup after boot
-        sleep 30
+        sleep 20

        PREFS="/Library/Preferences/SystemConfiguration/preferences.plist"

@@ -244,11 +244,11 @@ enum NetworkSetupHelper {
        rm -f "$LOG_OUT" "$LOG_ERR"

        # Switch back to Automatic network location
-        networksetup -switchtolocation Automatic 2>/dev/null || true
+        networksetup -switchtolocation Automatic >/dev/null 2>&1 || true

        # Delete the exo network location if it exists
-        networksetup -listlocations | grep -q '^exo$' && {
-          networksetup -deletelocation exo 2>/dev/null || true
+        networksetup -listlocations 2>/dev/null | grep -q '^exo$' && {
+          networksetup -deletelocation exo >/dev/null 2>&1 || true
        } || true

        # Re-enable any Thunderbolt Bridge service if it exists
@@ -258,12 +258,12 @@ enum NetworkSetupHelper {
          tb_devices=$(networksetup -listallhardwareports 2>/dev/null | awk '
            /^Hardware Port:/ { port = tolower(substr($0, 16)) }
            /^Device:/ { if (port ~ /thunderbolt/) print substr($0, 9) }
-          ')
+          ') || true
          [ -z "$tb_devices" ] && return 0

          # For each bridge device, check if it contains Thunderbolt interfaces
          for bridge in bridge0 bridge1 bridge2; do
-            members=$(ifconfig "$bridge" 2>/dev/null | awk '/member:/ {print $2}')
+            members=$(ifconfig "$bridge" 2>/dev/null | awk '/member:/ {print $2}') || true
            [ -z "$members" ] && continue

            for tb_dev in $tb_devices; do
@@ -272,7 +272,7 @@ enum NetworkSetupHelper {
                service_name=$(networksetup -listnetworkserviceorder 2>/dev/null | awk -v dev="$bridge" '
                  /^\\([0-9*]/ { gsub(/^\\([0-9*]+\\) /, ""); svc = $0 }
                  /Device:/ && $0 ~ dev { print svc; exit }
-                ')
+                ') || true
                if [ -n "$service_name" ]; then
                  networksetup -setnetworkserviceenabled "$service_name" on 2>/dev/null || true
                  return 0
@@ -280,8 +280,9 @@ enum NetworkSetupHelper {
              fi
            done
          done
+          return 0
        }
-        find_and_enable_thunderbolt_bridge
+        find_and_enable_thunderbolt_bridge || true

        echo "EXO network components removed successfully"
        """
--- a/app/EXO/EXO/Services/ThunderboltBridgeService.swift
+++ b/app/EXO/EXO/Services/ThunderboltBridgeService.swift
@@ -127,21 +127,24 @@ final class ThunderboltBridgeService: ObservableObject {

        // 2. Request specific network configuration rights
        let rightName = "system.services.systemconfiguration.network"
-        var item = AuthorizationItem(
-            name: rightName,
-            valueLength: 0,
-            value: nil,
-            flags: 0
-        )
-        var rights = AuthorizationRights(count: 1, items: &item)
-
-        status = AuthorizationCopyRights(
-            authRef,
-            &rights,
-            nil,
-            [.extendRights, .interactionAllowed],
-            nil
-        )
+        status = rightName.withCString { nameCString in
+            var item = AuthorizationItem(
+                name: nameCString,
+                valueLength: 0,
+                value: nil,
+                flags: 0
+            )
+            return withUnsafeMutablePointer(to: &item) { itemPointer in
+                var rights = AuthorizationRights(count: 1, items: itemPointer)
+                return AuthorizationCopyRights(
+                    authRef,
+                    &rights,
+                    nil,
+                    [.extendRights, .interactionAllowed],
+                    nil
+                )
+            }
+        }
        guard status == errAuthorizationSuccess else {
            if status == errAuthorizationCanceled {
                throw ThunderboltBridgeError.authorizationCanceled
--- a/app/EXO/EXO/ViewModels/InstanceViewModel.swift
+++ b/app/EXO/EXO/ViewModels/InstanceViewModel.swift
@@ -216,7 +216,7 @@ struct InstanceTaskViewModel: Identifiable, Equatable {
    let promptPreview: String?
    let errorMessage: String?
    let subtitle: String?
-    let parameters: ChatCompletionTaskParameters?
+    let parameters: TextGenerationTaskParameters?

    var title: String {
        switch kind {
--- a/app/EXO/uninstall-exo.sh
+++ b/app/EXO/uninstall-exo.sh
@@ -29,21 +29,21 @@ YELLOW='\033[1;33m'
 NC='\033[0m' # No Color

 echo_info() {
-    echo -e "${GREEN}[INFO]${NC} $1"
+  echo -e "${GREEN}[INFO]${NC} $1"
 }

 echo_warn() {
-    echo -e "${YELLOW}[WARN]${NC} $1"
+  echo -e "${YELLOW}[WARN]${NC} $1"
 }

 echo_error() {
-    echo -e "${RED}[ERROR]${NC} $1"
+  echo -e "${RED}[ERROR]${NC} $1"
 }

 # Check if running as root
 if [[ $EUID -ne 0 ]]; then
-    echo_error "This script must be run as root (use sudo)"
-    exit 1
+  echo_error "This script must be run as root (use sudo)"
+  exit 1
 fi

 echo ""
@@ -55,64 +55,64 @@ echo ""
 # Unload the LaunchDaemon if running
 echo_info "Stopping network setup daemon..."
 if launchctl list | grep -q "$LABEL"; then
-    launchctl bootout system/"$LABEL" 2>/dev/null || true
-    echo_info "Daemon stopped"
+  launchctl bootout system/"$LABEL" 2>/dev/null || true
+  echo_info "Daemon stopped"
 else
-    echo_warn "Daemon was not running"
+  echo_warn "Daemon was not running"
 fi

 # Remove LaunchDaemon plist
-if [[ -f "$PLIST_DEST" ]]; then
-    rm -f "$PLIST_DEST"
-    echo_info "Removed LaunchDaemon plist"
+if [[ -f $PLIST_DEST ]]; then
+  rm -f "$PLIST_DEST"
+  echo_info "Removed LaunchDaemon plist"
 else
-    echo_warn "LaunchDaemon plist not found (already removed?)"
+  echo_warn "LaunchDaemon plist not found (already removed?)"
 fi

 # Remove the script and parent directory
-if [[ -f "$SCRIPT_DEST" ]]; then
-    rm -f "$SCRIPT_DEST"
-    echo_info "Removed network setup script"
+if [[ -f $SCRIPT_DEST ]]; then
+  rm -f "$SCRIPT_DEST"
+  echo_info "Removed network setup script"
 else
-    echo_warn "Network setup script not found (already removed?)"
+  echo_warn "Network setup script not found (already removed?)"
 fi

 # Remove EXO directory if empty
 if [[ -d "/Library/Application Support/EXO" ]]; then
-    rmdir "/Library/Application Support/EXO" 2>/dev/null && \
-        echo_info "Removed EXO support directory" || \
-        echo_warn "EXO support directory not empty, leaving in place"
+  rmdir "/Library/Application Support/EXO" 2>/dev/null &&
+    echo_info "Removed EXO support directory" ||
+    echo_warn "EXO support directory not empty, leaving in place"
 fi

 # Remove log files
-if [[ -f "$LOG_OUT" ]] || [[ -f "$LOG_ERR" ]]; then
-    rm -f "$LOG_OUT" "$LOG_ERR"
-    echo_info "Removed log files"
+if [[ -f $LOG_OUT ]] || [[ -f $LOG_ERR ]]; then
+  rm -f "$LOG_OUT" "$LOG_ERR"
+  echo_info "Removed log files"
 else
-    echo_warn "Log files not found (already removed?)"
+  echo_warn "Log files not found (already removed?)"
 fi

 # Switch back to Automatic network location
 echo_info "Restoring network configuration..."
 if networksetup -listlocations | grep -q "^Automatic$"; then
-    networksetup -switchtolocation Automatic 2>/dev/null || true
-    echo_info "Switched to Automatic network location"
+  networksetup -switchtolocation Automatic 2>/dev/null || true
+  echo_info "Switched to Automatic network location"
 else
-    echo_warn "Automatic network location not found"
+  echo_warn "Automatic network location not found"
 fi

 # Delete the exo network location if it exists
 if networksetup -listlocations | grep -q "^exo$"; then
-    networksetup -deletelocation exo 2>/dev/null || true
-    echo_info "Deleted 'exo' network location"
+  networksetup -deletelocation exo 2>/dev/null || true
+  echo_info "Deleted 'exo' network location"
 else
-    echo_warn "'exo' network location not found (already removed?)"
+  echo_warn "'exo' network location not found (already removed?)"
 fi

 # Re-enable Thunderbolt Bridge if it exists
 if networksetup -listnetworkservices 2>/dev/null | grep -q "Thunderbolt Bridge"; then
-    networksetup -setnetworkserviceenabled "Thunderbolt Bridge" on 2>/dev/null || true
-    echo_info "Re-enabled Thunderbolt Bridge"
+  networksetup -setnetworkserviceenabled "Thunderbolt Bridge" on 2>/dev/null || true
+  echo_info "Re-enabled Thunderbolt Bridge"
 fi

 # Note about launch at login registration
@@ -124,14 +124,14 @@ echo_warn "  System Settings → General → Login Items → Remove EXO"
 # Check if EXO.app exists in common locations
 APP_FOUND=false
 for app_path in "/Applications/EXO.app" "$HOME/Applications/EXO.app"; do
-    if [[ -d "$app_path" ]]; then
-        if [[ "$APP_FOUND" == false ]]; then
-            echo ""
-            APP_FOUND=true
-        fi
-        echo_warn "EXO.app found at: $app_path"
-        echo_warn "You may want to move it to Trash manually."
+  if [[ -d $app_path ]]; then
+    if [[ $APP_FOUND == false ]]; then
+      echo ""
+      APP_FOUND=true
    fi
+    echo_warn "EXO.app found at: $app_path"
+    echo_warn "You may want to move it to Trash manually."
+  fi
 done

 echo ""
@@ -151,4 +151,3 @@ echo ""
 echo "Manual step required:"
 echo "  Remove EXO from Login Items in System Settings → General → Login Items"
 echo ""
-
--- a/bench/exo_bench.py
+++ b/bench/exo_bench.py
@@ -5,10 +5,13 @@ from __future__ import annotations
 import argparse
 import contextlib
 import http.client
+import itertools
 import json
 import os
+import sys
 import time
 from collections.abc import Callable
+from pathlib import Path
 from statistics import mean
 from typing import Any
 from urllib.parse import urlencode
@@ -16,6 +19,84 @@ from urllib.parse import urlencode
 from loguru import logger
 from transformers import AutoTokenizer

+# Monkey-patch for transformers 5.x compatibility
+# Kimi's tokenization_kimi.py imports bytes_to_unicode from the old location
+# which was moved in transformers 5.0.0rc2
+try:
+    import transformers.models.gpt2.tokenization_gpt2 as gpt2_tokenization
+    from transformers.convert_slow_tokenizer import bytes_to_unicode
+
+    if not hasattr(gpt2_tokenization, "bytes_to_unicode"):
+        gpt2_tokenization.bytes_to_unicode = bytes_to_unicode  # type: ignore[attr-defined]
+except ImportError:
+    pass  # transformers < 5.0 or bytes_to_unicode not available
+
+
+def load_tokenizer_for_bench(model_id: str) -> Any:
+    """
+    Load tokenizer for benchmarking, with special handling for Kimi models.
+
+    Kimi uses a custom TikTokenTokenizer that transformers 5.x can't load via AutoTokenizer.
+    This function replicates the logic from utils_mlx.py for bench compatibility.
+    """
+    model_id_lower = model_id.lower()
+
+    if "kimi-k2" in model_id_lower:
+        import importlib.util
+        import types
+
+        from huggingface_hub import snapshot_download
+
+        # Download/get the model path
+        model_path = Path(
+            snapshot_download(
+                model_id,
+                allow_patterns=["*.json", "*.py", "*.tiktoken"],
+            )
+        )
+
+        sys.path.insert(0, str(model_path))
+
+        # Load tool_declaration_ts first (tokenization_kimi imports it with relative import)
+        tool_decl_path = model_path / "tool_declaration_ts.py"
+        if tool_decl_path.exists():
+            spec = importlib.util.spec_from_file_location(
+                "tool_declaration_ts", tool_decl_path
+            )
+            if spec and spec.loader:
+                tool_decl_module = importlib.util.module_from_spec(spec)
+                sys.modules["tool_declaration_ts"] = tool_decl_module
+                spec.loader.exec_module(tool_decl_module)
+
+        # Load tokenization_kimi with patched source (convert relative to absolute import)
+        tok_path = model_path / "tokenization_kimi.py"
+        source = tok_path.read_text()
+        source = source.replace("from .tool_declaration_ts", "from tool_declaration_ts")
+        spec = importlib.util.spec_from_file_location("tokenization_kimi", tok_path)
+        if spec:
+            tok_module = types.ModuleType("tokenization_kimi")
+            tok_module.__file__ = str(tok_path)
+            sys.modules["tokenization_kimi"] = tok_module
+            exec(compile(source, tok_path, "exec"), tok_module.__dict__)  # noqa: S102
+            TikTokenTokenizer = tok_module.TikTokenTokenizer  # noqa: N806
+        else:
+            from tokenization_kimi import TikTokenTokenizer  # type: ignore[import-not-found]  # noqa: I001
+
+        hf_tokenizer: Any = TikTokenTokenizer.from_pretrained(model_path)
+
+        # Patch encode to use internal tiktoken model directly
+        # transformers 5.x has a bug in the encode->pad path for slow tokenizers
+        def _patched_encode(text: str, **kwargs: object) -> list[int]:
+            # Pass allowed_special="all" to handle special tokens like <|im_user|>
+            return list(hf_tokenizer.model.encode(text, allowed_special="all"))
+
+        hf_tokenizer.encode = _patched_encode
+
+        return hf_tokenizer
+
+    # Default: use AutoTokenizer
+    return AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+

 class ExoHttpError(RuntimeError):
    def __init__(self, status: int, reason: str, body_preview: str):
@@ -24,7 +105,7 @@ class ExoHttpError(RuntimeError):


 class ExoClient:
-    def __init__(self, host: str, port: int, timeout_s: float = 600.0):
+    def __init__(self, host: str, port: int, timeout_s: float = 7200.0):
        self.host = host
        self.port = port
        self.timeout_s = timeout_s
@@ -180,14 +261,7 @@ def parse_int_list(values: list[str]) -> list[int]:
            part = part.strip()
            if part:
                items.append(int(part))
-
-    seen: set[int] = set()
-    out: list[int] = []
-    for x in items:
-        if x not in seen:
-            out.append(x)
-            seen.add(x)
-    return out
+    return items


 def resolve_model_short_id(client: ExoClient, model_arg: str) -> tuple[str, str]:
@@ -240,7 +314,11 @@ def run_one_completion(

    stats = out.get("generation_stats")

-    preview = (out.get("choices") or [{}])[0]["message"]["content"][:200]
+    # Extract preview, handling None content (common for thinking models)
+    choices = out.get("choices") or [{}]
+    message = choices[0].get("message", {}) if choices else {}
+    content = message.get("content") or ""
+    preview = content[:200] if content else ""

    return {
        "elapsed_s": elapsed,
@@ -277,12 +355,29 @@ class PromptSizer:
                f"Target ({target}) is smaller than template overhead ({self.base_tokens})."
            )

-        content = ""
-        tok = self.count_fn(content)
+        # Estimate tokens per atom using a sample
+        sample_count = 100
+        sample_content = self.atom * sample_count
+        sample_tokens = self.count_fn(sample_content) - self.base_tokens
+        tokens_per_atom = sample_tokens / sample_count

-        while tok < target:
-            content += self.atom
-            tok = self.count_fn(content)
+        # Estimate starting point
+        needed_tokens = target - self.base_tokens
+        estimated_atoms = int(needed_tokens / tokens_per_atom)
+
+        # Binary search to find exact atom count
+        low, high = 0, estimated_atoms * 2 + 100
+        while low < high:
+            mid = (low + high) // 2
+            tok = self.count_fn(self.atom * mid)
+            if tok < target:
+                low = mid + 1
+            else:
+                high = mid
+
+        content = self.atom * low
+        tok = self.count_fn(content)
+        logger.info(f"{tok=}")

        if tok != target:
            raise RuntimeError(
@@ -348,7 +443,7 @@ def main() -> int:
        help="Warmup runs per placement (uses first pp/tg).",
    )
    ap.add_argument(
-        "--timeout", type=float, default=600.0, help="HTTP timeout (seconds)."
+        "--timeout", type=float, default=7200.0, help="HTTP timeout (seconds)."
    )
    ap.add_argument(
        "--json-out",
@@ -358,6 +453,11 @@ def main() -> int:
    ap.add_argument(
        "--dry-run", action="store_true", help="List selected placements and exit."
    )
+    ap.add_argument(
+        "--all-combinations",
+        action="store_true",
+        help="Force all pp×tg combinations (cartesian product) even when lists have equal length.",
+    )
    args = ap.parse_args()

    pp_list = parse_int_list(args.pp)
@@ -369,6 +469,15 @@ def main() -> int:
        logger.error("--repeat must be >= 1")
        return 2

+    # Log pairing mode
+    use_combinations = args.all_combinations or len(pp_list) != len(tg_list)
+    if use_combinations:
+        logger.info(
+            f"pp/tg mode: combinations (product) - {len(pp_list) * len(tg_list)} pairs"
+        )
+    else:
+        logger.info(f"pp/tg mode: tandem (zip) - {len(pp_list)} pairs")
+
    client = ExoClient(args.host, args.port, timeout_s=args.timeout)
    short_id, full_model_id = resolve_model_short_id(client, args.model)

@@ -377,10 +486,7 @@ def main() -> int:
    )
    previews = previews_resp.get("previews") or []

-    tokenizer = AutoTokenizer.from_pretrained(
-        full_model_id,
-        trust_remote_code=True,
-    )
+    tokenizer = load_tokenizer_for_bench(full_model_id)
    if tokenizer is None:
        raise RuntimeError("[exo-bench] tokenizer load failed")

@@ -486,60 +592,55 @@ def main() -> int:
                )
                logger.debug(f"  warmup {i + 1}/{args.warmup} done")

-            for pp in pp_list:
-                # if (
-                #     pp * n_nodes > 2048
-                #     and "ring" in instance_meta.lower()
-                #     and "tensor" in sharding.lower()
-                # ):
-                #     model_card = MODEL_CARDS[short_id]
-                #     if model_card.metadata.storage_size > Memory.from_gb(10):
-                #         logger.info(
-                #             f"Skipping tensor ring as this is too slow for model of size {model_card.metadata.storage_size} on {n_nodes=}"
-                #         )
-                #         continue
-                for tg in tg_list:
-                    runs: list[dict[str, Any]] = []
-                    for r in range(args.repeat):
-                        time.sleep(3)
-                        try:
-                            row, actual_pp_tokens = run_one_completion(
-                                client, full_model_id, pp, tg, prompt_sizer
-                            )
-                        except Exception as e:
-                            logger.error(e)
-                            continue
-                        row.update(
-                            {
-                                "model_short_id": short_id,
-                                "model_id": full_model_id,
-                                "placement_sharding": sharding,
-                                "placement_instance_meta": instance_meta,
-                                "placement_nodes": n_nodes,
-                                "instance_id": instance_id,
-                                "pp_tokens": actual_pp_tokens,
-                                "tg": tg,
-                                "repeat_index": r,
-                            }
-                        )
-                        runs.append(row)
-                        all_rows.append(row)
+            # If pp and tg lists have same length, run in tandem (zip)
+            # Otherwise (or if --all-combinations), run all combinations (cartesian product)
+            if use_combinations:
+                pp_tg_pairs = list(itertools.product(pp_list, tg_list))
+            else:
+                pp_tg_pairs = list(zip(pp_list, tg_list, strict=True))

-                    if runs:
-                        prompt_tps = mean(x["stats"]["prompt_tps"] for x in runs)
-                        gen_tps = mean(x["stats"]["generation_tps"] for x in runs)
-                        ptok = mean(x["stats"]["prompt_tokens"] for x in runs)
-                        gtok = mean(x["stats"]["generation_tokens"] for x in runs)
-                        peak = mean(
-                            x["stats"]["peak_memory_usage"]["inBytes"] for x in runs
+            for pp, tg in pp_tg_pairs:
+                runs: list[dict[str, Any]] = []
+                for r in range(args.repeat):
+                    time.sleep(3)
+                    try:
+                        row, actual_pp_tokens = run_one_completion(
+                            client, full_model_id, pp, tg, prompt_sizer
                        )
+                    except Exception as e:
+                        logger.error(e)
+                        continue
+                    row.update(
+                        {
+                            "model_short_id": short_id,
+                            "model_id": full_model_id,
+                            "placement_sharding": sharding,
+                            "placement_instance_meta": instance_meta,
+                            "placement_nodes": n_nodes,
+                            "instance_id": instance_id,
+                            "pp_tokens": actual_pp_tokens,
+                            "tg": tg,
+                            "repeat_index": r,
+                        }
+                    )
+                    runs.append(row)
+                    all_rows.append(row)

-                        logger.info(
-                            f"prompt_tps={prompt_tps:.2f} gen_tps={gen_tps:.2f}    "
-                            f"prompt_tokens={ptok} gen_tokens={gtok}    "
-                            f"peak_memory={format_peak_memory(peak)}\n"
-                        )
-                    time.sleep(2)
+                if runs:
+                    prompt_tps = mean(x["stats"]["prompt_tps"] for x in runs)
+                    gen_tps = mean(x["stats"]["generation_tps"] for x in runs)
+                    ptok = mean(x["stats"]["prompt_tokens"] for x in runs)
+                    gtok = mean(x["stats"]["generation_tokens"] for x in runs)
+                    peak = mean(
+                        x["stats"]["peak_memory_usage"]["inBytes"] for x in runs
+                    )
+
+                    logger.info(
+                        f"prompt_tps={prompt_tps:.2f} gen_tps={gen_tps:.2f}    "
+                        f"prompt_tokens={ptok} gen_tokens={gtok}    "
+                        f"peak_memory={format_peak_memory(peak)}\n"
+                    )
+                time.sleep(2)
        finally:
            try:
                client.request_json("DELETE", f"/instance/{instance_id}")
--- a/dashboard/package-lock.json
+++ b/dashboard/package-lock.json
@@ -865,7 +865,6 @@
 			"integrity": "sha512-oH8tXw7EZnie8FdOWYrF7Yn4IKrqTFHhXvl8YxXxbKwTMcD/5NNCryUSEXRk2ZR4ojnub0P8rNrsVGHXWqIDtA==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"@standard-schema/spec": "^1.0.0",
 				"@sveltejs/acorn-typescript": "^1.0.5",
@@ -905,7 +904,6 @@
 			"integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"@sveltejs/vite-plugin-svelte-inspector": "^4.0.1",
 				"debug": "^4.4.1",
@@ -1522,7 +1520,6 @@
 			"integrity": "sha512-LCCV0HdSZZZb34qifBsyWlUmok6W7ouER+oQIGBScS8EsZsQbrtFTUrDX4hOl+CS6p7cnNC4td+qrSVGSCTUfQ==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"undici-types": "~6.21.0"
 			}
@@ -1532,7 +1529,6 @@
 			"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
 			"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
 			"license": "MIT",
-			"peer": true,
 			"bin": {
 				"acorn": "bin/acorn"
 			},
@@ -1945,7 +1941,6 @@
 			"integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
 			"dev": true,
 			"license": "ISC",
-			"peer": true,
 			"engines": {
 				"node": ">=12"
 			}
@@ -2653,7 +2648,6 @@
 			"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"engines": {
 				"node": ">=12"
 			},
@@ -2696,7 +2690,6 @@
 			"integrity": "sha512-UOnG6LftzbdaHZcKoPFtOcCKztrQ57WkHDeRD9t/PTQtmT0NHSeWWepj6pS0z/N7+08BHFDQVUrfmfMRcZwbMg==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"bin": {
 				"prettier": "bin/prettier.cjs"
 			},
@@ -2869,7 +2862,6 @@
 			"resolved": "https://registry.npmjs.org/svelte/-/svelte-5.45.3.tgz",
 			"integrity": "sha512-ngKXNhNvwPzF43QqEhDOue7TQTrG09em1sd4HBxVF0Wr2gopAmdEWan+rgbdgK4fhBtSOTJO8bYU4chUG7VXZQ==",
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"@jridgewell/remapping": "^2.3.4",
 				"@jridgewell/sourcemap-codec": "^1.5.0",
@@ -3014,7 +3006,6 @@
 			"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
 			"dev": true,
 			"license": "Apache-2.0",
-			"peer": true,
 			"bin": {
 				"tsc": "bin/tsc",
 				"tsserver": "bin/tsserver"
@@ -3036,7 +3027,6 @@
 			"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"esbuild": "^0.25.0",
 				"fdir": "^6.4.4",
--- a/dashboard/src/lib/components/FamilyLogos.svelte
+++ b/dashboard/src/lib/components/FamilyLogos.svelte
@@ -0,0 +1,73 @@
+<script lang="ts">
+  type FamilyLogoProps = {
+    family: string;
+    class?: string;
+  };
+
+  let { family, class: className = "" }: FamilyLogoProps = $props();
+</script>
+
+{#if family === "favorites"}
+  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
+    <path
+      d="M12 2l3.09 6.26L22 9.27l-5 4.87 1.18 6.88L12 17.77l-6.18 3.25L7 14.14 2 9.27l6.91-1.01L12 2z"
+    />
+  </svg>
+{:else if family === "llama" || family === "meta"}
+  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
+    <path
+      d="M6.915 4.03c-1.968 0-3.683 1.28-4.871 3.113C.704 9.208 0 11.883 0 14.449c0 .706.07 1.369.21 1.973a6.624 6.624 0 0 0 .265.86 5.297 5.297 0 0 0 .371.761c.696 1.159 1.818 1.927 3.593 1.927 1.497 0 2.633-.671 3.965-2.444.76-1.012 1.144-1.626 2.663-4.32l.756-1.339.186-.325c.061.1.121.196.183.3l2.152 3.595c.724 1.21 1.665 2.556 2.47 3.314 1.046.987 1.992 1.22 3.06 1.22 1.075 0 1.876-.355 2.455-.843a3.743 3.743 0 0 0 .81-.973c.542-.939.861-2.127.861-3.745 0-2.72-.681-5.357-2.084-7.45-1.282-1.912-2.957-2.93-4.716-2.93-1.047 0-2.088.467-3.053 1.308-.652.57-1.257 1.29-1.82 2.05-.69-.875-1.335-1.547-1.958-2.056-1.182-.966-2.315-1.303-3.454-1.303zm10.16 2.053c1.147 0 2.188.758 2.992 1.999 1.132 1.748 1.647 4.195 1.647 6.4 0 1.548-.368 2.9-1.839 2.9-.58 0-1.027-.23-1.664-1.004-.496-.601-1.343-1.878-2.832-4.358l-.617-1.028a44.908 44.908 0 0 0-1.255-1.98c.07-.109.141-.224.211-.327 1.12-1.667 2.118-2.602 3.358-2.602zm-10.201.553c1.265 0 2.058.791 2.675 1.446.307.327.737.871 1.234 1.579l-1.02 1.566c-.757 1.163-1.882 3.017-2.837 4.338-1.191 1.649-1.81 1.817-2.486 1.817-.524 0-1.038-.237-1.383-.794-.263-.426-.464-1.13-.464-2.046 0-2.221.63-4.535 1.66-6.088.454-.687.964-1.226 1.533-1.533a2.264 2.264 0 0 1 1.088-.285z"
+    />
+  </svg>
+{:else if family === "qwen"}
+  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
+    <path
+      d="M12.604 1.34c.393.69.784 1.382 1.174 2.075a.18.18 0 00.157.091h5.552c.174 0 .322.11.446.327l1.454 2.57c.19.337.24.478.024.837-.26.43-.513.864-.76 1.3l-.367.658c-.106.196-.223.28-.04.512l2.652 4.637c.172.301.111.494-.043.77-.437.785-.882 1.564-1.335 2.34-.159.272-.352.375-.68.37-.777-.016-1.552-.01-2.327.016a.099.099 0 00-.081.05 575.097 575.097 0 01-2.705 4.74c-.169.293-.38.363-.725.364-.997.003-2.002.004-3.017.002a.537.537 0 01-.465-.271l-1.335-2.323a.09.09 0 00-.083-.049H4.982c-.285.03-.553-.001-.805-.092l-1.603-2.77a.543.543 0 01-.002-.54l1.207-2.12a.198.198 0 000-.197 550.951 550.951 0 01-1.875-3.272l-.79-1.395c-.16-.31-.173-.496.095-.965.465-.813.927-1.625 1.387-2.436.132-.234.304-.334.584-.335a338.3 338.3 0 012.589-.001.124.124 0 00.107-.063l2.806-4.895a.488.488 0 01.422-.246c.524-.001 1.053 0 1.583-.006L11.704 1c.341-.003.724.032.9.34zm-3.432.403a.06.06 0 00-.052.03L6.254 6.788a.157.157 0 01-.135.078H3.253c-.056 0-.07.025-.041.074l5.81 10.156c.025.042.013.062-.034.063l-2.795.015a.218.218 0 00-.2.116l-1.32 2.31c-.044.078-.021.118.068.118l5.716.008c.046 0 .08.02.104.061l1.403 2.454c.046.081.092.082.139 0l5.006-8.76.783-1.382a.055.055 0 01.096 0l1.424 2.53a.122.122 0 00.107.062l2.763-.02a.04.04 0 00.035-.02.041.041 0 000-.04l-2.9-5.086a.108.108 0 010-.113l.293-.507 1.12-1.977c.024-.041.012-.062-.035-.062H9.2c-.059 0-.073-.026-.043-.077l1.434-2.505a.107.107 0 000-.114L9.225 1.774a.06.06 0 00-.053-.031zm6.29 8.02c.046 0 .058.02.034.06l-.832 1.465-2.613 4.585a.056.056 0 01-.05.029.058.058 0 01-.05-.029L8.498 9.841c-.02-.034-.01-.052.028-.054l.216-.012 6.722-.012z"
+    />
+  </svg>
+{:else if family === "deepseek"}
+  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
+    <path
+      d="M23.748 4.482c-.254-.124-.364.113-.512.234-.051.039-.094.09-.137.136-.372.397-.806.657-1.373.626-.829-.046-1.537.214-2.163.848-.133-.782-.575-1.248-1.247-1.548-.352-.156-.708-.311-.955-.65-.172-.241-.219-.51-.305-.774-.055-.16-.11-.323-.293-.35-.2-.031-.278.136-.356.276-.313.572-.434 1.202-.422 1.84.027 1.436.633 2.58 1.838 3.393.137.093.172.187.129.323-.082.28-.18.552-.266.833-.055.179-.137.217-.329.14a5.526 5.526 0 01-1.736-1.18c-.857-.828-1.631-1.742-2.597-2.458a11.365 11.365 0 00-.689-.471c-.985-.957.13-1.743.388-1.836.27-.098.093-.432-.779-.428-.872.004-1.67.295-2.687.684a3.055 3.055 0 01-.465.137 9.597 9.597 0 00-2.883-.102c-1.885.21-3.39 1.102-4.497 2.623C.082 8.606-.231 10.684.152 12.85c.403 2.284 1.569 4.175 3.36 5.653 1.858 1.533 3.997 2.284 6.438 2.14 1.482-.085 3.133-.284 4.994-1.86.47.234.962.327 1.78.397.63.059 1.236-.03 1.705-.128.735-.156.684-.837.419-.961-2.155-1.004-1.682-.595-2.113-.926 1.096-1.296 2.746-2.642 3.392-7.003.05-.347.007-.565 0-.845-.004-.17.035-.237.23-.256a4.173 4.173 0 001.545-.475c1.396-.763 1.96-2.015 2.093-3.517.02-.23-.004-.467-.247-.588zM11.581 18c-2.089-1.642-3.102-2.183-3.52-2.16-.392.024-.321.471-.235.763.09.288.207.486.371.739.114.167.192.416-.113.603-.673.416-1.842-.14-1.897-.167-1.361-.802-2.5-1.86-3.301-3.307-.774-1.393-1.224-2.887-1.298-4.482-.02-.386.093-.522.477-.592a4.696 4.696 0 011.529-.039c2.132.312 3.946 1.265 5.468 2.774.868.86 1.525 1.887 2.202 2.891.72 1.066 1.494 2.082 2.48 2.914.348.292.625.514.891.677-.802.09-2.14.11-3.054-.614zm1-6.44a.306.306 0 01.415-.287.302.302 0 01.2.288.306.306 0 01-.31.307.303.303 0 01-.304-.308zm3.11 1.596c-.2.081-.399.151-.59.16a1.245 1.245 0 01-.798-.254c-.274-.23-.47-.358-.552-.758a1.73 1.73 0 01.016-.588c.07-.327-.008-.537-.239-.727-.187-.156-.426-.199-.688-.199a.559.559 0 01-.254-.078c-.11-.054-.2-.19-.114-.358.028-.054.16-.186.192-.21.356-.202.767-.136 1.146.016.352.144.618.408 1.001.782.391.451.462.576.685.914.176.265.336.537.445.848.067.195-.019.354-.25.452z"
+    />
+  </svg>
+{:else if family === "openai" || family === "gpt-oss"}
+  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
+    <path
+      d="M22.2819 9.8211a5.9847 5.9847 0 0 0-.5157-4.9108 6.0462 6.0462 0 0 0-6.5098-2.9A6.0651 6.0651 0 0 0 4.9807 4.1818a5.9847 5.9847 0 0 0-3.9977 2.9 6.0462 6.0462 0 0 0 .7427 7.0966 5.98 5.98 0 0 0 .511 4.9107 6.051 6.051 0 0 0 6.5146 2.9001A5.9847 5.9847 0 0 0 13.2599 24a6.0557 6.0557 0 0 0 5.7718-4.2058 5.9894 5.9894 0 0 0 3.9977-2.9001 6.0557 6.0557 0 0 0-.7475-7.0729zm-9.022 12.6081a4.4755 4.4755 0 0 1-2.8764-1.0408l.1419-.0804 4.7783-2.7582a.7948.7948 0 0 0 .3927-.6813v-6.7369l2.02 1.1686a.071.071 0 0 1 .038.052v5.5826a4.504 4.504 0 0 1-4.4945 4.4944zm-9.6607-4.1254a4.4708 4.4708 0 0 1-.5346-3.0137l.142.0852 4.783 2.7582a.7712.7712 0 0 0 .7806 0l5.8428-3.3685v2.3324a.0804.0804 0 0 1-.0332.0615L9.74 19.9502a4.4992 4.4992 0 0 1-6.1408-1.6464zM2.3408 7.8956a4.485 4.485 0 0 1 2.3655-1.9728V11.6a.7664.7664 0 0 0 .3879.6765l5.8144 3.3543-2.0201 1.1685a.0757.0757 0 0 1-.071 0l-4.8303-2.7865A4.504 4.504 0 0 1 2.3408 7.872zm16.5963 3.8558L13.1038 8.364 15.1192 7.2a.0757.0757 0 0 1 .071 0l4.8303 2.7913a4.4944 4.4944 0 0 1-.6765 8.1042v-5.6772a.79.79 0 0 0-.407-.667zm2.0107-3.0231l-.142-.0852-4.7735-2.7818a.7759.7759 0 0 0-.7854 0L9.409 9.2297V6.8974a.0662.0662 0 0 1 .0284-.0615l4.8303-2.7866a4.4992 4.4992 0 0 1 6.6802 4.66zM8.3065 12.863l-2.02-1.1638a.0804.0804 0 0 1-.038-.0567V6.0742a4.4992 4.4992 0 0 1 7.3757-3.4537l-.142.0805L8.704 5.459a.7948.7948 0 0 0-.3927.6813zm1.0976-2.3654l2.602-1.4998 2.6069 1.4998v2.9994l-2.5974 1.4997-2.6067-1.4997Z"
+    />
+  </svg>
+{:else if family === "glm"}
+  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
+    <path
+      d="M11.991 23.503a.24.24 0 00-.244.248.24.24 0 00.244.249.24.24 0 00.245-.249.24.24 0 00-.22-.247l-.025-.001zM9.671 5.365a1.697 1.697 0 011.099 2.132l-.071.172-.016.04-.018.054c-.07.16-.104.32-.104.498-.035.71.47 1.279 1.186 1.314h.366c1.309.053 2.338 1.173 2.286 2.523-.052 1.332-1.152 2.38-2.478 2.327h-.174c-.715.018-1.274.64-1.239 1.368 0 .124.018.23.053.337.209.373.54.658.96.8.75.23 1.517-.125 1.9-.782l.018-.035c.402-.64 1.17-.96 1.92-.711.854.284 1.378 1.226 1.099 2.167a1.661 1.661 0 01-2.077 1.102 1.711 1.711 0 01-.907-.711l-.017-.035c-.2-.323-.463-.58-.851-.711l-.056-.018a1.646 1.646 0 00-1.954.746 1.66 1.66 0 01-1.065.764 1.677 1.677 0 01-1.989-1.279c-.209-.906.332-1.83 1.257-2.043a1.51 1.51 0 01.296-.035h.018c.68-.071 1.151-.622 1.116-1.333a1.307 1.307 0 00-.227-.693 2.515 2.515 0 01-.366-1.403 2.39 2.39 0 01.366-1.208c.14-.195.21-.444.227-.693.018-.71-.506-1.261-1.186-1.332l-.07-.018a1.43 1.43 0 01-.299-.07l-.05-.019a1.7 1.7 0 01-1.047-2.114 1.68 1.68 0 012.094-1.101zm-5.575 10.11c.26-.264.639-.367.994-.27.355.096.633.379.728.74.095.362-.007.748-.267 1.013-.402.41-1.053.41-1.455 0a1.062 1.062 0 010-1.482zm14.845-.294c.359-.09.738.024.992.297.254.274.344.665.237 1.025-.107.36-.396.634-.756.718-.551.128-1.1-.22-1.23-.781a1.05 1.05 0 01.757-1.26zm-.064-4.39c.314.32.49.753.49 1.206 0 .452-.176.886-.49 1.206-.315.32-.74.5-1.185.5-.444 0-.87-.18-1.184-.5a1.727 1.727 0 010-2.412 1.654 1.654 0 012.369 0zm-11.243.163c.364.484.447 1.128.218 1.691a1.665 1.665 0 01-2.188.923c-.855-.36-1.26-1.358-.907-2.228a1.68 1.68 0 011.33-1.038c.593-.08 1.183.169 1.547.652zm11.545-4.221c.368 0 .708.2.892.524.184.324.184.724 0 1.048a1.026 1.026 0 01-.892.524c-.568 0-1.03-.47-1.03-1.048 0-.579.462-1.048 1.03-1.048zm-14.358 0c.368 0 .707.2.891.524.184.324.184.724 0 1.048a1.026 1.026 0 01-.891.524c-.569 0-1.03-.47-1.03-1.048 0-.579.461-1.048 1.03-1.048zm10.031-1.475c.925 0 1.675.764 1.675 1.706s-.75 1.705-1.675 1.705-1.674-.763-1.674-1.705c0-.942.75-1.706 1.674-1.706zm-2.626-.684c.362-.082.653-.356.761-.718a1.062 1.062 0 00-.238-1.028 1.017 1.017 0 00-.996-.294c-.547.14-.881.7-.752 1.257.13.558.675.907 1.225.783zm0 16.876c.359-.087.644-.36.75-.72a1.062 1.062 0 00-.237-1.019 1.018 1.018 0 00-.985-.301 1.037 1.037 0 00-.762.717c-.108.361-.017.754.239 1.028.245.263.606.377.953.305l.043-.01zM17.19 3.5a.631.631 0 00.628-.64c0-.355-.279-.64-.628-.64a.631.631 0 00-.628.64c0 .355.28.64.628.64zm-10.38 0a.631.631 0 00.628-.64c0-.355-.28-.64-.628-.64a.631.631 0 00-.628.64c0 .355.279.64.628.64zm-5.182 7.852a.631.631 0 00-.628.64c0 .354.28.639.628.639a.63.63 0 00.627-.606l.001-.034a.62.62 0 00-.628-.64zm5.182 9.13a.631.631 0 00-.628.64c0 .355.279.64.628.64a.631.631 0 00.628-.64c0-.355-.28-.64-.628-.64zm10.38.018a.631.631 0 00-.628.64c0 .355.28.64.628.64a.631.631 0 00.628-.64c0-.355-.279-.64-.628-.64zm5.182-9.148a.631.631 0 00-.628.64c0 .354.279.639.628.639a.631.631 0 00.628-.64c0-.355-.28-.64-.628-.64zm-.384-4.992a.24.24 0 00.244-.249.24.24 0 00-.244-.249.24.24 0 00-.244.249c0 .142.122.249.244.249zM11.991.497a.24.24 0 00.245-.248A.24.24 0 0011.99 0a.24.24 0 00-.244.249c0 .133.108.236.223.247l.021.001zM2.011 6.36a.24.24 0 00.245-.249.24.24 0 00-.244-.249.24.24 0 00-.244.249.24.24 0 00.244.249zm0 11.263a.24.24 0 00-.243.248.24.24 0 00.244.249.24.24 0 00.244-.249.252.252 0 00-.244-.248zm19.995-.018a.24.24 0 00-.245.248.24.24 0 00.245.25.24.24 0 00.244-.25.252.252 0 00-.244-.248z"
+    />
+  </svg>
+{:else if family === "minimax"}
+  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
+    <path
+      d="M16.278 2c1.156 0 2.093.927 2.093 2.07v12.501a.74.74 0 00.744.709.74.74 0 00.743-.709V9.099a2.06 2.06 0 012.071-2.049A2.06 2.06 0 0124 9.1v6.561a.649.649 0 01-.652.645.649.649 0 01-.653-.645V9.1a.762.762 0 00-.766-.758.762.762 0 00-.766.758v7.472a2.037 2.037 0 01-2.048 2.026 2.037 2.037 0 01-2.048-2.026v-12.5a.785.785 0 00-.788-.753.785.785 0 00-.789.752l-.001 15.904A2.037 2.037 0 0113.441 22a2.037 2.037 0 01-2.048-2.026V18.04c0-.356.292-.645.652-.645.36 0 .652.289.652.645v1.934c0 .263.142.506.372.638.23.131.514.131.744 0a.734.734 0 00.372-.638V4.07c0-1.143.937-2.07 2.093-2.07zm-5.674 0c1.156 0 2.093.927 2.093 2.07v11.523a.648.648 0 01-.652.645.648.648 0 01-.652-.645V4.07a.785.785 0 00-.789-.78.785.785 0 00-.789.78v14.013a2.06 2.06 0 01-2.07 2.048 2.06 2.06 0 01-2.071-2.048V9.1a.762.762 0 00-.766-.758.762.762 0 00-.766.758v3.8a2.06 2.06 0 01-2.071 2.049A2.06 2.06 0 010 12.9v-1.378c0-.357.292-.646.652-.646.36 0 .653.29.653.646V12.9c0 .418.343.757.766.757s.766-.339.766-.757V9.099a2.06 2.06 0 012.07-2.048 2.06 2.06 0 012.071 2.048v8.984c0 .419.343.758.767.758.423 0 .766-.339.766-.758V4.07c0-1.143.937-2.07 2.093-2.07z"
+    />
+  </svg>
+{:else if family === "kimi"}
+  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
+    <path
+      d="M19.738 5.776c.163-.209.306-.4.457-.585.07-.087.064-.153-.004-.244-.655-.861-.717-1.817-.34-2.787.283-.73.909-1.072 1.674-1.145.477-.045.945.004 1.379.236.57.305.902.77 1.01 1.412.086.512.07 1.012-.075 1.508-.257.878-.888 1.333-1.753 1.448-.718.096-1.446.108-2.17.157-.056.004-.113 0-.178 0z"
+    />
+    <path
+      d="M17.962 1.844h-4.326l-3.425 7.81H5.369V1.878H1.5V22h3.87v-8.477h6.824a3.025 3.025 0 002.743-1.75V22h3.87v-8.477a3.87 3.87 0 00-3.588-3.86v-.01h-2.125a3.94 3.94 0 002.323-2.12l2.545-5.689z"
+    />
+  </svg>
+{:else if family === "huggingface"}
+  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
+    <path
+      d="M12.025 1.13c-5.77 0-10.449 4.647-10.449 10.378 0 1.112.178 2.181.503 3.185.064-.222.203-.444.416-.577a.96.96 0 0 1 .524-.15c.293 0 .584.124.84.284.278.173.48.408.71.694.226.282.458.611.684.951v-.014c.017-.324.106-.622.264-.874s.403-.487.762-.543c.3-.047.596.06.787.203s.31.313.4.467c.15.257.212.468.233.542.01.026.653 1.552 1.657 2.54.616.605 1.01 1.223 1.082 1.912.055.537-.096 1.059-.38 1.572.637.121 1.294.187 1.967.187.657 0 1.298-.063 1.921-.178-.287-.517-.44-1.041-.384-1.581.07-.69.465-1.307 1.081-1.913 1.004-.987 1.647-2.513 1.657-2.539.021-.074.083-.285.233-.542.09-.154.208-.323.4-.467a1.08 1.08 0 0 1 .787-.203c.359.056.604.29.762.543s.247.55.265.874v.015c.225-.34.457-.67.683-.952.23-.286.432-.52.71-.694.257-.16.547-.284.84-.285a.97.97 0 0 1 .524.151c.228.143.373.388.43.625l.006.04a10.3 10.3 0 0 0 .534-3.273c0-5.731-4.678-10.378-10.449-10.378M8.327 6.583a1.5 1.5 0 0 1 .713.174 1.487 1.487 0 0 1 .617 2.013c-.183.343-.762-.214-1.102-.094-.38.134-.532.914-.917.71a1.487 1.487 0 0 1 .69-2.803m7.486 0a1.487 1.487 0 0 1 .689 2.803c-.385.204-.536-.576-.916-.71-.34-.12-.92.437-1.103.094a1.487 1.487 0 0 1 .617-2.013 1.5 1.5 0 0 1 .713-.174m-10.68 1.55a.96.96 0 1 1 0 1.921.96.96 0 0 1 0-1.92m13.838 0a.96.96 0 1 1 0 1.92.96.96 0 0 1 0-1.92M8.489 11.458c.588.01 1.965 1.157 3.572 1.164 1.607-.007 2.984-1.155 3.572-1.164.196-.003.305.12.305.454 0 .886-.424 2.328-1.563 3.202-.22-.756-1.396-1.366-1.63-1.32q-.011.001-.02.006l-.044.026-.01.008-.03.024q-.018.017-.035.036l-.032.04a1 1 0 0 0-.058.09l-.014.025q-.049.088-.11.19a1 1 0 0 1-.083.116 1.2 1.2 0 0 1-.173.18q-.035.029-.075.058a1.3 1.3 0 0 1-.251-.243 1 1 0 0 1-.076-.107c-.124-.193-.177-.363-.337-.444-.034-.016-.104-.008-.2.022q-.094.03-.216.087-.06.028-.125.063l-.13.074q-.067.04-.136.086a3 3 0 0 0-.135.096 3 3 0 0 0-.26.219 2 2 0 0 0-.12.121 2 2 0 0 0-.106.128l-.002.002a2 2 0 0 0-.09.132l-.001.001a1.2 1.2 0 0 0-.105.212q-.013.036-.024.073c-1.139-.875-1.563-2.317-1.563-3.203 0-.334.109-.457.305-.454m.836 10.354c.824-1.19.766-2.082-.365-3.194-1.13-1.112-1.789-2.738-1.789-2.738s-.246-.945-.806-.858-.97 1.499.202 2.362c1.173.864-.233 1.45-.685.64-.45-.812-1.683-2.896-2.322-3.295s-1.089-.175-.938.647 2.822 2.813 2.562 3.244-1.176-.506-1.176-.506-2.866-2.567-3.49-1.898.473 1.23 2.037 2.16c1.564.932 1.686 1.178 1.464 1.53s-3.675-2.511-4-1.297c-.323 1.214 3.524 1.567 3.287 2.405-.238.839-2.71-1.587-3.216-.642-.506.946 3.49 2.056 3.522 2.064 1.29.33 4.568 1.028 5.713-.624m5.349 0c-.824-1.19-.766-2.082.365-3.194 1.13-1.112 1.789-2.738 1.789-2.738s.246-.945.806-.858.97 1.499-.202 2.362c-1.173.864.233 1.45.685.64.451-.812 1.683-2.896 2.322-3.295s1.089-.175.938.647-2.822 2.813-2.562 3.244 1.176-.506 1.176-.506 2.866-2.567 3.49-1.898-.473 1.23-2.037 2.16c-1.564.932-1.686 1.178-1.464 1.53s3.675-2.511 4-1.297c.323 1.214-3.524 1.567-3.287 2.405.238.839 2.71-1.587 3.216-.642.506.946-3.49 2.056-3.522 2.064-1.29.33-4.568 1.028-5.713-.624"
+    />
+  </svg>
+{:else}
+  <svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
+    <path
+      d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-2 15l-5-5 1.41-1.41L10 14.17l7.59-7.59L19 8l-9 9z"
+    />
+  </svg>
+{/if}
--- a/dashboard/src/lib/components/FamilySidebar.svelte
+++ b/dashboard/src/lib/components/FamilySidebar.svelte
@@ -0,0 +1,142 @@
+<script lang="ts">
+  import FamilyLogos from "./FamilyLogos.svelte";
+
+  type FamilySidebarProps = {
+    families: string[];
+    selectedFamily: string | null;
+    hasFavorites: boolean;
+    onSelect: (family: string | null) => void;
+  };
+
+  let { families, selectedFamily, hasFavorites, onSelect }: FamilySidebarProps =
+    $props();
+
+  // Family display names
+  const familyNames: Record<string, string> = {
+    favorites: "Favorites",
+    huggingface: "Hub",
+    llama: "Meta",
+    qwen: "Qwen",
+    deepseek: "DeepSeek",
+    "gpt-oss": "OpenAI",
+    glm: "GLM",
+    minimax: "MiniMax",
+    kimi: "Kimi",
+  };
+
+  function getFamilyName(family: string): string {
+    return (
+      familyNames[family] || family.charAt(0).toUpperCase() + family.slice(1)
+    );
+  }
+</script>
+
+<div
+  class="flex flex-col gap-1 py-2 px-1 border-r border-exo-yellow/10 bg-exo-medium-gray/30 min-w-[64px]"
+>
+  <!-- All models (no filter) -->
+  <button
+    type="button"
+    onclick={() => onSelect(null)}
+    class="group flex flex-col items-center justify-center p-2 rounded transition-all duration-200 cursor-pointer {selectedFamily ===
+    null
+      ? 'bg-exo-yellow/20 border-l-2 border-exo-yellow'
+      : 'hover:bg-white/5 border-l-2 border-transparent'}"
+    title="All models"
+  >
+    <svg
+      class="w-5 h-5 {selectedFamily === null
+        ? 'text-exo-yellow'
+        : 'text-white/50 group-hover:text-white/70'}"
+      viewBox="0 0 24 24"
+      fill="currentColor"
+    >
+      <path
+        d="M4 8h4V4H4v4zm6 12h4v-4h-4v4zm-6 0h4v-4H4v4zm0-6h4v-4H4v4zm6 0h4v-4h-4v4zm6-10v4h4V4h-4zm-6 4h4V4h-4v4zm6 6h4v-4h-4v4zm0 6h4v-4h-4v4z"
+      />
+    </svg>
+    <span
+      class="text-[9px] font-mono mt-0.5 {selectedFamily === null
+        ? 'text-exo-yellow'
+        : 'text-white/40 group-hover:text-white/60'}">All</span
+    >
+  </button>
+
+  <!-- Favorites (only show if has favorites) -->
+  {#if hasFavorites}
+    <button
+      type="button"
+      onclick={() => onSelect("favorites")}
+      class="group flex flex-col items-center justify-center p-2 rounded transition-all duration-200 cursor-pointer {selectedFamily ===
+      'favorites'
+        ? 'bg-exo-yellow/20 border-l-2 border-exo-yellow'
+        : 'hover:bg-white/5 border-l-2 border-transparent'}"
+      title="Show favorited models"
+    >
+      <FamilyLogos
+        family="favorites"
+        class={selectedFamily === "favorites"
+          ? "text-amber-400"
+          : "text-white/50 group-hover:text-amber-400/70"}
+      />
+      <span
+        class="text-[9px] font-mono mt-0.5 {selectedFamily === 'favorites'
+          ? 'text-amber-400'
+          : 'text-white/40 group-hover:text-white/60'}">Faves</span
+      >
+    </button>
+  {/if}
+
+  <!-- HuggingFace Hub -->
+  <button
+    type="button"
+    onclick={() => onSelect("huggingface")}
+    class="group flex flex-col items-center justify-center p-2 rounded transition-all duration-200 cursor-pointer {selectedFamily ===
+    'huggingface'
+      ? 'bg-orange-500/20 border-l-2 border-orange-400'
+      : 'hover:bg-white/5 border-l-2 border-transparent'}"
+    title="Browse and add models from Hugging Face"
+  >
+    <FamilyLogos
+      family="huggingface"
+      class={selectedFamily === "huggingface"
+        ? "text-orange-400"
+        : "text-white/50 group-hover:text-orange-400/70"}
+    />
+    <span
+      class="text-[9px] font-mono mt-0.5 {selectedFamily === 'huggingface'
+        ? 'text-orange-400'
+        : 'text-white/40 group-hover:text-white/60'}">Hub</span
+    >
+  </button>
+
+  <div class="h-px bg-exo-yellow/10 my-1"></div>
+
+  <!-- Model families -->
+  {#each families as family}
+    <button
+      type="button"
+      onclick={() => onSelect(family)}
+      class="group flex flex-col items-center justify-center p-2 rounded transition-all duration-200 cursor-pointer {selectedFamily ===
+      family
+        ? 'bg-exo-yellow/20 border-l-2 border-exo-yellow'
+        : 'hover:bg-white/5 border-l-2 border-transparent'}"
+      title={getFamilyName(family)}
+    >
+      <FamilyLogos
+        {family}
+        class={selectedFamily === family
+          ? "text-exo-yellow"
+          : "text-white/50 group-hover:text-white/70"}
+      />
+      <span
+        class="text-[9px] font-mono mt-0.5 truncate max-w-full {selectedFamily ===
+        family
+          ? 'text-exo-yellow'
+          : 'text-white/40 group-hover:text-white/60'}"
+      >
+        {getFamilyName(family)}
+      </span>
+    </button>
+  {/each}
+</div>
--- a/dashboard/src/lib/components/HuggingFaceResultItem.svelte
+++ b/dashboard/src/lib/components/HuggingFaceResultItem.svelte
@@ -0,0 +1,127 @@
+<script lang="ts">
+  interface HuggingFaceModel {
+    id: string;
+    author: string;
+    downloads: number;
+    likes: number;
+    last_modified: string;
+    tags: string[];
+  }
+
+  type HuggingFaceResultItemProps = {
+    model: HuggingFaceModel;
+    isAdded: boolean;
+    isAdding: boolean;
+    onAdd: () => void;
+    onSelect: () => void;
+  };
+
+  let {
+    model,
+    isAdded,
+    isAdding,
+    onAdd,
+    onSelect,
+  }: HuggingFaceResultItemProps = $props();
+
+  function formatNumber(num: number): string {
+    if (num >= 1000000) {
+      return `${(num / 1000000).toFixed(1)}M`;
+    } else if (num >= 1000) {
+      return `${(num / 1000).toFixed(1)}k`;
+    }
+    return num.toString();
+  }
+
+  // Extract model name from full ID (e.g., "mlx-community/Llama-3.2-1B" -> "Llama-3.2-1B")
+  const modelName = $derived(model.id.split("/").pop() || model.id);
+</script>
+
+<div
+  class="flex items-center justify-between gap-3 px-3 py-2.5 hover:bg-white/5 transition-colors border-b border-white/5 last:border-b-0"
+>
+  <div class="flex-1 min-w-0">
+    <div class="flex items-center gap-2">
+      <span class="text-sm font-mono text-white truncate" title={model.id}
+        >{modelName}</span
+      >
+      {#if isAdded}
+        <span
+          class="px-1.5 py-0.5 text-[10px] font-mono bg-green-500/20 text-green-400 rounded"
+          >Added</span
+        >
+      {/if}
+    </div>
+    <div class="flex items-center gap-3 mt-0.5 text-xs text-white/40">
+      <span class="truncate">{model.author}</span>
+      <span
+        class="flex items-center gap-1 shrink-0"
+        title="Downloads in the last 30 days"
+      >
+        <svg
+          class="w-3 h-3"
+          fill="none"
+          stroke="currentColor"
+          viewBox="0 0 24 24"
+        >
+          <path
+            stroke-linecap="round"
+            stroke-linejoin="round"
+            stroke-width="2"
+            d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-4l-4 4m0 0l-4-4m4 4V4"
+          />
+        </svg>
+        {formatNumber(model.downloads)}
+      </span>
+      <span
+        class="flex items-center gap-1 shrink-0"
+        title="Community likes on Hugging Face"
+      >
+        <svg
+          class="w-3 h-3"
+          fill="none"
+          stroke="currentColor"
+          viewBox="0 0 24 24"
+        >
+          <path
+            stroke-linecap="round"
+            stroke-linejoin="round"
+            stroke-width="2"
+            d="M4.318 6.318a4.5 4.5 0 000 6.364L12 20.364l7.682-7.682a4.5 4.5 0 00-6.364-6.364L12 7.636l-1.318-1.318a4.5 4.5 0 00-6.364 0z"
+          />
+        </svg>
+        {formatNumber(model.likes)}
+      </span>
+    </div>
+  </div>
+
+  <div class="flex items-center gap-2 shrink-0">
+    {#if isAdded}
+      <button
+        type="button"
+        onclick={onSelect}
+        class="px-3 py-1.5 text-xs font-mono tracking-wider uppercase bg-exo-yellow/10 text-exo-yellow border border-exo-yellow/30 hover:bg-exo-yellow/20 transition-colors rounded cursor-pointer"
+      >
+        Select
+      </button>
+    {:else}
+      <button
+        type="button"
+        onclick={onAdd}
+        disabled={isAdding}
+        class="px-3 py-1.5 text-xs font-mono tracking-wider uppercase bg-orange-500/10 text-orange-400 border border-orange-400/30 hover:bg-orange-500/20 transition-colors rounded cursor-pointer disabled:opacity-50 disabled:cursor-not-allowed"
+      >
+        {#if isAdding}
+          <span class="flex items-center gap-1.5">
+            <span
+              class="w-3 h-3 border-2 border-orange-400 border-t-transparent rounded-full animate-spin"
+            ></span>
+            Adding...
+          </span>
+        {:else}
+          + Add
+        {/if}
+      </button>
+    {/if}
+  </div>
+</div>
--- a/dashboard/src/lib/components/ModelFilterPopover.svelte
+++ b/dashboard/src/lib/components/ModelFilterPopover.svelte
@@ -0,0 +1,182 @@
+<script lang="ts">
+  import { fly } from "svelte/transition";
+  import { cubicOut } from "svelte/easing";
+
+  interface FilterState {
+    capabilities: string[];
+    sizeRange: { min: number; max: number } | null;
+  }
+
+  type ModelFilterPopoverProps = {
+    filters: FilterState;
+    onChange: (filters: FilterState) => void;
+    onClear: () => void;
+    onClose: () => void;
+  };
+
+  let { filters, onChange, onClear, onClose }: ModelFilterPopoverProps =
+    $props();
+
+  // Available capabilities
+  const availableCapabilities = [
+    { id: "text", label: "Text" },
+    { id: "thinking", label: "Thinking" },
+    { id: "code", label: "Code" },
+    { id: "vision", label: "Vision" },
+  ];
+
+  // Size ranges
+  const sizeRanges = [
+    { label: "< 10GB", min: 0, max: 10 },
+    { label: "10-50GB", min: 10, max: 50 },
+    { label: "50-200GB", min: 50, max: 200 },
+    { label: "> 200GB", min: 200, max: 10000 },
+  ];
+
+  function toggleCapability(cap: string) {
+    const next = filters.capabilities.includes(cap)
+      ? filters.capabilities.filter((c) => c !== cap)
+      : [...filters.capabilities, cap];
+    onChange({ ...filters, capabilities: next });
+  }
+
+  function selectSizeRange(range: { min: number; max: number } | null) {
+    // Toggle off if same range is clicked
+    if (
+      filters.sizeRange &&
+      range &&
+      filters.sizeRange.min === range.min &&
+      filters.sizeRange.max === range.max
+    ) {
+      onChange({ ...filters, sizeRange: null });
+    } else {
+      onChange({ ...filters, sizeRange: range });
+    }
+  }
+
+  function handleClickOutside(e: MouseEvent) {
+    const target = e.target as HTMLElement;
+    if (
+      !target.closest(".filter-popover") &&
+      !target.closest(".filter-toggle")
+    ) {
+      onClose();
+    }
+  }
+</script>
+
+<svelte:window onclick={handleClickOutside} />
+
+<!-- svelte-ignore a11y_no_static_element_interactions -->
+<div
+  class="filter-popover absolute right-0 top-full mt-2 w-64 bg-exo-dark-gray border border-exo-yellow/10 rounded-lg shadow-xl z-10"
+  transition:fly={{ y: -10, duration: 200, easing: cubicOut }}
+  onclick={(e) => e.stopPropagation()}
+  role="dialog"
+  aria-label="Filter options"
+>
+  <div class="p-3 space-y-4">
+    <!-- Capabilities -->
+    <div>
+      <h4 class="text-xs font-mono text-white/50 mb-2">Capabilities</h4>
+      <div class="flex flex-wrap gap-1.5">
+        {#each availableCapabilities as cap}
+          {@const isSelected = filters.capabilities.includes(cap.id)}
+          <button
+            type="button"
+            class="px-2 py-1 text-xs font-mono rounded transition-colors {isSelected
+              ? 'bg-exo-yellow/20 text-exo-yellow border border-exo-yellow/30'
+              : 'bg-white/5 text-white/60 hover:bg-white/10 border border-transparent'}"
+            onclick={() => toggleCapability(cap.id)}
+          >
+            {#if cap.id === "text"}
+              <svg
+                class="w-3.5 h-3.5 inline-block"
+                viewBox="0 0 24 24"
+                fill="none"
+                stroke="currentColor"
+                stroke-width="1.5"
+                ><path
+                  d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"
+                  stroke-linecap="round"
+                  stroke-linejoin="round"
+                /></svg
+              >
+            {:else if cap.id === "thinking"}
+              <svg
+                class="w-3.5 h-3.5 inline-block"
+                viewBox="0 0 24 24"
+                fill="none"
+                stroke="currentColor"
+                stroke-width="1.5"
+                ><path
+                  d="M12 2a7 7 0 0 0-7 7c0 2.38 1.19 4.47 3 5.74V17a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1v-2.26c1.81-1.27 3-3.36 3-5.74a7 7 0 0 0-7-7zM9 20h6M10 22h4"
+                  stroke-linecap="round"
+                  stroke-linejoin="round"
+                /></svg
+              >
+            {:else if cap.id === "code"}
+              <svg
+                class="w-3.5 h-3.5 inline-block"
+                viewBox="0 0 24 24"
+                fill="none"
+                stroke="currentColor"
+                stroke-width="1.5"
+                ><path
+                  d="M16 18l6-6-6-6M8 6l-6 6 6 6"
+                  stroke-linecap="round"
+                  stroke-linejoin="round"
+                /></svg
+              >
+            {:else if cap.id === "vision"}
+              <svg
+                class="w-3.5 h-3.5 inline-block"
+                viewBox="0 0 24 24"
+                fill="none"
+                stroke="currentColor"
+                stroke-width="1.5"
+                ><path
+                  d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z"
+                  stroke-linecap="round"
+                  stroke-linejoin="round"
+                /><circle cx="12" cy="12" r="3" /></svg
+              >
+            {/if}
+            <span class="ml-1">{cap.label}</span>
+          </button>
+        {/each}
+      </div>
+    </div>
+
+    <!-- Size range -->
+    <div>
+      <h4 class="text-xs font-mono text-white/50 mb-2">Model Size</h4>
+      <div class="flex flex-wrap gap-1.5">
+        {#each sizeRanges as range}
+          {@const isSelected =
+            filters.sizeRange &&
+            filters.sizeRange.min === range.min &&
+            filters.sizeRange.max === range.max}
+          <button
+            type="button"
+            class="px-2 py-1 text-xs font-mono rounded transition-colors {isSelected
+              ? 'bg-exo-yellow/20 text-exo-yellow border border-exo-yellow/30'
+              : 'bg-white/5 text-white/60 hover:bg-white/10 border border-transparent'}"
+            onclick={() => selectSizeRange(range)}
+          >
+            {range.label}
+          </button>
+        {/each}
+      </div>
+    </div>
+
+    <!-- Clear button -->
+    <button
+      type="button"
+      class="w-full py-1.5 text-xs font-mono text-white/50 hover:text-white/70 hover:bg-white/5 rounded transition-colors"
+      onclick={onClear}
+    >
+      Clear all filters
+    </button>
+  </div>
+</div>
--- a/dashboard/src/lib/components/ModelPickerGroup.svelte
+++ b/dashboard/src/lib/components/ModelPickerGroup.svelte
@@ -0,0 +1,324 @@
+<script lang="ts">
+  interface ModelInfo {
+    id: string;
+    name?: string;
+    storage_size_megabytes?: number;
+    base_model?: string;
+    quantization?: string;
+    supports_tensor?: boolean;
+    capabilities?: string[];
+    family?: string;
+    is_custom?: boolean;
+  }
+
+  interface ModelGroup {
+    id: string;
+    name: string;
+    capabilities: string[];
+    family: string;
+    variants: ModelInfo[];
+    smallestVariant: ModelInfo;
+    hasMultipleVariants: boolean;
+  }
+
+  type ModelPickerGroupProps = {
+    group: ModelGroup;
+    isExpanded: boolean;
+    isFavorite: boolean;
+    selectedModelId: string | null;
+    canModelFit: (id: string) => boolean;
+    onToggleExpand: () => void;
+    onSelectModel: (modelId: string) => void;
+    onToggleFavorite: (baseModelId: string) => void;
+    onShowInfo: (group: ModelGroup) => void;
+  };
+
+  let {
+    group,
+    isExpanded,
+    isFavorite,
+    selectedModelId,
+    canModelFit,
+    onToggleExpand,
+    onSelectModel,
+    onToggleFavorite,
+    onShowInfo,
+  }: ModelPickerGroupProps = $props();
+
+  // Format storage size
+  function formatSize(mb: number | undefined): string {
+    if (!mb) return "";
+    if (mb >= 1024) {
+      return `${(mb / 1024).toFixed(0)}GB`;
+    }
+    return `${mb}MB`;
+  }
+
+  // Check if any variant can fit
+  const anyVariantFits = $derived(
+    group.variants.some((v) => canModelFit(v.id)),
+  );
+
+  // Check if this group's model is currently selected (for single-variant groups)
+  const isMainSelected = $derived(
+    !group.hasMultipleVariants &&
+      group.variants.some((v) => v.id === selectedModelId),
+  );
+</script>
+
+<div
+  class="border-b border-white/5 last:border-b-0 {!anyVariantFits
+    ? 'opacity-50'
+    : ''}"
+>
+  <!-- Main row -->
+  <div
+    class="flex items-center gap-2 px-3 py-2.5 transition-colors {anyVariantFits
+      ? 'hover:bg-white/5 cursor-pointer'
+      : 'cursor-not-allowed'} {isMainSelected
+      ? 'bg-exo-yellow/10 border-l-2 border-exo-yellow'
+      : 'border-l-2 border-transparent'}"
+    onclick={() => {
+      if (group.hasMultipleVariants) {
+        onToggleExpand();
+      } else {
+        const modelId = group.variants[0]?.id;
+        if (modelId && canModelFit(modelId)) {
+          onSelectModel(modelId);
+        }
+      }
+    }}
+    role="button"
+    tabindex="0"
+    onkeydown={(e) => {
+      if (e.key === "Enter" || e.key === " ") {
+        e.preventDefault();
+        if (group.hasMultipleVariants) {
+          onToggleExpand();
+        } else {
+          const modelId = group.variants[0]?.id;
+          if (modelId && canModelFit(modelId)) {
+            onSelectModel(modelId);
+          }
+        }
+      }
+    }}
+  >
+    <!-- Expand/collapse chevron (for groups with variants) -->
+    {#if group.hasMultipleVariants}
+      <svg
+        class="w-4 h-4 text-white/40 transition-transform duration-200 flex-shrink-0 {isExpanded
+          ? 'rotate-90'
+          : ''}"
+        viewBox="0 0 24 24"
+        fill="currentColor"
+      >
+        <path d="M8.59 16.59L13.17 12 8.59 7.41 10 6l6 6-6 6-1.41-1.41z" />
+      </svg>
+    {:else}
+      <div class="w-4 flex-shrink-0"></div>
+    {/if}
+
+    <!-- Model name -->
+    <div class="flex-1 min-w-0">
+      <div class="flex items-center gap-2">
+        <span class="font-mono text-sm text-white truncate">
+          {group.name}
+        </span>
+        <!-- Capability icons -->
+        {#each group.capabilities.filter((c) => c !== "text") as cap}
+          {#if cap === "thinking"}
+            <svg
+              class="w-3.5 h-3.5 text-white/40 flex-shrink-0"
+              viewBox="0 0 24 24"
+              fill="none"
+              stroke="currentColor"
+              stroke-width="1.5"
+              title="Supports Thinking"
+            >
+              <path
+                d="M12 2a7 7 0 0 0-7 7c0 2.38 1.19 4.47 3 5.74V17a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1v-2.26c1.81-1.27 3-3.36 3-5.74a7 7 0 0 0-7-7zM9 20h6M10 22h4"
+                stroke-linecap="round"
+                stroke-linejoin="round"
+              />
+            </svg>
+          {:else if cap === "code"}
+            <svg
+              class="w-3.5 h-3.5 text-white/40 flex-shrink-0"
+              viewBox="0 0 24 24"
+              fill="none"
+              stroke="currentColor"
+              stroke-width="1.5"
+              title="Supports code generation"
+            >
+              <path
+                d="M16 18l6-6-6-6M8 6l-6 6 6 6"
+                stroke-linecap="round"
+                stroke-linejoin="round"
+              />
+            </svg>
+          {:else if cap === "vision"}
+            <svg
+              class="w-3.5 h-3.5 text-white/40 flex-shrink-0"
+              viewBox="0 0 24 24"
+              fill="none"
+              stroke="currentColor"
+              stroke-width="1.5"
+              title="Supports image input"
+            >
+              <path
+                d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z"
+                stroke-linecap="round"
+                stroke-linejoin="round"
+              />
+              <circle cx="12" cy="12" r="3" />
+            </svg>
+          {:else if cap === "image_gen"}
+            <svg
+              class="w-3.5 h-3.5 text-white/40 flex-shrink-0"
+              viewBox="0 0 24 24"
+              fill="none"
+              stroke="currentColor"
+              stroke-width="1.5"
+              title="Supports image generation"
+            >
+              <rect x="3" y="3" width="18" height="18" rx="2" ry="2" />
+              <circle cx="8.5" cy="8.5" r="1.5" />
+              <path d="M21 15l-5-5L5 21" />
+            </svg>
+          {/if}
+        {/each}
+      </div>
+    </div>
+
+    <!-- Size indicator (smallest variant) -->
+    {#if !group.hasMultipleVariants && group.smallestVariant?.storage_size_megabytes}
+      <span class="text-xs font-mono text-white/30 flex-shrink-0">
+        {formatSize(group.smallestVariant.storage_size_megabytes)}
+      </span>
+    {/if}
+
+    <!-- Variant count -->
+    {#if group.hasMultipleVariants}
+      <span class="text-xs font-mono text-white/30 flex-shrink-0">
+        {group.variants.length} variants
+      </span>
+    {/if}
+
+    <!-- Check mark if selected (single-variant) -->
+    {#if isMainSelected}
+      <svg
+        class="w-4 h-4 text-exo-yellow flex-shrink-0"
+        viewBox="0 0 24 24"
+        fill="currentColor"
+      >
+        <path d="M9 16.17L4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41L9 16.17z" />
+      </svg>
+    {/if}
+
+    <!-- Favorite star -->
+    <button
+      type="button"
+      class="p-1 rounded hover:bg-white/10 transition-colors flex-shrink-0"
+      onclick={(e) => {
+        e.stopPropagation();
+        onToggleFavorite(group.id);
+      }}
+      title={isFavorite ? "Remove from favorites" : "Add to favorites"}
+    >
+      {#if isFavorite}
+        <svg
+          class="w-4 h-4 text-amber-400"
+          viewBox="0 0 24 24"
+          fill="currentColor"
+        >
+          <path
+            d="M12 2l3.09 6.26L22 9.27l-5 4.87 1.18 6.88L12 17.77l-6.18 3.25L7 14.14 2 9.27l6.91-1.01L12 2z"
+          />
+        </svg>
+      {:else}
+        <svg
+          class="w-4 h-4 text-white/30 hover:text-white/50"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+        >
+          <path
+            d="M12 2l3.09 6.26L22 9.27l-5 4.87 1.18 6.88L12 17.77l-6.18 3.25L7 14.14 2 9.27l6.91-1.01L12 2z"
+          />
+        </svg>
+      {/if}
+    </button>
+
+    <!-- Info button -->
+    <button
+      type="button"
+      class="p-1 rounded hover:bg-white/10 transition-colors flex-shrink-0"
+      onclick={(e) => {
+        e.stopPropagation();
+        onShowInfo(group);
+      }}
+      title="View model details"
+    >
+      <svg
+        class="w-4 h-4 text-white/30 hover:text-white/50"
+        viewBox="0 0 24 24"
+        fill="currentColor"
+      >
+        <path
+          d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm1 15h-2v-6h2v6zm0-8h-2V7h2v2z"
+        />
+      </svg>
+    </button>
+  </div>
+
+  <!-- Expanded variants -->
+  {#if isExpanded && group.hasMultipleVariants}
+    <div class="bg-black/20 border-t border-white/5">
+      {#each group.variants as variant}
+        {@const modelCanFit = canModelFit(variant.id)}
+        {@const isSelected = selectedModelId === variant.id}
+        <button
+          type="button"
+          class="w-full flex items-center gap-3 px-3 py-2 pl-10 hover:bg-white/5 transition-colors text-left {!modelCanFit
+            ? 'opacity-50 cursor-not-allowed'
+            : 'cursor-pointer'} {isSelected
+            ? 'bg-exo-yellow/10 border-l-2 border-exo-yellow'
+            : 'border-l-2 border-transparent'}"
+          disabled={!modelCanFit}
+          onclick={() => {
+            if (modelCanFit) {
+              onSelectModel(variant.id);
+            }
+          }}
+        >
+          <!-- Quantization badge -->
+          <span
+            class="text-xs font-mono px-1.5 py-0.5 rounded bg-white/10 text-white/70 flex-shrink-0"
+          >
+            {variant.quantization || "default"}
+          </span>
+
+          <!-- Size -->
+          <span class="text-xs font-mono text-white/40 flex-1">
+            {formatSize(variant.storage_size_megabytes)}
+          </span>
+
+          <!-- Check mark if selected -->
+          {#if isSelected}
+            <svg
+              class="w-4 h-4 text-exo-yellow"
+              viewBox="0 0 24 24"
+              fill="currentColor"
+            >
+              <path
+                d="M9 16.17L4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41L9 16.17z"
+              />
+            </svg>
+          {/if}
+        </button>
+      {/each}
+    </div>
+  {/if}
+</div>
--- a/dashboard/src/lib/components/ModelPickerModal.svelte
+++ b/dashboard/src/lib/components/ModelPickerModal.svelte
@@ -0,0 +1,748 @@
+<script lang="ts">
+  import { fade, fly } from "svelte/transition";
+  import { cubicOut } from "svelte/easing";
+  import FamilySidebar from "./FamilySidebar.svelte";
+  import ModelPickerGroup from "./ModelPickerGroup.svelte";
+  import ModelFilterPopover from "./ModelFilterPopover.svelte";
+  import HuggingFaceResultItem from "./HuggingFaceResultItem.svelte";
+
+  interface ModelInfo {
+    id: string;
+    name?: string;
+    storage_size_megabytes?: number;
+    base_model?: string;
+    quantization?: string;
+    supports_tensor?: boolean;
+    capabilities?: string[];
+    family?: string;
+    is_custom?: boolean;
+    tasks?: string[];
+    hugging_face_id?: string;
+  }
+
+  interface ModelGroup {
+    id: string;
+    name: string;
+    capabilities: string[];
+    family: string;
+    variants: ModelInfo[];
+    smallestVariant: ModelInfo;
+    hasMultipleVariants: boolean;
+  }
+
+  interface FilterState {
+    capabilities: string[];
+    sizeRange: { min: number; max: number } | null;
+  }
+
+  interface HuggingFaceModel {
+    id: string;
+    author: string;
+    downloads: number;
+    likes: number;
+    last_modified: string;
+    tags: string[];
+  }
+
+  type ModelPickerModalProps = {
+    isOpen: boolean;
+    models: ModelInfo[];
+    selectedModelId: string | null;
+    favorites: Set<string>;
+    existingModelIds: Set<string>;
+    canModelFit: (modelId: string) => boolean;
+    onSelect: (modelId: string) => void;
+    onClose: () => void;
+    onToggleFavorite: (baseModelId: string) => void;
+    onAddModel: (modelId: string) => Promise<void>;
+    onDeleteModel: (modelId: string) => Promise<void>;
+    totalMemoryGB: number;
+    usedMemoryGB: number;
+  };
+
+  let {
+    isOpen,
+    models,
+    selectedModelId,
+    favorites,
+    existingModelIds,
+    canModelFit,
+    onSelect,
+    onClose,
+    onToggleFavorite,
+    onAddModel,
+    onDeleteModel,
+    totalMemoryGB,
+    usedMemoryGB,
+  }: ModelPickerModalProps = $props();
+
+  // Local state
+  let searchQuery = $state("");
+  let selectedFamily = $state<string | null>(null);
+  let expandedGroups = $state<Set<string>>(new Set());
+  let showFilters = $state(false);
+  let filters = $state<FilterState>({ capabilities: [], sizeRange: null });
+  let infoGroup = $state<ModelGroup | null>(null);
+
+  // HuggingFace Hub state
+  let hfSearchQuery = $state("");
+  let hfSearchResults = $state<HuggingFaceModel[]>([]);
+  let hfTrendingModels = $state<HuggingFaceModel[]>([]);
+  let hfIsSearching = $state(false);
+  let hfIsLoadingTrending = $state(false);
+  let addingModelId = $state<string | null>(null);
+  let hfSearchDebounceTimer: ReturnType<typeof setTimeout> | null = null;
+  let manualModelId = $state("");
+  let addModelError = $state<string | null>(null);
+
+  // Reset state when modal opens
+  $effect(() => {
+    if (isOpen) {
+      searchQuery = "";
+      selectedFamily = null;
+      expandedGroups = new Set();
+      showFilters = false;
+      hfSearchQuery = "";
+      hfSearchResults = [];
+      manualModelId = "";
+      addModelError = null;
+    }
+  });
+
+  // Fetch trending models when HuggingFace is selected
+  $effect(() => {
+    if (
+      selectedFamily === "huggingface" &&
+      hfTrendingModels.length === 0 &&
+      !hfIsLoadingTrending
+    ) {
+      fetchTrendingModels();
+    }
+  });
+
+  async function fetchTrendingModels() {
+    hfIsLoadingTrending = true;
+    try {
+      const response = await fetch("/models/search?query=&limit=20");
+      if (response.ok) {
+        hfTrendingModels = await response.json();
+      }
+    } catch (error) {
+      console.error("Failed to fetch trending models:", error);
+    } finally {
+      hfIsLoadingTrending = false;
+    }
+  }
+
+  async function searchHuggingFace(query: string) {
+    if (query.length < 2) {
+      hfSearchResults = [];
+      return;
+    }
+
+    hfIsSearching = true;
+    try {
+      const response = await fetch(
+        `/models/search?query=${encodeURIComponent(query)}&limit=20`,
+      );
+      if (response.ok) {
+        hfSearchResults = await response.json();
+      } else {
+        hfSearchResults = [];
+      }
+    } catch (error) {
+      console.error("Failed to search models:", error);
+      hfSearchResults = [];
+    } finally {
+      hfIsSearching = false;
+    }
+  }
+
+  function handleHfSearchInput(query: string) {
+    hfSearchQuery = query;
+    addModelError = null;
+
+    if (hfSearchDebounceTimer) {
+      clearTimeout(hfSearchDebounceTimer);
+    }
+
+    if (query.length >= 2) {
+      hfSearchDebounceTimer = setTimeout(() => {
+        searchHuggingFace(query);
+      }, 300);
+    } else {
+      hfSearchResults = [];
+    }
+  }
+
+  async function handleAddModel(modelId: string) {
+    addingModelId = modelId;
+    addModelError = null;
+    try {
+      await onAddModel(modelId);
+    } catch (error) {
+      addModelError =
+        error instanceof Error ? error.message : "Failed to add model";
+    } finally {
+      addingModelId = null;
+    }
+  }
+
+  async function handleAddManualModel() {
+    if (!manualModelId.trim()) return;
+    await handleAddModel(manualModelId.trim());
+    if (!addModelError) {
+      manualModelId = "";
+    }
+  }
+
+  function handleSelectHfModel(modelId: string) {
+    onSelect(modelId);
+    onClose();
+  }
+
+  // Models to display in HuggingFace view
+  const hfDisplayModels = $derived.by((): HuggingFaceModel[] => {
+    if (hfSearchQuery.length >= 2) {
+      return hfSearchResults;
+    }
+    return hfTrendingModels;
+  });
+
+  // Group models by base_model
+  const groupedModels = $derived.by((): ModelGroup[] => {
+    const groups = new Map<string, ModelGroup>();
+
+    for (const model of models) {
+      const groupId = model.base_model || model.id;
+      const groupName = model.base_model || model.name || model.id;
+
+      if (!groups.has(groupId)) {
+        groups.set(groupId, {
+          id: groupId,
+          name: groupName,
+          capabilities: model.capabilities || ["text"],
+          family: model.family || "",
+          variants: [],
+          smallestVariant: model,
+          hasMultipleVariants: false,
+        });
+      }
+
+      const group = groups.get(groupId)!;
+      group.variants.push(model);
+
+      // Track smallest variant
+      if (
+        (model.storage_size_megabytes || 0) <
+        (group.smallestVariant.storage_size_megabytes || Infinity)
+      ) {
+        group.smallestVariant = model;
+      }
+
+      // Update capabilities if not set
+      if (
+        group.capabilities.length <= 1 &&
+        model.capabilities &&
+        model.capabilities.length > 1
+      ) {
+        group.capabilities = model.capabilities;
+      }
+      if (!group.family && model.family) {
+        group.family = model.family;
+      }
+    }
+
+    // Sort variants within each group by size
+    for (const group of groups.values()) {
+      group.variants.sort(
+        (a, b) =>
+          (a.storage_size_megabytes || 0) - (b.storage_size_megabytes || 0),
+      );
+      group.hasMultipleVariants = group.variants.length > 1;
+    }
+
+    // Convert to array and sort by smallest variant size (biggest first)
+    return Array.from(groups.values()).sort((a, b) => {
+      return (
+        (b.smallestVariant.storage_size_megabytes || 0) -
+        (a.smallestVariant.storage_size_megabytes || 0)
+      );
+    });
+  });
+
+  // Get unique families
+  const uniqueFamilies = $derived.by((): string[] => {
+    const families = new Set<string>();
+    for (const group of groupedModels) {
+      if (group.family) {
+        families.add(group.family);
+      }
+    }
+    const familyOrder = [
+      "kimi",
+      "qwen",
+      "glm",
+      "minimax",
+      "deepseek",
+      "gpt-oss",
+      "llama",
+    ];
+    return Array.from(families).sort((a, b) => {
+      const aIdx = familyOrder.indexOf(a);
+      const bIdx = familyOrder.indexOf(b);
+      if (aIdx === -1 && bIdx === -1) return a.localeCompare(b);
+      if (aIdx === -1) return 1;
+      if (bIdx === -1) return -1;
+      return aIdx - bIdx;
+    });
+  });
+
+  // Filter models based on search, family, and filters
+  const filteredGroups = $derived.by((): ModelGroup[] => {
+    let result: ModelGroup[] = [...groupedModels];
+
+    // Filter by family
+    if (selectedFamily === "favorites") {
+      result = result.filter((g) => favorites.has(g.id));
+    } else if (selectedFamily && selectedFamily !== "huggingface") {
+      result = result.filter((g) => g.family === selectedFamily);
+    }
+
+    // Filter by search query
+    if (searchQuery.trim()) {
+      const query = searchQuery.toLowerCase().trim();
+      result = result.filter(
+        (g) =>
+          g.name.toLowerCase().includes(query) ||
+          g.variants.some(
+            (v) =>
+              v.id.toLowerCase().includes(query) ||
+              (v.name || "").toLowerCase().includes(query),
+          ),
+      );
+    }
+
+    // Filter by capabilities
+    if (filters.capabilities.length > 0) {
+      result = result.filter((g) =>
+        filters.capabilities.every((cap) => g.capabilities.includes(cap)),
+      );
+    }
+
+    // Filter by size range
+    if (filters.sizeRange) {
+      const { min, max } = filters.sizeRange;
+      result = result.filter((g) => {
+        const sizeGB = (g.smallestVariant.storage_size_megabytes || 0) / 1024;
+        return sizeGB >= min && sizeGB <= max;
+      });
+    }
+
+    // Sort: models that fit first, then by size (largest first)
+    result.sort((a, b) => {
+      const aFits = a.variants.some((v) => canModelFit(v.id));
+      const bFits = b.variants.some((v) => canModelFit(v.id));
+
+      if (aFits && !bFits) return -1;
+      if (!aFits && bFits) return 1;
+
+      return (
+        (b.smallestVariant.storage_size_megabytes || 0) -
+        (a.smallestVariant.storage_size_megabytes || 0)
+      );
+    });
+
+    return result;
+  });
+
+  // Check if any favorites exist
+  const hasFavorites = $derived(favorites.size > 0);
+
+  function toggleGroupExpanded(groupId: string) {
+    const next = new Set(expandedGroups);
+    if (next.has(groupId)) {
+      next.delete(groupId);
+    } else {
+      next.add(groupId);
+    }
+    expandedGroups = next;
+  }
+
+  function handleSelect(modelId: string) {
+    onSelect(modelId);
+    onClose();
+  }
+
+  function handleKeydown(e: KeyboardEvent) {
+    if (e.key === "Escape") {
+      onClose();
+    }
+  }
+
+  function handleFiltersChange(newFilters: FilterState) {
+    filters = newFilters;
+  }
+
+  function clearFilters() {
+    filters = { capabilities: [], sizeRange: null };
+  }
+
+  const hasActiveFilters = $derived(
+    filters.capabilities.length > 0 || filters.sizeRange !== null,
+  );
+</script>
+
+<svelte:window onkeydown={handleKeydown} />
+
+{#if isOpen}
+  <!-- Backdrop -->
+  <div
+    class="fixed inset-0 z-50 bg-black/80 backdrop-blur-sm"
+    transition:fade={{ duration: 200 }}
+    onclick={onClose}
+    role="presentation"
+  ></div>
+
+  <!-- Modal -->
+  <div
+    class="fixed z-50 top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 w-[min(90vw,600px)] h-[min(80vh,700px)] bg-exo-dark-gray border border-exo-yellow/10 rounded-lg shadow-2xl overflow-hidden flex flex-col"
+    transition:fly={{ y: 20, duration: 300, easing: cubicOut }}
+    role="dialog"
+    aria-modal="true"
+    aria-label="Select a model"
+  >
+    <!-- Header with search -->
+    <div
+      class="flex items-center gap-2 p-3 border-b border-exo-yellow/10 bg-exo-medium-gray/30"
+    >
+      {#if selectedFamily === "huggingface"}
+        <!-- HuggingFace search -->
+        <svg
+          class="w-5 h-5 text-orange-400/60 flex-shrink-0"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+        >
+          <circle cx="11" cy="11" r="8" />
+          <path d="M21 21l-4.35-4.35" />
+        </svg>
+        <input
+          type="search"
+          class="flex-1 bg-transparent border-none outline-none text-sm font-mono text-white placeholder-white/40"
+          placeholder="Search mlx-community models..."
+          value={hfSearchQuery}
+          oninput={(e) => handleHfSearchInput(e.currentTarget.value)}
+        />
+        {#if hfIsSearching}
+          <div class="flex-shrink-0">
+            <span
+              class="w-4 h-4 border-2 border-orange-400 border-t-transparent rounded-full animate-spin block"
+            ></span>
+          </div>
+        {/if}
+      {:else}
+        <!-- Normal model search -->
+        <svg
+          class="w-5 h-5 text-white/40 flex-shrink-0"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+        >
+          <circle cx="11" cy="11" r="8" />
+          <path d="M21 21l-4.35-4.35" />
+        </svg>
+        <input
+          type="search"
+          class="flex-1 bg-transparent border-none outline-none text-sm font-mono text-white placeholder-white/40"
+          placeholder="Search models..."
+          bind:value={searchQuery}
+        />
+        <!-- Cluster memory -->
+        <span
+          class="text-xs font-mono flex-shrink-0"
+          title="Cluster memory usage"
+          ><span class="text-exo-yellow">{Math.round(usedMemoryGB)}GB</span
+          ><span class="text-white/40">/{Math.round(totalMemoryGB)}GB</span
+          ></span
+        >
+        <!-- Filter button -->
+        <div class="relative filter-toggle">
+          <button
+            type="button"
+            class="p-1.5 rounded hover:bg-white/10 transition-colors {hasActiveFilters
+              ? 'text-exo-yellow'
+              : 'text-white/50'}"
+            onclick={() => (showFilters = !showFilters)}
+            title="Filter by capability or size"
+          >
+            <svg class="w-5 h-5" viewBox="0 0 24 24" fill="currentColor">
+              <path d="M10 18h4v-2h-4v2zM3 6v2h18V6H3zm3 7h12v-2H6v2z" />
+            </svg>
+          </button>
+          {#if showFilters}
+            <ModelFilterPopover
+              {filters}
+              onChange={handleFiltersChange}
+              onClear={clearFilters}
+              onClose={() => (showFilters = false)}
+            />
+          {/if}
+        </div>
+      {/if}
+      <!-- Close button -->
+      <button
+        type="button"
+        class="p-1.5 rounded hover:bg-white/10 transition-colors text-white/50 hover:text-white/70"
+        onclick={onClose}
+        title="Close model picker"
+      >
+        <svg class="w-5 h-5" viewBox="0 0 24 24" fill="currentColor">
+          <path
+            d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"
+          />
+        </svg>
+      </button>
+    </div>
+
+    <!-- Body -->
+    <div class="flex flex-1 overflow-hidden">
+      <!-- Family sidebar -->
+      <FamilySidebar
+        families={uniqueFamilies}
+        {selectedFamily}
+        {hasFavorites}
+        onSelect={(family) => (selectedFamily = family)}
+      />
+
+      <!-- Model list -->
+      <div class="flex-1 overflow-y-auto flex flex-col">
+        {#if selectedFamily === "huggingface"}
+          <!-- HuggingFace Hub view -->
+          <div class="flex-1 flex flex-col min-h-0">
+            <!-- Section header -->
+            <div
+              class="sticky top-0 z-10 px-3 py-2 bg-exo-dark-gray/95 border-b border-exo-yellow/10"
+            >
+              <span class="text-xs font-mono text-white/40">
+                {#if hfSearchQuery.length >= 2}
+                  Search results for "{hfSearchQuery}"
+                {:else}
+                  Trending on mlx-community
+                {/if}
+              </span>
+            </div>
+
+            <!-- Results list -->
+            <div class="flex-1 overflow-y-auto">
+              {#if hfIsLoadingTrending && hfTrendingModels.length === 0}
+                <div
+                  class="flex items-center justify-center py-12 text-white/40"
+                >
+                  <span
+                    class="w-5 h-5 border-2 border-orange-400 border-t-transparent rounded-full animate-spin mr-2"
+                  ></span>
+                  <span class="font-mono text-sm"
+                    >Loading trending models...</span
+                  >
+                </div>
+              {:else if hfDisplayModels.length === 0}
+                <div
+                  class="flex flex-col items-center justify-center py-12 text-white/40"
+                >
+                  <svg
+                    class="w-10 h-10 mb-2"
+                    viewBox="0 0 24 24"
+                    fill="currentColor"
+                  >
+                    <path
+                      d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-2 13.5c-.83 0-1.5-.67-1.5-1.5s.67-1.5 1.5-1.5 1.5.67 1.5 1.5-.67 1.5-1.5 1.5zm4 0c-.83 0-1.5-.67-1.5-1.5s.67-1.5 1.5-1.5 1.5.67 1.5 1.5-.67 1.5-1.5 1.5zm2-4.5H8c0-2.21 1.79-4 4-4s4 1.79 4 4z"
+                    />
+                  </svg>
+                  <p class="font-mono text-sm">No models found</p>
+                  {#if hfSearchQuery}
+                    <p class="font-mono text-xs mt-1">
+                      Try a different search term
+                    </p>
+                  {/if}
+                </div>
+              {:else}
+                {#each hfDisplayModels as model}
+                  <HuggingFaceResultItem
+                    {model}
+                    isAdded={existingModelIds.has(model.id)}
+                    isAdding={addingModelId === model.id}
+                    onAdd={() => handleAddModel(model.id)}
+                    onSelect={() => handleSelectHfModel(model.id)}
+                  />
+                {/each}
+              {/if}
+            </div>
+
+            <!-- Manual input footer -->
+            <div
+              class="sticky bottom-0 border-t border-exo-yellow/10 bg-exo-dark-gray p-3"
+            >
+              {#if addModelError}
+                <div
+                  class="bg-red-500/10 border border-red-500/30 rounded px-3 py-2 mb-2"
+                >
+                  <p class="text-red-400 text-xs font-mono break-words">
+                    {addModelError}
+                  </p>
+                </div>
+              {/if}
+              <div class="flex gap-2">
+                <input
+                  type="text"
+                  class="flex-1 bg-exo-black/60 border border-exo-yellow/30 rounded px-3 py-1.5 text-xs font-mono text-white placeholder-white/30 focus:outline-none focus:border-exo-yellow/50"
+                  placeholder="Or paste model ID directly..."
+                  bind:value={manualModelId}
+                  onkeydown={(e) => {
+                    if (e.key === "Enter") handleAddManualModel();
+                  }}
+                />
+                <button
+                  type="button"
+                  onclick={handleAddManualModel}
+                  disabled={!manualModelId.trim() || addingModelId !== null}
+                  class="px-3 py-1.5 text-xs font-mono tracking-wider uppercase bg-orange-500/10 text-orange-400 border border-orange-400/30 hover:bg-orange-500/20 transition-colors rounded disabled:opacity-50 disabled:cursor-not-allowed"
+                >
+                  Add
+                </button>
+              </div>
+            </div>
+          </div>
+        {:else if filteredGroups.length === 0}
+          <div
+            class="flex flex-col items-center justify-center h-full text-white/40 p-8"
+          >
+            <svg class="w-12 h-12 mb-3" viewBox="0 0 24 24" fill="currentColor">
+              <path
+                d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-2 15l-5-5 1.41-1.41L10 14.17l7.59-7.59L19 8l-9 9z"
+              />
+            </svg>
+            <p class="font-mono text-sm">No models found</p>
+            {#if hasActiveFilters || searchQuery}
+              <button
+                type="button"
+                class="mt-2 text-xs text-exo-yellow hover:underline"
+                onclick={() => {
+                  searchQuery = "";
+                  clearFilters();
+                }}
+              >
+                Clear filters
+              </button>
+            {/if}
+          </div>
+        {:else}
+          {#each filteredGroups as group}
+            <ModelPickerGroup
+              {group}
+              isExpanded={expandedGroups.has(group.id)}
+              isFavorite={favorites.has(group.id)}
+              {selectedModelId}
+              {canModelFit}
+              onToggleExpand={() => toggleGroupExpanded(group.id)}
+              onSelectModel={handleSelect}
+              {onToggleFavorite}
+              onShowInfo={(g) => (infoGroup = g)}
+            />
+          {/each}
+        {/if}
+      </div>
+    </div>
+
+    <!-- Footer with active filters indicator -->
+    {#if hasActiveFilters}
+      <div
+        class="flex items-center gap-2 px-3 py-2 border-t border-exo-yellow/10 bg-exo-medium-gray/20 text-xs font-mono text-white/50"
+      >
+        <span>Filters:</span>
+        {#each filters.capabilities as cap}
+          <span class="px-1.5 py-0.5 bg-exo-yellow/20 text-exo-yellow rounded"
+            >{cap}</span
+          >
+        {/each}
+        {#if filters.sizeRange}
+          <span class="px-1.5 py-0.5 bg-exo-yellow/20 text-exo-yellow rounded">
+            {filters.sizeRange.min}GB - {filters.sizeRange.max}GB
+          </span>
+        {/if}
+        <button
+          type="button"
+          class="ml-auto text-white/40 hover:text-white/60"
+          onclick={clearFilters}
+        >
+          Clear all
+        </button>
+      </div>
+    {/if}
+  </div>
+
+  <!-- Info modal -->
+  {#if infoGroup}
+    <div
+      class="fixed inset-0 z-[60] bg-black/60"
+      transition:fade={{ duration: 150 }}
+      onclick={() => (infoGroup = null)}
+      role="presentation"
+    ></div>
+    <div
+      class="fixed z-[60] top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 w-[min(80vw,400px)] bg-exo-dark-gray border border-exo-yellow/10 rounded-lg shadow-2xl p-4"
+      transition:fly={{ y: 10, duration: 200, easing: cubicOut }}
+      role="dialog"
+      aria-modal="true"
+    >
+      <div class="flex items-start justify-between mb-3">
+        <h3 class="font-mono text-lg text-white">{infoGroup.name}</h3>
+        <button
+          type="button"
+          class="p-1 rounded hover:bg-white/10 transition-colors text-white/50"
+          onclick={() => (infoGroup = null)}
+          title="Close model details"
+          aria-label="Close info dialog"
+        >
+          <svg class="w-4 h-4" viewBox="0 0 24 24" fill="currentColor">
+            <path
+              d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"
+            />
+          </svg>
+        </button>
+      </div>
+      <div class="space-y-2 text-xs font-mono">
+        <div class="flex items-center gap-2">
+          <span class="text-white/40">Family:</span>
+          <span class="text-white/70">{infoGroup.family || "Unknown"}</span>
+        </div>
+        <div class="flex items-center gap-2">
+          <span class="text-white/40">Capabilities:</span>
+          <span class="text-white/70">{infoGroup.capabilities.join(", ")}</span>
+        </div>
+        <div class="flex items-center gap-2">
+          <span class="text-white/40">Variants:</span>
+          <span class="text-white/70">{infoGroup.variants.length}</span>
+        </div>
+        {#if infoGroup.variants.length > 0}
+          <div class="mt-3 pt-3 border-t border-exo-yellow/10">
+            <span class="text-white/40">Available quantizations:</span>
+            <div class="flex flex-wrap gap-1 mt-1">
+              {#each infoGroup.variants as variant}
+                <span
+                  class="px-1.5 py-0.5 bg-white/10 text-white/60 rounded text-[10px]"
+                >
+                  {variant.quantization || "default"} ({Math.round(
+                    (variant.storage_size_megabytes || 0) / 1024,
+                  )}GB)
+                </span>
+              {/each}
+            </div>
+          </div>
+        {/if}
+      </div>
+    </div>
+  {/if}
+{/if}
--- a/dashboard/src/lib/components/index.ts
+++ b/dashboard/src/lib/components/index.ts
@@ -6,3 +6,9 @@ export { default as ChatSidebar } from "./ChatSidebar.svelte";
 export { default as ModelCard } from "./ModelCard.svelte";
 export { default as MarkdownContent } from "./MarkdownContent.svelte";
 export { default as ImageParamsPanel } from "./ImageParamsPanel.svelte";
+export { default as FamilyLogos } from "./FamilyLogos.svelte";
+export { default as FamilySidebar } from "./FamilySidebar.svelte";
+export { default as HuggingFaceResultItem } from "./HuggingFaceResultItem.svelte";
+export { default as ModelFilterPopover } from "./ModelFilterPopover.svelte";
+export { default as ModelPickerGroup } from "./ModelPickerGroup.svelte";
+export { default as ModelPickerModal } from "./ModelPickerModal.svelte";
--- a/dashboard/src/lib/stores/app.svelte.ts
+++ b/dashboard/src/lib/stores/app.svelte.ts
@@ -173,6 +173,41 @@ export interface PlacementPreviewResponse {
  previews: PlacementPreview[];
 }

+interface ImageApiResponse {
+  created: number;
+  data: Array<{ b64_json?: string; url?: string }>;
+}
+
+// Trace API response types
+export interface TraceCategoryStats {
+  totalUs: number;
+  count: number;
+  minUs: number;
+  maxUs: number;
+  avgUs: number;
+}
+
+export interface TraceRankStats {
+  byCategory: Record<string, TraceCategoryStats>;
+}
+
+export interface TraceStatsResponse {
+  taskId: string;
+  totalWallTimeUs: number;
+  byCategory: Record<string, TraceCategoryStats>;
+  byRank: Record<number, TraceRankStats>;
+}
+
+export interface TraceListItem {
+  taskId: string;
+  createdAt: string;
+  fileSize: number;
+}
+
+export interface TraceListResponse {
+  traces: TraceListItem[];
+}
+
 interface RawStateResponse {
  topology?: RawTopology;
  instances?: Record<
@@ -2095,107 +2130,137 @@ class AppStore {
        throw new Error(`API error: ${response.status} - ${errorText}`);
      }

-      const reader = response.body?.getReader();
-      if (!reader) {
-        throw new Error("No response body");
-      }
+      // Streaming requires both stream=true AND partialImages > 0
+      const isStreaming = params.stream && params.partialImages > 0;

-      interface ImageGenerationChunk {
-        data?: { b64_json?: string };
-        format?: string;
-        type?: "partial" | "final";
-        image_index?: number;
-        partial_index?: number;
-        total_partials?: number;
-      }
+      if (!isStreaming) {
+        // Non-streaming: parse JSON response directly
+        const jsonResponse = (await response.json()) as ImageApiResponse;
+        const format = params.outputFormat || "png";
+        const mimeType = `image/${format}`;

-      const numImages = params.numImages;
+        const attachments: MessageAttachment[] = jsonResponse.data
+          .filter((img) => img.b64_json)
+          .map((img, index) => ({
+            type: "generated-image" as const,
+            name: `generated-image-${index + 1}.${format}`,
+            preview: `data:${mimeType};base64,${img.b64_json}`,
+            mimeType,
+          }));

-      await this.parseSSEStream<ImageGenerationChunk>(
-        reader,
-        targetConversationId,
-        (parsed) => {
-          const imageData = parsed.data?.b64_json;
+        this.updateConversationMessage(
+          targetConversationId,
+          assistantMessage.id,
+          (msg) => {
+            msg.content = "";
+            msg.attachments = attachments;
+          },
+        );
+        this.syncActiveMessagesIfNeeded(targetConversationId);
+      } else {
+        // Streaming mode: use SSE parser
+        const reader = response.body?.getReader();
+        if (!reader) {
+          throw new Error("No response body");
+        }

-          if (imageData) {
-            const format = parsed.format || "png";
-            const mimeType = `image/${format}`;
-            const imageIndex = parsed.image_index ?? 0;
+        interface ImageGenerationChunk {
+          data?: { b64_json?: string };
+          format?: string;
+          type?: "partial" | "final";
+          image_index?: number;
+          partial_index?: number;
+          total_partials?: number;
+        }

-            if (parsed.type === "partial") {
-              // Update with partial image and progress
-              const partialNum = (parsed.partial_index ?? 0) + 1;
-              const totalPartials = parsed.total_partials ?? 3;
-              const progressText =
-                numImages > 1
-                  ? `Generating image ${imageIndex + 1}/${numImages}... ${partialNum}/${totalPartials}`
-                  : `Generating... ${partialNum}/${totalPartials}`;
+        const numImages = params.numImages;

-              const partialAttachment: MessageAttachment = {
-                type: "generated-image",
-                name: `generated-image.${format}`,
-                preview: `data:${mimeType};base64,${imageData}`,
-                mimeType,
-              };
+        await this.parseSSEStream<ImageGenerationChunk>(
+          reader,
+          targetConversationId,
+          (parsed) => {
+            const imageData = parsed.data?.b64_json;

-              this.updateConversationMessage(
-                targetConversationId,
-                assistantMessage.id,
-                (msg) => {
-                  msg.content = progressText;
-                  if (imageIndex === 0) {
-                    // First image - safe to replace attachments with partial preview
-                    msg.attachments = [partialAttachment];
-                  } else {
-                    // Subsequent images - keep existing finals, show partial at current position
-                    const existingAttachments = msg.attachments || [];
-                    // Keep only the completed final images (up to current imageIndex)
-                    const finals = existingAttachments.slice(0, imageIndex);
-                    msg.attachments = [...finals, partialAttachment];
-                  }
-                },
-              );
-            } else if (parsed.type === "final") {
-              // Final image - replace partial at this position
-              const newAttachment: MessageAttachment = {
-                type: "generated-image",
-                name: `generated-image-${imageIndex + 1}.${format}`,
-                preview: `data:${mimeType};base64,${imageData}`,
-                mimeType,
-              };
+            if (imageData) {
+              const format = parsed.format || "png";
+              const mimeType = `image/${format}`;
+              const imageIndex = parsed.image_index ?? 0;

-              this.updateConversationMessage(
-                targetConversationId,
-                assistantMessage.id,
-                (msg) => {
-                  if (imageIndex === 0) {
-                    // First final image - replace any partial preview
-                    msg.attachments = [newAttachment];
-                  } else {
-                    // Subsequent images - keep previous finals, replace partial at current position
-                    const existingAttachments = msg.attachments || [];
-                    // Slice keeps indices 0 to imageIndex-1 (the previous final images)
-                    const previousFinals = existingAttachments.slice(
-                      0,
-                      imageIndex,
-                    );
-                    msg.attachments = [...previousFinals, newAttachment];
-                  }
+              if (parsed.type === "partial") {
+                // Update with partial image and progress
+                const partialNum = (parsed.partial_index ?? 0) + 1;
+                const totalPartials = parsed.total_partials ?? 3;
+                const progressText =
+                  numImages > 1
+                    ? `Generating image ${imageIndex + 1}/${numImages}... ${partialNum}/${totalPartials}`
+                    : `Generating... ${partialNum}/${totalPartials}`;

-                  // Update progress message for multiple images
-                  if (numImages > 1 && imageIndex < numImages - 1) {
-                    msg.content = `Generating image ${imageIndex + 2}/${numImages}...`;
-                  } else {
-                    msg.content = "";
-                  }
-                },
-              );
+                const partialAttachment: MessageAttachment = {
+                  type: "generated-image",
+                  name: `generated-image.${format}`,
+                  preview: `data:${mimeType};base64,${imageData}`,
+                  mimeType,
+                };
+
+                this.updateConversationMessage(
+                  targetConversationId,
+                  assistantMessage.id,
+                  (msg) => {
+                    msg.content = progressText;
+                    if (imageIndex === 0) {
+                      // First image - safe to replace attachments with partial preview
+                      msg.attachments = [partialAttachment];
+                    } else {
+                      // Subsequent images - keep existing finals, show partial at current position
+                      const existingAttachments = msg.attachments || [];
+                      // Keep only the completed final images (up to current imageIndex)
+                      const finals = existingAttachments.slice(0, imageIndex);
+                      msg.attachments = [...finals, partialAttachment];
+                    }
+                  },
+                );
+              } else if (parsed.type === "final") {
+                // Final image - replace partial at this position
+                const newAttachment: MessageAttachment = {
+                  type: "generated-image",
+                  name: `generated-image-${imageIndex + 1}.${format}`,
+                  preview: `data:${mimeType};base64,${imageData}`,
+                  mimeType,
+                };
+
+                this.updateConversationMessage(
+                  targetConversationId,
+                  assistantMessage.id,
+                  (msg) => {
+                    if (imageIndex === 0) {
+                      // First final image - replace any partial preview
+                      msg.attachments = [newAttachment];
+                    } else {
+                      // Subsequent images - keep previous finals, replace partial at current position
+                      const existingAttachments = msg.attachments || [];
+                      // Slice keeps indices 0 to imageIndex-1 (the previous final images)
+                      const previousFinals = existingAttachments.slice(
+                        0,
+                        imageIndex,
+                      );
+                      msg.attachments = [...previousFinals, newAttachment];
+                    }
+
+                    // Update progress message for multiple images
+                    if (numImages > 1 && imageIndex < numImages - 1) {
+                      msg.content = `Generating image ${imageIndex + 2}/${numImages}...`;
+                    } else {
+                      msg.content = "";
+                    }
+                  },
+                );
+              }
+
+              this.syncActiveMessagesIfNeeded(targetConversationId);
            }
-
-            this.syncActiveMessagesIfNeeded(targetConversationId);
-          }
-        },
-      );
+          },
+        );
+      }
    } catch (error) {
      console.error("Error generating image:", error);
      this.handleStreamingError(
@@ -2343,69 +2408,98 @@ class AppStore {
        throw new Error(`API error: ${apiResponse.status} - ${errorText}`);
      }

-      const reader = apiResponse.body?.getReader();
-      if (!reader) {
-        throw new Error("No response body");
-      }
+      // Streaming requires both stream=true AND partialImages > 0
+      const isStreaming = params.stream && params.partialImages > 0;

-      interface ImageEditChunk {
-        data?: { b64_json?: string };
-        format?: string;
-        type?: "partial" | "final";
-        partial_index?: number;
-        total_partials?: number;
-      }
+      if (!isStreaming) {
+        // Non-streaming: parse JSON response directly
+        const jsonResponse = (await apiResponse.json()) as ImageApiResponse;
+        const format = params.outputFormat || "png";
+        const mimeType = `image/${format}`;
+        const attachments: MessageAttachment[] = jsonResponse.data
+          .filter((img) => img.b64_json)
+          .map((img) => ({
+            type: "generated-image" as const,
+            name: `edited-image.${format}`,
+            preview: `data:${mimeType};base64,${img.b64_json}`,
+            mimeType,
+          }));

-      await this.parseSSEStream<ImageEditChunk>(
-        reader,
-        targetConversationId,
-        (parsed) => {
-          const imageData = parsed.data?.b64_json;
+        this.updateConversationMessage(
+          targetConversationId,
+          assistantMessage.id,
+          (msg) => {
+            msg.content = "";
+            msg.attachments = attachments;
+          },
+        );
+        this.syncActiveMessagesIfNeeded(targetConversationId);
+      } else {
+        // Streaming mode: use SSE parser
+        const reader = apiResponse.body?.getReader();
+        if (!reader) {
+          throw new Error("No response body");
+        }

-          if (imageData) {
-            const format = parsed.format || "png";
-            const mimeType = `image/${format}`;
-            if (parsed.type === "partial") {
-              // Update with partial image and progress
-              const partialNum = (parsed.partial_index ?? 0) + 1;
-              const totalPartials = parsed.total_partials ?? 3;
-              this.updateConversationMessage(
-                targetConversationId,
-                assistantMessage.id,
-                (msg) => {
-                  msg.content = `Editing... ${partialNum}/${totalPartials}`;
-                  msg.attachments = [
-                    {
-                      type: "generated-image",
-                      name: `edited-image.${format}`,
-                      preview: `data:${mimeType};base64,${imageData}`,
-                      mimeType,
-                    },
-                  ];
-                },
-              );
-            } else if (parsed.type === "final") {
-              // Final image
-              this.updateConversationMessage(
-                targetConversationId,
-                assistantMessage.id,
-                (msg) => {
-                  msg.content = "";
-                  msg.attachments = [
-                    {
-                      type: "generated-image",
-                      name: `edited-image.${format}`,
-                      preview: `data:${mimeType};base64,${imageData}`,
-                      mimeType,
-                    },
-                  ];
-                },
-              );
+        interface ImageEditChunk {
+          data?: { b64_json?: string };
+          format?: string;
+          type?: "partial" | "final";
+          partial_index?: number;
+          total_partials?: number;
+        }
+
+        await this.parseSSEStream<ImageEditChunk>(
+          reader,
+          targetConversationId,
+          (parsed) => {
+            const imageData = parsed.data?.b64_json;
+
+            if (imageData) {
+              const format = parsed.format || "png";
+              const mimeType = `image/${format}`;
+              if (parsed.type === "partial") {
+                // Update with partial image and progress
+                const partialNum = (parsed.partial_index ?? 0) + 1;
+                const totalPartials = parsed.total_partials ?? 3;
+                this.updateConversationMessage(
+                  targetConversationId,
+                  assistantMessage.id,
+                  (msg) => {
+                    msg.content = `Editing... ${partialNum}/${totalPartials}`;
+                    msg.attachments = [
+                      {
+                        type: "generated-image",
+                        name: `edited-image.${format}`,
+                        preview: `data:${mimeType};base64,${imageData}`,
+                        mimeType,
+                      },
+                    ];
+                  },
+                );
+              } else if (parsed.type === "final") {
+                // Final image
+                this.updateConversationMessage(
+                  targetConversationId,
+                  assistantMessage.id,
+                  (msg) => {
+                    msg.content = "";
+                    msg.attachments = [
+                      {
+                        type: "generated-image",
+                        name: `edited-image.${format}`,
+                        preview: `data:${mimeType};base64,${imageData}`,
+                        mimeType,
+                      },
+                    ];
+                  },
+                );
+              }
+              this.syncActiveMessagesIfNeeded(targetConversationId);
            }
-            this.syncActiveMessagesIfNeeded(targetConversationId);
-          }
-        },
-      );
+          },
+        );
+      }
    } catch (error) {
      console.error("Error editing image:", error);
      this.handleStreamingError(
@@ -2491,6 +2585,49 @@ class AppStore {
      throw error;
    }
  }
+
+  /**
+   * List all available traces
+   */
+  async listTraces(): Promise<TraceListResponse> {
+    const response = await fetch("/v1/traces");
+    if (!response.ok) {
+      throw new Error(`Failed to list traces: ${response.status}`);
+    }
+    return (await response.json()) as TraceListResponse;
+  }
+
+  /**
+   * Check if a trace exists for a given task ID
+   */
+  async checkTraceExists(taskId: string): Promise<boolean> {
+    try {
+      const response = await fetch(`/v1/traces/${encodeURIComponent(taskId)}`);
+      return response.ok;
+    } catch {
+      return false;
+    }
+  }
+
+  /**
+   * Get computed statistics for a task's trace
+   */
+  async fetchTraceStats(taskId: string): Promise<TraceStatsResponse> {
+    const response = await fetch(
+      `/v1/traces/${encodeURIComponent(taskId)}/stats`,
+    );
+    if (!response.ok) {
+      throw new Error(`Failed to fetch trace stats: ${response.status}`);
+    }
+    return (await response.json()) as TraceStatsResponse;
+  }
+
+  /**
+   * Get the URL for the raw trace file (for Perfetto)
+   */
+  getTraceRawUrl(taskId: string): string {
+    return `/v1/traces/${encodeURIComponent(taskId)}/raw`;
+  }
 }

 export const appStore = new AppStore();
@@ -2602,3 +2739,12 @@ export const startDownload = (nodeId: string, shardMetadata: object) =>
  appStore.startDownload(nodeId, shardMetadata);
 export const deleteDownload = (nodeId: string, modelId: string) =>
  appStore.deleteDownload(nodeId, modelId);
+
+// Trace actions
+export const listTraces = () => appStore.listTraces();
+export const checkTraceExists = (taskId: string) =>
+  appStore.checkTraceExists(taskId);
+export const fetchTraceStats = (taskId: string) =>
+  appStore.fetchTraceStats(taskId);
+export const getTraceRawUrl = (taskId: string) =>
+  appStore.getTraceRawUrl(taskId);
--- a/dashboard/src/lib/stores/favorites.svelte.ts
+++ b/dashboard/src/lib/stores/favorites.svelte.ts
@@ -0,0 +1,97 @@
+/**
+ * FavoritesStore - Manages favorite models with localStorage persistence
+ */
+
+import { browser } from "$app/environment";
+
+const FAVORITES_KEY = "exo-favorite-models";
+
+class FavoritesStore {
+  favorites = $state<Set<string>>(new Set());
+
+  constructor() {
+    if (browser) {
+      this.loadFromStorage();
+    }
+  }
+
+  private loadFromStorage() {
+    try {
+      const stored = localStorage.getItem(FAVORITES_KEY);
+      if (stored) {
+        const parsed = JSON.parse(stored) as string[];
+        this.favorites = new Set(parsed);
+      }
+    } catch (error) {
+      console.error("Failed to load favorites:", error);
+    }
+  }
+
+  private saveToStorage() {
+    try {
+      const array = Array.from(this.favorites);
+      localStorage.setItem(FAVORITES_KEY, JSON.stringify(array));
+    } catch (error) {
+      console.error("Failed to save favorites:", error);
+    }
+  }
+
+  add(baseModelId: string) {
+    const next = new Set(this.favorites);
+    next.add(baseModelId);
+    this.favorites = next;
+    this.saveToStorage();
+  }
+
+  remove(baseModelId: string) {
+    const next = new Set(this.favorites);
+    next.delete(baseModelId);
+    this.favorites = next;
+    this.saveToStorage();
+  }
+
+  toggle(baseModelId: string) {
+    if (this.favorites.has(baseModelId)) {
+      this.remove(baseModelId);
+    } else {
+      this.add(baseModelId);
+    }
+  }
+
+  isFavorite(baseModelId: string): boolean {
+    return this.favorites.has(baseModelId);
+  }
+
+  getAll(): string[] {
+    return Array.from(this.favorites);
+  }
+
+  getSet(): Set<string> {
+    return new Set(this.favorites);
+  }
+
+  hasAny(): boolean {
+    return this.favorites.size > 0;
+  }
+
+  clearAll() {
+    this.favorites = new Set();
+    this.saveToStorage();
+  }
+}
+
+export const favoritesStore = new FavoritesStore();
+
+export const favorites = () => favoritesStore.favorites;
+export const hasFavorites = () => favoritesStore.hasAny();
+export const isFavorite = (baseModelId: string) =>
+  favoritesStore.isFavorite(baseModelId);
+export const toggleFavorite = (baseModelId: string) =>
+  favoritesStore.toggle(baseModelId);
+export const addFavorite = (baseModelId: string) =>
+  favoritesStore.add(baseModelId);
+export const removeFavorite = (baseModelId: string) =>
+  favoritesStore.remove(baseModelId);
+export const getFavorites = () => favoritesStore.getAll();
+export const getFavoritesSet = () => favoritesStore.getSet();
+export const clearFavorites = () => favoritesStore.clearAll();
--- a/dashboard/src/routes/+page.svelte
+++ b/dashboard/src/routes/+page.svelte
@@ -5,7 +5,13 @@
    ChatMessages,
    ChatSidebar,
    ModelCard,
+    ModelPickerModal,
  } from "$lib/components";
+  import {
+    favorites,
+    toggleFavorite,
+    getFavoritesSet,
+  } from "$lib/stores/favorites.svelte";
  import {
    hasStartedChat,
    isTopologyMinimized,
@@ -100,6 +106,11 @@
      storage_size_megabytes?: number;
      tasks?: string[];
      hugging_face_id?: string;
+      is_custom?: boolean;
+      family?: string;
+      quantization?: string;
+      base_model?: string;
+      capabilities?: string[];
    }>
  >([]);

@@ -211,9 +222,11 @@
  let launchingModelId = $state<string | null>(null);
  let instanceDownloadExpandedNodes = $state<Set<string>>(new Set());

-  // Custom dropdown state
-  let isModelDropdownOpen = $state(false);
-  let modelDropdownSearch = $state("");
+  // Model picker modal state
+  let isModelPickerOpen = $state(false);
+
+  // Favorites state (reactive)
+  const favoritesSet = $derived(getFavoritesSet());

  // Slider dragging state
  let isDraggingSlider = $state(false);
@@ -530,6 +543,47 @@
    }
  }

+  async function addModelFromPicker(modelId: string) {
+    const response = await fetch("/models/add", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ model_id: modelId }),
+    });
+
+    if (!response.ok) {
+      let message = `Failed to add model (${response.status}: ${response.statusText})`;
+      try {
+        const err = await response.json();
+        if (err.detail) message = err.detail;
+      } catch {
+        // use default message
+      }
+      throw new Error(message);
+    }
+
+    await fetchModels();
+  }
+
+  async function deleteCustomModel(modelId: string) {
+    try {
+      const response = await fetch(
+        `/models/custom/${encodeURIComponent(modelId)}`,
+        { method: "DELETE" },
+      );
+      if (response.ok) {
+        await fetchModels();
+      }
+    } catch {
+      console.error("Failed to delete custom model");
+    }
+  }
+
+  function handleModelPickerSelect(modelId: string) {
+    selectPreviewModel(modelId);
+    saveLaunchDefaults();
+    isModelPickerOpen = false;
+  }
+
  async function launchInstance(
    modelId: string,
    specificPreview?: PlacementPreview | null,
@@ -2360,14 +2414,12 @@
              >
            </div>

-            <!-- Model Dropdown (Custom) -->
-            <div class="flex-shrink-0 mb-3 relative">
+            <!-- Model Picker Button -->
+            <div class="flex-shrink-0 mb-3">
              <button
                type="button"
-                onclick={() => (isModelDropdownOpen = !isModelDropdownOpen)}
-                class="w-full bg-exo-medium-gray/50 border border-exo-yellow/30 rounded pl-3 pr-8 py-2.5 text-sm font-mono text-left tracking-wide cursor-pointer transition-all duration-200 hover:border-exo-yellow/50 focus:outline-none focus:border-exo-yellow/70 {isModelDropdownOpen
-                  ? 'border-exo-yellow/70'
-                  : ''}"
+                onclick={() => (isModelPickerOpen = true)}
+                class="w-full bg-exo-medium-gray/50 border border-exo-yellow/30 rounded pl-3 pr-8 py-2.5 text-sm font-mono text-left tracking-wide cursor-pointer transition-all duration-200 hover:border-exo-yellow/50 focus:outline-none focus:border-exo-yellow/70 relative"
              >
                {#if selectedModelId}
                  {@const foundModel = models.find(
@@ -2375,54 +2427,12 @@
                  )}
                  {#if foundModel}
                    {@const sizeGB = getModelSizeGB(foundModel)}
-                    {@const isImageModel = modelSupportsImageGeneration(
-                      foundModel.id,
-                    )}
-                    {@const isImageEditModel = modelSupportsImageEditing(
-                      foundModel.id,
-                    )}
                    <span
                      class="flex items-center justify-between gap-2 w-full pr-4"
                    >
                      <span
                        class="flex items-center gap-2 text-exo-light-gray truncate"
                      >
-                        {#if isImageModel}
-                          <svg
-                            class="w-4 h-4 flex-shrink-0 text-exo-yellow"
-                            fill="none"
-                            viewBox="0 0 24 24"
-                            stroke="currentColor"
-                            stroke-width="2"
-                          >
-                            <rect
-                              x="3"
-                              y="3"
-                              width="18"
-                              height="18"
-                              rx="2"
-                              ry="2"
-                            />
-                            <circle cx="8.5" cy="8.5" r="1.5" />
-                            <polyline points="21 15 16 10 5 21" />
-                          </svg>
-                        {/if}
-                        {#if isImageEditModel}
-                          <svg
-                            class="w-4 h-4 flex-shrink-0 text-exo-yellow"
-                            fill="none"
-                            viewBox="0 0 24 24"
-                            stroke="currentColor"
-                            stroke-width="2"
-                          >
-                            <path
-                              d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"
-                            />
-                            <path
-                              d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"
-                            />
-                          </svg>
-                        {/if}
                        <span class="truncate"
                          >{foundModel.name || foundModel.id}</span
                        >
@@ -2439,142 +2449,24 @@
                {:else}
                  <span class="text-white/50">— SELECT MODEL —</span>
                {/if}
-              </button>
-              <div
-                class="absolute right-3 top-1/2 -translate-y-1/2 pointer-events-none transition-transform duration-200 {isModelDropdownOpen
-                  ? 'rotate-180'
-                  : ''}"
-              >
-                <svg
-                  class="w-4 h-4 text-exo-yellow/60"
-                  fill="none"
-                  viewBox="0 0 24 24"
-                  stroke="currentColor"
-                >
-                  <path
-                    stroke-linecap="round"
-                    stroke-linejoin="round"
-                    stroke-width="2"
-                    d="M19 9l-7 7-7-7"
-                  />
-                </svg>
-              </div>
-
-              {#if isModelDropdownOpen}
-                <!-- Backdrop to close dropdown -->
-                <button
-                  type="button"
-                  class="fixed inset-0 z-40 cursor-default"
-                  onclick={() => (isModelDropdownOpen = false)}
-                  aria-label="Close dropdown"
-                ></button>
-
-                <!-- Dropdown Panel -->
                <div
-                  class="absolute top-full left-0 right-0 mt-1 bg-exo-dark-gray border border-exo-yellow/30 rounded shadow-lg shadow-black/50 z-50 max-h-64 overflow-y-auto"
+                  class="absolute right-3 top-1/2 -translate-y-1/2 pointer-events-none"
                >
-                  <!-- Search within dropdown -->
-                  <div
-                    class="sticky top-0 bg-exo-dark-gray border-b border-exo-medium-gray/30 p-2"
+                  <svg
+                    class="w-4 h-4 text-exo-yellow/60"
+                    fill="none"
+                    viewBox="0 0 24 24"
+                    stroke="currentColor"
                  >
-                    <input
-                      type="text"
-                      placeholder="Search models..."
-                      bind:value={modelDropdownSearch}
-                      class="w-full bg-exo-dark-gray/60 border border-exo-medium-gray/30 rounded px-2 py-1.5 text-xs font-mono text-white/80 placeholder:text-white/40 focus:outline-none focus:border-exo-yellow/50"
+                    <path
+                      stroke-linecap="round"
+                      stroke-linejoin="round"
+                      stroke-width="2"
+                      d="M19 9l-7 7-7-7"
                    />
-                  </div>
-
-                  <!-- Options -->
-                  <div class="py-1">
-                    {#each sortedModels().filter((m) => !modelDropdownSearch || (m.name || m.id)
-                          .toLowerCase()
-                          .includes(modelDropdownSearch.toLowerCase())) as model}
-                      {@const sizeGB = getModelSizeGB(model)}
-                      {@const modelCanFit = hasEnoughMemory(model)}
-                      {@const isImageModel = modelSupportsImageGeneration(
-                        model.id,
-                      )}
-                      {@const isImageEditModel = modelSupportsImageEditing(
-                        model.id,
-                      )}
-                      <button
-                        type="button"
-                        onclick={() => {
-                          if (modelCanFit) {
-                            selectPreviewModel(model.id);
-                            saveLaunchDefaults();
-                            isModelDropdownOpen = false;
-                            modelDropdownSearch = "";
-                          }
-                        }}
-                        disabled={!modelCanFit}
-                        class="w-full px-3 py-2 text-left text-sm font-mono tracking-wide transition-colors duration-100 flex items-center justify-between gap-2 {selectedModelId ===
-                        model.id
-                          ? 'bg-transparent text-exo-yellow cursor-pointer'
-                          : modelCanFit
-                            ? 'text-white/80 hover:text-exo-yellow cursor-pointer'
-                            : 'text-white/30 cursor-default'}"
-                      >
-                        <span class="flex items-center gap-2 truncate flex-1">
-                          {#if isImageModel}
-                            <svg
-                              class="w-4 h-4 flex-shrink-0 text-exo-yellow"
-                              fill="none"
-                              viewBox="0 0 24 24"
-                              stroke="currentColor"
-                              stroke-width="2"
-                              aria-label="Image generation model"
-                            >
-                              <rect
-                                x="3"
-                                y="3"
-                                width="18"
-                                height="18"
-                                rx="2"
-                                ry="2"
-                              />
-                              <circle cx="8.5" cy="8.5" r="1.5" />
-                              <polyline points="21 15 16 10 5 21" />
-                            </svg>
-                          {/if}
-                          {#if isImageEditModel}
-                            <svg
-                              class="w-4 h-4 flex-shrink-0 text-exo-yellow"
-                              fill="none"
-                              viewBox="0 0 24 24"
-                              stroke="currentColor"
-                              stroke-width="2"
-                              aria-label="Image editing model"
-                            >
-                              <path
-                                d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"
-                              />
-                              <path
-                                d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"
-                              />
-                            </svg>
-                          {/if}
-                          <span class="truncate">{model.name || model.id}</span>
-                        </span>
-                        <span
-                          class="flex-shrink-0 text-xs {modelCanFit
-                            ? 'text-white/50'
-                            : 'text-red-400/60'}"
-                        >
-                          {sizeGB >= 1
-                            ? sizeGB.toFixed(0)
-                            : sizeGB.toFixed(1)}GB
-                        </span>
-                      </button>
-                    {:else}
-                      <div class="px-3 py-2 text-xs text-white/50 font-mono">
-                        No models found
-                      </div>
-                    {/each}
-                  </div>
+                  </svg>
                </div>
-              {/if}
+              </button>
            </div>

            <!-- Configuration Options -->
@@ -3354,3 +3246,22 @@
    {/if}
  </main>
 </div>
+
+<ModelPickerModal
+  isOpen={isModelPickerOpen}
+  {models}
+  {selectedModelId}
+  favorites={favoritesSet}
+  existingModelIds={new Set(models.map((m) => m.id))}
+  canModelFit={(modelId) => {
+    const model = models.find((m) => m.id === modelId);
+    return model ? hasEnoughMemory(model) : false;
+  }}
+  onSelect={handleModelPickerSelect}
+  onClose={() => (isModelPickerOpen = false)}
+  onToggleFavorite={toggleFavorite}
+  onAddModel={addModelFromPicker}
+  onDeleteModel={deleteCustomModel}
+  totalMemoryGB={clusterMemory().total / (1024 * 1024 * 1024)}
+  usedMemoryGB={clusterMemory().used / (1024 * 1024 * 1024)}
+/>
--- a/dashboard/src/routes/traces/+page.svelte
+++ b/dashboard/src/routes/traces/+page.svelte
@@ -0,0 +1,190 @@
+<script lang="ts">
+  import { onMount } from "svelte";
+  import {
+    listTraces,
+    getTraceRawUrl,
+    type TraceListItem,
+  } from "$lib/stores/app.svelte";
+  import HeaderNav from "$lib/components/HeaderNav.svelte";
+
+  let traces = $state<TraceListItem[]>([]);
+  let loading = $state(true);
+  let error = $state<string | null>(null);
+
+  function formatBytes(bytes: number): string {
+    if (!bytes || bytes <= 0) return "0B";
+    const units = ["B", "KB", "MB", "GB"];
+    const i = Math.min(
+      Math.floor(Math.log(bytes) / Math.log(1024)),
+      units.length - 1,
+    );
+    const val = bytes / Math.pow(1024, i);
+    return `${val.toFixed(val >= 10 ? 0 : 1)}${units[i]}`;
+  }
+
+  function formatDate(isoString: string): string {
+    const date = new Date(isoString);
+    return date.toLocaleString();
+  }
+
+  async function downloadTrace(taskId: string) {
+    const response = await fetch(getTraceRawUrl(taskId));
+    const blob = await response.blob();
+    const url = URL.createObjectURL(blob);
+    const a = document.createElement("a");
+    a.href = url;
+    a.download = `trace_${taskId}.json`;
+    a.click();
+    URL.revokeObjectURL(url);
+  }
+
+  async function openInPerfetto(taskId: string) {
+    // Fetch trace data from our local API
+    const response = await fetch(getTraceRawUrl(taskId));
+    const traceData = await response.arrayBuffer();
+
+    // Open Perfetto UI
+    const perfettoWindow = window.open("https://ui.perfetto.dev");
+    if (!perfettoWindow) {
+      alert("Failed to open Perfetto. Please allow popups.");
+      return;
+    }
+
+    // Wait for Perfetto to be ready, then send trace via postMessage
+    const onMessage = (e: MessageEvent) => {
+      if (e.data === "PONG") {
+        window.removeEventListener("message", onMessage);
+        perfettoWindow.postMessage(
+          {
+            perfetto: {
+              buffer: traceData,
+              title: `Trace ${taskId}`,
+            },
+          },
+          "https://ui.perfetto.dev",
+        );
+      }
+    };
+    window.addEventListener("message", onMessage);
+
+    // Ping Perfetto until it responds
+    const pingInterval = setInterval(() => {
+      perfettoWindow.postMessage("PING", "https://ui.perfetto.dev");
+    }, 50);
+
+    // Clean up after 10 seconds
+    setTimeout(() => {
+      clearInterval(pingInterval);
+      window.removeEventListener("message", onMessage);
+    }, 10000);
+  }
+
+  async function refresh() {
+    loading = true;
+    error = null;
+    try {
+      const response = await listTraces();
+      traces = response.traces;
+    } catch (e) {
+      error = e instanceof Error ? e.message : "Failed to load traces";
+    } finally {
+      loading = false;
+    }
+  }
+
+  onMount(() => {
+    refresh();
+  });
+</script>
+
+<div class="min-h-screen bg-exo-dark-gray text-white">
+  <HeaderNav showHome={true} />
+  <div class="max-w-7xl mx-auto px-4 lg:px-8 py-6 space-y-6">
+    <div class="flex items-center justify-between gap-4 flex-wrap">
+      <div>
+        <h1
+          class="text-2xl font-mono tracking-[0.2em] uppercase text-exo-yellow"
+        >
+          Traces
+        </h1>
+      </div>
+      <div class="flex items-center gap-3">
+        <button
+          type="button"
+          class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-2 py-1 rounded"
+          onclick={refresh}
+          disabled={loading}
+        >
+          Refresh
+        </button>
+      </div>
+    </div>
+
+    {#if loading}
+      <div
+        class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-6 text-center text-exo-light-gray"
+      >
+        <div class="text-sm">Loading traces...</div>
+      </div>
+    {:else if error}
+      <div
+        class="rounded border border-red-500/30 bg-red-500/10 p-6 text-center text-red-400"
+      >
+        <div class="text-sm">{error}</div>
+      </div>
+    {:else if traces.length === 0}
+      <div
+        class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-6 text-center text-exo-light-gray space-y-2"
+      >
+        <div class="text-sm">No traces found.</div>
+        <div class="text-xs text-exo-light-gray/70">
+          Run exo with EXO_TRACING_ENABLED=1 to collect traces.
+        </div>
+      </div>
+    {:else}
+      <div class="space-y-3">
+        {#each traces as trace}
+          <div
+            class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-4 flex items-center justify-between gap-4"
+          >
+            <div class="min-w-0 flex-1">
+              <a
+                href="#/traces/{trace.taskId}"
+                class="text-sm font-mono text-white hover:text-exo-yellow transition-colors truncate block"
+              >
+                {trace.taskId}
+              </a>
+              <div class="text-xs text-exo-light-gray font-mono mt-1">
+                {formatDate(trace.createdAt)} &bull; {formatBytes(
+                  trace.fileSize,
+                )}
+              </div>
+            </div>
+            <div class="flex items-center gap-2 shrink-0">
+              <a
+                href="#/traces/{trace.taskId}"
+                class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-2 py-1 rounded"
+              >
+                View Stats
+              </a>
+              <button
+                type="button"
+                class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-2 py-1 rounded"
+                onclick={() => downloadTrace(trace.taskId)}
+              >
+                Download
+              </button>
+              <button
+                type="button"
+                class="text-xs font-mono text-exo-dark-gray bg-exo-yellow hover:bg-exo-yellow/90 transition-colors uppercase px-2 py-1 rounded font-semibold"
+                onclick={() => openInPerfetto(trace.taskId)}
+              >
+                View Trace
+              </button>
+            </div>
+          </div>
+        {/each}
+      </div>
+    {/if}
+  </div>
+</div>
--- a/dashboard/src/routes/traces/[taskId]/+page.svelte
+++ b/dashboard/src/routes/traces/[taskId]/+page.svelte
@@ -0,0 +1,367 @@
+<script lang="ts">
+  import { page } from "$app/stores";
+  import { onMount } from "svelte";
+  import {
+    fetchTraceStats,
+    getTraceRawUrl,
+    type TraceStatsResponse,
+    type TraceCategoryStats,
+  } from "$lib/stores/app.svelte";
+  import HeaderNav from "$lib/components/HeaderNav.svelte";
+
+  const taskId = $derived($page.params.taskId);
+
+  let stats = $state<TraceStatsResponse | null>(null);
+  let loading = $state(true);
+  let error = $state<string | null>(null);
+
+  function formatDuration(us: number): string {
+    if (us < 1000) return `${us.toFixed(0)}us`;
+    if (us < 1_000_000) return `${(us / 1000).toFixed(2)}ms`;
+    return `${(us / 1_000_000).toFixed(2)}s`;
+  }
+
+  function formatPercentage(part: number, total: number): string {
+    if (total === 0) return "0.0%";
+    return `${((part / total) * 100).toFixed(1)}%`;
+  }
+
+  // Parse hierarchical categories like "sync/compute" into phases
+  type PhaseData = {
+    name: string;
+    subcategories: { name: string; stats: TraceCategoryStats }[];
+    totalUs: number; // From outer span (e.g., "sync" category)
+    stepCount: number; // Count of outer span events
+  };
+
+  function parsePhases(
+    byCategory: Record<string, TraceCategoryStats>,
+  ): PhaseData[] {
+    const phases = new Map<
+      string,
+      {
+        subcats: Map<string, TraceCategoryStats>;
+        outerStats: TraceCategoryStats | null;
+      }
+    >();
+
+    for (const [category, catStats] of Object.entries(byCategory)) {
+      if (category.includes("/")) {
+        const [phase, subcat] = category.split("/", 2);
+        if (!phases.has(phase)) {
+          phases.set(phase, { subcats: new Map(), outerStats: null });
+        }
+        phases.get(phase)!.subcats.set(subcat, catStats);
+      } else {
+        // Outer span - this IS the phase total
+        if (!phases.has(category)) {
+          phases.set(category, { subcats: new Map(), outerStats: null });
+        }
+        phases.get(category)!.outerStats = catStats;
+      }
+    }
+
+    return Array.from(phases.entries())
+      .filter(([_, data]) => data.outerStats !== null) // Only phases with outer spans
+      .map(([name, data]) => ({
+        name,
+        subcategories: Array.from(data.subcats.entries())
+          .map(([subName, subStats]) => ({ name: subName, stats: subStats }))
+          .sort((a, b) => b.stats.totalUs - a.stats.totalUs),
+        totalUs: data.outerStats!.totalUs, // Outer span total
+        stepCount: data.outerStats!.count, // Number of steps
+      }))
+      .sort((a, b) => b.totalUs - a.totalUs);
+  }
+
+  async function downloadTrace() {
+    if (!taskId) return;
+    const response = await fetch(getTraceRawUrl(taskId));
+    const blob = await response.blob();
+    const url = URL.createObjectURL(blob);
+    const a = document.createElement("a");
+    a.href = url;
+    a.download = `trace_${taskId}.json`;
+    a.click();
+    URL.revokeObjectURL(url);
+  }
+
+  async function openInPerfetto() {
+    if (!taskId) return;
+
+    // Fetch trace data from our local API
+    const response = await fetch(getTraceRawUrl(taskId));
+    const traceData = await response.arrayBuffer();
+
+    // Open Perfetto UI
+    const perfettoWindow = window.open("https://ui.perfetto.dev");
+    if (!perfettoWindow) {
+      alert("Failed to open Perfetto. Please allow popups.");
+      return;
+    }
+
+    // Wait for Perfetto to be ready, then send trace via postMessage
+    const onMessage = (e: MessageEvent) => {
+      if (e.data === "PONG") {
+        window.removeEventListener("message", onMessage);
+        perfettoWindow.postMessage(
+          {
+            perfetto: {
+              buffer: traceData,
+              title: `Trace ${taskId}`,
+            },
+          },
+          "https://ui.perfetto.dev",
+        );
+      }
+    };
+    window.addEventListener("message", onMessage);
+
+    // Ping Perfetto until it responds
+    const pingInterval = setInterval(() => {
+      perfettoWindow.postMessage("PING", "https://ui.perfetto.dev");
+    }, 50);
+
+    // Clean up after 10 seconds
+    setTimeout(() => {
+      clearInterval(pingInterval);
+      window.removeEventListener("message", onMessage);
+    }, 10000);
+  }
+
+  onMount(async () => {
+    if (!taskId) {
+      error = "No task ID provided";
+      loading = false;
+      return;
+    }
+
+    try {
+      stats = await fetchTraceStats(taskId);
+    } catch (e) {
+      error = e instanceof Error ? e.message : "Failed to load trace";
+    } finally {
+      loading = false;
+    }
+  });
+
+  const phases = $derived(stats ? parsePhases(stats.byCategory) : []);
+  const sortedRanks = $derived(
+    stats
+      ? Object.keys(stats.byRank)
+          .map(Number)
+          .sort((a, b) => a - b)
+      : [],
+  );
+  const nodeCount = $derived(sortedRanks.length || 1);
+</script>
+
+<div class="min-h-screen bg-exo-dark-gray text-white">
+  <HeaderNav showHome={true} />
+  <div class="max-w-7xl mx-auto px-4 lg:px-8 py-6 space-y-6">
+    <div class="flex items-center justify-between gap-4 flex-wrap">
+      <div>
+        <h1
+          class="text-2xl font-mono tracking-[0.2em] uppercase text-exo-yellow"
+        >
+          Trace
+        </h1>
+        <p class="text-sm text-exo-light-gray font-mono truncate max-w-lg">
+          {taskId}
+        </p>
+      </div>
+      <div class="flex items-center gap-3">
+        <a
+          href="#/traces"
+          class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-3 py-1.5 rounded"
+        >
+          All Traces
+        </a>
+        <button
+          type="button"
+          class="text-xs font-mono text-exo-light-gray hover:text-exo-yellow transition-colors uppercase border border-exo-medium-gray/40 px-3 py-1.5 rounded"
+          onclick={downloadTrace}
+          disabled={loading || !!error}
+        >
+          Download
+        </button>
+        <button
+          type="button"
+          class="text-xs font-mono text-exo-dark-gray bg-exo-yellow hover:bg-exo-yellow/90 transition-colors uppercase px-3 py-1.5 rounded font-semibold"
+          onclick={openInPerfetto}
+          disabled={loading || !!error}
+        >
+          View Trace
+        </button>
+      </div>
+    </div>
+
+    {#if loading}
+      <div
+        class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-6 text-center text-exo-light-gray"
+      >
+        <div class="text-sm">Loading trace data...</div>
+      </div>
+    {:else if error}
+      <div
+        class="rounded border border-red-500/30 bg-red-500/10 p-6 text-center text-red-400"
+      >
+        <div class="text-sm">{error}</div>
+      </div>
+    {:else if stats}
+      <!-- Wall Time Summary -->
+      <div
+        class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-4 space-y-2"
+      >
+        <h2
+          class="text-sm font-mono uppercase tracking-wider text-exo-light-gray"
+        >
+          Summary
+        </h2>
+        <div class="text-3xl font-mono text-exo-yellow">
+          {formatDuration(stats.totalWallTimeUs)}
+        </div>
+        <div class="text-xs text-exo-light-gray">Total wall time</div>
+      </div>
+
+      <!-- By Phase -->
+      {#if phases.length > 0}
+        <div
+          class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-4 space-y-4"
+        >
+          <h2
+            class="text-sm font-mono uppercase tracking-wider text-exo-light-gray"
+          >
+            By Phase <span class="text-exo-light-gray/50">(avg per node)</span>
+          </h2>
+          <div class="space-y-4">
+            {#each phases as phase}
+              {@const normalizedTotal = phase.totalUs / nodeCount}
+              {@const normalizedStepCount = phase.stepCount / nodeCount}
+              <div class="space-y-2">
+                <div class="flex items-center justify-between">
+                  <span class="text-sm font-mono text-white">{phase.name}</span>
+                  <span class="text-sm font-mono">
+                    <span class="text-exo-yellow"
+                      >{formatDuration(normalizedTotal)}</span
+                    >
+                    <span class="text-exo-light-gray ml-2">
+                      ({normalizedStepCount} steps, {formatDuration(
+                        normalizedTotal / normalizedStepCount,
+                      )}/step)
+                    </span>
+                  </span>
+                </div>
+                {#if phase.subcategories.length > 0}
+                  <div class="pl-4 space-y-1.5">
+                    {#each phase.subcategories as subcat}
+                      {@const normalizedSubcat =
+                        subcat.stats.totalUs / nodeCount}
+                      {@const pct = formatPercentage(
+                        normalizedSubcat,
+                        normalizedTotal,
+                      )}
+                      {@const perStep = normalizedSubcat / normalizedStepCount}
+                      <div
+                        class="flex items-center justify-between text-xs font-mono"
+                      >
+                        <span class="text-exo-light-gray">{subcat.name}</span>
+                        <span class="text-white">
+                          {formatDuration(normalizedSubcat)}
+                          <span class="text-exo-light-gray ml-2">({pct})</span>
+                          <span class="text-exo-light-gray/60 ml-2"
+                            >{formatDuration(perStep)}/step</span
+                          >
+                        </span>
+                      </div>
+                      <!-- Progress bar -->
+                      <div
+                        class="relative h-1.5 bg-exo-black/60 rounded-sm overflow-hidden"
+                      >
+                        <div
+                          class="absolute inset-y-0 left-0 bg-gradient-to-r from-exo-yellow to-exo-yellow/70 transition-all duration-300"
+                          style="width: {pct}"
+                        ></div>
+                      </div>
+                    {/each}
+                  </div>
+                {/if}
+              </div>
+            {/each}
+          </div>
+        </div>
+      {/if}
+
+      <!-- By Rank -->
+      {#if sortedRanks.length > 0}
+        <div
+          class="rounded border border-exo-medium-gray/30 bg-exo-black/30 p-4 space-y-4"
+        >
+          <h2
+            class="text-sm font-mono uppercase tracking-wider text-exo-light-gray"
+          >
+            By Rank
+          </h2>
+          <div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
+            {#each sortedRanks as rank}
+              {@const rankStats = stats.byRank[rank]}
+              {@const rankPhases = parsePhases(rankStats.byCategory)}
+              <div
+                class="rounded border border-exo-medium-gray/20 bg-exo-dark-gray/60 p-3 space-y-3"
+              >
+                <div class="text-sm font-mono text-exo-yellow">
+                  Rank {rank}
+                </div>
+                <div class="space-y-2">
+                  {#each rankPhases as phase}
+                    <div class="space-y-1">
+                      <div class="flex items-center justify-between text-xs">
+                        <span class="font-mono text-exo-light-gray"
+                          >{phase.name}</span
+                        >
+                        <span class="font-mono text-white">
+                          {formatDuration(phase.totalUs)}
+                          <span class="text-exo-light-gray/50 ml-1">
+                            ({phase.stepCount}x)
+                          </span>
+                        </span>
+                      </div>
+                      {#if phase.subcategories.length > 0}
+                        <div class="pl-2 space-y-0.5">
+                          {#each phase.subcategories as subcat}
+                            {@const pct = formatPercentage(
+                              subcat.stats.totalUs,
+                              phase.totalUs,
+                            )}
+                            {@const perStep =
+                              subcat.stats.totalUs / phase.stepCount}
+                            <div
+                              class="flex items-center justify-between text-[10px] font-mono"
+                            >
+                              <span class="text-exo-light-gray/70"
+                                >{subcat.name}</span
+                              >
+                              <span class="text-exo-light-gray">
+                                {formatDuration(subcat.stats.totalUs)}
+                                <span class="text-exo-light-gray/50"
+                                  >({pct})</span
+                                >
+                                <span class="text-exo-light-gray/30 ml-1"
+                                  >{formatDuration(perStep)}/step</span
+                                >
+                              </span>
+                            </div>
+                          {/each}
+                        </div>
+                      {/if}
+                    </div>
+                  {/each}
+                </div>
+              </div>
+            {/each}
+          </div>
+        </div>
+      {/if}
+    {/if}
+  </div>
+</div>
--- a/flake.lock
+++ b/flake.lock
@@ -21,7 +21,9 @@
          "nixpkgs"
        ],
        "purescript-overlay": "purescript-overlay",
-        "pyproject-nix": "pyproject-nix"
+        "pyproject-nix": [
+          "pyproject-nix"
+        ]
      },
      "locked": {
        "lastModified": 1765953015,
@@ -149,19 +151,44 @@
        "type": "github"
      }
    },
+    "pyproject-build-systems": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ],
+        "pyproject-nix": [
+          "pyproject-nix"
+        ],
+        "uv2nix": [
+          "uv2nix"
+        ]
+      },
+      "locked": {
+        "lastModified": 1763662255,
+        "narHash": "sha256-4bocaOyLa3AfiS8KrWjZQYu+IAta05u3gYZzZ6zXbT0=",
+        "owner": "pyproject-nix",
+        "repo": "build-system-pkgs",
+        "rev": "042904167604c681a090c07eb6967b4dd4dae88c",
+        "type": "github"
+      },
+      "original": {
+        "owner": "pyproject-nix",
+        "repo": "build-system-pkgs",
+        "type": "github"
+      }
+    },
    "pyproject-nix": {
      "inputs": {
        "nixpkgs": [
-          "dream2nix",
          "nixpkgs"
        ]
      },
      "locked": {
-        "lastModified": 1763017646,
-        "narHash": "sha256-Z+R2lveIp6Skn1VPH3taQIuMhABg1IizJd8oVdmdHsQ=",
+        "lastModified": 1764134915,
+        "narHash": "sha256-xaKvtPx6YAnA3HQVp5LwyYG1MaN4LLehpQI8xEdBvBY=",
        "owner": "pyproject-nix",
        "repo": "pyproject.nix",
-        "rev": "47bd6f296502842643078d66128f7b5e5370790c",
+        "rev": "2c8df1383b32e5443c921f61224b198a2282a657",
        "type": "github"
      },
      "original": {
@@ -178,7 +205,10 @@
        "flake-parts": "flake-parts",
        "nixpkgs": "nixpkgs",
        "nixpkgs-swift": "nixpkgs-swift",
-        "treefmt-nix": "treefmt-nix"
+        "pyproject-build-systems": "pyproject-build-systems",
+        "pyproject-nix": "pyproject-nix",
+        "treefmt-nix": "treefmt-nix",
+        "uv2nix": "uv2nix"
      }
    },
    "rust-analyzer-src": {
@@ -239,6 +269,29 @@
        "repo": "treefmt-nix",
        "type": "github"
      }
+    },
+    "uv2nix": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ],
+        "pyproject-nix": [
+          "pyproject-nix"
+        ]
+      },
+      "locked": {
+        "lastModified": 1767701098,
+        "narHash": "sha256-CJhKZnWb3gumR9oTRjFvCg/6lYTGbZRU7xtvcyWIRwU=",
+        "owner": "pyproject-nix",
+        "repo": "uv2nix",
+        "rev": "9d357f0d2ce6f5f35ec7959d7e704452352eb4da",
+        "type": "github"
+      },
+      "original": {
+        "owner": "pyproject-nix",
+        "repo": "uv2nix",
+        "type": "github"
+      }
    }
  },
  "root": "root",
--- a/flake.nix
+++ b/flake.nix
@@ -24,6 +24,26 @@
    dream2nix = {
      url = "github:nix-community/dream2nix";
      inputs.nixpkgs.follows = "nixpkgs";
+      inputs.pyproject-nix.follows = "pyproject-nix";
+    };
+
+    # Python packaging with uv2nix
+    pyproject-nix = {
+      url = "github:pyproject-nix/pyproject.nix";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
+
+    uv2nix = {
+      url = "github:pyproject-nix/uv2nix";
+      inputs.pyproject-nix.follows = "pyproject-nix";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
+
+    pyproject-build-systems = {
+      url = "github:pyproject-nix/build-system-pkgs";
+      inputs.pyproject-nix.follows = "pyproject-nix";
+      inputs.uv2nix.follows = "uv2nix";
+      inputs.nixpkgs.follows = "nixpkgs";
    };

    # Pinned nixpkgs for swift-format (swift is broken on x86_64-linux in newer nixpkgs)
@@ -48,6 +68,8 @@
        inputs.treefmt-nix.flakeModule
        ./dashboard/parts.nix
        ./rust/parts.nix
+        ./python/parts.nix
+        ./resources/parts.nix
      ];

      perSystem =
@@ -58,6 +80,11 @@
          pkgsSwift = import inputs.nixpkgs-swift { inherit system; };
        in
        {
+          # Allow unfree for metal-toolchain (needed for Darwin Metal packages)
+          _module.args.pkgs = import inputs.nixpkgs {
+            inherit system;
+            config.allowUnfreePredicate = pkg: (pkg.pname or "") == "metal-toolchain";
+          };
          treefmt = {
            projectRootFile = "flake.nix";
            programs = {
@@ -79,14 +106,24 @@
                enable = true;
                package = pkgsSwift.swiftPackages.swift-format;
              };
+              shfmt.enable = true;
            };
          };

-          checks.lint = pkgs.runCommand "lint-check" { } ''
-            export RUFF_CACHE_DIR="$TMPDIR/ruff-cache"
-            ${pkgs.ruff}/bin/ruff check ${inputs.self}/
-            touch $out
-          '';
+          packages = lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin (
+            let
+              uvLock = builtins.fromTOML (builtins.readFile ./uv.lock);
+              mlxPackage = builtins.head (builtins.filter (p: p.name == "mlx") uvLock.package);
+              uvLockMlxVersion = mlxPackage.version;
+            in
+            {
+              metal-toolchain = pkgs.callPackage ./nix/metal-toolchain.nix { };
+              mlx = pkgs.callPackage ./nix/mlx.nix {
+                metal-toolchain = self'.packages.metal-toolchain;
+                inherit uvLockMlxVersion;
+              };
+            }
+          );

          devShells.default = with pkgs; pkgs.mkShell {
            inputsFrom = [ self'.checks.cargo-build ];
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 export NIX_CONFIG := "extra-experimental-features = nix-command flakes"

 fmt:
-    nix fmt
+    treefmt || nix fmt

 lint:
    uv run ruff check --fix
--- a/nix/darwin-build-fixes.patch
+++ b/nix/darwin-build-fixes.patch
@@ -0,0 +1,79 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 0ed30932..d8528132 100644
+--- a/CMakeLists.txt
+++ b/CMakeLists.txt
+@@ -177,11 +177,7 @@ if(MLX_BUILD_METAL)
+     add_compile_definitions(MLX_METAL_DEBUG)
+   endif()
+
+-  # Throw an error if xcrun not found
+-  execute_process(
+-    COMMAND zsh "-c" "/usr/bin/xcrun -sdk macosx --show-sdk-version"
+-    OUTPUT_VARIABLE MACOS_SDK_VERSION
+-    OUTPUT_STRIP_TRAILING_WHITESPACE COMMAND_ERROR_IS_FATAL ANY)
+  set(MACOS_SDK_VERSION @sdkVersion@)
+
+   if(${MACOS_SDK_VERSION} LESS 14.0)
+     message(
+@@ -199,11 +195,8 @@ if(MLX_BUILD_METAL)
+     endif()
+     set(XCRUN_FLAGS "-mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
+   endif()
+-  execute_process(
+-    COMMAND
+-      zsh "-c"
+-      "echo \"__METAL_VERSION__\" | xcrun -sdk macosx metal ${XCRUN_FLAGS} -E -x metal -P - | tail -1 | tr -d '\n'"
+-    OUTPUT_VARIABLE MLX_METAL_VERSION COMMAND_ERROR_IS_FATAL ANY)
+  set(
+    MLX_METAL_VERSION @metalVersion@)
+   FetchContent_Declare(metal_cpp URL ${METAL_CPP_URL})
+   FetchContent_MakeAvailable(metal_cpp)
+   target_include_directories(
+diff --git a/cmake/extension.cmake b/cmake/extension.cmake
+index 13db804a..5b385132 100644
+--- a/cmake/extension.cmake
+++ b/cmake/extension.cmake
+@@ -36,7 +36,7 @@ macro(mlx_build_metallib)
+   add_custom_command(
+     OUTPUT ${MTLLIB_BUILD_TARGET}
+     COMMAND
+-      xcrun -sdk macosx metal
+      metal -fmodules-cache-path=${CMAKE_BINARY_DIR}/metal-cache
+       "$<LIST:TRANSFORM,${MTLLIB_INCLUDE_DIRS},PREPEND,-I>"
+       ${MTLLIB_COMPILE_OPTIONS} ${MTLLIB_SOURCES} -o ${MTLLIB_BUILD_TARGET}
+     DEPENDS ${MTLLIB_DEPS} ${MTLLIB_SOURCES}
+diff --git a/mlx/backend/metal/kernels/CMakeLists.txt b/mlx/backend/metal/kernels/CMakeLists.txt
+index 262b0495..5c7446ad 100644
+--- a/mlx/backend/metal/kernels/CMakeLists.txt
+++ b/mlx/backend/metal/kernels/CMakeLists.txt
+@@ -29,7 +29,7 @@ function(build_kernel_base TARGET SRCFILE DEPS)
+                     "-mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
+   endif()
+   add_custom_command(
+-    COMMAND xcrun -sdk macosx metal ${METAL_FLAGS} -c ${SRCFILE}
+    COMMAND metal -fmodules-cache-path=${CMAKE_BINARY_DIR}/metal-cache ${METAL_FLAGS} -c ${SRCFILE}
+             -I${PROJECT_SOURCE_DIR} -o ${TARGET}.air
+     DEPENDS ${SRCFILE} ${DEPS} ${BASE_HEADERS}
+     OUTPUT ${TARGET}.air
+@@ -170,7 +170,7 @@ endif()
+
+ add_custom_command(
+   OUTPUT ${MLX_METAL_PATH}/mlx.metallib
+-  COMMAND xcrun -sdk macosx metallib ${KERNEL_AIR} -o
+  COMMAND metallib ${KERNEL_AIR} -o
+           ${MLX_METAL_PATH}/mlx.metallib
+   DEPENDS ${KERNEL_AIR}
+   COMMENT "Building mlx.metallib"
+diff --git a/mlx/backend/metal/make_compiled_preamble.sh b/mlx/backend/metal/make_compiled_preamble.sh
+index bb55ed3a..94ea7dd7 100644
+--- a/mlx/backend/metal/make_compiled_preamble.sh
+++ b/mlx/backend/metal/make_compiled_preamble.sh
+@@ -31,7 +31,7 @@ OUTPUT_FILE=${OUTPUT_DIR}/${SRC_NAME}.cpp
+ mkdir -p "$OUTPUT_DIR"
+
+ # Use the metal compiler to get a list of headers (with depth)
+-CCC="xcrun -sdk macosx metal -x metal"
+CCC="metal -x metal -fmodules-cache-path=${OUTPUT_DIR}/metal-cache"
+ HDRS=$( $CCC -I"$SRC_DIR" -I"$JIT_INCLUDES" -DMLX_METAL_JIT -E -P -CC -C -H "$INPUT_FILE" $CFLAGS -w 2>&1 1>/dev/null )
+
+ # Remove any included system frameworks (for MetalPerformancePrimitive headers)
--- a/nix/metal-toolchain.nix
+++ b/nix/metal-toolchain.nix
@@ -0,0 +1,56 @@
+{ lib, stdenvNoCC, requireFile, nix }:
+
+let
+  narFile = requireFile {
+    name = "metal-toolchain-17C48.nar";
+    message = ''
+      The Metal Toolchain NAR must be available.
+
+      If you have cachix configured for exo.cachix.org, this should be automatic.
+
+      Otherwise:
+        1. Install Xcode 26+ from the App Store
+        2. Run: xcodebuild -downloadComponent MetalToolchain
+        3. Export the toolchain:
+           hdiutil attach "$(find /System/Library/AssetsV2/com_apple_MobileAsset_MetalToolchain -name '*.dmg' | head -1)" -mountpoint /tmp/metal-dmg
+           cp -R /tmp/metal-dmg/Metal.xctoolchain /tmp/metal-export
+           hdiutil detach /tmp/metal-dmg
+        4. Create NAR and add to store:
+           nix nar pack /tmp/metal-export > /tmp/metal-toolchain-17C48.nar
+           nix store add --mode flat /tmp/metal-toolchain-17C48.nar
+    '';
+    hash = "sha256-ayR5mXN4sZAddwKEG2OszGRF93k9ZFc7H0yi2xbylQw=";
+  };
+in
+stdenvNoCC.mkDerivation {
+  pname = "metal-toolchain";
+  version = "17C48";
+
+  dontUnpack = true;
+  dontBuild = true;
+  dontFixup = true;
+
+  nativeBuildInputs = [ nix ];
+
+  installPhase = ''
+    runHook preInstall
+
+    nix-store --restore $out < ${narFile}
+
+    # Create bin directory with symlinks for PATH
+    mkdir -p $out/bin
+    ln -s $out/usr/bin/metal $out/bin/metal
+    ln -s $out/usr/bin/metallib $out/bin/metallib
+
+    runHook postInstall
+  '';
+
+  # Metal language version for CMake (from: echo __METAL_VERSION__ | metal -E -x metal -P -)
+  passthru.metalVersion = "400";
+
+  meta = {
+    description = "Apple Metal compiler toolchain";
+    platforms = [ "aarch64-darwin" ];
+    license = lib.licenses.unfree;
+  };
+}
--- a/nix/mlx.nix
+++ b/nix/mlx.nix
@@ -0,0 +1,158 @@
+{ stdenv
+, lib
+, fetchFromGitHub
+, replaceVars
+, fetchzip
+, cmake
+, nlohmann_json
+, apple-sdk_26
+, metal-toolchain
+, runCommand
+, fmt
+, python313Packages
+, uvLockMlxVersion
+}:
+
+assert stdenv.isDarwin;
+
+let
+  python = python313Packages.python;
+
+  # Static dependencies included directly during compilation
+  gguf-tools = fetchFromGitHub {
+    owner = "antirez";
+    repo = "gguf-tools";
+    rev = "8fa6eb65236618e28fd7710a0fba565f7faa1848";
+    hash = "sha256-15FvyPOFqTOr5vdWQoPnZz+mYH919++EtghjozDlnSA=";
+  };
+
+  metal_cpp = fetchzip {
+    url = "https://developer.apple.com/metal/cpp/files/metal-cpp_26.zip";
+    hash = "sha256-7n2eI2lw/S+Us6l7YPAATKwcIbRRpaQ8VmES7S8ZjY8=";
+  };
+
+  nanobind = fetchFromGitHub {
+    owner = "wjakob";
+    repo = "nanobind";
+    rev = "v2.10.2";
+    hash = "sha256-io44YhN+VpfHFWyvvLWSanRgbzA0whK8WlDNRi3hahU=";
+    fetchSubmodules = true;
+  };
+
+  mlx = stdenv.mkDerivation rec {
+    pname = "mlx";
+    version = let v = "0.30.4"; in
+      assert v == uvLockMlxVersion || throw "MLX version mismatch: nix/mlx.nix has ${v} but uv.lock has ${uvLockMlxVersion}. Update both the version and hash in nix/mlx.nix.";
+      v;
+    pyproject = true;
+
+    src = fetchFromGitHub {
+      owner = "ml-explore";
+      repo = "mlx";
+      tag = "v${version}";
+      hash = "sha256-OJk6jPlbaSlsUdk3ADz3tWcRzTWXRof3/q8Soe1AO6w=";
+    };
+
+    patches = [
+      (replaceVars ./darwin-build-fixes.patch {
+        sdkVersion = apple-sdk_26.version;
+        metalVersion = metal-toolchain.metalVersion;
+      })
+    ];
+
+    postPatch = ''
+      substituteInPlace mlx/backend/cpu/jit_compiler.cpp \
+        --replace-fail "g++" "$CXX"
+    '';
+
+    dontUseCmakeConfigure = true;
+
+    enableParallelBuilding = true;
+
+    # Allows multiple cores to be used in Python builds.
+    postUnpack = ''
+      export MAKEFLAGS+="''${enableParallelBuilding:+-j$NIX_BUILD_CORES}"
+    '';
+
+    # Updates the wrong fetcher rev attribute
+    passthru.skipBulkUpdate = true;
+
+    env = {
+      DEV_RELEASE = 1;
+      CMAKE_ARGS = toString [
+        (lib.cmakeBool "USE_SYSTEM_FMT" true)
+        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_GGUFLIB" "${gguf-tools}")
+        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_JSON" "${nlohmann_json.src}")
+        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_NANOBIND" "${nanobind}")
+        (lib.cmakeBool "FETCHCONTENT_FULLY_DISCONNECTED" true)
+        (lib.cmakeBool "MLX_BUILD_METAL" true)
+        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_METAL_CPP" "${metal_cpp}")
+        (lib.cmakeOptionType "string" "CMAKE_OSX_DEPLOYMENT_TARGET" "${apple-sdk_26.version}")
+        (lib.cmakeOptionType "filepath" "CMAKE_OSX_SYSROOT" "${apple-sdk_26.passthru.sdkroot}")
+      ];
+      SDKROOT = apple-sdk_26.passthru.sdkroot;
+      MACOSX_DEPLOYMENT_TARGET = apple-sdk_26.version;
+    };
+
+    build-system = [
+      python313Packages.setuptools
+    ];
+
+    nativeBuildInputs = [
+      cmake
+      metal-toolchain
+      python313Packages.pypaBuildHook
+      python313Packages.pypaInstallHook
+      python313Packages.setuptools
+      python313Packages.typing-extensions
+      python313Packages.wheel
+      python313Packages.cmake
+      python313Packages.ninja
+    ];
+
+    buildInputs = [
+      fmt
+      gguf-tools
+      python313Packages.nanobind
+      python313Packages.pybind11
+      apple-sdk_26
+    ];
+
+    # Tests require Metal GPU access which isn't available in the Nix sandbox.
+    # To run tests, build with: nix build --option sandbox false .#mlx.passthru.tests.mlxTest
+    doCheck = false;
+
+    pythonImportsCheck = [ "mlx" ];
+
+    passthru.tests = {
+      # Runs example scripts to verify MLX works. Requires --option sandbox false
+      # since Metal GPU access is needed.
+      mlxTest =
+        runCommand "run-mlx-examples"
+          {
+            buildInputs = [ mlx ];
+            nativeBuildInputs = [ python ];
+          }
+          ''
+            cp ${src}/examples/python/logistic_regression.py .
+            ${python.interpreter} logistic_regression.py
+            rm logistic_regression.py
+
+            cp ${src}/examples/python/linear_regression.py .
+            ${python.interpreter} linear_regression.py
+            rm linear_regression.py
+
+            touch $out
+          '';
+    };
+
+    meta = {
+      homepage = "https://github.com/ml-explore/mlx";
+      description = "Array framework for Apple silicon";
+      changelog = "https://github.com/ml-explore/mlx/releases/tag/${src.tag}";
+      license = lib.licenses.mit;
+      platforms = [ "aarch64-darwin" ];
+    };
+  };
+in
+mlx
--- a/packaging/pyinstaller/exo.spec
+++ b/packaging/pyinstaller/exo.spec
@@ -10,6 +10,7 @@ PROJECT_ROOT = Path.cwd()
 SOURCE_ROOT = PROJECT_ROOT / "src"
 ENTRYPOINT = SOURCE_ROOT / "exo" / "__main__.py"
 DASHBOARD_DIR = PROJECT_ROOT / "dashboard" / "build"
+RESOURCES_DIR = PROJECT_ROOT / "resources"
 EXO_SHARED_MODELS_DIR = SOURCE_ROOT / "exo" / "shared" / "models"

 if not ENTRYPOINT.is_file():
@@ -18,6 +19,9 @@ if not ENTRYPOINT.is_file():
 if not DASHBOARD_DIR.is_dir():
    raise SystemExit(f"Dashboard assets are missing: {DASHBOARD_DIR}")

+if not RESOURCES_DIR.is_dir():
+    raise SystemExit(f"Resource assets are missing: {RESOURCES_DIR}")
+
 if not EXO_SHARED_MODELS_DIR.is_dir():
    raise SystemExit(f"Shared model assets are missing: {EXO_SHARED_MODELS_DIR}")

@@ -58,6 +62,7 @@ HIDDEN_IMPORTS = sorted(

 DATAS: list[tuple[str, str]] = [
    (str(DASHBOARD_DIR), "dashboard"),
+    (str(RESOURCES_DIR), "resources"),
    (str(MLX_LIB_DIR), "mlx/lib"),
    (str(EXO_SHARED_MODELS_DIR), "exo/shared/models"),
 ]
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,9 +17,9 @@ dependencies = [
    "loguru>=0.7.3",
    "exo_pyo3_bindings", # rust bindings
    "anyio==4.11.0",
-    "mlx @ git+https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git; sys_platform == 'darwin'",
-    "mlx[cpu]==0.30.3; sys_platform == 'linux'",
-    "mlx-lm==0.30.5",
+    "mlx==0.30.4; sys_platform == 'darwin'",
+    "mlx[cpu]==0.30.4; sys_platform == 'linux'",
+    "mlx-lm",
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
    "hypercorn>=0.18.0",
    "openai-harmony>=0.0.8",
@@ -63,6 +63,7 @@ members = [

 [tool.uv.sources]
 exo_pyo3_bindings = { workspace = true }
+mlx-lm = { git = "https://github.com/ml-explore/mlx-lm", branch = "main" }
 # Uncomment to use local mlx/mlx-lm development versions:
 # mlx = { path = "/Users/Shared/mlx", editable=true }
 # mlx-lm = { path = "/Users/Shared/mlx-lm", editable=true }
--- a/python/parts.nix
+++ b/python/parts.nix
@@ -0,0 +1,95 @@
+{ inputs, ... }:
+{
+  perSystem =
+    { config, self', pkgs, lib, system, ... }:
+    let
+      # Load workspace from uv.lock
+      workspace = inputs.uv2nix.lib.workspace.loadWorkspace {
+        workspaceRoot = inputs.self;
+      };
+
+      # Create overlay from workspace
+      # Use wheels from PyPI for most packages; we override mlx with our pure Nix Metal build
+      overlay = workspace.mkPyprojectOverlay { sourcePreference = "wheel"; };
+
+      # Override overlay to inject Nix-built components
+      exoOverlay = final: prev: {
+        # Replace workspace exo_pyo3_bindings with Nix-built wheel
+        exo-pyo3-bindings = pkgs.stdenv.mkDerivation {
+          pname = "exo-pyo3-bindings";
+          version = "0.1.0";
+          src = self'.packages.exo_pyo3_bindings;
+          # Install from pre-built wheel
+          nativeBuildInputs = [ final.pyprojectWheelHook ];
+          dontStrip = true;
+        };
+      };
+
+      python = pkgs.python313;
+
+      # Overlay to provide build systems and custom packages
+      buildSystemsOverlay = final: prev: {
+        # Use our pure Nix-built MLX with Metal support
+        mlx = self'.packages.mlx;
+
+        # mlx-lm is a git dependency that needs setuptools
+        mlx-lm = prev.mlx-lm.overrideAttrs (old: {
+          nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
+            final.setuptools
+          ];
+        });
+      };
+
+      pythonSet = (pkgs.callPackage inputs.pyproject-nix.build.packages {
+        inherit python;
+      }).overrideScope (
+        lib.composeManyExtensions [
+          inputs.pyproject-build-systems.overlays.default
+          overlay
+          exoOverlay
+          buildSystemsOverlay
+        ]
+      );
+      exoVenv = pythonSet.mkVirtualEnv "exo-env" workspace.deps.default;
+
+      # Virtual environment with dev dependencies for testing
+      testVenv = pythonSet.mkVirtualEnv "exo-test-env" (
+        workspace.deps.default // {
+          exo = [ "dev" ]; # Include pytest, pytest-asyncio, pytest-env
+        }
+      );
+
+      exoPackage = pkgs.runCommand "exo"
+        {
+          nativeBuildInputs = [ pkgs.makeWrapper ];
+        }
+        ''
+          mkdir -p $out/bin
+
+          # Create wrapper scripts
+          for script in exo exo-master exo-worker; do
+            makeWrapper ${exoVenv}/bin/$script $out/bin/$script \
+              --set EXO_DASHBOARD_DIR ${self'.packages.dashboard} \
+              --set EXO_RESOURCES_DIR ${self'.packages.resources} \
+              ${lib.optionalString pkgs.stdenv.isDarwin "--prefix PATH : ${pkgs.macmon}/bin"}
+          done
+        '';
+    in
+    {
+      # Python package only available on macOS (requires MLX/Metal)
+      packages = lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin {
+        exo = exoPackage;
+        # Test environment for running pytest outside of Nix sandbox (needs GPU access)
+        exo-test-env = testVenv;
+      };
+
+      checks = {
+        # Ruff linting (works on all platforms)
+        lint = pkgs.runCommand "ruff-lint" { } ''
+          export RUFF_CACHE_DIR="$TMPDIR/ruff-cache"
+          ${pkgs.ruff}/bin/ruff check ${inputs.self}/
+          touch $out
+        '';
+      };
+    };
+}
--- a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-4bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-4bit.toml
@@ -0,0 +1,45 @@
+model_id = "exolabs/FLUX.1-Krea-dev-4bit"
+n_layers = 57
+hidden_size = 1
+supports_tensor = false
+tasks = ["TextToImage"]
+
+[storage_size]
+in_bytes = 15475325472
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
+
+[[components]]
+component_name = "text_encoder_2"
+component_path = "text_encoder_2/"
+n_layers = 24
+can_shard = false
+safetensors_index_filename = "model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 9524621312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 57
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 5950704160
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-8bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-8bit.toml
@@ -0,0 +1,45 @@
+model_id = "exolabs/FLUX.1-Krea-dev-8bit"
+n_layers = 57
+hidden_size = 1
+supports_tensor = false
+tasks = ["TextToImage"]
+
+[storage_size]
+in_bytes = 21426029632
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
+
+[[components]]
+component_name = "text_encoder_2"
+component_path = "text_encoder_2/"
+n_layers = 24
+can_shard = false
+safetensors_index_filename = "model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 9524621312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 57
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 11901408320
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev.toml
@@ -0,0 +1,45 @@
+model_id = "exolabs/FLUX.1-Krea-dev"
+n_layers = 57
+hidden_size = 1
+supports_tensor = false
+tasks = ["TextToImage"]
+
+[storage_size]
+in_bytes = 33327437952
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
+
+[[components]]
+component_name = "text_encoder_2"
+component_path = "text_encoder_2/"
+n_layers = 24
+can_shard = false
+safetensors_index_filename = "model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 9524621312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 57
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 23802816640
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-dev-4bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-dev-4bit.toml
@@ -0,0 +1,45 @@
+model_id = "exolabs/FLUX.1-dev-4bit"
+n_layers = 57
+hidden_size = 1
+supports_tensor = false
+tasks = ["TextToImage"]
+
+[storage_size]
+in_bytes = 15475325472
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
+
+[[components]]
+component_name = "text_encoder_2"
+component_path = "text_encoder_2/"
+n_layers = 24
+can_shard = false
+safetensors_index_filename = "model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 9524621312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 57
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 5950704160
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-dev-8bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-dev-8bit.toml
@@ -0,0 +1,45 @@
+model_id = "exolabs/FLUX.1-dev-8bit"
+n_layers = 57
+hidden_size = 1
+supports_tensor = false
+tasks = ["TextToImage"]
+
+[storage_size]
+in_bytes = 21426029632
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
+
+[[components]]
+component_name = "text_encoder_2"
+component_path = "text_encoder_2/"
+n_layers = 24
+can_shard = false
+safetensors_index_filename = "model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 9524621312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 57
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 11901408320
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-dev.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-dev.toml
@@ -0,0 +1,45 @@
+model_id = "exolabs/FLUX.1-dev"
+n_layers = 57
+hidden_size = 1
+supports_tensor = false
+tasks = ["TextToImage"]
+
+[storage_size]
+in_bytes = 33327437952
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
+
+[[components]]
+component_name = "text_encoder_2"
+component_path = "text_encoder_2/"
+n_layers = 24
+can_shard = false
+safetensors_index_filename = "model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 9524621312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 57
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 23802816640
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-schnell-4bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-schnell-4bit.toml
@@ -0,0 +1,45 @@
+model_id = "exolabs/FLUX.1-schnell-4bit"
+n_layers = 57
+hidden_size = 1
+supports_tensor = false
+tasks = ["TextToImage"]
+
+[storage_size]
+in_bytes = 15470210592
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
+
+[[components]]
+component_name = "text_encoder_2"
+component_path = "text_encoder_2/"
+n_layers = 24
+can_shard = false
+safetensors_index_filename = "model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 9524621312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 57
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 5945589280
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-schnell-8bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-schnell-8bit.toml
@@ -0,0 +1,45 @@
+model_id = "exolabs/FLUX.1-schnell-8bit"
+n_layers = 57
+hidden_size = 1
+supports_tensor = false
+tasks = ["TextToImage"]
+
+[storage_size]
+in_bytes = 21415799872
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
+
+[[components]]
+component_name = "text_encoder_2"
+component_path = "text_encoder_2/"
+n_layers = 24
+can_shard = false
+safetensors_index_filename = "model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 9524621312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 57
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 11891178560
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/image_model_cards/exolabs--FLUX.1-schnell.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-schnell.toml
@@ -0,0 +1,45 @@
+model_id = "exolabs/FLUX.1-schnell"
+n_layers = 57
+hidden_size = 1
+supports_tensor = false
+tasks = ["TextToImage"]
+
+[storage_size]
+in_bytes = 33306978432
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
+
+[[components]]
+component_name = "text_encoder_2"
+component_path = "text_encoder_2/"
+n_layers = 24
+can_shard = false
+safetensors_index_filename = "model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 9524621312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 57
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 23782357120
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-4bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-4bit.toml
@@ -0,0 +1,35 @@
+model_id = "exolabs/Qwen-Image-4bit"
+n_layers = 60
+hidden_size = 1
+supports_tensor = false
+tasks = ["TextToImage"]
+
+[storage_size]
+in_bytes = 26799533856
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 16584333312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 60
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 10215200544
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-8bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-8bit.toml
@@ -0,0 +1,35 @@
+model_id = "exolabs/Qwen-Image-8bit"
+n_layers = 60
+hidden_size = 1
+supports_tensor = false
+tasks = ["TextToImage"]
+
+[storage_size]
+in_bytes = 37014734400
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 16584333312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 60
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 20430401088
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-4bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-4bit.toml
@@ -0,0 +1,35 @@
+model_id = "exolabs/Qwen-Image-Edit-2509-4bit"
+n_layers = 60
+hidden_size = 1
+supports_tensor = false
+tasks = ["ImageToImage"]
+
+[storage_size]
+in_bytes = 26799533856
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 16584333312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 60
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 10215200544
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-8bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-8bit.toml
@@ -0,0 +1,35 @@
+model_id = "exolabs/Qwen-Image-Edit-2509-8bit"
+n_layers = 60
+hidden_size = 1
+supports_tensor = false
+tasks = ["ImageToImage"]
+
+[storage_size]
+in_bytes = 37014734400
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 16584333312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 60
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 20430401088
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509.toml
@@ -0,0 +1,35 @@
+model_id = "exolabs/Qwen-Image-Edit-2509"
+n_layers = 60
+hidden_size = 1
+supports_tensor = false
+tasks = ["ImageToImage"]
+
+[storage_size]
+in_bytes = 57445135488
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 16584333312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 60
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 40860802176
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/image_model_cards/exolabs--Qwen-Image.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image.toml
@@ -0,0 +1,35 @@
+model_id = "exolabs/Qwen-Image"
+n_layers = 60
+hidden_size = 1
+supports_tensor = false
+tasks = ["TextToImage"]
+
+[storage_size]
+in_bytes = 57445135488
+
+[[components]]
+component_name = "text_encoder"
+component_path = "text_encoder/"
+n_layers = 12
+can_shard = false
+
+[components.storage_size]
+in_bytes = 16584333312
+
+[[components]]
+component_name = "transformer"
+component_path = "transformer/"
+n_layers = 60
+can_shard = true
+safetensors_index_filename = "diffusion_pytorch_model.safetensors.index.json"
+
+[components.storage_size]
+in_bytes = 40860802176
+
+[[components]]
+component_name = "vae"
+component_path = "vae/"
+can_shard = false
+
+[components.storage_size]
+in_bytes = 0
--- a/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/DeepSeek-V3.1-4bit"
+n_layers = 61
+hidden_size = 7168
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "deepseek"
+quantization = "4bit"
+base_model = "DeepSeek V3.1"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 405874409472
--- a/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-8bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/DeepSeek-V3.1-8bit"
+n_layers = 61
+hidden_size = 7168
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "deepseek"
+quantization = "8bit"
+base_model = "DeepSeek V3.1"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 765577920512
--- a/resources/inference_model_cards/mlx-community--GLM-4.5-Air-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.5-Air-8bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/GLM-4.5-Air-8bit"
+n_layers = 46
+hidden_size = 4096
+supports_tensor = false
+tasks = ["TextGeneration"]
+family = "glm"
+quantization = "8bit"
+base_model = "GLM 4.5 Air"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 122406567936
--- a/resources/inference_model_cards/mlx-community--GLM-4.5-Air-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.5-Air-bf16.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/GLM-4.5-Air-bf16"
+n_layers = 46
+hidden_size = 4096
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "glm"
+quantization = "bf16"
+base_model = "GLM 4.5 Air"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 229780750336
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/GLM-4.7-4bit"
+n_layers = 91
+hidden_size = 5120
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "glm"
+quantization = "4bit"
+base_model = "GLM 4.7"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 198556925568
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-6bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/GLM-4.7-6bit"
+n_layers = 91
+hidden_size = 5120
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "glm"
+quantization = "6bit"
+base_model = "GLM 4.7"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 286737579648
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-8bit-gs32.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-8bit-gs32.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/GLM-4.7-8bit-gs32"
+n_layers = 91
+hidden_size = 5120
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "glm"
+quantization = "8bit"
+base_model = "GLM 4.7"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 396963397248
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/GLM-4.7-Flash-4bit"
+n_layers = 47
+hidden_size = 2048
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "glm"
+quantization = "4bit"
+base_model = "GLM 4.7 Flash"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 19327352832
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-5bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-5bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/GLM-4.7-Flash-5bit"
+n_layers = 47
+hidden_size = 2048
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "glm"
+quantization = "5bit"
+base_model = "GLM 4.7 Flash"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 22548578304
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-6bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/GLM-4.7-Flash-6bit"
+n_layers = 47
+hidden_size = 2048
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "glm"
+quantization = "6bit"
+base_model = "GLM 4.7 Flash"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 26843545600
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-8bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/GLM-4.7-Flash-8bit"
+n_layers = 47
+hidden_size = 2048
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "glm"
+quantization = "8bit"
+base_model = "GLM 4.7 Flash"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 34359738368
--- a/resources/inference_model_cards/mlx-community--Kimi-K2-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2-Instruct-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Kimi-K2-Instruct-4bit"
+n_layers = 61
+hidden_size = 7168
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "kimi"
+quantization = "4bit"
+base_model = "Kimi K2"
+capabilities = ["text"]
+
+[storage_size]
+in_bytes = 620622774272
--- a/resources/inference_model_cards/mlx-community--Kimi-K2-Thinking.toml
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2-Thinking.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Kimi-K2-Thinking"
+n_layers = 61
+hidden_size = 7168
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "kimi"
+quantization = ""
+base_model = "Kimi K2"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 706522120192
--- a/resources/inference_model_cards/mlx-community--Kimi-K2.5.toml
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2.5.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Kimi-K2.5"
+n_layers = 61
+hidden_size = 7168
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "kimi"
+quantization = ""
+base_model = "Kimi K2.5"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 662498705408
--- a/resources/inference_model_cards/mlx-community--Llama-3.2-1B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.2-1B-Instruct-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Llama-3.2-1B-Instruct-4bit"
+n_layers = 16
+hidden_size = 2048
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "llama"
+quantization = "4bit"
+base_model = "Llama 3.2 1B"
+capabilities = ["text"]
+
+[storage_size]
+in_bytes = 729808896
--- a/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Llama-3.2-3B-Instruct-4bit"
+n_layers = 28
+hidden_size = 3072
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "llama"
+quantization = "4bit"
+base_model = "Llama 3.2 3B"
+capabilities = ["text"]
+
+[storage_size]
+in_bytes = 1863319552
--- a/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-8bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Llama-3.2-3B-Instruct-8bit"
+n_layers = 28
+hidden_size = 3072
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "llama"
+quantization = "8bit"
+base_model = "Llama 3.2 3B"
+capabilities = ["text"]
+
+[storage_size]
+in_bytes = 3501195264
--- a/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Llama-3.3-70B-Instruct-4bit"
+n_layers = 80
+hidden_size = 8192
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "llama"
+quantization = "4bit"
+base_model = "Llama 3.3 70B"
+capabilities = ["text"]
+
+[storage_size]
+in_bytes = 40652242944
--- a/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-8bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Llama-3.3-70B-Instruct-8bit"
+n_layers = 80
+hidden_size = 8192
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "llama"
+quantization = "8bit"
+base_model = "Llama 3.3 70B"
+capabilities = ["text"]
+
+[storage_size]
+in_bytes = 76799803392
--- a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-70B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-70B-Instruct-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
+n_layers = 80
+hidden_size = 8192
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "llama"
+quantization = "4bit"
+base_model = "Llama 3.1 70B"
+capabilities = ["text"]
+
+[storage_size]
+in_bytes = 40652242944
--- a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
+n_layers = 32
+hidden_size = 4096
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "llama"
+quantization = "4bit"
+base_model = "Llama 3.1 8B"
+capabilities = ["text"]
+
+[storage_size]
+in_bytes = 4637851648
--- a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-8bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
+n_layers = 32
+hidden_size = 4096
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "llama"
+quantization = "8bit"
+base_model = "Llama 3.1 8B"
+capabilities = ["text"]
+
+[storage_size]
+in_bytes = 8954839040
--- a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-bf16.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"
+n_layers = 32
+hidden_size = 4096
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "llama"
+quantization = "bf16"
+base_model = "Llama 3.1 8B"
+capabilities = ["text"]
+
+[storage_size]
+in_bytes = 16882073600
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.1-3bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.1-3bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/MiniMax-M2.1-3bit"
+n_layers = 61
+hidden_size = 3072
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "minimax"
+quantization = "3bit"
+base_model = "MiniMax M2.1"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 100086644736
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.1-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.1-8bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/MiniMax-M2.1-8bit"
+n_layers = 61
+hidden_size = 3072
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "minimax"
+quantization = "8bit"
+base_model = "MiniMax M2.1"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 242986745856
--- a/resources/inference_model_cards/mlx-community--Qwen3-0.6B-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-0.6B-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Qwen3-0.6B-4bit"
+n_layers = 28
+hidden_size = 1024
+supports_tensor = false
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "4bit"
+base_model = "Qwen3 0.6B"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 342884352
--- a/resources/inference_model_cards/mlx-community--Qwen3-0.6B-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-0.6B-8bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Qwen3-0.6B-8bit"
+n_layers = 28
+hidden_size = 1024
+supports_tensor = false
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "8bit"
+base_model = "Qwen3 0.6B"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 698351616
--- a/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"
+n_layers = 94
+hidden_size = 4096
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "4bit"
+base_model = "Qwen3 235B"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 141733920768
--- a/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-8bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"
+n_layers = 94
+hidden_size = 4096
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "8bit"
+base_model = "Qwen3 235B"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 268435456000
--- a/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Qwen3-30B-A3B-4bit"
+n_layers = 48
+hidden_size = 2048
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "4bit"
+base_model = "Qwen3 30B"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 17612931072
--- a/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-8bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Qwen3-30B-A3B-8bit"
+n_layers = 48
+hidden_size = 2048
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "8bit"
+base_model = "Qwen3 30B"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 33279705088
--- a/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"
+n_layers = 62
+hidden_size = 6144
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "4bit"
+base_model = "Qwen3 Coder 480B"
+capabilities = ["text", "code"]
+
+[storage_size]
+in_bytes = 289910292480
--- a/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-8bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"
+n_layers = 62
+hidden_size = 6144
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "8bit"
+base_model = "Qwen3 Coder 480B"
+capabilities = ["text", "code"]
+
+[storage_size]
+in_bytes = 579820584960
--- a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"
+n_layers = 48
+hidden_size = 2048
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "4bit"
+base_model = "Qwen3 Next 80B"
+capabilities = ["text"]
+
+[storage_size]
+in_bytes = 46976204800
--- a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-8bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"
+n_layers = 48
+hidden_size = 2048
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "8bit"
+base_model = "Qwen3 Next 80B"
+capabilities = ["text"]
+
+[storage_size]
+in_bytes = 88814387200
--- a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-4bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"
+n_layers = 48
+hidden_size = 2048
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "4bit"
+base_model = "Qwen3 Next 80B"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 47080074240
--- a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-8bit.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"
+n_layers = 48
+hidden_size = 2048
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "8bit"
+base_model = "Qwen3 Next 80B"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 88814387200
--- a/resources/inference_model_cards/mlx-community--gpt-oss-120b-MXFP4-Q8.toml
+++ b/resources/inference_model_cards/mlx-community--gpt-oss-120b-MXFP4-Q8.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/gpt-oss-120b-MXFP4-Q8"
+n_layers = 36
+hidden_size = 2880
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "gpt-oss"
+quantization = "MXFP4-Q8"
+base_model = "GPT-OSS 120B"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 70652212224
--- a/resources/inference_model_cards/mlx-community--gpt-oss-20b-MXFP4-Q8.toml
+++ b/resources/inference_model_cards/mlx-community--gpt-oss-20b-MXFP4-Q8.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/gpt-oss-20b-MXFP4-Q8"
+n_layers = 24
+hidden_size = 2880
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "gpt-oss"
+quantization = "MXFP4-Q8"
+base_model = "GPT-OSS 20B"
+capabilities = ["text", "thinking"]
+
+[storage_size]
+in_bytes = 12025908224
--- a/resources/inference_model_cards/mlx-community--llama-3.3-70b-instruct-fp16.toml
+++ b/resources/inference_model_cards/mlx-community--llama-3.3-70b-instruct-fp16.toml
@@ -0,0 +1,12 @@
+model_id = "mlx-community/llama-3.3-70b-instruct-fp16"
+n_layers = 80
+hidden_size = 8192
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "llama"
+quantization = "fp16"
+base_model = "Llama 3.3 70B"
+capabilities = ["text"]
+
+[storage_size]
+in_bytes = 144383672320
--- a/resources/parts.nix
+++ b/resources/parts.nix
@@ -0,0 +1,17 @@
+{ inputs, ... }:
+{
+  perSystem =
+    { pkgs, lib, ... }:
+    let
+      # Filter source to only include resources directory
+      resourcesSrc = lib.cleanSourceWith {
+        src = inputs.self + "/resources";
+      };
+    in
+    {
+      packages.resources = pkgs.runCommand "exo-resources" { } ''
+        cp -r ${resourcesSrc} $out
+      '';
+    };
+}
+
--- a/rust/clippy.toml
+++ b/rust/clippy.toml
@@ -1,2 +0,0 @@
-# we can manually exclude false-positive lint errors for dual packages (if in dependencies)
-#allowed-duplicate-crates = ["hashbrown"]
--- a/rust/exo_pyo3_bindings/Cargo.toml
+++ b/rust/exo_pyo3_bindings/Cargo.toml
@@ -25,44 +25,25 @@ workspace = true
 networking = { workspace = true }

 # interop
-pyo3 = { version = "0.27.1", features = [
-    # "abi3-py311", # tells pyo3 (and maturin) to build using the stable ABI with minimum Python version 3.11
-    "nightly", # enables better-supported GIL integration
-    "experimental-async", # async support in #[pyfunction] & #[pymethods]
-    #"experimental-inspect", # inspection of generated binary => easier to automate type-hint generation
-    #"py-clone", # adding Clone-ing of `Py<T>` without GIL (may cause panics - remove if panics happen)
-    "multiple-pymethods", # allows multiple #[pymethods] sections per class
+pyo3 = { version = "0.27.2", features = [
+    "abi3-py313", # tells pyo3 (and maturin) to build using the stable ABI with minimum Python version 3.11
+    # "nightly", # enables better-supported GIL integration
+    "experimental-async" # async support in #[pyfunction] & #[pymethods]
+    # "experimental-inspect", # inspection of generated binary => easier to automate type-hint generation
+    # "py-clone", # adding Clone-ing of `Py<T>` without GIL (may cause panics - remove if panics happen)
+    # "multiple-pymethods", # allows multiple #[pymethods] sections per class

    # integrations with other libraries
-    "arc_lock", "bigdecimal", "either", "hashbrown", "indexmap", "num-bigint", "num-complex", "num-rational",
-    "ordered-float", "rust_decimal", "smallvec",
+    # "arc_lock", "bigdecimal", "either", "hashbrown", "indexmap", "num-bigint", "num-complex", "num-rational",
+    # "ordered-float", "rust_decimal", "smallvec",
    # "anyhow", "chrono", "chrono-local", "chrono-tz", "eyre", "jiff-02", "lock_api", "parking-lot", "time",  "serde",
 ] }
 pyo3-stub-gen = { version = "0.17.2" }
 pyo3-async-runtimes = { version = "0.27.0", features = ["attributes", "tokio-runtime", "testing"] }
 pyo3-log = "0.13.2"

-# macro dependencies
-extend = { workspace = true }
-delegate = { workspace = true }
-impl-trait-for-tuples = { workspace = true }
-derive_more = { workspace = true }
-pin-project = { workspace = true }
-
 # async runtime
 tokio = { workspace = true, features = ["full", "tracing"] }
-futures = { workspace = true }
-
-# utility dependencies
-once_cell = "1.21.3"
-thread_local = "1.1.9"
-util = { workspace = true }
-thiserror = { workspace = true }
-#internment = { workspace = true }
-#recursion = { workspace = true }
-#generativity = { workspace = true }
-#itertools = { workspace = true }
-

 # Tracing
 #tracing = "0.1"
@@ -75,3 +56,4 @@ env_logger = "0.11"

 # Networking
 libp2p = { workspace = true, features = ["full"] }
+futures-lite = "2.6.1"
--- a/rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi
+++ b/rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi
@@ -2,220 +2,39 @@
 # ruff: noqa: E501, F401

 import builtins
-import enum
 import typing

-@typing.final
-class AllQueuesFullError(builtins.Exception):
-    def __new__(cls, *args: typing.Any) -> AllQueuesFullError: ...
-    def __repr__(self) -> builtins.str: ...
-    def __str__(self) -> builtins.str: ...
-
-@typing.final
-class ConnectionUpdate:
-    @property
-    def update_type(self) -> ConnectionUpdateType:
-        r"""
-        Whether this is a connection or disconnection event
-        """
-    @property
-    def peer_id(self) -> PeerId:
-        r"""
-        Identity of the peer that we have connected to or disconnected from.
-        """
-    @property
-    def remote_ipv4(self) -> builtins.str:
-        r"""
-        Remote connection's IPv4 address.
-        """
-    @property
-    def remote_tcp_port(self) -> builtins.int:
-        r"""
-        Remote connection's TCP port.
-        """
-
@typing.final
 class Keypair:
-    r"""
-    Identity keypair of a node.
-    """
    @staticmethod
-    def generate_ed25519() -> Keypair:
+    def generate() -> Keypair:
        r"""
-        Generate a new Ed25519 keypair.
-        """
-    @staticmethod
-    def generate_ecdsa() -> Keypair:
-        r"""
-        Generate a new ECDSA keypair.
-        """
-    @staticmethod
-    def generate_secp256k1() -> Keypair:
-        r"""
-        Generate a new Secp256k1 keypair.
+        Generate a new ed25519 keypair
        """
    @staticmethod
    def from_protobuf_encoding(bytes: bytes) -> Keypair:
        r"""
        Decode a private key from a protobuf structure and parse it as a `Keypair`.
        """
-    @staticmethod
-    def rsa_from_pkcs8(bytes: bytes) -> Keypair:
-        r"""
-        Decode an keypair from a DER-encoded secret key in PKCS#8 `PrivateKeyInfo`
-        format (i.e. unencrypted) as defined in [RFC5208].
-        
-        [RFC5208]: https://tools.ietf.org/html/rfc5208#section-5
-        """
-    @staticmethod
-    def secp256k1_from_der(bytes: bytes) -> Keypair:
-        r"""
-        Decode a keypair from a DER-encoded Secp256k1 secret key in an `ECPrivateKey`
-        structure as defined in [RFC5915].
-        
-        [RFC5915]: https://tools.ietf.org/html/rfc5915
-        """
-    @staticmethod
-    def ed25519_from_bytes(bytes: bytes) -> Keypair: ...
    def to_protobuf_encoding(self) -> bytes:
        r"""
-        Encode a private key as protobuf structure.
-        """
-    def to_peer_id(self) -> PeerId:
-        r"""
-        Convert the `Keypair` into the corresponding `PeerId`.
+        Encode a private key to a protobuf structure.
        """
+    def to_string(self) -> builtins.str: ...

@typing.final
-class Multiaddr:
-    r"""
-    Representation of a Multiaddr.
-    """
+class PyPeer:
    @staticmethod
-    def empty() -> Multiaddr:
-        r"""
-        Create a new, empty multiaddress.
-        """
-    @staticmethod
-    def with_capacity(n: builtins.int) -> Multiaddr:
-        r"""
-        Create a new, empty multiaddress with the given capacity.
-        """
-    @staticmethod
-    def from_bytes(bytes: bytes) -> Multiaddr:
-        r"""
-        Parse a `Multiaddr` value from its byte slice representation.
-        """
-    @staticmethod
-    def from_string(string: builtins.str) -> Multiaddr:
-        r"""
-        Parse a `Multiaddr` value from its string representation.
-        """
-    def len(self) -> builtins.int:
-        r"""
-        Return the length in bytes of this multiaddress.
-        """
-    def is_empty(self) -> builtins.bool:
-        r"""
-        Returns true if the length of this multiaddress is 0.
-        """
-    def to_bytes(self) -> bytes:
-        r"""
-        Return a copy of this [`Multiaddr`]'s byte representation.
-        """
-    def to_string(self) -> builtins.str:
-        r"""
-        Convert a Multiaddr to a string.
-        """
+    def new(kp: Keypair, namespace: builtins.str) -> PyPeer: ...
+    async def subscribe(self, topic: builtins.str) -> None: ...
+    async def unsubscribe(self, topic: builtins.str) -> None: ...
+    async def send(self, topic: builtins.str, payload: bytes) -> None: ...
+    async def run(self) -> None: ...
+    async def recv(self) -> PySwarmEvent: ...

@typing.final
-class NetworkingHandle:
-    def __new__(cls, identity: Keypair) -> NetworkingHandle: ...
-    async def connection_update_recv(self) -> ConnectionUpdate:
-        r"""
-        Receives the next `ConnectionUpdate` from networking.
-        """
-    async def connection_update_recv_many(self, limit: builtins.int) -> builtins.list[ConnectionUpdate]:
-        r"""
-        Receives at most `limit` `ConnectionUpdate`s from networking and returns them.
-        
-        For `limit = 0`, an empty collection of `ConnectionUpdate`s will be returned immediately.
-        For `limit > 0`, if there are no `ConnectionUpdate`s in the channel's queue this method
-        will sleep until a `ConnectionUpdate`s is sent.
-        """
-    async def gossipsub_subscribe(self, topic: builtins.str) -> builtins.bool:
-        r"""
-        Subscribe to a `GossipSub` topic.
-        
-        Returns `True` if the subscription worked. Returns `False` if we were already subscribed.
-        """
-    async def gossipsub_unsubscribe(self, topic: builtins.str) -> builtins.bool:
-        r"""
-        Unsubscribes from a `GossipSub` topic.
-        
-        Returns `True` if we were subscribed to this topic. Returns `False` if we were not subscribed.
-        """
-    async def gossipsub_publish(self, topic: builtins.str, data: bytes) -> None:
-        r"""
-        Publishes a message with multiple topics to the `GossipSub` network.
-        
-        If no peers are found that subscribe to this topic, throws `NoPeersSubscribedToTopicError` exception.
-        """
-    async def gossipsub_recv(self) -> tuple[builtins.str, bytes]:
-        r"""
-        Receives the next message from the `GossipSub` network.
-        """
-    async def gossipsub_recv_many(self, limit: builtins.int) -> builtins.list[tuple[builtins.str, bytes]]:
-        r"""
-        Receives at most `limit` messages from the `GossipSub` network and returns them.
-        
-        For `limit = 0`, an empty collection of messages will be returned immediately.
-        For `limit > 0`, if there are no messages in the channel's queue this method
-        will sleep until a message is sent.
-        """
-
-@typing.final
-class NoPeersSubscribedToTopicError(builtins.Exception):
-    def __new__(cls, *args: typing.Any) -> NoPeersSubscribedToTopicError: ...
-    def __repr__(self) -> builtins.str: ...
-    def __str__(self) -> builtins.str: ...
-
-@typing.final
-class PeerId:
-    r"""
-    Identifier of a peer of the network.
-    
-    The data is a `CIDv0` compatible multihash of the protobuf encoded public key of the peer
-    as specified in [specs/peer-ids](https://github.com/libp2p/specs/blob/master/peer-ids/peer-ids.md).
-    """
-    @staticmethod
-    def random() -> PeerId:
-        r"""
-        Generates a random peer ID from a cryptographically secure PRNG.
-        
-        This is useful for randomly walking on a DHT, or for testing purposes.
-        """
-    @staticmethod
-    def from_bytes(bytes: bytes) -> PeerId:
-        r"""
-        Parses a `PeerId` from bytes.
-        """
-    def to_bytes(self) -> bytes:
-        r"""
-        Returns a raw bytes representation of this `PeerId`.
-        """
-    def to_base58(self) -> builtins.str:
-        r"""
-        Returns a base-58 encoded string of this `PeerId`.
-        """
-    def __repr__(self) -> builtins.str: ...
-    def __str__(self) -> builtins.str: ...
-
-@typing.final
-class ConnectionUpdateType(enum.Enum):
-    r"""
-    Connection or disconnection event discriminant type.
-    """
-    Connected = ...
-    Disconnected = ...
+class PySwarmEvent:
+    def downcast_discovered(self) -> typing.Optional[builtins.str]: ...
+    def downcast_expired(self) -> typing.Optional[builtins.str]: ...
+    def downcast_message(self) -> typing.Optional[tuple[builtins.str, builtins.str, bytes]]: ...

--- a/rust/exo_pyo3_bindings/src/allow_threading.rs
+++ b/rust/exo_pyo3_bindings/src/allow_threading.rs
@@ -1,8 +1,4 @@
-//! SEE: https://pyo3.rs/v0.26.0/async-await.html#detaching-from-the-interpreter-across-await
-//!
-
-use pin_project::pin_project;
-use pyo3::marker::Ungil;
+//! See: <https://pyo3.rs/v0.27.2/async-await.html#detaching-from-the-interpreter-across-await>
 use pyo3::prelude::*;
 use std::{
    future::Future,
@@ -10,31 +6,17 @@ use std::{
    task::{Context, Poll},
 };

-/// SEE: https://pyo3.rs/v0.26.0/async-await.html#detaching-from-the-interpreter-across-await
-#[pin_project]
-#[repr(transparent)]
-pub(crate) struct AllowThreads<F>(#[pin] F);
-
-impl<F> AllowThreads<F>
-where
-    Self: Future,
-{
-    pub fn new(f: F) -> Self {
-        Self(f)
-    }
-}
+pub struct AllowThreads<F>(pub(crate) F);

 impl<F> Future for AllowThreads<F>
 where
-    F: Future + Ungil,
-    F::Output: Ungil,
+    F: Future + Unpin + Send,
+    F::Output: Send,
 {
    type Output = F::Output;

-    fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
        let waker = cx.waker();
-        Python::with_gil(|py| {
-            py.allow_threads(|| self.project().0.poll(&mut Context::from_waker(waker)))
-        })
+        Python::attach(|py| py.detach(|| pin!(&mut self.0).poll(&mut Context::from_waker(waker))))
    }
 }
--- a/rust/exo_pyo3_bindings/src/examples/mod.rs
+++ b/rust/exo_pyo3_bindings/src/examples/mod.rs
@@ -1,240 +0,0 @@
-//! This module exists to hold examples of some pyo3 patterns that may be too complex to
-//! re-create from scratch, but too inhomogenous to create an abstraction/wrapper around.
-//!
-//! Pattern examples include:
-//!  - Async task handles: with GC-integrated cleanup
-//!  - Sync/async callbacks from python: with propper eventloop handling
-//!
-//! Mutability pattern: https://pyo3.rs/v0.26.0/async-await.html#send--static-constraint
-//!  - Store mutable fields in tokio's `Mutex<T>`
-//!  - For async code: take `&self` and `.lock().await`
-//!  - For sync code: take `&mut self` and `.get_mut()`
-
-use crate::ext::{PyResultExt as _, ResultExt as _, TokioRuntimeExt as _};
-use futures::FutureExt as _;
-use futures::future::BoxFuture;
-use pyo3::exceptions::PyRuntimeError;
-use pyo3::prelude::{PyModule, PyModuleMethods as _};
-use pyo3::{
-    Bound, Py, PyAny, PyErr, PyResult, PyTraverseError, PyVisit, Python, pyclass, pymethods,
-};
-use std::time::Duration;
-use tokio::sync::mpsc;
-use tokio::sync::mpsc::error::TryRecvError;
-
-fn needs_tokio_runtime() {
-    tokio::runtime::Handle::current();
-}
-
-type SyncCallback = Box<dyn Fn() + Send + Sync>;
-type AsyncCallback = Box<dyn Fn() -> BoxFuture<'static, ()> + Send + Sync>;
-
-enum AsyncTaskMessage {
-    SyncCallback(SyncCallback),
-    AsyncCallback(AsyncCallback),
-}
-
-async fn async_task(
-    sender: mpsc::UnboundedSender<()>,
-    mut receiver: mpsc::UnboundedReceiver<AsyncTaskMessage>,
-) {
-    log::info!("RUST: async task started");
-
-    // task state
-    let mut interval = tokio::time::interval(Duration::from_secs(1));
-
-    let mut sync_cbs: Vec<SyncCallback> = vec![];
-    let mut async_cbs: Vec<AsyncCallback> = vec![];
-
-    loop {
-        tokio::select! {
-            // handle incoming messages from task-handle
-            message = receiver.recv() => {
-                // handle closed channel by exiting
-                let Some(message) = message else {
-                    log::info!("RUST: channel closed");
-                    break;
-                };
-
-                // dispatch incoming event
-                match message {
-                    AsyncTaskMessage::SyncCallback(cb) => {
-                        sync_cbs.push(cb);
-                    }
-                    AsyncTaskMessage::AsyncCallback(cb) => {
-                        async_cbs.push(cb);
-                    }
-                }
-            }
-
-            // handle all other events
-            _ = interval.tick() => {
-                log::info!("RUST: async task tick");
-
-                // call back all sync callbacks
-                for cb in &sync_cbs {
-                    cb();
-                }
-
-                // call back all async callbacks
-                for cb in &async_cbs {
-                    cb().await;
-                }
-
-                // send event on unbounded channel
-                sender.send(()).expect("handle receiver cannot be closed/dropped");
-            }
-        }
-    }
-
-    log::info!("RUST: async task stopped");
-}
-
-// #[gen_stub_pyclass]
-#[pyclass(name = "AsyncTaskHandle")]
-#[derive(Debug)]
-struct PyAsyncTaskHandle {
-    sender: Option<mpsc::UnboundedSender<AsyncTaskMessage>>,
-    receiver: mpsc::UnboundedReceiver<()>,
-}
-
-#[allow(clippy::expect_used)]
-impl PyAsyncTaskHandle {
-    const fn sender(&self) -> &mpsc::UnboundedSender<AsyncTaskMessage> {
-        self.sender
-            .as_ref()
-            .expect("The sender should only be None after de-initialization.")
-    }
-
-    const fn sender_mut(&mut self) -> &mpsc::UnboundedSender<AsyncTaskMessage> {
-        self.sender
-            .as_mut()
-            .expect("The sender should only be None after de-initialization.")
-    }
-
-    const fn new(
-        sender: mpsc::UnboundedSender<AsyncTaskMessage>,
-        receiver: mpsc::UnboundedReceiver<()>,
-    ) -> Self {
-        Self {
-            sender: Some(sender),
-            receiver,
-        }
-    }
-}
-
-// #[gen_stub_pymethods]
-#[pymethods]
-impl PyAsyncTaskHandle {
-    #[new]
-    fn py_new(py: Python<'_>) -> PyResult<Self> {
-        use pyo3_async_runtimes::tokio::get_runtime;
-
-        // create communication channel TOWARDS our task
-        let (h_sender, t_receiver) = mpsc::unbounded_channel::<AsyncTaskMessage>();
-
-        // create communication channel FROM our task
-        let (t_sender, h_receiver) = mpsc::unbounded_channel::<()>();
-
-        // perform necessary setup within tokio context - or it crashes
-        let () = get_runtime().block_on(async { needs_tokio_runtime() });
-
-        // spawn tokio task with this thread's task-locals - without this, async callbacks on the new threads will not work!!
-        _ = get_runtime().spawn_with_scope(py, async move {
-            async_task(t_sender, t_receiver).await;
-        });
-        Ok(Self::new(h_sender, h_receiver))
-    }
-
-    /// NOTE: exceptions in callbacks are silently ignored until end of execution
-    fn add_sync_callback(
-        &self,
-        // #[gen_stub(override_type(
-        //     type_repr="collections.abc.Callable[[], None]",
-        //     imports=("collections.abc")
-        // ))]
-        callback: Py<PyAny>,
-    ) -> PyResult<()> {
-        // blocking call to async method -> can do non-blocking if needed
-        self.sender()
-            .send(AsyncTaskMessage::SyncCallback(Box::new(move || {
-                _ = Python::with_gil(|py| callback.call0(py).write_unraisable_with(py));
-            })))
-            .pyerr()?;
-        Ok(())
-    }
-
-    /// NOTE: exceptions in callbacks are silently ignored until end of execution
-    fn add_async_callback(
-        &self,
-        // #[gen_stub(override_type(
-        //     type_repr="collections.abc.Callable[[], collections.abc.Awaitable[None]]",
-        //     imports=("collections.abc")
-        // ))]
-        callback: Py<PyAny>,
-    ) -> PyResult<()> {
-        // blocking call to async method -> can do non-blocking if needed
-        self.sender()
-            .send(AsyncTaskMessage::AsyncCallback(Box::new(move || {
-                let c = Python::with_gil(|py| callback.clone_ref(py));
-                async move {
-                    if let Some(f) = Python::with_gil(|py| {
-                        let coroutine = c.call0(py).write_unraisable_with(py)?;
-                        pyo3_async_runtimes::tokio::into_future(coroutine.into_bound(py))
-                            .write_unraisable_with(py)
-                    }) {
-                        _ = f.await.write_unraisable();
-                    }
-                }
-                .boxed()
-            })))
-            .pyerr()?;
-        Ok(())
-    }
-
-    async fn receive_unit(&mut self) -> PyResult<()> {
-        self.receiver
-            .recv()
-            .await
-            .ok_or(PyErr::new::<PyRuntimeError, _>(
-                "cannot receive unit on closed channel",
-            ))
-    }
-
-    fn drain_units(&mut self) -> PyResult<i32> {
-        let mut cnt = 0;
-        loop {
-            match self.receiver.try_recv() {
-                Err(TryRecvError::Disconnected) => {
-                    return Err(PyErr::new::<PyRuntimeError, _>(
-                        "cannot receive unit on closed channel",
-                    ));
-                }
-                Err(TryRecvError::Empty) => return Ok(cnt),
-                Ok(()) => {
-                    cnt += 1;
-                    continue;
-                }
-            }
-        }
-    }
-
-    // #[gen_stub(skip)]
-    const fn __traverse__(&self, _visit: PyVisit<'_>) -> Result<(), PyTraverseError> {
-        Ok(()) // This is needed purely so `__clear__` can work
-    }
-
-    // #[gen_stub(skip)]
-    fn __clear__(&mut self) {
-        // TODO: may or may not need to await a "kill-signal" oneshot channel message,
-        //       to ensure that the networking task is done BEFORE exiting the clear function...
-        //       but this may require GIL?? and it may not be safe to call GIL here??
-        self.sender = None; // Using Option<T> as a trick to force `sender` channel to be dropped
-    }
-}
-
-pub fn examples_submodule(m: &Bound<'_, PyModule>) -> PyResult<()> {
-    m.add_class::<PyAsyncTaskHandle>()?;
-
-    Ok(())
-}
--- a/rust/exo_pyo3_bindings/src/lib.rs
+++ b/rust/exo_pyo3_bindings/src/lib.rs
@@ -1,216 +1,42 @@
 //! TODO: crate documentation
-//!
-//! this is here as a placeholder documentation
-//!
-//!
+pub(crate) mod allow_threading;

-// enable Rust-unstable features for convenience
-#![feature(trait_alias)]
-#![feature(tuple_trait)]
-#![feature(unboxed_closures)]
-// #![feature(stmt_expr_attributes)]
-// #![feature(assert_matches)]
-// #![feature(async_fn_in_dyn_trait)]
-// #![feature(async_for_loop)]
-// #![feature(auto_traits)]
-// #![feature(negative_impls)]
-
-extern crate core;
-mod allow_threading;
-mod examples;
 pub(crate) mod networking;
-pub(crate) mod pylibp2p;
+pub(crate) mod take_once {
+    use tokio::sync::Mutex;
+
+    pub struct TakeOnce<T>(Mutex<Option<T>>);
+    impl<T> TakeOnce<T> {
+        pub fn new(t: T) -> Self {
+            Self(Mutex::new(Some(t)))
+        }
+        pub fn take(&self) -> Option<T> {
+            match self.0.try_lock() {
+                Ok(mut o) => o.take(),
+                Err(_) => None,
+            }
+        }
+    }
+}

-use crate::networking::networking_submodule;
-use crate::pylibp2p::ident::ident_submodule;
-use crate::pylibp2p::multiaddr::multiaddr_submodule;
-use pyo3::prelude::PyModule;
 use pyo3::prelude::*;
-use pyo3::{Bound, PyResult, pyclass, pymodule};
+
 use pyo3_stub_gen::define_stub_info_gatherer;

-/// Namespace for all the constants used by this crate.
-pub(crate) mod r#const {
-    pub const MPSC_CHANNEL_SIZE: usize = 1024;
-}
-
-/// Namespace for all the type/trait aliases used by this crate.
-pub(crate) mod alias {
-    use std::error::Error;
-    use std::marker::Tuple;
-
-    pub trait SendFn<Args: Tuple + Send + 'static, Output> =
-        Fn<Args, Output = Output> + Send + 'static;
-
-    pub type AnyError = Box<dyn Error + Send + Sync + 'static>;
-    pub type AnyResult<T> = Result<T, AnyError>;
-}
-
-/// Namespace for crate-wide extension traits/methods
-pub(crate) mod ext {
-    use crate::allow_threading::AllowThreads;
-    use extend::ext;
-    use pyo3::exceptions::{PyConnectionError, PyRuntimeError};
-    use pyo3::marker::Ungil;
-    use pyo3::types::PyBytes;
-    use pyo3::{Py, PyErr, PyResult, Python};
-    use tokio::runtime::Runtime;
-    use tokio::sync::mpsc;
-    use tokio::sync::mpsc::error::TryRecvError;
-    use tokio::task::JoinHandle;
-
-    #[ext(pub, name = ByteArrayExt)]
-    impl [u8] {
-        fn pybytes(&self) -> Py<PyBytes> {
-            Python::with_gil(|py| PyBytes::new(py, self).unbind())
-        }
-    }
-
-    #[ext(pub, name = ResultExt)]
-    impl<T, E> Result<T, E>
-    where
-        E: ToString,
-    {
-        fn pyerr(self) -> PyResult<T> {
-            self.map_err(|e| PyRuntimeError::new_err(e.to_string()))
-        }
-    }
-
-    pub trait FutureExt: Future + Sized {
-        /// SEE: https://pyo3.rs/v0.26.0/async-await.html#detaching-from-the-interpreter-across-await
-        fn allow_threads_py(self) -> AllowThreads<Self>
-        where
-            AllowThreads<Self>: Future,
-        {
-            AllowThreads::new(self)
-        }
-    }
-
-    impl<T: Future> FutureExt for T {}
-
-    #[ext(pub, name = PyErrExt)]
-    impl PyErr {
-        fn receiver_channel_closed() -> Self {
-            PyConnectionError::new_err("Receiver channel closed unexpectedly")
-        }
-    }
-
-    #[ext(pub, name = PyResultExt)]
-    impl<T> PyResult<T> {
-        fn write_unraisable(self) -> Option<T> {
-            Python::with_gil(|py| self.write_unraisable_with(py))
-        }
-
-        fn write_unraisable_with(self, py: Python<'_>) -> Option<T> {
-            match self {
-                Ok(v) => Some(v),
-                Err(e) => {
-                    // write error back to python
-                    e.write_unraisable(py, None);
-                    None
-                }
-            }
-        }
-    }
-
-    #[ext(pub, name = TokioRuntimeExt)]
-    impl Runtime {
-        fn spawn_with_scope<F>(&self, py: Python<'_>, future: F) -> PyResult<JoinHandle<F::Output>>
-        where
-            F: Future + Send + 'static,
-            F::Output: Send + 'static,
-        {
-            let locals = pyo3_async_runtimes::tokio::get_current_locals(py)?;
-            Ok(self.spawn(pyo3_async_runtimes::tokio::scope(locals, future)))
-        }
-    }
-
-    #[ext(pub, name = TokioMpscSenderExt)]
-    impl<T> mpsc::Sender<T> {
-        /// Sends a value, waiting until there is capacity.
-        ///
-        /// A successful send occurs when it is determined that the other end of the
-        /// channel has not hung up already. An unsuccessful send would be one where
-        /// the corresponding receiver has already been closed.
-        async fn send_py(&self, value: T) -> PyResult<()> {
-            self.send(value)
-                .await
-                .map_err(|_| PyErr::receiver_channel_closed())
-        }
-    }
-
-    #[ext(pub, name = TokioMpscReceiverExt)]
-    impl<T> mpsc::Receiver<T> {
-        /// Receives the next value for this receiver.
-        async fn recv_py(&mut self) -> PyResult<T> {
-            self.recv().await.ok_or_else(PyErr::receiver_channel_closed)
-        }
-
-        /// Receives at most `limit` values for this receiver and returns them.
-        ///
-        /// For `limit = 0`, an empty collection of messages will be returned immediately.
-        /// For `limit > 0`, if there are no messages in the channel's queue this method
-        /// will sleep until a message is sent.
-        async fn recv_many_py(&mut self, limit: usize) -> PyResult<Vec<T>> {
-            // get updates from receiver channel
-            let mut updates = Vec::with_capacity(limit);
-            let received = self.recv_many(&mut updates, limit).await;
-
-            // if we received zero items, then the channel was unexpectedly closed
-            if limit != 0 && received == 0 {
-                return Err(PyErr::receiver_channel_closed());
-            }
-
-            Ok(updates)
-        }
-
-        /// Tries to receive the next value for this receiver.
-        fn try_recv_py(&mut self) -> PyResult<Option<T>> {
-            match self.try_recv() {
-                Ok(v) => Ok(Some(v)),
-                Err(TryRecvError::Empty) => Ok(None),
-                Err(TryRecvError::Disconnected) => Err(PyErr::receiver_channel_closed()),
-            }
-        }
-    }
-}
-
-pub(crate) mod private {
-    use std::marker::Sized;
-
-    /// Sealed traits support
-    pub trait Sealed {}
-    impl<T: ?Sized> Sealed for T {}
-}
-
-/// A wrapper around [`Py`] that implements [`Clone`] using [`Python::with_gil`].
-#[repr(transparent)]
-pub(crate) struct ClonePy<T>(pub Py<T>);
-
-impl<T> Clone for ClonePy<T> {
-    fn clone(&self) -> Self {
-        Python::with_gil(|py| Self(self.0.clone_ref(py)))
-    }
-}
-
 /// A Python module implemented in Rust. The name of this function must match
 /// the `lib.name` setting in the `Cargo.toml`, else Python will not be able to
 /// import the module.
 #[pymodule(name = "exo_pyo3_bindings")]
-fn main_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
+pub fn networking_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
    // install logger
    pyo3_log::init();
+    // setup runtime
+    let mut builder = tokio::runtime::Builder::new_multi_thread();
+    builder.enable_all();
+    pyo3_async_runtimes::tokio::init(builder);

-    // TODO: for now this is all NOT a submodule, but figure out how to make the submodule system
-    //       work with maturin, where the types generate correctly, in the right folder, without
-    //       too many importing issues...
-    ident_submodule(m)?;
-    multiaddr_submodule(m)?;
-    networking_submodule(m)?;
-
-    // top-level constructs
-    // TODO: ...
-
+    m.add_class::<networking::PyPeer>()?;
+    m.add_class::<networking::PyKeypair>()?;
    Ok(())
 }

--- a/rust/exo_pyo3_bindings/src/networking.rs
+++ b/rust/exo_pyo3_bindings/src/networking.rs
@@ -1,571 +1,214 @@
-#![allow(
-    clippy::multiple_inherent_impl,
-    clippy::unnecessary_wraps,
-    clippy::unused_self,
-    clippy::needless_pass_by_value
-)]
+use crate::allow_threading::AllowThreads;
+use crate::take_once::TakeOnce;

-use crate::r#const::MPSC_CHANNEL_SIZE;
-use crate::ext::{ByteArrayExt as _, FutureExt, PyErrExt as _};
-use crate::ext::{ResultExt as _, TokioMpscReceiverExt as _, TokioMpscSenderExt as _};
-use crate::pyclass;
-use crate::pylibp2p::ident::{PyKeypair, PyPeerId};
-use libp2p::futures::StreamExt as _;
-use libp2p::gossipsub::{IdentTopic, Message, MessageId, PublishError};
-use libp2p::swarm::SwarmEvent;
-use libp2p::{gossipsub, mdns};
-use networking::discovery;
-use networking::swarm::create_swarm;
-use pyo3::prelude::{PyModule, PyModuleMethods as _};
-use pyo3::types::PyBytes;
-use pyo3::{Bound, Py, PyErr, PyResult, PyTraverseError, PyVisit, Python, pymethods};
-use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pyclass_enum, gen_stub_pymethods};
-use std::net::IpAddr;
-use tokio::sync::{Mutex, mpsc, oneshot};
-use util::ext::VecExt as _;
+use std::pin::pin;

-mod exception {
-    use pyo3::types::PyTuple;
-    use pyo3::{PyErrArguments, exceptions::PyException, prelude::*};
-    use pyo3_stub_gen::derive::*;
-
-    #[gen_stub_pyclass]
-    #[pyclass(frozen, extends=PyException, name="NoPeersSubscribedToTopicError")]
-    pub struct PyNoPeersSubscribedToTopicError {}
-
-    impl PyNoPeersSubscribedToTopicError {
-        const MSG: &'static str = "\
-        No peers are currently subscribed to receive messages on this topic. \
-        Wait for peers to subscribe or check your network connectivity.";
-
-        ///   Creates a new  [ `PyErr` ]  of this type.
-        ///
-        ///   [`PyErr`] :  https://docs.rs/pyo3/latest/pyo3/struct.PyErr.html   "PyErr in pyo3"
-        pub(crate) fn new_err() -> PyErr {
-            PyErr::new::<Self, _>(()) // TODO: check if this needs to be replaced???
-        }
-    }
-
-    #[gen_stub_pymethods]
-    #[pymethods]
-    impl PyNoPeersSubscribedToTopicError {
-        #[new]
-        #[pyo3(signature = (*args))]
-        #[allow(unused_variables)]
-        pub(crate) fn new(args: &Bound<'_, PyTuple>) -> Self {
-            Self {}
-        }
-
-        fn __repr__(&self) -> String {
-            format!("PeerId(\"{}\")", Self::MSG)
-        }
-
-        fn __str__(&self) -> String {
-            Self::MSG.to_string()
-        }
-    }
-
-    #[gen_stub_pyclass]
-    #[pyclass(frozen, extends=PyException, name="AllQueuesFullError")]
-    pub struct PyAllQueuesFullError {}
-
-    impl PyAllQueuesFullError {
-        const MSG: &'static str =
-            "All libp2p peers are unresponsive, resend the message or reconnect.";
-
-        ///   Creates a new  [ `PyErr` ]  of this type.
-        ///
-        ///   [`PyErr`] :  https://docs.rs/pyo3/latest/pyo3/struct.PyErr.html   "PyErr in pyo3"
-        pub(crate) fn new_err() -> PyErr {
-            PyErr::new::<Self, _>(()) // TODO: check if this needs to be replaced???
-        }
-    }
-
-    #[gen_stub_pymethods]
-    #[pymethods]
-    impl PyAllQueuesFullError {
-        #[new]
-        #[pyo3(signature = (*args))]
-        #[allow(unused_variables)]
-        pub(crate) fn new(args: &Bound<'_, PyTuple>) -> Self {
-            Self {}
-        }
-
-        fn __repr__(&self) -> String {
-            format!("PeerId(\"{}\")", Self::MSG)
-        }
-
-        fn __str__(&self) -> String {
-            Self::MSG.to_string()
-        }
-    }
-}
-
-/// Connection or disconnection event discriminant type.
-#[gen_stub_pyclass_enum]
-#[pyclass(eq, eq_int, name = "ConnectionUpdateType")]
-#[derive(Debug, Clone, PartialEq)]
-enum PyConnectionUpdateType {
-    Connected = 0,
-    Disconnected,
-}
+use futures_lite::FutureExt;
+use libp2p::{gossipsub::PublishError, identity::Keypair};
+use networking::{FromSwarm, Peer, ToSwarm};
+use pyo3::{
+    coroutine::CancelHandle,
+    exceptions::{PyConnectionError, PyRuntimeError, PyValueError},
+    prelude::*,
+    types::PyBytes,
+};
+use pyo3_stub_gen::{
+    derive::{gen_methods_from_python, gen_stub_pyclass, gen_stub_pymethods},
+    inventory::submit,
+};
+use tokio::sync::{Mutex, mpsc};

 #[gen_stub_pyclass]
-#[pyclass(frozen, name = "ConnectionUpdate")]
-#[derive(Debug, Clone)]
-struct PyConnectionUpdate {
-    /// Whether this is a connection or disconnection event
-    #[pyo3(get)]
-    update_type: PyConnectionUpdateType,
+#[pyclass(name = "Keypair", frozen)]
+#[derive(Clone)]
+pub struct PyKeypair(Keypair);

-    /// Identity of the peer that we have connected to or disconnected from.
-    #[pyo3(get)]
-    peer_id: PyPeerId,
+#[gen_stub_pymethods]
+#[pymethods]
+impl PyKeypair {
+    /// Generate a new ed25519 keypair
+    #[staticmethod]
+    fn generate() -> Self {
+        Self(Keypair::generate_ed25519())
+    }

-    /// Remote connection's IPv4 address.
-    #[pyo3(get)]
-    remote_ipv4: String,
+    /// Decode a private key from a protobuf structure and parse it as a `Keypair`.
+    #[staticmethod]
+    fn from_protobuf_encoding(bytes: &Bound<'_, PyBytes>) -> Self {
+        let bytes = Vec::from(bytes.as_bytes());
+        Self(Keypair::from_protobuf_encoding(&bytes).expect("todo"))
+    }

-    /// Remote connection's TCP port.
-    #[pyo3(get)]
-    remote_tcp_port: u16,
-}
-
-enum ToTask {
-    GossipsubSubscribe {
-        topic: String,
-        result_tx: oneshot::Sender<PyResult<bool>>,
-    },
-    GossipsubUnsubscribe {
-        topic: String,
-        result_tx: oneshot::Sender<bool>,
-    },
-    GossipsubPublish {
-        topic: String,
-        data: Vec<u8>,
-        result_tx: oneshot::Sender<PyResult<MessageId>>,
-    },
-}
-
-#[allow(clippy::enum_glob_use)]
-async fn networking_task(
-    mut swarm: networking::swarm::Swarm,
-    mut to_task_rx: mpsc::Receiver<ToTask>,
-    connection_update_tx: mpsc::Sender<PyConnectionUpdate>,
-    gossipsub_message_tx: mpsc::Sender<(String, Vec<u8>)>,
-) {
-    use SwarmEvent::*;
-    use ToTask::*;
-    use mdns::Event::*;
-    use networking::swarm::BehaviourEvent::*;
-
-    log::info!("RUST: networking task started");
-
-    loop {
-        tokio::select! {
-            message = to_task_rx.recv() => {
-                // handle closed channel
-                let Some(message) = message else {
-                    log::info!("RUST: channel closed");
-                    break;
-                };
-
-                // dispatch incoming messages
-                match message {
-                    GossipsubSubscribe { topic, result_tx } => {
-                        // try to subscribe
-                        let result = swarm.behaviour_mut()
-                            .gossipsub.subscribe(&IdentTopic::new(topic));
-
-                        // send response oneshot
-                        if let Err(e) = result_tx.send(result.pyerr()) {
-                            log::error!("RUST: could not subscribe to gossipsub topic since channel already closed: {e:?}");
-                            continue;
-                        }
-                    }
-                    GossipsubUnsubscribe { topic, result_tx } => {
-                        // try to unsubscribe from the topic
-                        let result = swarm.behaviour_mut()
-                            .gossipsub.unsubscribe(&IdentTopic::new(topic));
-
-                        // send response oneshot (or exit if connection closed)
-                        if let Err(e) = result_tx.send(result) {
-                            log::error!("RUST: could not unsubscribe from gossipsub topic since channel already closed: {e:?}");
-                            continue;
-                        }
-                    }
-                    GossipsubPublish { topic, data, result_tx } => {
-                        // try to publish the data -> catch NoPeersSubscribedToTopic error & convert to correct exception
-                        let result = swarm.behaviour_mut().gossipsub.publish(
-                            IdentTopic::new(topic), data);
-                        let pyresult: PyResult<MessageId> = if let Err(PublishError::NoPeersSubscribedToTopic) = result {
-                            Err(exception::PyNoPeersSubscribedToTopicError::new_err())
-                        } else if let Err(PublishError::AllQueuesFull(_)) = result {
-                            Err(exception::PyAllQueuesFullError::new_err())
-                        } else {
-                            result.pyerr()
-                        };
-
-                        // send response oneshot (or exit if connection closed)
-                        if let Err(e) = result_tx.send(pyresult) {
-                            log::error!("RUST: could not publish gossipsub message since channel already closed: {e:?}");
-                            continue;
-                        }
-                    }
-                }
-            }
-
-            // architectural solution to this problem:
-            // create keep_alive behavior who's job it is to dial peers discovered by mDNS (and drop when expired)
-            //   -> it will emmit TRUE connected/disconnected events consumable elsewhere
-            //
-            // gossipsub will feed off-of dial attempts created by networking, and that will bootstrap its' peers list
-            // then for actual communication it will dial those peers if need-be
-            swarm_event = swarm.select_next_some() => {
-                match swarm_event {
-                    Behaviour(Gossipsub(gossipsub::Event::Message {
-                        message: Message {
-                            topic,
-                            data,
-                            ..
-                        },
-                        ..
-                    })) => {
-                        // topic-ID is just the topic hash!!! (since we used identity hasher)
-                        let message = (topic.into_string(), data);
-
-                        // send incoming message to channel (or exit if connection closed)
-                        if let Err(e) = gossipsub_message_tx.send(message).await {
-                            log::error!("RUST: could not send incoming gossipsub message since channel already closed: {e}");
-                            continue;
-                        }
-                    },
-                    Behaviour(Discovery(discovery::Event::ConnectionEstablished { peer_id, remote_ip, remote_tcp_port, .. })) => {
-                        // grab IPv4 string
-                        let remote_ipv4 = match remote_ip {
-                            IpAddr::V4(ip) => ip.to_string(),
-                            IpAddr::V6(ip) => {
-                                log::warn!("RUST: ignoring connection to IPv6 address: {ip}");
-                                continue;
-                            }
-                        };
-
-                        // send connection event to channel (or exit if connection closed)
-                        if let Err(e) = connection_update_tx.send(PyConnectionUpdate {
-                            update_type: PyConnectionUpdateType::Connected,
-                            peer_id: PyPeerId(peer_id),
-                            remote_ipv4,
-                            remote_tcp_port,
-                        }).await {
-                            log::error!("RUST: could not send connection update since channel already closed: {e}");
-                            continue;
-                        }
-                    },
-                    Behaviour(Discovery(discovery::Event::ConnectionClosed { peer_id, remote_ip, remote_tcp_port, .. })) => {
-                        // grab IPv4 string
-                        let remote_ipv4 = match remote_ip {
-                            IpAddr::V4(ip) => ip.to_string(),
-                            IpAddr::V6(ip) => {
-                                log::warn!("RUST: ignoring disconnection from IPv6 address: {ip}");
-                                continue;
-                            }
-                        };
-
-                        // send disconnection event to channel (or exit if connection closed)
-                        if let Err(e) = connection_update_tx.send(PyConnectionUpdate {
-                            update_type: PyConnectionUpdateType::Disconnected,
-                            peer_id: PyPeerId(peer_id),
-                            remote_ipv4,
-                            remote_tcp_port,
-                        }).await {
-                            log::error!("RUST: could not send connection update since channel already closed: {e}");
-                            continue;
-                        }
-                    },
-                    e => {
-                        log::info!("RUST: other event {e:?}");
-                    }
-                }
-            }
+    /// Encode a private key to a protobuf structure.
+    fn to_protobuf_encoding<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
+        match self.0.to_protobuf_encoding() {
+            Ok(bytes) => Ok(PyBytes::new(py, &bytes)),
+            Err(e) => Err(PyValueError::new_err(e.to_string())),
        }
    }

-    log::info!("RUST: networking task stopped");
+    fn to_string(&self) -> String {
+        self.0.public().to_peer_id().to_base58()
+    }
 }

+struct PeerBuilder(
+    String,
+    Keypair,
+    mpsc::Sender<FromSwarm>,
+    mpsc::Receiver<ToSwarm>,
+);
+
 #[gen_stub_pyclass]
-#[pyclass(name = "NetworkingHandle")]
-#[derive(Debug)]
-struct PyNetworkingHandle {
-    // channels
-    to_task_tx: Option<mpsc::Sender<ToTask>>,
-    connection_update_rx: Mutex<mpsc::Receiver<PyConnectionUpdate>>,
-    gossipsub_message_rx: Mutex<mpsc::Receiver<(String, Vec<u8>)>>,
-}
-
-impl Drop for PyNetworkingHandle {
-    fn drop(&mut self) {
-        // TODO: may or may not need to await a "kill-signal" oneshot channel message,
-        //       to ensure that the networking task is done BEFORE exiting the clear function...
-        //       but this may require GIL?? and it may not be safe to call GIL here??
-        self.to_task_tx = None; // Using Option<T> as a trick to force channel to be dropped
-    }
-}
-
-#[allow(clippy::expect_used)]
-impl PyNetworkingHandle {
-    fn new(
-        to_task_tx: mpsc::Sender<ToTask>,
-        connection_update_rx: mpsc::Receiver<PyConnectionUpdate>,
-        gossipsub_message_rx: mpsc::Receiver<(String, Vec<u8>)>,
-    ) -> Self {
-        Self {
-            to_task_tx: Some(to_task_tx),
-            connection_update_rx: Mutex::new(connection_update_rx),
-            gossipsub_message_rx: Mutex::new(gossipsub_message_rx),
-        }
-    }
-
-    const fn to_task_tx(&self) -> &mpsc::Sender<ToTask> {
-        self.to_task_tx
-            .as_ref()
-            .expect("The sender should only be None after de-initialization.")
-    }
+#[pyclass]
+pub struct PyPeer {
+    peer: TakeOnce<PeerBuilder>,
+    to_swarm: mpsc::Sender<ToSwarm>,
+    from_swarm: Mutex<mpsc::Receiver<FromSwarm>>,
 }

 #[gen_stub_pymethods]
 #[pymethods]
-impl PyNetworkingHandle {
-    // NOTE: `async fn`s here that use `.await` will wrap the future in `.allow_threads_py()`
-    //       immediately beforehand to release the interpreter.
-    //       SEE: https://pyo3.rs/v0.26.0/async-await.html#detaching-from-the-interpreter-across-await
-
-    // ---- Lifecycle management methods ----
-
-    #[new]
-    fn py_new(identity: Bound<'_, PyKeypair>) -> PyResult<Self> {
-        use pyo3_async_runtimes::tokio::get_runtime;
-
-        // create communication channels
-        let (to_task_tx, to_task_rx) = mpsc::channel(MPSC_CHANNEL_SIZE);
-        let (connection_update_tx, connection_update_rx) = mpsc::channel(MPSC_CHANNEL_SIZE);
-        let (gossipsub_message_tx, gossipsub_message_rx) = mpsc::channel(MPSC_CHANNEL_SIZE);
-
-        // get identity
-        let identity = identity.borrow().0.clone();
-
-        // create networking swarm (within tokio context!! or it crashes)
-        let swarm = get_runtime()
-            .block_on(async { create_swarm(identity) })
-            .pyerr()?;
-
-        // spawn tokio task running the networking logic
-        get_runtime().spawn(async move {
-            networking_task(
-                swarm,
-                to_task_rx,
-                connection_update_tx,
-                gossipsub_message_tx,
-            )
-            .await;
-        });
-        Ok(Self::new(
-            to_task_tx,
-            connection_update_rx,
-            gossipsub_message_rx,
-        ))
+impl PyPeer {
+    #[staticmethod]
+    fn new(kp: PyKeypair, namespace: String) -> PyResult<Self> {
+        let (to_client, from_swarm) = mpsc::channel(1024);
+        let (to_swarm, from_client) = mpsc::channel(1024);
+        Ok(Self {
+            peer: TakeOnce::new(PeerBuilder(namespace, kp.0, to_client, from_client)),
+            to_swarm,
+            from_swarm: Mutex::new(from_swarm),
+        })
    }

    #[gen_stub(skip)]
-    const fn __traverse__(&self, _visit: PyVisit<'_>) -> Result<(), PyTraverseError> {
-        Ok(()) // This is needed purely so `__clear__` can work
+    async fn run(&self, #[pyo3(cancel_handle)] mut cancel: CancelHandle) -> PyResult<()> {
+        let builder = self
+            .peer
+            .take()
+            .ok_or_else(|| PyRuntimeError::new_err("tried to run peer twice"))?;
+        let jh = pyo3_async_runtimes::tokio::get_runtime()
+            .spawn(async move {
+                let mut peer =
+                    Peer::new(builder.0, builder.1, builder.2, builder.3).map_err(|_| {
+                        PyConnectionError::new_err("peer failed to listen on default address")
+                    })?;
+                peer.run()
+                    .await
+                    .map_err(|()| PyConnectionError::new_err("peer communication closed"))
+            })
+            .or(async {
+                cancel.cancelled().await;
+                Ok(Ok(()))
+            });
+        match AllowThreads(pin!(jh)).await {
+            Err(e) if e.is_cancelled() => Ok(()),
+            Err(e) if e.is_panic() => Err(PyRuntimeError::new_err(format!("tokio panic {e}"))),
+            Err(_) => unreachable!(),
+            Ok(res) => res,
+        }
+    }
+
+    async fn subscribe(&self, topic: String) -> PyResult<()> {
+        self.to_swarm
+            .send(ToSwarm::Subscribe(topic))
+            .await
+            .map_err(|_| PyRuntimeError::new_err("swarm communication closed"))
+    }
+    async fn unsubscribe(&self, topic: String) -> PyResult<()> {
+        self.to_swarm
+            .send(ToSwarm::Unsubscribe(topic))
+            .await
+            .map_err(|_| PyRuntimeError::new_err("swarm communication closed"))
+    }
+    async fn send(&self, topic: String, payload: Py<PyBytes>) -> PyResult<()> {
+        // this function attaches to the python interpreter synchronously to avoid holding the GIL
+        let bytes = Python::attach(|py| Vec::from(payload.bind(py).as_bytes()));
+        self.to_swarm
+            .send(ToSwarm::Message(topic, bytes))
+            .await
+            .map_err(|_| PyRuntimeError::new_err("swarm communication closed"))
    }

    #[gen_stub(skip)]
-    fn __clear__(&mut self) {
-        // TODO: may or may not need to await a "kill-signal" oneshot channel message,
-        //       to ensure that the networking task is done BEFORE exiting the clear function...
-        //       but this may require GIL?? and it may not be safe to call GIL here??
-        self.to_task_tx = None; // Using Option<T> as a trick to force channel to be dropped
+    async fn recv(
+        &self,
+        #[pyo3(cancel_handle)] mut cancel: CancelHandle,
+    ) -> PyResult<PySwarmEvent> {
+        loop {
+            return match AllowThreads(pin!(
+                self.from_swarm
+                    .try_lock()
+                    .map_err(|_| PyRuntimeError::new_err("tried to recv twice"))?
+                    .recv()
+                    .or(async {
+                        cancel.cancelled().await;
+                        None
+                    })
+            ))
+            .await
+            {
+                Some(FromSwarm::PublishError(p)) => match p {
+                    PublishError::AllQueuesFull(_) => {
+                        Err(PyConnectionError::new_err("swarm overloaded"))
+                    }
+                    PublishError::MessageTooLarge => {
+                        Err(PyValueError::new_err("message too large"))
+                    }
+                    PublishError::NoPeersSubscribedToTopic => {
+                        continue;
+                    }
+                    // TODO(evan): logs here
+                    _ => continue,
+                },
+                None => Err(PyRuntimeError::new_err("swarm communication closed")),
+                Some(fs) => Ok(PySwarmEvent(fs)),
+            };
+        }
    }
-
-    // ---- Connection update receiver methods ----
-
-    /// Receives the next `ConnectionUpdate` from networking.
-    async fn connection_update_recv(&self) -> PyResult<PyConnectionUpdate> {
-        self.connection_update_rx
-            .lock()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .recv_py()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-    }
-
-    /// Receives at most `limit` `ConnectionUpdate`s from networking and returns them.
-    ///
-    /// For `limit = 0`, an empty collection of `ConnectionUpdate`s will be returned immediately.
-    /// For `limit > 0`, if there are no `ConnectionUpdate`s in the channel's queue this method
-    /// will sleep until a `ConnectionUpdate`s is sent.
-    async fn connection_update_recv_many(&self, limit: usize) -> PyResult<Vec<PyConnectionUpdate>> {
-        self.connection_update_rx
-            .lock()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .recv_many_py(limit)
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-    }
-
-    // TODO: rn this blocks main thread if anything else is awaiting the channel (bc its a mutex)
-    //       so its too dangerous to expose just yet. figure out a better semantics for handling this,
-    //       so things don't randomly block
-    // /// Tries to receive the next `ConnectionUpdate` from networking.
-    // fn connection_update_try_recv(&self) -> PyResult<Option<PyConnectionUpdate>> {
-    //     self.connection_update_rx.blocking_lock().try_recv_py()
-    // }
-    //
-    // /// Checks if the `ConnectionUpdate` channel is empty.
-    // fn connection_update_is_empty(&self) -> bool {
-    //     self.connection_update_rx.blocking_lock().is_empty()
-    // }
-    //
-    // /// Returns the number of `ConnectionUpdate`s in the channel.
-    // fn connection_update_len(&self) -> usize {
-    //     self.connection_update_rx.blocking_lock().len()
-    // }
-
-    // ---- Gossipsub management methods ----
-
-    /// Subscribe to a `GossipSub` topic.
-    ///
-    /// Returns `True` if the subscription worked. Returns `False` if we were already subscribed.
-    async fn gossipsub_subscribe(&self, topic: String) -> PyResult<bool> {
-        let (tx, rx) = oneshot::channel();
-
-        // send off request to subscribe
-        self.to_task_tx()
-            .send_py(ToTask::GossipsubSubscribe {
-                topic,
-                result_tx: tx,
-            })
-            .allow_threads_py() // allow-threads-aware async call
-            .await?;
-
-        // wait for response & return any errors
-        rx.allow_threads_py() // allow-threads-aware async call
-            .await
-            .map_err(|_| PyErr::receiver_channel_closed())?
-    }
-
-    /// Unsubscribes from a `GossipSub` topic.
-    ///
-    /// Returns `True` if we were subscribed to this topic. Returns `False` if we were not subscribed.
-    async fn gossipsub_unsubscribe(&self, topic: String) -> PyResult<bool> {
-        let (tx, rx) = oneshot::channel();
-
-        // send off request to unsubscribe
-        self.to_task_tx()
-            .send_py(ToTask::GossipsubUnsubscribe {
-                topic,
-                result_tx: tx,
-            })
-            .allow_threads_py() // allow-threads-aware async call
-            .await?;
-
-        // wait for response & convert any errors
-        rx.allow_threads_py() // allow-threads-aware async call
-            .await
-            .map_err(|_| PyErr::receiver_channel_closed())
-    }
-
-    /// Publishes a message with multiple topics to the `GossipSub` network.
-    ///
-    /// If no peers are found that subscribe to this topic, throws `NoPeersSubscribedToTopicError` exception.
-    async fn gossipsub_publish(&self, topic: String, data: Py<PyBytes>) -> PyResult<()> {
-        let (tx, rx) = oneshot::channel();
-
-        // send off request to subscribe
-        let data = Python::with_gil(|py| Vec::from(data.as_bytes(py)));
-        self.to_task_tx()
-            .send_py(ToTask::GossipsubPublish {
-                topic,
-                data,
-                result_tx: tx,
-            })
-            .allow_threads_py() // allow-threads-aware async call
-            .await?;
-
-        // wait for response & return any errors => ignore messageID for now!!!
-        let _ = rx
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .map_err(|_| PyErr::receiver_channel_closed())??;
-        Ok(())
-    }
-
-    // ---- Gossipsub message receiver methods ----
-
-    /// Receives the next message from the `GossipSub` network.
-    async fn gossipsub_recv(&self) -> PyResult<(String, Py<PyBytes>)> {
-        self.gossipsub_message_rx
-            .lock()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .recv_py()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .map(|(t, d)| (t, d.pybytes()))
-    }
-
-    /// Receives at most `limit` messages from the `GossipSub` network and returns them.
-    ///
-    /// For `limit = 0`, an empty collection of messages will be returned immediately.
-    /// For `limit > 0`, if there are no messages in the channel's queue this method
-    /// will sleep until a message is sent.
-    async fn gossipsub_recv_many(&self, limit: usize) -> PyResult<Vec<(String, Py<PyBytes>)>> {
-        Ok(self
-            .gossipsub_message_rx
-            .lock()
-            .allow_threads_py() // allow-threads-aware async call
-            .await
-            .recv_many_py(limit)
-            .allow_threads_py() // allow-threads-aware async call
-            .await?
-            .map(|(t, d)| (t, d.pybytes())))
-    }
-
-    // TODO: rn this blocks main thread if anything else is awaiting the channel (bc its a mutex)
-    //       so its too dangerous to expose just yet. figure out a better semantics for handling this,
-    //       so things don't randomly block
-    // /// Tries to receive the next message from the `GossipSub` network.
-    // fn gossipsub_try_recv(&self) -> PyResult<Option<(String, Py<PyBytes>)>> {
-    //     Ok(self
-    //         .gossipsub_message_rx
-    //         .blocking_lock()
-    //         .try_recv_py()?
-    //         .map(|(t, d)| (t, d.pybytes())))
-    // }
-    //
-    // /// Checks if the `GossipSub` message channel is empty.
-    // fn gossipsub_is_empty(&self) -> bool {
-    //     self.gossipsub_message_rx.blocking_lock().is_empty()
-    // }
-    //
-    // /// Returns the number of `GossipSub` messages in the channel.
-    // fn gossipsub_len(&self) -> usize {
-    //     self.gossipsub_message_rx.blocking_lock().len()
-    // }
 }

-pub fn networking_submodule(m: &Bound<'_, PyModule>) -> PyResult<()> {
-    m.add_class::<exception::PyNoPeersSubscribedToTopicError>()?;
-    m.add_class::<exception::PyAllQueuesFullError>()?;
-
-    m.add_class::<PyConnectionUpdateType>()?;
-    m.add_class::<PyConnectionUpdate>()?;
-    m.add_class::<PyConnectionUpdateType>()?;
-    m.add_class::<PyNetworkingHandle>()?;
-
-    Ok(())
+// Manually submit the run()/recv() stub because the cancelhandle is poorly understood
+submit! {
+    gen_methods_from_python! {
+        r#"
+        class PyPeer:
+            async def run(self): ...
+            async def recv(self) -> PySwarmEvent: ...
+        "#
+    }
+}
+
+#[gen_stub_pyclass]
+#[pyclass]
+pub struct PySwarmEvent(FromSwarm);
+
+#[gen_stub_pymethods]
+#[pymethods]
+impl PySwarmEvent {
+    // probably a better way to do this, but...
+    fn downcast_discovered(&self) -> Option<String> {
+        if let FromSwarm::Discovered(peer_id) = self.0 {
+            Some(peer_id.to_base58())
+        } else {
+            None
+        }
+    }
+    fn downcast_expired(&self) -> Option<String> {
+        if let FromSwarm::Expired(peer_id) = self.0 {
+            Some(peer_id.to_base58())
+        } else {
+            None
+        }
+    }
+    fn downcast_message<'py>(
+        &self,
+        py: Python<'py>,
+    ) -> Option<(String, String, Bound<'py, PyBytes>)> {
+        if let FromSwarm::Message(peer_id, topic, data) = &self.0 {
+            Some((peer_id.to_base58(), topic.clone(), PyBytes::new(py, data)))
+        } else {
+            None
+        }
+    }
 }
--- a/Show More
+++ b/Show More