nix mlx compilation for better portability

2026-01-16 01:51:03 -05:00 · 2026-01-13 17:13:28 +00:00
56 changed files with 1344 additions and 2014 deletions
--- a/.github/workflows/build-app.yml
+++ b/.github/workflows/build-app.yml
@@ -113,22 +113,11 @@ jobs:
          uv python install
          uv sync --locked

-      - name: Install Nix
-        uses: cachix/install-nix-action@v31
-        with:
-          nix_path: nixpkgs=channel:nixos-unstable
-
-      - name: Configure Cachix
-        uses: cachix/cachix-action@v14
-        with:
-          name: exo
-          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
-
      - name: Build dashboard
        run: |
-          DASHBOARD_OUT=$(nix build .#dashboard --print-build-logs --no-link --print-out-paths)
-          mkdir -p dashboard/build
-          cp -r "$DASHBOARD_OUT"/* dashboard/build/
+          cd dashboard
+          npm ci
+          npm run build

      - name: Install Sparkle CLI
        run: |
--- a/.github/workflows/pipeline.yml
+++ b/.github/workflows/pipeline.yml
@@ -113,6 +113,28 @@ jobs:
        with:
          lfs: false

+      - name: Select Xcode
+        if: startsWith(matrix.runner, 'macos-')
+        run: |
+          XCODE_BASEDIR="$(printf '%s\n' /Applications/Xcode_*.app | sort -V | tail -n 1)"
+          [[ -z "$XCODE_BASEDIR" ]] && exit 1
+          sudo mv "$XCODE_BASEDIR" /Applications/Xcode.app
+
+          ls -ld "/Applications/Xcode.app"
+          sudo /usr/bin/xcode-select -s "/Applications/Xcode.app"
+          /usr/bin/xcode-select -p || true
+          /usr/bin/xcrun --toolchain default --find xcodebuild || true
+
+      - name: Install Metal toolchain component
+        if: startsWith(matrix.runner, 'macos-')
+        run: |
+          set -e
+          if ! xcrun --find metal >/dev/null 2>&1; then
+            sudo xcodebuild -downloadComponent MetalToolchain
+          fi
+          xcrun --find metal
+          xcrun --find metallib
+
      - uses: cachix/install-nix-action@v31
        with:
          nix_path: nixpkgs=channel:nixos-unstable
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4340,6 +4340,25 @@ dependencies = [
 "libc",
 ]

+[[package]]
+name = "system_custodian"
+version = "0.0.1"
+dependencies = [
+ "delegate",
+ "derive_more",
+ "either",
+ "extend",
+ "futures",
+ "futures-timer",
+ "impl-trait-for-tuples",
+ "keccak-const",
+ "log",
+ "thiserror 2.0.17",
+ "tokio",
+ "tracing-subscriber",
+ "util",
+]
+
 [[package]]
 name = "tagptr"
 version = "0.2.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,6 +3,7 @@ resolver = "3"
 members = [
    "rust/networking",
    "rust/exo_pyo3_bindings",
+    "rust/system_custodian",
    "rust/util",
 ]

@@ -24,6 +25,7 @@ opt-level = 3
 [workspace.dependencies]
 ## Crate members as common dependencies
 networking = { path = "rust/networking" }
+system_custodian = { path = "rust/system_custodian" }
 util = { path = "rust/util" }

 # Proc-macro authoring tools
--- a/dashboard/dashboard.nix
+++ b/dashboard/dashboard.nix
@@ -1,60 +0,0 @@
-{ lib
-, config
-, dream2nix
-, ...
-}:
-let
-  # Read and parse the lock file
-  rawLockFile = builtins.fromJSON (builtins.readFile "${config.deps.dashboardSrc}/package-lock.json");
-
-  # For packages with bundleDependencies, filter out deps that are bundled
-  # (bundled deps are inside the tarball, not separate lockfile entries)
-  fixedPackages = lib.mapAttrs
-    (path: entry:
-      if entry ? bundleDependencies && entry.bundleDependencies != [ ]
-      then entry // {
-        dependencies = lib.filterAttrs
-          (name: _: !(lib.elem name entry.bundleDependencies))
-          (entry.dependencies or { });
-      }
-      else entry
-    )
-    (rawLockFile.packages or { });
-
-  fixedLockFile = rawLockFile // { packages = fixedPackages; };
-in
-{
-  imports = [
-    dream2nix.modules.dream2nix.nodejs-package-lock-v3
-    dream2nix.modules.dream2nix.nodejs-granular-v3
-  ];
-
-  name = "exo-dashboard";
-  version = "1.0.0";
-
-  mkDerivation = {
-    src = config.deps.dashboardSrc;
-
-    buildPhase = ''
-      runHook preBuild
-      npm run build
-      runHook postBuild
-    '';
-
-    installPhase = ''
-      runHook preInstall
-      cp -r build $out/build
-      runHook postInstall
-    '';
-  };
-
-  deps = { nixpkgs, ... }: {
-    inherit (nixpkgs) stdenv;
-    dashboardSrc = null; # Injected by parts.nix
-  };
-
-  nodejs-package-lock-v3 = {
-    # Don't use packageLockFile - provide the fixed lock content directly
-    packageLock = fixedLockFile;
-  };
-}
--- a/dashboard/package-lock.json
+++ b/dashboard/package-lock.json
@@ -863,7 +863,6 @@
 			"integrity": "sha512-oH8tXw7EZnie8FdOWYrF7Yn4IKrqTFHhXvl8YxXxbKwTMcD/5NNCryUSEXRk2ZR4ojnub0P8rNrsVGHXWqIDtA==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"@standard-schema/spec": "^1.0.0",
 				"@sveltejs/acorn-typescript": "^1.0.5",
@@ -903,7 +902,6 @@
 			"integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"@sveltejs/vite-plugin-svelte-inspector": "^4.0.1",
 				"debug": "^4.4.1",
@@ -1520,7 +1518,6 @@
 			"integrity": "sha512-LCCV0HdSZZZb34qifBsyWlUmok6W7ouER+oQIGBScS8EsZsQbrtFTUrDX4hOl+CS6p7cnNC4td+qrSVGSCTUfQ==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"undici-types": "~6.21.0"
 			}
@@ -1530,7 +1527,6 @@
 			"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
 			"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
 			"license": "MIT",
-			"peer": true,
 			"bin": {
 				"acorn": "bin/acorn"
 			},
@@ -1943,7 +1939,6 @@
 			"integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
 			"dev": true,
 			"license": "ISC",
-			"peer": true,
 			"engines": {
 				"node": ">=12"
 			}
@@ -2651,7 +2646,6 @@
 			"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"engines": {
 				"node": ">=12"
 			},
@@ -2839,7 +2833,6 @@
 			"resolved": "https://registry.npmjs.org/svelte/-/svelte-5.45.3.tgz",
 			"integrity": "sha512-ngKXNhNvwPzF43QqEhDOue7TQTrG09em1sd4HBxVF0Wr2gopAmdEWan+rgbdgK4fhBtSOTJO8bYU4chUG7VXZQ==",
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"@jridgewell/remapping": "^2.3.4",
 				"@jridgewell/sourcemap-codec": "^1.5.0",
@@ -2984,7 +2977,6 @@
 			"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
 			"dev": true,
 			"license": "Apache-2.0",
-			"peer": true,
 			"bin": {
 				"tsc": "bin/tsc",
 				"tsserver": "bin/tsserver"
@@ -3006,7 +2998,6 @@
 			"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
 			"dev": true,
 			"license": "MIT",
-			"peer": true,
 			"dependencies": {
 				"esbuild": "^0.25.0",
 				"fdir": "^6.4.4",
--- a/dashboard/parts.nix
+++ b/dashboard/parts.nix
@@ -1,44 +0,0 @@
-{ inputs, ... }:
-{
-  perSystem =
-    { pkgs, lib, ... }:
-    let
-      # Filter source to only include dashboard directory
-      src = lib.cleanSourceWith {
-        src = inputs.self;
-        filter =
-          path: type:
-          let
-            baseName = builtins.baseNameOf path;
-            inDashboardDir =
-              (lib.hasInfix "/dashboard/" path)
-              || (lib.hasSuffix "/dashboard" (builtins.dirOf path))
-              || (baseName == "dashboard" && type == "directory");
-          in
-          inDashboardDir;
-      };
-
-      # Build the dashboard with dream2nix (includes node_modules in output)
-      dashboardFull = inputs.dream2nix.lib.evalModules {
-        packageSets.nixpkgs = pkgs;
-        modules = [
-          ./dashboard.nix
-          {
-            paths.projectRoot = inputs.self;
-            paths.projectRootFile = "flake.nix";
-            paths.package = inputs.self + "/dashboard";
-          }
-          # Inject the filtered source
-          {
-            deps.dashboardSrc = lib.mkForce "${src}/dashboard";
-          }
-        ];
-      };
-    in
-    {
-      # Extract just the static site from the full build
-      packages.dashboard = pkgs.runCommand "exo-dashboard" { } ''
-        cp -r ${dashboardFull}/build $out
-      '';
-    };
-}
--- a/flake.lock
+++ b/flake.lock
@@ -1,42 +1,5 @@
 {
  "nodes": {
-    "crane": {
-      "locked": {
-        "lastModified": 1767744144,
-        "narHash": "sha256-9/9ntI0D+HbN4G0TrK3KmHbTvwgswz7p8IEJsWyef8Q=",
-        "owner": "ipetkov",
-        "repo": "crane",
-        "rev": "2fb033290bf6b23f226d4c8b32f7f7a16b043d7e",
-        "type": "github"
-      },
-      "original": {
-        "owner": "ipetkov",
-        "repo": "crane",
-        "type": "github"
-      }
-    },
-    "dream2nix": {
-      "inputs": {
-        "nixpkgs": [
-          "nixpkgs"
-        ],
-        "purescript-overlay": "purescript-overlay",
-        "pyproject-nix": "pyproject-nix"
-      },
-      "locked": {
-        "lastModified": 1765953015,
-        "narHash": "sha256-5FBZbbWR1Csp3Y2icfRkxMJw/a/5FGg8hCXej2//bbI=",
-        "owner": "nix-community",
-        "repo": "dream2nix",
-        "rev": "69eb01fa0995e1e90add49d8ca5bcba213b0416f",
-        "type": "github"
-      },
-      "original": {
-        "owner": "nix-community",
-        "repo": "dream2nix",
-        "type": "github"
-      }
-    },
    "fenix": {
      "inputs": {
        "nixpkgs": [
@@ -45,11 +8,11 @@
        "rust-analyzer-src": "rust-analyzer-src"
      },
      "locked": {
-        "lastModified": 1768287139,
-        "narHash": "sha256-nsXFt0OzUi6K7dUzzJD5/v9e0Ic+fvclfIW936/43ZM=",
+        "lastModified": 1761893049,
+        "narHash": "sha256-1TtFDPhC+ZsrOOtBnry1EZC+WipTTvsOVjIEVugqji8=",
        "owner": "nix-community",
        "repo": "fenix",
-        "rev": "a4a3aa956931f90f35453cb519e4545e9ad7f773",
+        "rev": "c2ac9a5c0d6d16630c3b225b874bd14528d1abe6",
        "type": "github"
      },
      "original": {
@@ -58,22 +21,6 @@
        "type": "github"
      }
    },
-    "flake-compat": {
-      "flake": false,
-      "locked": {
-        "lastModified": 1696426674,
-        "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
-        "owner": "edolstra",
-        "repo": "flake-compat",
-        "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
-        "type": "github"
-      },
-      "original": {
-        "owner": "edolstra",
-        "repo": "flake-compat",
-        "type": "github"
-      }
-    },
    "flake-parts": {
      "inputs": {
        "nixpkgs-lib": [
@@ -95,22 +42,6 @@
      }
    },
    "nixpkgs": {
-      "locked": {
-        "lastModified": 1768127708,
-        "narHash": "sha256-1Sm77VfZh3mU0F5OqKABNLWxOuDeHIlcFjsXeeiPazs=",
-        "owner": "NixOS",
-        "repo": "nixpkgs",
-        "rev": "ffbc9f8cbaacfb331b6017d5a5abb21a492c9a38",
-        "type": "github"
-      },
-      "original": {
-        "owner": "NixOS",
-        "ref": "nixos-unstable",
-        "repo": "nixpkgs",
-        "type": "github"
-      }
-    },
-    "nixpkgs-swift": {
      "locked": {
        "lastModified": 1761672384,
        "narHash": "sha256-o9KF3DJL7g7iYMZq9SWgfS1BFlNbsm6xplRjVlOCkXI=",
@@ -121,74 +52,27 @@
      },
      "original": {
        "owner": "NixOS",
+        "ref": "nixos-unstable",
        "repo": "nixpkgs",
-        "rev": "08dacfca559e1d7da38f3cf05f1f45ee9bfd213c",
-        "type": "github"
-      }
-    },
-    "purescript-overlay": {
-      "inputs": {
-        "flake-compat": "flake-compat",
-        "nixpkgs": [
-          "dream2nix",
-          "nixpkgs"
-        ],
-        "slimlock": "slimlock"
-      },
-      "locked": {
-        "lastModified": 1728546539,
-        "narHash": "sha256-Sws7w0tlnjD+Bjck1nv29NjC5DbL6nH5auL9Ex9Iz2A=",
-        "owner": "thomashoneyman",
-        "repo": "purescript-overlay",
-        "rev": "4ad4c15d07bd899d7346b331f377606631eb0ee4",
-        "type": "github"
-      },
-      "original": {
-        "owner": "thomashoneyman",
-        "repo": "purescript-overlay",
-        "type": "github"
-      }
-    },
-    "pyproject-nix": {
-      "inputs": {
-        "nixpkgs": [
-          "dream2nix",
-          "nixpkgs"
-        ]
-      },
-      "locked": {
-        "lastModified": 1763017646,
-        "narHash": "sha256-Z+R2lveIp6Skn1VPH3taQIuMhABg1IizJd8oVdmdHsQ=",
-        "owner": "pyproject-nix",
-        "repo": "pyproject.nix",
-        "rev": "47bd6f296502842643078d66128f7b5e5370790c",
-        "type": "github"
-      },
-      "original": {
-        "owner": "pyproject-nix",
-        "repo": "pyproject.nix",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
-        "crane": "crane",
-        "dream2nix": "dream2nix",
        "fenix": "fenix",
        "flake-parts": "flake-parts",
        "nixpkgs": "nixpkgs",
-        "nixpkgs-swift": "nixpkgs-swift",
        "treefmt-nix": "treefmt-nix"
      }
    },
    "rust-analyzer-src": {
      "flake": false,
      "locked": {
-        "lastModified": 1768224240,
-        "narHash": "sha256-Pp1dDrXKPBUJReZnnDElFyHYn67XTd48zRhToheLjtk=",
+        "lastModified": 1761849405,
+        "narHash": "sha256-igXdvC+WCUN+3gnfk+ptT7rMmxQuY6WbIg1rXMUN1DM=",
        "owner": "rust-lang",
        "repo": "rust-analyzer",
-        "rev": "725349602e525df37f377701e001fe8aab807878",
+        "rev": "f7de8ae045a5fe80f1203c5a1c3015b05f7c3550",
        "type": "github"
      },
      "original": {
@@ -198,28 +82,6 @@
        "type": "github"
      }
    },
-    "slimlock": {
-      "inputs": {
-        "nixpkgs": [
-          "dream2nix",
-          "purescript-overlay",
-          "nixpkgs"
-        ]
-      },
-      "locked": {
-        "lastModified": 1688756706,
-        "narHash": "sha256-xzkkMv3neJJJ89zo3o2ojp7nFeaZc2G0fYwNXNJRFlo=",
-        "owner": "thomashoneyman",
-        "repo": "slimlock",
-        "rev": "cf72723f59e2340d24881fd7bf61cb113b4c407c",
-        "type": "github"
-      },
-      "original": {
-        "owner": "thomashoneyman",
-        "repo": "slimlock",
-        "type": "github"
-      }
-    },
    "treefmt-nix": {
      "inputs": {
        "nixpkgs": [
@@ -227,11 +89,11 @@
        ]
      },
      "locked": {
-        "lastModified": 1768158989,
-        "narHash": "sha256-67vyT1+xClLldnumAzCTBvU0jLZ1YBcf4vANRWP3+Ak=",
+        "lastModified": 1762938485,
+        "narHash": "sha256-AlEObg0syDl+Spi4LsZIBrjw+snSVU4T8MOeuZJUJjM=",
        "owner": "numtide",
        "repo": "treefmt-nix",
-        "rev": "e96d59dff5c0d7fddb9d113ba108f03c3ef99eca",
+        "rev": "5b4ee75aeefd1e2d5a1cc43cf6ba65eba75e83e4",
        "type": "github"
      },
      "original": {
--- a/flake.nix
+++ b/flake.nix
@@ -9,8 +9,6 @@
      inputs.nixpkgs-lib.follows = "nixpkgs";
    };

-    crane.url = "github:ipetkov/crane";
-
    fenix = {
      url = "github:nix-community/fenix";
      inputs.nixpkgs.follows = "nixpkgs";
@@ -20,14 +18,6 @@
      url = "github:numtide/treefmt-nix";
      inputs.nixpkgs.follows = "nixpkgs";
    };
-
-    dream2nix = {
-      url = "github:nix-community/dream2nix";
-      inputs.nixpkgs.follows = "nixpkgs";
-    };
-
-    # Pinned nixpkgs for swift-format (swift is broken on x86_64-linux in newer nixpkgs)
-    nixpkgs-swift.url = "github:NixOS/nixpkgs/08dacfca559e1d7da38f3cf05f1f45ee9bfd213c";
  };

  nixConfig = {
@@ -46,16 +36,12 @@

      imports = [
        inputs.treefmt-nix.flakeModule
-        ./dashboard/parts.nix
-        ./rust/parts.nix
      ];

      perSystem =
-        { config, self', inputs', pkgs, lib, system, ... }:
+        { config, inputs', pkgs, lib, ... }:
        let
          fenixToolchain = inputs'.fenix.packages.complete;
-          # Use pinned nixpkgs for swift-format (swift is broken on x86_64-linux in newer nixpkgs)
-          pkgsSwift = import inputs.nixpkgs-swift { inherit system; };
        in
        {
          treefmt = {
@@ -68,16 +54,13 @@
              };
              rustfmt = {
                enable = true;
-                package = config.rust.toolchain;
+                package = fenixToolchain.rustfmt;
              };
              prettier = {
                enable = true;
                includes = [ "*.ts" ];
              };
-              swift-format = {
-                enable = true;
-                package = pkgsSwift.swiftPackages.swift-format;
-              };
+              swift-format.enable = true;
            };
          };

@@ -87,9 +70,12 @@
            touch $out
          '';

-          devShells.default = with pkgs; pkgs.mkShell {
-            inputsFrom = [ self'.checks.cargo-build ];
+          packages =
+            if pkgs.stdenv.isDarwin then {
+              metal = pkgs.callPackage ./nix/metalWrapper.nix { metalVersion = "230"; };
+            } else { };

+          devShells.default = with pkgs; mkShellNoCC {
            packages =
              [
                # FORMATTING
@@ -102,8 +88,14 @@
                basedpyright

                # RUST
-                config.rust.toolchain
-                maturin
+                (fenixToolchain.withComponents [
+                  "cargo"
+                  "rustc"
+                  "clippy"
+                  "rustfmt"
+                  "rust-src"
+                ])
+                rustup # Just here to make RustRover happy

                # NIX
                nixpkgs-fmt
@@ -115,20 +107,31 @@
                just
                jq
              ]
-              ++ lib.optionals stdenv.isLinux [
+              ++ (lib.optionals stdenv.isLinux [
+                # IFCONFIG
                unixtools.ifconfig
-              ]
-              ++ lib.optionals stdenv.isDarwin [
-                macmon
-              ];

-            OPENSSL_NO_VENDOR = "1";
+                # Build dependencies for Linux
+                pkg-config
+                openssl
+              ])
+              ++ (lib.optionals stdenv.isDarwin [
+                # MACMON
+                macmon
+              ]);
+

            shellHook = ''
-              export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${python313}/lib"
-              ${lib.optionalString stdenv.isLinux ''
-                export LD_LIBRARY_PATH="${openssl.out}/lib:$LD_LIBRARY_PATH"
+              # PYTHON
+              export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${pkgs.python313}/lib"
+              ${lib.optionalString pkgs.stdenv.isLinux ''
+                # Build environment for Linux
+                export PKG_CONFIG_PATH="${pkgs.openssl.dev}/lib/pkgconfig:$PKG_CONFIG_PATH"
+                export LD_LIBRARY_PATH="${pkgs.openssl.out}/lib:$LD_LIBRARY_PATH"
              ''}
+              echo
+              echo "🍎🍎 Run 'just <recipe>' to get started"
+              just --list
            '';
          };
        };
--- a/nix/darwin-build-fixes.patch
+++ b/nix/darwin-build-fixes.patch
@@ -0,0 +1,79 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 0ed30932..d8528132 100644
+--- a/CMakeLists.txt
+++ b/CMakeLists.txt
+@@ -177,11 +177,7 @@ if(MLX_BUILD_METAL)
+     add_compile_definitions(MLX_METAL_DEBUG)
+   endif()
+ 
+-  # Throw an error if xcrun not found
+-  execute_process(
+-    COMMAND zsh "-c" "/usr/bin/xcrun -sdk macosx --show-sdk-version"
+-    OUTPUT_VARIABLE MACOS_SDK_VERSION
+-    OUTPUT_STRIP_TRAILING_WHITESPACE COMMAND_ERROR_IS_FATAL ANY)
+  set(MACOS_SDK_VERSION @sdkVersion@)
+ 
+   if(${MACOS_SDK_VERSION} LESS 14.0)
+     message(
+@@ -199,11 +195,8 @@ if(MLX_BUILD_METAL)
+     endif()
+     set(XCRUN_FLAGS "-mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
+   endif()
+-  execute_process(
+-    COMMAND
+-      zsh "-c"
+-      "echo \"__METAL_VERSION__\" | xcrun -sdk macosx metal ${XCRUN_FLAGS} -E -x metal -P - | tail -1 | tr -d '\n'"
+-    OUTPUT_VARIABLE MLX_METAL_VERSION COMMAND_ERROR_IS_FATAL ANY)
+  set(
+    MLX_METAL_VERSION @metalVersion@)
+   FetchContent_Declare(metal_cpp URL ${METAL_CPP_URL})
+   FetchContent_MakeAvailable(metal_cpp)
+   target_include_directories(
+diff --git a/cmake/extension.cmake b/cmake/extension.cmake
+index 13db804a..5b385132 100644
+--- a/cmake/extension.cmake
+++ b/cmake/extension.cmake
+@@ -36,7 +36,7 @@ macro(mlx_build_metallib)
+   add_custom_command(
+     OUTPUT ${MTLLIB_BUILD_TARGET}
+     COMMAND
+-      xcrun -sdk macosx metal
+      metal
+       "$<LIST:TRANSFORM,${MTLLIB_INCLUDE_DIRS},PREPEND,-I>"
+       ${MTLLIB_COMPILE_OPTIONS} ${MTLLIB_SOURCES} -o ${MTLLIB_BUILD_TARGET}
+     DEPENDS ${MTLLIB_DEPS} ${MTLLIB_SOURCES}
+diff --git a/mlx/backend/metal/kernels/CMakeLists.txt b/mlx/backend/metal/kernels/CMakeLists.txt
+index 262b0495..5c7446ad 100644
+--- a/mlx/backend/metal/kernels/CMakeLists.txt
+++ b/mlx/backend/metal/kernels/CMakeLists.txt
+@@ -29,7 +29,7 @@ function(build_kernel_base TARGET SRCFILE DEPS)
+                     "-mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
+   endif()
+   add_custom_command(
+-    COMMAND xcrun -sdk macosx metal ${METAL_FLAGS} -c ${SRCFILE}
+    COMMAND metal ${METAL_FLAGS} -c ${SRCFILE}
+             -I${PROJECT_SOURCE_DIR} -o ${TARGET}.air
+     DEPENDS ${SRCFILE} ${DEPS} ${BASE_HEADERS}
+     OUTPUT ${TARGET}.air
+@@ -170,7 +170,7 @@ endif()
+ 
+ add_custom_command(
+   OUTPUT ${MLX_METAL_PATH}/mlx.metallib
+-  COMMAND xcrun -sdk macosx metallib ${KERNEL_AIR} -o
+  COMMAND metallib ${KERNEL_AIR} -o
+           ${MLX_METAL_PATH}/mlx.metallib
+   DEPENDS ${KERNEL_AIR}
+   COMMENT "Building mlx.metallib"
+diff --git a/mlx/backend/metal/make_compiled_preamble.sh b/mlx/backend/metal/make_compiled_preamble.sh
+index bb55ed3a..94ea7dd7 100644
+--- a/mlx/backend/metal/make_compiled_preamble.sh
+++ b/mlx/backend/metal/make_compiled_preamble.sh
+@@ -31,7 +31,7 @@ OUTPUT_FILE=${OUTPUT_DIR}/${SRC_NAME}.cpp
+ mkdir -p "$OUTPUT_DIR"
+ 
+ # Use the metal compiler to get a list of headers (with depth)
+-CCC="xcrun -sdk macosx metal -x metal"
+CCC="metal -x metal"
+ HDRS=$( $CCC -I"$SRC_DIR" -I"$JIT_INCLUDES" -DMLX_METAL_JIT -E -P -CC -C -H "$INPUT_FILE" $CFLAGS -w 2>&1 1>/dev/null )
+ 
+ # Remove any included system frameworks (for MetalPerformancePrimitive headers)
--- a/nix/metalWrapper.nix
+++ b/nix/metalWrapper.nix
@@ -0,0 +1,22 @@
+{ stdenv
+, metalVersion
+, xcodeBaseDir ? "/Applications/Xcode.app"
+}:
+assert stdenv.isDarwin;
+stdenv.mkDerivation {
+  pname = "metal-wrapper-impure";
+  version = metalVersion;
+
+  __noChroot = true;
+  buildCommand = ''
+    DEVELOPER_DIR=${xcodeBaseDir}/Contents/Developer
+    [[ -x "$DEVELOPER_DIR/usr/bin/xcodebuild" ]] || (echo "Missing xcodebuild at $DEVELOPER_DIR/usr/bin/xcodebuild" && exit 1)
+    SDKROOT=${xcodeBaseDir}/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
+    [[ -d "$SDKROOT" ]] || (echo "Missing SDKROOT at $SDKROOT" && exit 1)
+    export DEVELOPER_DIR SDKROOT
+    mkdir -p $out/bin && cd $out/bin
+    ln -s $(/usr/bin/xcrun --sdk macosx -f metal)
+    ln -s $(/usr/bin/xcrun --sdk macosx -f metallib)
+    [[ -f $out/bin/metal ]] && [[ -f $out/bin/metallib ]] || exit 1
+  '';
+}
--- a/nix/mlx.nix
+++ b/nix/mlx.nix
@@ -0,0 +1,154 @@
+{ stdenv
+, lib
+, buildPythonPackage
+, fetchFromGitHub
+, replaceVars
+, fetchzip
+, setuptools
+, cmake
+, nanobind
+, pybind11
+, nlohmann_json
+, apple-sdk_26
+, metal
+, numpy
+, pytestCheckHook
+, python
+, runCommand
+, fmt
+}:
+assert stdenv.isDarwin;
+let
+  # static dependencies included directly during compilation
+  gguf-tools = fetchFromGitHub {
+    owner = "antirez";
+    repo = "gguf-tools";
+    rev = "8fa6eb65236618e28fd7710a0fba565f7faa1848";
+    hash = "sha256-15FvyPOFqTOr5vdWQoPnZz+mYH919++EtghjozDlnSA=";
+  };
+
+  metal_cpp = fetchzip {
+    url = "https://developer.apple.com/metal/cpp/files/metal-cpp_26.zip";
+    hash = "sha256-7n2eI2lw/S+Us6l7YPAATKwcIbRRpaQ8VmES7S8ZjY8=";
+  };
+
+  mlx = buildPythonPackage rec {
+    pname = "mlx";
+    version = "0.30.1";
+    pyproject = true;
+
+    src = fetchFromGitHub {
+      owner = "ml-explore";
+      repo = "mlx";
+      tag = "v${version}";
+      hash = "sha256-Vt0RH+70VBwUjXSfPTsNdRS3g0ookJHhzf2kvgEtgH8=";
+    };
+
+    patches = [
+      (replaceVars ./darwin-build-fixes.patch {
+        sdkVersion = apple-sdk_26.version;
+        metalVersion = metal.version;
+      })
+    ];
+
+    postPatch = ''
+      substituteInPlace pyproject.toml \
+        --replace-fail "nanobind==2.10.2" "nanobind"
+
+      substituteInPlace mlx/backend/cpu/jit_compiler.cpp \
+        --replace-fail "g++" "$CXX"
+    '';
+
+    dontUseCmakeConfigure = true;
+
+    enableParallelBuilding = true;
+
+    # Allows multiple cores to be used in Python builds.
+    postUnpack = ''
+      export MAKEFLAGS+="''${enableParallelBuilding:+-j$NIX_BUILD_CORES}"
+    '';
+
+    # updates the wrong fetcher rev attribute
+    passthru.skipBulkUpdate = true;
+
+    env = {
+      DEV_RELEASE = 1;
+      # NOTE The `metal` command-line utility used to build the Metal kernels is not open-source.
+      # this is what the xcode wrapper is for - it patches in the system metal cli
+      CMAKE_ARGS = toString [
+        (lib.cmakeBool "USE_SYSTEM_FMT" true)
+        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_GGUFLIB" "${gguf-tools}")
+        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_JSON" "${nlohmann_json.src}")
+        (lib.cmakeBool "FETCHCONTENT_FULLY_DISCONNECTED" true)
+        (lib.cmakeBool "MLX_BUILD_METAL" true)
+        (lib.cmakeOptionType "filepath" "METAL_LIB"
+          "${metal}/Metal.framework")
+        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_METAL_CPP" "${metal_cpp}")
+        (lib.cmakeOptionType "string" "CMAKE_OSX_DEPLOYMENT_TARGET" "${apple-sdk_26.version}")
+        (lib.cmakeOptionType "filepath" "CMAKE_OSX_SYSROOT" "${apple-sdk_26.passthru.sdkroot}")
+      ];
+      SDKROOT = apple-sdk_26.passthru.sdkroot;
+      MACOSX_DEPLOYMENT_TARGET = apple-sdk_26.version;
+    };
+
+    build-system = [
+      setuptools
+    ];
+
+    nativeBuildInputs = [
+      cmake
+      metal
+    ];
+
+    buildInputs = [
+      fmt
+      gguf-tools
+      nanobind
+      pybind11
+      apple-sdk_26
+    ];
+
+    pythonImportsCheck = [ "mlx" ];
+
+    # Run the mlx Python test suite.
+    nativeCheckInputs = [
+      numpy
+      pytestCheckHook
+    ];
+
+    enabledTestPaths = [
+      "python/tests/"
+    ];
+
+    # Additional testing by executing the example Python scripts supplied with mlx
+    # using the version of the library we've built.
+    passthru.tests = {
+      mlxTest =
+        runCommand "run-mlx-examples"
+          {
+            buildInputs = [ mlx ];
+            nativeBuildInputs = [ python ];
+          }
+          ''
+            cp ${src}/examples/python/logistic_regression.py .
+            ${python.interpreter} logistic_regression.py
+            rm logistic_regression.py
+
+            cp ${src}/examples/python/linear_regression.py .
+            ${python.interpreter} linear_regression.py
+            rm linear_regression.py
+
+            touch $out
+          '';
+    };
+
+    meta = {
+      homepage = "https://github.com/ml-explore/mlx";
+      description = "Array framework for Apple silicon";
+      changelog = "https://github.com/ml-explore/mlx/releases/tag/${src.tag}";
+      license = lib.licenses.mit;
+      platforms = [ "x86_64-linux" "aarch64-linux" "aarch64-darwin" ];
+    };
+  };
+in
+mlx
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,7 +23,6 @@ dependencies = [
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
    "hypercorn>=0.18.0",
    "openai-harmony>=0.0.8",
-    "tomlkit>=0.14.0",
 ]

 [project.scripts]
--- a/resources/model_cards/deepseek-v3.1-4bit.toml
+++ b/resources/model_cards/deepseek-v3.1-4bit.toml
@@ -1,15 +0,0 @@
-short_id = "deepseek-v3.1-4bit"
-model_id = "mlx-community/DeepSeek-V3.1-4bit"
-name = "DeepSeek V3.1 (4-bit)"
-description = "DeepSeek V3.1 is a large language model trained on the DeepSeek V3.1 dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/DeepSeek-V3.1-4bit"
-pretty_name = "DeepSeek V3.1 (4-bit)"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 405874409472
--- a/resources/model_cards/deepseek-v3.1-8bit.toml
+++ b/resources/model_cards/deepseek-v3.1-8bit.toml
@@ -1,15 +0,0 @@
-short_id = "deepseek-v3.1-8bit"
-model_id = "mlx-community/DeepSeek-V3.1-8bit"
-name = "DeepSeek V3.1 (8-bit)"
-description = "DeepSeek V3.1 is a large language model trained on the DeepSeek V3.1 dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/DeepSeek-V3.1-8bit"
-pretty_name = "DeepSeek V3.1 (8-bit)"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 765577920512
--- a/resources/model_cards/glm-4.5-air-8bit.toml
+++ b/resources/model_cards/glm-4.5-air-8bit.toml
@@ -1,15 +0,0 @@
-short_id = "glm-4.5-air-8bit"
-model_id = "mlx-community/GLM-4.5-Air-8bit"
-name = "GLM 4.5 Air 8bit"
-description = "GLM 4.5 Air 8bit"
-tags = []
-
-[metadata]
-model_id = "mlx-community/GLM-4.5-Air-8bit"
-pretty_name = "GLM 4.5 Air 8bit"
-n_layers = 46
-hidden_size = 4096
-supports_tensor = false
-
-[metadata.storage_size]
-in_bytes = 122406567936
--- a/resources/model_cards/glm-4.5-air-bf16.toml
+++ b/resources/model_cards/glm-4.5-air-bf16.toml
@@ -1,15 +0,0 @@
-short_id = "glm-4.5-air-bf16"
-model_id = "mlx-community/GLM-4.5-Air-bf16"
-name = "GLM 4.5 Air bf16"
-description = "GLM 4.5 Air bf16"
-tags = []
-
-[metadata]
-model_id = "mlx-community/GLM-4.5-Air-bf16"
-pretty_name = "GLM 4.5 Air bf16"
-n_layers = 46
-hidden_size = 4096
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 229780750336
--- a/resources/model_cards/glm-4.7-4bit.toml
+++ b/resources/model_cards/glm-4.7-4bit.toml
@@ -1,15 +0,0 @@
-short_id = "glm-4.7-4bit"
-model_id = "mlx-community/GLM-4.7-4bit"
-name = "GLM 4.7 4bit"
-description = "GLM 4.7 4bit"
-tags = []
-
-[metadata]
-model_id = "mlx-community/GLM-4.7-4bit"
-pretty_name = "GLM 4.7 4bit"
-n_layers = 91
-hidden_size = 5120
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 198556925568
--- a/resources/model_cards/glm-4.7-6bit.toml
+++ b/resources/model_cards/glm-4.7-6bit.toml
@@ -1,15 +0,0 @@
-short_id = "glm-4.7-6bit"
-model_id = "mlx-community/GLM-4.7-6bit"
-name = "GLM 4.7 6bit"
-description = "GLM 4.7 6bit"
-tags = []
-
-[metadata]
-model_id = "mlx-community/GLM-4.7-6bit"
-pretty_name = "GLM 4.7 6bit"
-n_layers = 91
-hidden_size = 5120
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 286737579648
--- a/resources/model_cards/glm-4.7-8bit-gs32.toml
+++ b/resources/model_cards/glm-4.7-8bit-gs32.toml
@@ -1,15 +0,0 @@
-short_id = "glm-4.7-8bit-gs32"
-model_id = "mlx-community/GLM-4.7-8bit-gs32"
-name = "GLM 4.7 8bit (gs32)"
-description = "GLM 4.7 8bit (gs32)"
-tags = []
-
-[metadata]
-model_id = "mlx-community/GLM-4.7-8bit-gs32"
-pretty_name = "GLM 4.7 8bit (gs32)"
-n_layers = 91
-hidden_size = 5120
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 396963397248
--- a/resources/model_cards/gpt-oss-120b-MXFP4-Q8.toml
+++ b/resources/model_cards/gpt-oss-120b-MXFP4-Q8.toml
@@ -1,15 +0,0 @@
-short_id = "gpt-oss-120b-MXFP4-Q8"
-model_id = "mlx-community/gpt-oss-120b-MXFP4-Q8"
-name = "GPT-OSS 120B (MXFP4-Q8, MLX)"
-description = "OpenAI's GPT-OSS 120B is a 117B-parameter Mixture-of-Experts model designed for high-reasoning and general-purpose use; this variant is a 4-bit MLX conversion for Apple Silicon."
-tags = []
-
-[metadata]
-model_id = "mlx-community/gpt-oss-120b-MXFP4-Q8"
-pretty_name = "GPT-OSS 120B (MXFP4-Q8, MLX)"
-n_layers = 36
-hidden_size = 2880
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 70652212224
--- a/resources/model_cards/gpt-oss-20b-4bit.toml
+++ b/resources/model_cards/gpt-oss-20b-4bit.toml
@@ -1,15 +0,0 @@
-short_id = "gpt-oss-20b-4bit"
-model_id = "mlx-community/gpt-oss-20b-MXFP4-Q4"
-name = "GPT-OSS 20B (MXFP4-Q4, MLX)"
-description = "OpenAI's GPT-OSS 20B is a medium-sized MoE model for lower-latency and local or specialized use cases; this MLX variant uses MXFP4 4-bit quantization."
-tags = []
-
-[metadata]
-model_id = "mlx-community/gpt-oss-20b-MXFP4-Q4"
-pretty_name = "GPT-OSS 20B (MXFP4-Q4, MLX)"
-n_layers = 24
-hidden_size = 2880
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 12025908224
--- a/resources/model_cards/kimi-k2-instruct-4bit.toml
+++ b/resources/model_cards/kimi-k2-instruct-4bit.toml
@@ -1,15 +0,0 @@
-short_id = "kimi-k2-instruct-4bit"
-model_id = "mlx-community/Kimi-K2-Instruct-4bit"
-name = "Kimi K2 Instruct (4-bit)"
-description = "Kimi K2 is a large language model trained on the Kimi K2 dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Kimi-K2-Instruct-4bit"
-pretty_name = "Kimi K2 Instruct (4-bit)"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 620622774272
--- a/resources/model_cards/kimi-k2-thinking.toml
+++ b/resources/model_cards/kimi-k2-thinking.toml
@@ -1,15 +0,0 @@
-short_id = "kimi-k2-thinking"
-model_id = "mlx-community/Kimi-K2-Thinking"
-name = "Kimi K2 Thinking (4-bit)"
-description = "Kimi K2 Thinking is the latest, most capable version of open-source thinking model."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Kimi-K2-Thinking"
-pretty_name = "Kimi K2 Thinking (4-bit)"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 706522120192
--- a/resources/model_cards/llama-3.1-70b.toml
+++ b/resources/model_cards/llama-3.1-70b.toml
@@ -1,15 +0,0 @@
-short_id = "llama-3.1-70b"
-model_id = "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
-name = "Llama 3.1 70B (4-bit)"
-description = "Llama 3.1 is a large language model trained on the Llama 3.1 dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
-pretty_name = "Llama 3.1 70B (4-bit)"
-n_layers = 80
-hidden_size = 8192
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 40652242944
--- a/resources/model_cards/llama-3.1-8b-8bit.toml
+++ b/resources/model_cards/llama-3.1-8b-8bit.toml
@@ -1,15 +0,0 @@
-short_id = "llama-3.1-8b-8bit"
-model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
-name = "Llama 3.1 8B (8-bit)"
-description = "Llama 3.1 is a large language model trained on the Llama 3.1 dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
-pretty_name = "Llama 3.1 8B (8-bit)"
-n_layers = 32
-hidden_size = 4096
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 8954839040
--- a/resources/model_cards/llama-3.1-8b-bf16.toml
+++ b/resources/model_cards/llama-3.1-8b-bf16.toml
@@ -1,15 +0,0 @@
-short_id = "llama-3.1-8b-bf16"
-model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"
-name = "Llama 3.1 8B (BF16)"
-description = "Llama 3.1 is a large language model trained on the Llama 3.1 dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"
-pretty_name = "Llama 3.1 8B (BF16)"
-n_layers = 32
-hidden_size = 4096
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 16882073600
--- a/resources/model_cards/llama-3.1-8b.toml
+++ b/resources/model_cards/llama-3.1-8b.toml
@@ -1,15 +0,0 @@
-short_id = "llama-3.1-8b"
-model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
-name = "Llama 3.1 8B (4-bit)"
-description = "Llama 3.1 is a large language model trained on the Llama 3.1 dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
-pretty_name = "Llama 3.1 8B (4-bit)"
-n_layers = 32
-hidden_size = 4096
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 4637851648
--- a/resources/model_cards/llama-3.2-1b.toml
+++ b/resources/model_cards/llama-3.2-1b.toml
@@ -1,15 +0,0 @@
-short_id = "llama-3.2-1b"
-model_id = "mlx-community/Llama-3.2-1B-Instruct-4bit"
-name = "Llama 3.2 1B (4-bit)"
-description = "Llama 3.2 is a large language model trained on the Llama 3.2 dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Llama-3.2-1B-Instruct-4bit"
-pretty_name = "Llama 3.2 1B (4-bit)"
-n_layers = 16
-hidden_size = 2048
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 729808896
--- a/resources/model_cards/llama-3.2-3b-8bit.toml
+++ b/resources/model_cards/llama-3.2-3b-8bit.toml
@@ -1,15 +0,0 @@
-short_id = "llama-3.2-3b-8bit"
-model_id = "mlx-community/Llama-3.2-3B-Instruct-8bit"
-name = "Llama 3.2 3B (8-bit)"
-description = "Llama 3.2 is a large language model trained on the Llama 3.2 dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Llama-3.2-3B-Instruct-8bit"
-pretty_name = "Llama 3.2 3B (8-bit)"
-n_layers = 28
-hidden_size = 3072
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 3501195264
--- a/resources/model_cards/llama-3.2-3b.toml
+++ b/resources/model_cards/llama-3.2-3b.toml
@@ -1,15 +0,0 @@
-short_id = "llama-3.2-3b"
-model_id = "mlx-community/Llama-3.2-3B-Instruct-4bit"
-name = "Llama 3.2 3B (4-bit)"
-description = "Llama 3.2 is a large language model trained on the Llama 3.2 dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Llama-3.2-3B-Instruct-4bit"
-pretty_name = "Llama 3.2 3B (4-bit)"
-n_layers = 28
-hidden_size = 3072
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 1863319552
--- a/resources/model_cards/llama-3.3-70b-8bit.toml
+++ b/resources/model_cards/llama-3.3-70b-8bit.toml
@@ -1,15 +0,0 @@
-short_id = "llama-3.3-70b-8bit"
-model_id = "mlx-community/Llama-3.3-70B-Instruct-8bit"
-name = "Llama 3.3 70B (8-bit)"
-description = "The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)"
-tags = []
-
-[metadata]
-model_id = "mlx-community/Llama-3.3-70B-Instruct-8bit"
-pretty_name = "Llama 3.3 70B (8-bit)"
-n_layers = 80
-hidden_size = 8192
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 76799803392
--- a/resources/model_cards/llama-3.3-70b-fp16.toml
+++ b/resources/model_cards/llama-3.3-70b-fp16.toml
@@ -1,15 +0,0 @@
-short_id = "llama-3.3-70b-fp16"
-model_id = "mlx-community/llama-3.3-70b-instruct-fp16"
-name = "Llama 3.3 70B (FP16)"
-description = "The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)"
-tags = []
-
-[metadata]
-model_id = "mlx-community/llama-3.3-70b-instruct-fp16"
-pretty_name = "Llama 3.3 70B (FP16)"
-n_layers = 80
-hidden_size = 8192
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 144383672320
--- a/resources/model_cards/llama-3.3-70b.toml
+++ b/resources/model_cards/llama-3.3-70b.toml
@@ -1,15 +0,0 @@
-short_id = "llama-3.3-70b"
-model_id = "mlx-community/Llama-3.3-70B-Instruct-4bit"
-name = "Llama 3.3 70B (4-bit)"
-description = "The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)"
-tags = []
-
-[metadata]
-model_id = "mlx-community/Llama-3.3-70B-Instruct-4bit"
-pretty_name = "Llama 3.3 70B"
-n_layers = 80
-hidden_size = 8192
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 40652242944
--- a/resources/model_cards/minimax-m2.1-3bit.toml
+++ b/resources/model_cards/minimax-m2.1-3bit.toml
@@ -1,15 +0,0 @@
-short_id = "minimax-m2.1-3bit"
-model_id = "mlx-community/MiniMax-M2.1-3bit"
-name = "MiniMax M2.1 3bit"
-description = "MiniMax M2.1 3bit"
-tags = []
-
-[metadata]
-model_id = "mlx-community/MiniMax-M2.1-3bit"
-pretty_name = "MiniMax M2.1 3bit"
-n_layers = 61
-hidden_size = 3072
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 100086644736
--- a/resources/model_cards/minimax-m2.1-8bit.toml
+++ b/resources/model_cards/minimax-m2.1-8bit.toml
@@ -1,15 +0,0 @@
-short_id = "minimax-m2.1-8bit"
-model_id = "mlx-community/MiniMax-M2.1-8bit"
-name = "MiniMax M2.1 8bit"
-description = "MiniMax M2.1 8bit"
-tags = []
-
-[metadata]
-model_id = "mlx-community/MiniMax-M2.1-8bit"
-pretty_name = "MiniMax M2.1 8bit"
-n_layers = 61
-hidden_size = 3072
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 242986745856
--- a/resources/model_cards/qwen3-0.6b-8bit.toml
+++ b/resources/model_cards/qwen3-0.6b-8bit.toml
@@ -1,15 +0,0 @@
-short_id = "qwen3-0.6b-8bit"
-model_id = "mlx-community/Qwen3-0.6B-8bit"
-name = "Qwen3 0.6B (8-bit)"
-description = "Qwen3 0.6B is a large language model trained on the Qwen3 0.6B dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Qwen3-0.6B-8bit"
-pretty_name = "Qwen3 0.6B (8-bit)"
-n_layers = 28
-hidden_size = 1024
-supports_tensor = false
-
-[metadata.storage_size]
-in_bytes = 698351616
--- a/resources/model_cards/qwen3-0.6b.toml
+++ b/resources/model_cards/qwen3-0.6b.toml
@@ -1,15 +0,0 @@
-short_id = "qwen3-0.6b"
-model_id = "mlx-community/Qwen3-0.6B-4bit"
-name = "Qwen3 0.6B (4-bit)"
-description = "Qwen3 0.6B is a large language model trained on the Qwen3 0.6B dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Qwen3-0.6B-4bit"
-pretty_name = "Qwen3 0.6B (4-bit)"
-n_layers = 28
-hidden_size = 1024
-supports_tensor = false
-
-[metadata.storage_size]
-in_bytes = 342884352
--- a/resources/model_cards/qwen3-235b-a22b-4bit.toml
+++ b/resources/model_cards/qwen3-235b-a22b-4bit.toml
@@ -1,15 +0,0 @@
-short_id = "qwen3-235b-a22b-4bit"
-model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"
-name = "Qwen3 235B A22B (4-bit)"
-description = "Qwen3 235B (Active 22B) is a large language model trained on the Qwen3 235B dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"
-pretty_name = "Qwen3 235B A22B (4-bit)"
-n_layers = 94
-hidden_size = 4096
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 141733920768
--- a/resources/model_cards/qwen3-235b-a22b-8bit.toml
+++ b/resources/model_cards/qwen3-235b-a22b-8bit.toml
@@ -1,15 +0,0 @@
-short_id = "qwen3-235b-a22b-8bit"
-model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"
-name = "Qwen3 235B A22B (8-bit)"
-description = "Qwen3 235B (Active 22B) is a large language model trained on the Qwen3 235B dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"
-pretty_name = "Qwen3 235B A22B (8-bit)"
-n_layers = 94
-hidden_size = 4096
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 268435456000
--- a/resources/model_cards/qwen3-30b-8bit.toml
+++ b/resources/model_cards/qwen3-30b-8bit.toml
@@ -1,15 +0,0 @@
-short_id = "qwen3-30b-8bit"
-model_id = "mlx-community/Qwen3-30B-A3B-8bit"
-name = "Qwen3 30B A3B (8-bit)"
-description = "Qwen3 30B is a large language model trained on the Qwen3 30B dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Qwen3-30B-A3B-8bit"
-pretty_name = "Qwen3 30B A3B (8-bit)"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 33279705088
--- a/resources/model_cards/qwen3-30b.toml
+++ b/resources/model_cards/qwen3-30b.toml
@@ -1,15 +0,0 @@
-short_id = "qwen3-30b"
-model_id = "mlx-community/Qwen3-30B-A3B-4bit"
-name = "Qwen3 30B A3B (4-bit)"
-description = "Qwen3 30B is a large language model trained on the Qwen3 30B dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Qwen3-30B-A3B-4bit"
-pretty_name = "Qwen3 30B A3B (4-bit)"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 17612931072
--- a/resources/model_cards/qwen3-80b-a3B-4bit.toml
+++ b/resources/model_cards/qwen3-80b-a3B-4bit.toml
@@ -1,15 +0,0 @@
-short_id = "qwen3-80b-a3B-4bit"
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"
-name = "Qwen3 80B A3B (4-bit)"
-description = "Qwen3 80B"
-tags = []
-
-[metadata]
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"
-pretty_name = "Qwen3 80B A3B (4-bit)"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 46976204800
--- a/resources/model_cards/qwen3-80b-a3B-8bit.toml
+++ b/resources/model_cards/qwen3-80b-a3B-8bit.toml
@@ -1,15 +0,0 @@
-short_id = "qwen3-80b-a3B-8bit"
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"
-name = "Qwen3 80B A3B (8-bit)"
-description = "Qwen3 80B"
-tags = []
-
-[metadata]
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"
-pretty_name = "Qwen3 80B A3B (8-bit)"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 88814387200
--- a/resources/model_cards/qwen3-80b-a3B-thinking-4bit.toml
+++ b/resources/model_cards/qwen3-80b-a3B-thinking-4bit.toml
@@ -1,15 +0,0 @@
-short_id = "qwen3-80b-a3B-thinking-4bit"
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"
-name = "Qwen3 80B A3B Thinking (4-bit)"
-description = "Qwen3 80B Reasoning model"
-tags = []
-
-[metadata]
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"
-pretty_name = "Qwen3 80B A3B (4-bit)"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 88814387200
--- a/resources/model_cards/qwen3-80b-a3B-thinking-8bit.toml
+++ b/resources/model_cards/qwen3-80b-a3B-thinking-8bit.toml
@@ -1,15 +0,0 @@
-short_id = "qwen3-80b-a3B-thinking-8bit"
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"
-name = "Qwen3 80B A3B Thinking (8-bit)"
-description = "Qwen3 80B Reasoning model"
-tags = []
-
-[metadata]
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"
-pretty_name = "Qwen3 80B A3B (8-bit)"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 88814387200
--- a/resources/model_cards/qwen3-coder-480b-a35b-4bit.toml
+++ b/resources/model_cards/qwen3-coder-480b-a35b-4bit.toml
@@ -1,15 +0,0 @@
-short_id = "qwen3-coder-480b-a35b-4bit"
-model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"
-name = "Qwen3 Coder 480B A35B (4-bit)"
-description = "Qwen3 Coder 480B (Active 35B) is a large language model trained on the Qwen3 Coder 480B dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"
-pretty_name = "Qwen3 Coder 480B A35B (4-bit)"
-n_layers = 62
-hidden_size = 6144
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 289910292480
--- a/resources/model_cards/qwen3-coder-480b-a35b-8bit.toml
+++ b/resources/model_cards/qwen3-coder-480b-a35b-8bit.toml
@@ -1,15 +0,0 @@
-short_id = "qwen3-coder-480b-a35b-8bit"
-model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"
-name = "Qwen3 Coder 480B A35B (8-bit)"
-description = "Qwen3 Coder 480B (Active 35B) is a large language model trained on the Qwen3 Coder 480B dataset."
-tags = []
-
-[metadata]
-model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"
-pretty_name = "Qwen3 Coder 480B A35B (8-bit)"
-n_layers = 62
-hidden_size = 6144
-supports_tensor = true
-
-[metadata.storage_size]
-in_bytes = 579820584960
--- a/rust/parts.nix
+++ b/rust/parts.nix
@@ -1,145 +0,0 @@
-{ inputs, ... }:
-{
-  perSystem =
-    { config, self', inputs', pkgs, lib, ... }:
-    let
-      # Fenix nightly toolchain with all components
-      fenixPkgs = inputs'.fenix.packages;
-      rustToolchain = fenixPkgs.complete.withComponents [
-        "cargo"
-        "rustc"
-        "clippy"
-        "rustfmt"
-        "rust-src"
-        "rust-analyzer"
-      ];
-
-      # Crane with fenix toolchain
-      craneLib = (inputs.crane.mkLib pkgs).overrideToolchain rustToolchain;
-
-      # Source filtering - only include rust/ directory and root Cargo files
-      # This ensures changes to Python/docs/etc don't trigger Rust rebuilds
-      src = lib.cleanSourceWith {
-        src = inputs.self;
-        filter =
-          path: type:
-          let
-            baseName = builtins.baseNameOf path;
-            parentDir = builtins.dirOf path;
-            inRustDir =
-              (lib.hasInfix "/rust/" path)
-              || (lib.hasSuffix "/rust" parentDir)
-              || (baseName == "rust" && type == "directory");
-            isRootCargoFile =
-              (baseName == "Cargo.toml" || baseName == "Cargo.lock")
-              && (builtins.dirOf path == toString inputs.self);
-          in
-          isRootCargoFile
-          || (inRustDir && (craneLib.filterCargoSources path type || lib.hasSuffix ".toml" path || lib.hasSuffix ".md" path));
-      };
-
-      # Common arguments for all Rust builds
-      commonArgs = {
-        inherit src;
-        pname = "exo-rust";
-        version = "0.0.1";
-        strictDeps = true;
-
-        nativeBuildInputs = [
-          pkgs.pkg-config
-          pkgs.python313 # Required for pyo3-build-config
-        ];
-
-        buildInputs = [
-          pkgs.openssl
-          pkgs.python313 # Required for pyo3 tests
-        ];
-
-        OPENSSL_NO_VENDOR = "1";
-
-        # Required for pyo3 tests to find libpython
-        LD_LIBRARY_PATH = lib.makeLibraryPath [ pkgs.python313 ];
-      };
-
-      # Build dependencies once for caching
-      cargoArtifacts = craneLib.buildDepsOnly (
-        commonArgs
-        // {
-          cargoExtraArgs = "--workspace";
-        }
-      );
-    in
-    {
-      # Export toolchain for use in treefmt and devShell
-      options.rust = {
-        toolchain = lib.mkOption {
-          type = lib.types.package;
-          default = rustToolchain;
-          description = "The Rust toolchain to use";
-        };
-      };
-
-      config = {
-        packages = {
-          # Python bindings wheel via maturin
-          exo_pyo3_bindings = craneLib.buildPackage (
-            commonArgs
-            // {
-              inherit cargoArtifacts;
-              pname = "exo_pyo3_bindings";
-
-              nativeBuildInputs = commonArgs.nativeBuildInputs ++ [
-                pkgs.maturin
-              ];
-
-              buildPhaseCargoCommand = ''
-                maturin build \
-                  --release \
-                  --manylinux off \
-                  --manifest-path rust/exo_pyo3_bindings/Cargo.toml \
-                  --features "pyo3/extension-module,pyo3/experimental-async" \
-                  --interpreter ${pkgs.python313}/bin/python \
-                  --out dist
-              '';
-
-              # Don't use crane's default install behavior
-              doNotPostBuildInstallCargoBinaries = true;
-
-              installPhaseCommand = ''
-                mkdir -p $out
-                cp dist/*.whl $out/
-              '';
-            }
-          );
-        };
-
-        checks = {
-          # Full workspace build (all crates)
-          cargo-build = craneLib.buildPackage (
-            commonArgs
-            // {
-              inherit cargoArtifacts;
-              cargoExtraArgs = "--workspace";
-            }
-          );
-          # Run tests with nextest
-          cargo-nextest = craneLib.cargoNextest (
-            commonArgs
-            // {
-              inherit cargoArtifacts;
-              cargoExtraArgs = "--workspace";
-            }
-          );
-
-          # Build documentation
-          cargo-doc = craneLib.cargoDoc (
-            commonArgs
-            // {
-              inherit cargoArtifacts;
-              cargoExtraArgs = "--workspace";
-            }
-          );
-        };
-      };
-    };
-}
--- a/rust/system_custodian/Cargo.toml
+++ b/rust/system_custodian/Cargo.toml
@@ -0,0 +1,47 @@
+[package]
+name = "system_custodian"
+version = { workspace = true }
+edition = { workspace = true }
+publish = false
+
+[lib]
+doctest = false
+name = "system_custodian"
+path = "src/lib.rs"
+
+[[bin]]
+path = "src/bin/main.rs"
+name = "system_custodian"
+doc = false
+
+[lints]
+workspace = true
+
+[dependencies]
+# datastructures
+either = { workspace = true }
+
+# macro dependencies
+extend = { workspace = true }
+delegate = { workspace = true }
+impl-trait-for-tuples = { workspace = true }
+derive_more = { workspace = true }
+
+# async
+tokio = { workspace = true, features = ["full"] }
+futures = { workspace = true }
+futures-timer = { workspace = true }
+
+# utility dependencies
+util = { workspace = true }
+thiserror = { workspace = true }
+#internment = { workspace = true }
+#recursion = { workspace = true }
+#generativity = { workspace = true }
+#itertools = { workspace = true }
+tracing-subscriber = { version = "0.3.19", features = ["default", "env-filter"] }
+keccak-const = { workspace = true }
+
+# tracing/logging
+log = { workspace = true }
+
--- a/rust/system_custodian/src/bin/main.rs
+++ b/rust/system_custodian/src/bin/main.rs
@@ -0,0 +1,4 @@
+//! TODO: documentation
+//!
+
+fn main() {}
--- a/rust/system_custodian/src/lib.rs
+++ b/rust/system_custodian/src/lib.rs
@@ -0,0 +1,69 @@
+//! This crate defines the logic of, and ways to interact with, Exo's **_System Custodian_** daemon.
+//!
+//! The **_System Custodian_** daemon is supposed to be a long-living process that precedes the
+//! launch of the Exo application, and responsible for ensuring the system (configuration, settings,
+//! etc.) is in an appropriate state to facilitate the running of Exo application.
+//! The **_System Custodian_** daemon shall expose a [D-Bus](https://www.freedesktop.org/wiki/Software/dbus/)
+//! service which Exo application use to _control & query_ it.
+//!
+//! # Lifecycle
+//! When the Exo application starts, it will _wake_ the **_System Custodian_** daemon for the
+//! duration of its lifetime, and after it has terminated the daemon will go back to sleep. When
+//! the daemon wakes up, it will configure the system into a state suitable for the Exo Application;
+//! When the daemon goes to sleep, it will revert those changes as much as it can in case they were
+//! destructive to the user's pre-existing configurations.
+//!
+//! # Responsibilities
+//! TODO: these are purely on MacOS, but change to be more broad
+//! The **_System Custodian_** daemon is responsible for using System Configuration framework to
+//!  1. duplicate the current network set
+//!  2. modify existing services to turn on IPv6 if not there
+//!  3. remove any bridge services & add any missing services that AREN'T bridge
+//! TODO: In the future:
+//!  1. run a dummy AWDL service to [allow for macOS peer-to-peer wireless networking](https://yggdrasil-network.github.io/2019/08/19/awdl.html)
+//!  2. toggle some GPU/memory configurations to speed up GPU (ask Alex what those configurations are)
+//!  3. if we ever decide to provide our **own network interfaces** that abstract over some userland
+//!     logic, this would be the place to spin that up.
+//!
+//! Then it will watch the SCDynamicStore for:
+//!  1. all __actual__ network interfaces -> collect information on them e.g. their BSD name, MAC
+//!     address, MTU, IPv6 addresses, etc. -> and set up watchers/notifiers to inform the DBus
+//!     interface of any changes
+//!  2. watch for any __undesirable__ changes to configuration and revert it
+//!
+//! It should somehow (probably through system sockets and/or BSD interface) trigger IPv6 NDP on
+//! each of the interfaces & also listen to/query for any changes on the OS routing cache??
+//! Basically emulate the `ping6 ff02::1%enX` and `ndp -an` commands BUT BETTER!!!
+//!  1. all that info should coalesce back to the overall state colleted -> should be queryable
+//!     over D-Bus
+//! TODO:
+//!  1. we might potentially add to this step a handshake of some kind...? To ensure that we can
+//!     ACTUALLY communicate with that machine over that link over e.g. TCP, UDP, etc. Will the
+//!     handshake require to know Node ID? Will the handshake require heartbeats? Who knows...
+//!  2. if we ever decide to write proprietary L2/L3 protocols for quicker communication,
+//!     e.g. [AF_NDRV](https://www.zerotier.com/blog/how-zerotier-eliminated-kernel-extensions-on-macos/)
+//!     for raw ethernet frame communication, or even a [custom thunderbolt PCIe driver](https://developer.apple.com/documentation/pcidriverkit/creating-custom-pcie-drivers-for-thunderbolt-devices),
+//!     then this would be the place to carry out discovery and propper handshakes with devices
+//!     on the other end of the link.
+//!
+
+// enable Rust-unstable features for convenience
+#![feature(trait_alias)]
+#![feature(stmt_expr_attributes)]
+#![feature(type_alias_impl_trait)]
+#![feature(specialization)]
+#![feature(unboxed_closures)]
+#![feature(const_trait_impl)]
+#![feature(fn_traits)]
+
+pub(crate) mod private {
+    // sealed traits support
+    pub trait Sealed {}
+    impl<T: ?Sized> Sealed for T {}
+}
+
+/// Namespace for all the type/trait aliases used by this crate.
+pub(crate) mod alias {}
+
+/// Namespace for crate-wide extension traits/methods
+pub(crate) mod ext {}
--- a/src/exo/shared/models/model_cards.py
+++ b/src/exo/shared/models/model_cards.py
@@ -1,6 +1,3 @@
-from anyio import Path, open_file
-import tomlkit 
-
 from exo.shared.types.memory import Memory
 from exo.shared.types.models import ModelId, ModelMetadata
 from exo.utils.pydantic_ext import CamelCaseModel
@@ -14,24 +11,35 @@ class ModelCard(CamelCaseModel):
    tags: list[str]
    metadata: ModelMetadata

-    @staticmethod
-    async def load(path: Path) -> "ModelCard":
-        async with await open_file(path) as f:
-            data = await f.read()
-            py = tomlkit.loads(data)
-            return ModelCard.model_validate(py)
-
-    async def save(self, path: Path):
-        async with await open_file(path, "w") as f:
-            py = self.model_dump()
-            data = tomlkit.dumps(py) # pyright: ignore[reportUnknownMemberType]
-            await f.write(data)
-
-
-

 MODEL_CARDS: dict[str, ModelCard] = {
    # deepseek v3
+    # "deepseek-v3-0324:4bit": ModelCard(
+    #     short_id="deepseek-v3-0324:4bit",
+    #     model_id="mlx-community/DeepSeek-V3-0324-4bit",
+    #     name="DeepSeek V3 0324 (4-bit)",
+    #     description="""DeepSeek V3 is a large language model trained on the DeepSeek V3 dataset.""",
+    #     tags=[],
+    #     metadata=ModelMetadata(
+    #         model_id=ModelId("mlx-community/DeepSeek-V3-0324-4bit"),
+    #         pretty_name="DeepSeek V3 0324 (4-bit)",
+    #         storage_size=Memory.from_kb(409706307),
+    #         n_layers=61,
+    #     ),
+    # ),
+    # "deepseek-v3-0324": ModelCard(
+    #     short_id="deepseek-v3-0324",
+    #     model_id="mlx-community/DeepSeek-v3-0324-8bit",
+    #     name="DeepSeek V3 0324 (8-bit)",
+    #     description="""DeepSeek V3 is a large language model trained on the DeepSeek V3 dataset.""",
+    #     tags=[],
+    #     metadata=ModelMetadata(
+    #         model_id=ModelId("mlx-community/DeepSeek-v3-0324-8bit"),
+    #         pretty_name="DeepSeek V3 0324 (8-bit)",
+    #         storage_size=Memory.from_kb(754706307),
+    #         n_layers=61,
+    #     ),
+    # ),
    "deepseek-v3.1-4bit": ModelCard(
        short_id="deepseek-v3.1-4bit",
        model_id=ModelId("mlx-community/DeepSeek-V3.1-4bit"),
@@ -62,6 +70,63 @@ MODEL_CARDS: dict[str, ModelCard] = {
            supports_tensor=True,
        ),
    ),
+    # "deepseek-v3.2": ModelCard(
+    #     short_id="deepseek-v3.2",
+    #     model_id=ModelId("mlx-community/DeepSeek-V3.2-8bit"),
+    #     name="DeepSeek V3.2 (8-bit)",
+    #     description="""DeepSeek V3.2 is a large language model trained on the DeepSeek V3.2 dataset.""",
+    #     tags=[],
+    #     metadata=ModelMetadata(
+    #         model_id=ModelId("mlx-community/DeepSeek-V3.2-8bit"),
+    #         pretty_name="DeepSeek V3.2 (8-bit)",
+    #         storage_size=Memory.from_kb(754706307),
+    #         n_layers=61,
+    #         hidden_size=7168,
+    #     ),
+    # ),
+    # "deepseek-v3.2-4bit": ModelCard(
+    #     short_id="deepseek-v3.2-4bit",
+    #     model_id=ModelId("mlx-community/DeepSeek-V3.2-4bit"),
+    #     name="DeepSeek V3.2 (4-bit)",
+    #     description="""DeepSeek V3.2 is a large language model trained on the DeepSeek V3.2 dataset.""",
+    #     tags=[],
+    #     metadata=ModelMetadata(
+    #         model_id=ModelId("mlx-community/DeepSeek-V3.2-4bit"),
+    #         pretty_name="DeepSeek V3.2 (4-bit)",
+    #         storage_size=Memory.from_kb(754706307 // 2),  # TODO !!!!!
+    #         n_layers=61,
+    #         hidden_size=7168,
+    #     ),
+    # ),
+    # deepseek r1
+    # "deepseek-r1-0528-4bit": ModelCard(
+    #     short_id="deepseek-r1-0528-4bit",
+    #     model_id="mlx-community/DeepSeek-R1-0528-4bit",
+    #     name="DeepSeek-R1-0528 (4-bit)",
+    #     description="""DeepSeek R1 is a large language model trained on the DeepSeek R1 dataset.""",
+    #     tags=[],
+    #     metadata=ModelMetadata(
+    #         model_id=ModelId("mlx-community/DeepSeek-R1-0528-4bit"),
+    #         pretty_name="DeepSeek R1 671B (4-bit)",
+    #         storage_size=Memory.from_kb(409706307),
+    #         n_layers=61,
+    #         hidden_size=7168,
+    #     ),
+    # ),
+    # "deepseek-r1-0528": ModelCard(
+    #     short_id="deepseek-r1-0528",
+    #     model_id="mlx-community/DeepSeek-R1-0528-8bit",
+    #     name="DeepSeek-R1-0528 (8-bit)",
+    #     description="""DeepSeek R1 is a large language model trained on the DeepSeek R1 dataset.""",
+    #     tags=[],
+    #     metadata=ModelMetadata(
+    #         model_id=ModelId("mlx-community/DeepSeek-R1-0528-8bit"),
+    #         pretty_name="DeepSeek R1 671B (8-bit)",
+    #         storage_size=Memory.from_bytes(754998771712),
+    #         n_layers=61,
+    # .       hidden_size=7168,
+    #     ),
+    # ),
    # kimi k2
    "kimi-k2-instruct-4bit": ModelCard(
        short_id="kimi-k2-instruct-4bit",
@@ -458,9 +523,8 @@ MODEL_CARDS: dict[str, ModelCard] = {
            supports_tensor=True,
        ),
    ),
-    # glm 4.5
+    # Needs to be quantized g32 or g16.
    "glm-4.5-air-8bit": ModelCard(
-        # Needs to be quantized g32 or g16 to work with tensor parallel
        short_id="glm-4.5-air-8bit",
        model_id=ModelId("mlx-community/GLM-4.5-Air-8bit"),
        name="GLM 4.5 Air 8bit",
@@ -490,81 +554,19 @@ MODEL_CARDS: dict[str, ModelCard] = {
            supports_tensor=True,
        ),
    ),
-    # glm 4.7
-    "glm-4.7-4bit": ModelCard(
-        short_id="glm-4.7-4bit",
-        model_id=ModelId("mlx-community/GLM-4.7-4bit"),
-        name="GLM 4.7 4bit",
-        description="GLM 4.7 4bit",
-        tags=[],
-        metadata=ModelMetadata(
-            model_id=ModelId("mlx-community/GLM-4.7-4bit"),
-            pretty_name="GLM 4.7 4bit",
-            storage_size=Memory.from_bytes(198556925568),
-            n_layers=91,
-            hidden_size=5120,
-            supports_tensor=True,
-        ),
-    ),
-    "glm-4.7-6bit": ModelCard(
-        short_id="glm-4.7-6bit",
-        model_id=ModelId("mlx-community/GLM-4.7-6bit"),
-        name="GLM 4.7 6bit",
-        description="GLM 4.7 6bit",
-        tags=[],
-        metadata=ModelMetadata(
-            model_id=ModelId("mlx-community/GLM-4.7-6bit"),
-            pretty_name="GLM 4.7 6bit",
-            storage_size=Memory.from_bytes(286737579648),
-            n_layers=91,
-            hidden_size=5120,
-            supports_tensor=True,
-        ),
-    ),
-    "glm-4.7-8bit-gs32": ModelCard(
-        short_id="glm-4.7-8bit-gs32",
-        model_id=ModelId("mlx-community/GLM-4.7-8bit-gs32"),
-        name="GLM 4.7 8bit (gs32)",
-        description="GLM 4.7 8bit (gs32)",
-        tags=[],
-        metadata=ModelMetadata(
-            model_id=ModelId("mlx-community/GLM-4.7-8bit-gs32"),
-            pretty_name="GLM 4.7 8bit (gs32)",
-            storage_size=Memory.from_bytes(396963397248),
-            n_layers=91,
-            hidden_size=5120,
-            supports_tensor=True,
-        ),
-    ),
-    # minimax-m2
-    "minimax-m2.1-8bit": ModelCard(
-        short_id="minimax-m2.1-8bit",
-        model_id=ModelId("mlx-community/MiniMax-M2.1-8bit"),
-        name="MiniMax M2.1 8bit",
-        description="MiniMax M2.1 8bit",
-        tags=[],
-        metadata=ModelMetadata(
-            model_id=ModelId("mlx-community/MiniMax-M2.1-8bit"),
-            pretty_name="MiniMax M2.1 8bit",
-            storage_size=Memory.from_bytes(242986745856),
-            n_layers=61,
-            hidden_size=3072,
-            supports_tensor=True,
-        ),
-    ),
-    "minimax-m2.1-3bit": ModelCard(
-        short_id="minimax-m2.1-3bit",
-        model_id=ModelId("mlx-community/MiniMax-M2.1-3bit"),
-        name="MiniMax M2.1 3bit",
-        description="MiniMax M2.1 3bit",
-        tags=[],
-        metadata=ModelMetadata(
-            model_id=ModelId("mlx-community/MiniMax-M2.1-3bit"),
-            pretty_name="MiniMax M2.1 3bit",
-            storage_size=Memory.from_bytes(100086644736),
-            n_layers=61,
-            hidden_size=3072,
-            supports_tensor=True,
-        ),
-    ),
+    # "devstral-2-123b-instruct-2512-8bit": ModelCard(
+    #     short_id="devstral-2-123b-instruct-2512-8bit",
+    #     model_id=ModelId("mlx-community/Devstral-2-123B-Instruct-2512-8bit"),
+    #     name="Devstral 2 123B Instruct 2512 (8-bit, MLX)",
+    #     description="""Mistral AI's Devstral 2 123B Instruct (2512) is an agentic coding model.""",
+    #     tags=[],
+    #     metadata=ModelMetadata(
+    #         model_id=ModelId("mlx-community/Devstral-2-123B-Instruct-2512-8bit"),
+    #         pretty_name="Devstral 2 123B Instruct 2512 (8-bit, MLX)",
+    #         storage_size=Memory.from_kb(133_000_000),
+    #         n_layers=88,
+    #         hidden_size=12288,
+    #         supports_tensor=True,
+    #     ),
+    # ),
 }
--- a/src/exo/worker/engines/mlx/auto_parallel.py
+++ b/src/exo/worker/engines/mlx/auto_parallel.py
@@ -10,24 +10,18 @@ from mlx.nn.layers.distributed import (
    shard_linear,
    sum_gradients,
 )
+from mlx_lm.models.cache import (
+    _BaseCache,  # pyright: ignore[reportPrivateUsage]
+)
 from mlx_lm.models.deepseek_v3 import DeepseekV3MLP
 from mlx_lm.models.deepseek_v3 import Model as DeepseekV3Model
-from mlx_lm.models.deepseek_v32 import DeepseekV32MLP
-from mlx_lm.models.deepseek_v32 import Model as DeepseekV32Model
-from mlx_lm.models.glm4_moe import Model as Glm4MoeModel
-from mlx_lm.models.glm4_moe import MoE
-from mlx_lm.models.gpt_oss import GptOssMoeModel
-from mlx_lm.models.gpt_oss import Model as GptOssModel
 from mlx_lm.models.llama import Model as LlamaModel
-from mlx_lm.models.minimax import Model as MiniMaxModel
-from mlx_lm.models.ministral3 import Model as Ministral3Model
 from mlx_lm.models.qwen3_moe import Model as Qwen3MoeModel
 from mlx_lm.models.qwen3_moe import Qwen3MoeSparseMoeBlock
-from mlx_lm.models.qwen3_next import Model as Qwen3NextModel
-from mlx_lm.models.qwen3_next import Qwen3NextSparseMoeBlock

-from exo.shared.logging import logger
-from exo.shared.types.worker.shards import PipelineShardMetadata
+from exo.shared.types.worker.shards import (
+    PipelineShardMetadata,
+)


 class _LayerCallable(Protocol):
@@ -97,6 +91,8 @@ class PipelineLastLayer(CustomMlxLayer):
            x, *args, **kwargs
        ).arguments.get("cache", None)

+        assert cache is None or issubclass(type(cache), _BaseCache)  # type: ignore
+
        output: mx.array = self.original_layer(x, *args, **kwargs)

        if self.r != self.s - 1:
@@ -104,6 +100,7 @@ class PipelineLastLayer(CustomMlxLayer):
                output, (self.r + 1) % self.s, group=self.group
            )
            if cache is not None:
+                # This change happened upstream - check out mlx github somewhere??
                cache.keys = mx.depends(cache.keys, output)  # type: ignore[reportUnknownMemberType]

        output = mx.distributed.all_gather(output, group=self.group)[-output.shape[0] :]
@@ -135,6 +132,24 @@ def _get_layers(inner_model_instance: nn.Module) -> list[_LayerCallable]:
    return layers


+def _set_layers(model: nn.Module, layers: list[_LayerCallable]) -> None:
+    inner_model_instance = _inner_model(model)
+    if hasattr(inner_model_instance, "layers"):
+        inner_model_instance.layers = layers
+
+        # Update DeepSeek V3 specific parameters when layers are shrunk
+        if isinstance(model, DeepseekV3Model) and hasattr(
+            inner_model_instance, "num_layers"
+        ):
+            inner_model_instance.start_idx = 0
+            inner_model_instance.end_idx = len(layers)
+            inner_model_instance.num_layers = len(layers)
+    elif hasattr(inner_model_instance, "h"):
+        inner_model_instance.h = layers
+    else:
+        raise ValueError("Model must have either a 'layers' or 'h' attribute")
+
+
 def pipeline_auto_parallel(
    model: nn.Module,
    group: mx.distributed.Group,
@@ -150,7 +165,8 @@ def pipeline_auto_parallel(
    """
    inner_model_instance: nn.Module = _inner_model(model)

-    layers = _get_layers(inner_model_instance)
+    # Handle both model.layers and model.h cases
+    layers: list[_LayerCallable] = _get_layers(inner_model_instance)

    start_layer, end_layer = model_shard_meta.start_layer, model_shard_meta.end_layer
    device_rank, world_size = model_shard_meta.device_rank, model_shard_meta.world_size
@@ -164,17 +180,6 @@ def pipeline_auto_parallel(
        group=group,
    )

-    if isinstance(inner_model_instance, GptOssMoeModel):
-        inner_model_instance.layer_types = inner_model_instance.layer_types[  # type: ignore
-            start_layer:end_layer
-        ]
-        inner_model_instance.swa_idx = inner_model_instance.layer_types.index(  # type: ignore
-            "sliding_attention"
-        )
-        inner_model_instance.ga_idx = inner_model_instance.layer_types.index(  # type: ignore
-            "full_attention"
-        )
-
    _set_layers(model, layers)

    assert isinstance(layers, list), (
@@ -199,44 +204,18 @@ def tensor_auto_parallel(
        group=group,
    )

-    segments: int = 1
-
-    def _all_to_sharded(path: str, weight: mx.array):
-        if path.endswith("bias"):
-            logger.info(f"Sharding bias for {path} - all to sharded")
-            return weight.ndim - 1, segments
-        return max(weight.ndim - 2, 0), segments
-
    all_to_sharded_linear_in_place = partial(
        shard_inplace,
-        sharding=_all_to_sharded,  # type: ignore
+        sharding="all-to-sharded",
        group=group,
    )
-
-    n = group.size()
-
-    def _sharded_to_all(path: str, weight: mx.array):
-        if path.endswith("bias"):
-            logger.info(f"Sharding bias for {path} - sharded to all")
-            weight /= n
-            return None
-        return -1, segments
-
    sharded_to_all_linear_in_place = partial(
        shard_inplace,
-        sharding=_sharded_to_all,  # type: ignore
+        sharding="sharded-to-all",
        group=group,
    )

-    if hasattr(model, "shard"):
-        try:
-            model.shard(group)  # type: ignore
-            return model
-        except (AttributeError, TypeError, NameError):
-            pass
-
-    if isinstance(model, (LlamaModel, Ministral3Model)):
-        logger.warning("shouldn't be hit - upstream sharding exists")
+    if isinstance(model, LlamaModel):
        tensor_parallel_sharding_strategy = LlamaShardingStrategy(
            group,
            all_to_sharded_linear,
@@ -244,8 +223,7 @@ def tensor_auto_parallel(
            all_to_sharded_linear_in_place,
            sharded_to_all_linear_in_place,
        )
-    elif isinstance(model, (DeepseekV3Model, DeepseekV32Model)):
-        logger.warning("shouldn't be hit - upstream sharding exists")
+    elif isinstance(model, DeepseekV3Model):
        tensor_parallel_sharding_strategy = DeepSeekShardingStrategy(
            group,
            all_to_sharded_linear,
@@ -253,15 +231,7 @@ def tensor_auto_parallel(
            all_to_sharded_linear_in_place,
            sharded_to_all_linear_in_place,
        )
-    elif isinstance(model, MiniMaxModel):
-        tensor_parallel_sharding_strategy = MiniMaxShardingStrategy(
-            group,
-            all_to_sharded_linear,
-            sharded_to_all_linear,
-            all_to_sharded_linear_in_place,
-            sharded_to_all_linear_in_place,
-        )
-    elif isinstance(model, (Qwen3MoeModel, Glm4MoeModel, Qwen3NextModel)):
+    elif isinstance(model, Qwen3MoeModel):
        tensor_parallel_sharding_strategy = QwenShardingStrategy(
            group,
            all_to_sharded_linear,
@@ -269,15 +239,6 @@ def tensor_auto_parallel(
            all_to_sharded_linear_in_place,
            sharded_to_all_linear_in_place,
        )
-    elif isinstance(model, GptOssModel):
-        tensor_parallel_sharding_strategy = GptOssShardingStrategy(
-            group,
-            all_to_sharded_linear,
-            sharded_to_all_linear,
-            all_to_sharded_linear_in_place,
-            sharded_to_all_linear_in_place,
-        )
-
    else:
        raise ValueError(f"Unsupported model type: {type(model)}")

@@ -323,32 +284,6 @@ class LlamaShardingStrategy(TensorParallelShardingStrategy):
        return model


-def _set_layers(model: nn.Module, layers: list[_LayerCallable]) -> None:
-    inner_model_instance = _inner_model(model)
-    if hasattr(inner_model_instance, "layers"):
-        inner_model_instance.layers = layers
-
-        # Update DeepSeek V3 specific parameters when layers are shrunk
-        if isinstance(
-            model, (DeepseekV3Model, DeepseekV32Model, Glm4MoeModel)
-        ) and hasattr(inner_model_instance, "num_layers"):
-            logger.info(
-                f"Setting num_layers to {len(layers)} for model {model.model.__class__.__name__}"
-            )
-            inner_model_instance.start_idx = 0
-            inner_model_instance.end_idx = len(layers)
-            inner_model_instance.num_layers = len(layers)
-        elif isinstance(model, Qwen3MoeModel):
-            logger.info(
-                f"Setting num_hidden_layers to {len(layers)} for model {model.model.__class__.__name__}"
-            )
-            inner_model_instance.num_hidden_layers = len(layers)
-    elif hasattr(inner_model_instance, "h"):
-        inner_model_instance.h = layers
-    else:
-        raise ValueError("Model must have either a 'layers' or 'h' attribute")
-
-
 class DeepSeekShardingStrategy(TensorParallelShardingStrategy):
    def shard_model(self, model: nn.Module) -> nn.Module:
        model = cast(DeepseekV3Model, model)
@@ -369,7 +304,7 @@ class DeepSeekShardingStrategy(TensorParallelShardingStrategy):
            layer.self_attn.num_heads //= self.N

            # Shard the MLP
-            if isinstance(layer.mlp, (DeepseekV3MLP, DeepseekV32MLP)):
+            if isinstance(layer.mlp, DeepseekV3MLP):
                layer.mlp.gate_proj = self.all_to_sharded_linear(layer.mlp.gate_proj)
                layer.mlp.down_proj = self.sharded_to_all_linear(layer.mlp.down_proj)
                layer.mlp.up_proj = self.all_to_sharded_linear(layer.mlp.up_proj)
@@ -403,35 +338,6 @@ class ShardedDeepseekV3MoE(CustomMlxLayer):
        return y


-class MiniMaxShardingStrategy(TensorParallelShardingStrategy):
-    def shard_model(self, model: nn.Module) -> nn.Module:
-        model = cast(MiniMaxModel, model)
-        for layer in model.layers:
-            # Shard the self attention
-            layer.self_attn.q_proj = self.all_to_sharded_linear(layer.self_attn.q_proj)
-            layer.self_attn.k_proj = self.all_to_sharded_linear(layer.self_attn.k_proj)
-            layer.self_attn.v_proj = self.all_to_sharded_linear(layer.self_attn.v_proj)
-            layer.self_attn.o_proj = self.sharded_to_all_linear(layer.self_attn.o_proj)
-            layer.self_attn.num_attention_heads //= self.N
-            layer.self_attn.num_key_value_heads //= self.N
-
-            # Shard the MoE. Shard in place since the MoE should be responsible
-            # for aggregating the results.
-            self.all_to_sharded_linear_in_place(
-                layer.block_sparse_moe.switch_mlp.gate_proj
-            )
-            self.sharded_to_all_linear_in_place(
-                layer.block_sparse_moe.switch_mlp.down_proj
-            )
-            self.all_to_sharded_linear_in_place(
-                layer.block_sparse_moe.switch_mlp.up_proj
-            )
-            layer.block_sparse_moe = ShardedQwenMoE(layer.block_sparse_moe)  # pyright: ignore[reportAttributeAccessIssue, reportArgumentType]
-            layer.block_sparse_moe.sharding_group = self.group
-
-        return model
-
-
 class QwenShardingStrategy(TensorParallelShardingStrategy):
    def shard_model(self, model: nn.Module) -> nn.Module:
        model = cast(Qwen3MoeModel, model)
@@ -446,13 +352,11 @@ class QwenShardingStrategy(TensorParallelShardingStrategy):

            # Shard the MoE. Shard in place since the MoE should be responsible
            # for aggregating the results.
-            if isinstance(
-                layer.mlp, (Qwen3MoeSparseMoeBlock, MoE, Qwen3NextSparseMoeBlock)
-            ):
+            if isinstance(layer.mlp, Qwen3MoeSparseMoeBlock):
                self.all_to_sharded_linear_in_place(layer.mlp.switch_mlp.gate_proj)
                self.sharded_to_all_linear_in_place(layer.mlp.switch_mlp.down_proj)
                self.all_to_sharded_linear_in_place(layer.mlp.switch_mlp.up_proj)
-                layer.mlp = ShardedQwenMoE(layer.mlp)  # pyright: ignore[reportAttributeAccessIssue, reportArgumentType]
+                layer.mlp = ShardedQwenMoE(layer.mlp)  # type: ignore
                layer.mlp.sharding_group = self.group

            # Shard the MLP
@@ -476,50 +380,3 @@ class ShardedQwenMoE(CustomMlxLayer):
        if self.sharding_group is not None:
            y = mx.distributed.all_sum(y, group=self.sharding_group)
        return y
-
-
-class GptOssShardingStrategy(TensorParallelShardingStrategy):
-    def shard_model(self, model: nn.Module) -> nn.Module:
-        model = cast(GptOssMoeModel, model)
-
-        for layer in model.layers:
-            layer.self_attn.q_proj = self.all_to_sharded_linear(layer.self_attn.q_proj)
-            layer.self_attn.k_proj = self.all_to_sharded_linear(layer.self_attn.k_proj)
-            layer.self_attn.v_proj = self.all_to_sharded_linear(layer.self_attn.v_proj)
-            layer.self_attn.o_proj = self.sharded_to_all_linear(layer.self_attn.o_proj)
-
-            layer.self_attn.num_attention_heads //= self.N
-            layer.self_attn.num_key_value_heads //= self.N
-            layer.self_attn.num_key_value_groups = (
-                layer.self_attn.num_attention_heads
-                // layer.self_attn.num_key_value_heads
-            )
-
-            layer.self_attn.sinks = layer.self_attn.sinks[
-                layer.self_attn.num_attention_heads
-                * self.group.rank() : layer.self_attn.num_attention_heads
-                * (self.group.rank() + 1)
-            ]
-
-            self.all_to_sharded_linear_in_place(layer.mlp.experts.gate_proj)
-            self.sharded_to_all_linear_in_place(layer.mlp.experts.down_proj)
-            self.all_to_sharded_linear_in_place(layer.mlp.experts.up_proj)
-
-            layer.mlp = ShardedGptOssMoE(layer.mlp)  # type: ignore
-            layer.mlp.sharding_group = self.group
-
-        return model
-
-
-class ShardedGptOssMoE(CustomMlxLayer):
-    def __init__(self, layer: nn.Module):
-        super().__init__(layer)
-        self.sharding_group: mx.distributed.Group | None = None
-
-    def __call__(self, x: mx.array) -> mx.array:
-        if self.sharding_group is not None:
-            x = sum_gradients(self.sharding_group)(x)
-        y = self.original_layer(x)
-        if self.sharding_group is not None:
-            y = mx.distributed.all_sum(y, group=self.sharding_group)
-        return y
--- a/tests/headless_runner.py
+++ b/tests/headless_runner.py
@@ -89,12 +89,6 @@ async def assert_downloads():
    await sd.ensure_shard(
        await build_full_shard(MODEL_CARDS["gpt-oss-20b-4bit"].model_id)
    )
-    await sd.ensure_shard(
-        await build_full_shard(MODEL_CARDS["glm-4.7-8bit-gs32"].model_id)
-    )
-    await sd.ensure_shard(
-        await build_full_shard(MODEL_CARDS["minimax-m2.1-8bit"].model_id)
-    )


 async def ring_backend(test: Tests):
--- a/uv.lock
+++ b/uv.lock