use custom mlx sources for linux (#2087)

switch to hosting mlx sources on github & cachix instead of using a broken version of mlx. closes #2043.
2026-05-19 20:25:06 -04:00 · 2026-05-13 10:45:11 +01:00
parent ed2d10bdc6
commit 4466cd5323
4 changed files with 814 additions and 501 deletions
--- a/flake.nix
+++ b/flake.nix
@@ -146,7 +146,7 @@
                config.treefmt.build.wrapper

                # PYTHON
-                self'.packages.editableVenv
+                self'.packages.exo.passthru.evenv
                uv

                # RUST
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,20 +15,16 @@ dependencies = [
  "huggingface-hub>=1.8.0",
  "psutil>=7.0.0",
  "loguru>=0.7.3",
-  "exo-pyo3-bindings",                         # rust bindings
+  "exo-pyo3-bindings",               # rust bindings
  "anyio==4.11.0",
-  "mlx==0.31.2; sys_platform == 'darwin'",
-  "mlx-lm; sys_platform=='darwin'",
-  "tiktoken>=0.12.0",                          # required for kimi k2 tokenizer
+  "tiktoken>=0.12.0",                # required for kimi k2 tokenizer
  "hypercorn>=0.18.0",
  "openai-harmony>=0.0.8",
  "httpx>=0.28.1",
  "tomlkit>=0.14.0",
-  "mflux==0.17.2; sys_platform == 'darwin'",
  "python-multipart>=0.0.21",
  "msgspec>=0.19.0",
  "zstandard>=0.23.0",
-  "mlx-vlm>=0.3.11; sys_platform == 'darwin'",
  "transformers>=5.6.2",
 ]

@@ -49,26 +45,29 @@ dev = [

 [project.optional-dependencies]
 build = ["nanobind"]
-cpu = [
-  "mlx==0.31.1; sys_platform == 'linux'",
-  "mlx-cpu==0.31.1; sys_platform == 'linux'",
-  "mlx-lm; sys_platform == 'linux'",
-  "mlx-vlm>=0.3.11; sys_platform== 'linux'",
-  "torch>=2.10.0; sys_platform == 'linux'",
+mlx-none = ["anyio"]
+mlx = [
+  "mlx==0.32.0",
+  "mlx-lm",
+  "mlx-vlm>=0.3.11",
+  "mflux==0.17.5",
+  "torch==2.10.0; sys_platform == 'darwin'",
+  "torch==2.10.0; sys_platform == 'linux'",
+  "torchaudio==2.10.0; sys_platform == 'darwin'",
+  "torchaudio==2.10.0; sys_platform == 'linux'",
+  "torchvision==0.25.0; sys_platform == 'darwin'",
+  "torchvision==0.25.0; sys_platform == 'linux'",
 ]
-cuda12 = [
-  "mlx==0.31.1; sys_platform == 'linux'",
-  "mlx-cuda-12==0.31.1; sys_platform == 'linux'",
-  "mlx-lm; sys_platform == 'linux'",
-  "mlx-vlm>=0.3.11; sys_platform== 'linux'",
-  "torch>=2.10.0; sys_platform == 'linux'",
+mlx-cpu = ["exo[mlx]", "mlx-cpu==0.31.2; sys_platform == 'linux'"]
+mlx-cuda12 = [
+  "exo[mlx]",
+  "mlx-cuda-12==0.32.0; sys_platform == 'linux'",
+  "nvidia-ml-py>=13.595.45",
 ]
-cuda13 = [
-  "mlx==0.31.1; sys_platform == 'linux'",
-  "mlx-cuda-13==0.31.1; sys_platform == 'linux'",
-  "mlx-lm; sys_platform == 'linux'",
-  "mlx-vlm>=0.3.11; sys_platform== 'linux'",
-  "torch>=2.10.0; sys_platform == 'linux'",
+mlx-cuda13 = [
+  "exo[mlx]",
+  "mlx-cuda-13==0.32.0; sys_platform == 'linux'",
+  "nvidia-ml-py>=13.595.45",
 ]

 ###
@@ -80,14 +79,37 @@ members = ["rust/exo_pyo3_bindings", "bench", "tools"]

 [tool.uv.sources]
 exo-pyo3-bindings = { workspace = true }
-mlx = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git", branch = "address-rdma-gpu-locks", marker = "sys_platform == 'darwin'" }
-mlx-lm = { git = "https://github.com/rltakashige/mlx-lm", branch = "leo/deepseek-v4" }
-torch = [
-  { index = "pytorch-cu130", marker = "sys_platform == 'linux' and extra == 'cuda13' and extra != 'cpu' and extra != 'cuda12'" },
-  { index = "pytorch-cu120", marker = "sys_platform == 'linux' and extra == 'cuda12' and extra != 'cpu' and extra != 'cuda13'" },
-  { index = "pytorch-cpu", marker = "(extra != 'cuda12' and extra != 'cuda13' and sys_platform == 'linux') or sys_platform == 'darwin'" },
+mlx = [
+  { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git", branch = "address-rdma-gpu-locks", marker = "sys_platform == 'darwin'" },
+  { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_aarch64.whl", marker = "sys_platform == 'linux' and platform_machine == 'aarch64'" },
+  { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_x86_64.whl", marker = "sys_platform == 'linux' and platform_machine != 'aarch64'" },
+
+]
+mlx-lm = { git = "https://github.com/rltakashige/mlx-lm", branch = "leo/deepseek-v4" }
+mflux = { git = "https://github.com/evanev7/mflux", branch = "exo2" }
+torch = [
+  { index = "pytorch-cpu", marker = "sys_platform == 'linux' and extra == 'mlx-cpu' and extra != 'mlx-cuda13' and extra != 'mlx-cuda12'" },
+  { index = "pytorch-cu128", marker = "sys_platform == 'linux' and extra == 'mlx-cuda12' and extra != 'mlx-cuda13' " },
+  { index = "pytorch-cu130", marker = "sys_platform == 'linux' and extra == 'mlx-cuda13'" },
+]
+mlx-cuda-12 = [
+  { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_12-0.32.0-py3-none-manylinux_2_35_aarch64.whl", marker = "sys_platform == 'linux' and platform_machine == 'aarch64'" },
+  { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_12-0.32.0-py3-none-manylinux_2_35_x86_64.whl", marker = "sys_platform == 'linux' and platform_machine != 'aarch64'" },
+]
+mlx-cuda-13 = [
+  { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_13-0.32.0-py3-none-manylinux_2_35_aarch64.whl", marker = "sys_platform == 'linux' and platform_machine == 'aarch64'" },
+  { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_13-0.32.0-py3-none-manylinux_2_35_x86_64.whl", marker = "sys_platform == 'linux' and platform_machine != 'aarch64'" },
+]
+torchvision = [
+  { index = "pytorch-cpu", marker = "sys_platform == 'linux' and extra == 'mlx-cpu' and extra != 'mlx-cuda13' and extra != 'mlx-cuda12'" },
+  { index = "pytorch-cu128", marker = "sys_platform == 'linux' and extra == 'mlx-cuda12' and extra != 'mlx-cuda13'" },
+  { index = "pytorch-cu130", marker = "sys_platform == 'linux' and extra == 'mlx-cuda13'" },
+]
+torchaudio = [
+  { index = "pytorch-cpu", marker = "sys_platform == 'linux' and extra == 'mlx-cpu' and extra != 'mlx-cuda13' and extra != 'mlx-cuda12'" },
+  { index = "pytorch-cu128", marker = "sys_platform == 'linux' and extra == 'mlx-cuda12' and extra != 'mlx-cuda13'" },
+  { index = "pytorch-cu130", marker = "sys_platform == 'linux' and extra == 'mlx-cuda13'" },
 ]
-vllm = { git = "https://github.com/hmellor/vllm.git", branch = "transformers-v5" }

 [[tool.uv.index]]
 name = "pytorch-cu130"
@@ -95,8 +117,8 @@ url = "https://download.pytorch.org/whl/cu130"
 explicit = true

 [[tool.uv.index]]
-name = "pytorch-cu120"
-url = "https://download.pytorch.org/whl/cu120"
+name = "pytorch-cu128"
+url = "https://download.pytorch.org/whl/cu128"
 explicit = true

 [[tool.uv.index]]
@@ -164,11 +186,14 @@ root = "tools/src"
 required-version = ">=0.8.6"
 prerelease = "allow"
 environments = ["sys_platform == 'darwin'", "sys_platform == 'linux'"]
-conflicts = [[{ extra = "cuda12" }, { extra = "cuda13" }, { extra = "cpu" }]]
-constraint-dependencies = ["transformers>=5.6.2"]
-override-dependencies = [
-  "mlx==0.31.1; sys_platform=='linux'",
-  "mlx; sys_platform=='darwin'",
+override-dependencies = ["opencv-python; python_version < '0'"]
+conflicts = [
+  [
+    { extra = "mlx-cuda13" },
+    { extra = "mlx-cuda12" },
+    { extra = "mlx-cpu" },
+    { extra = "mlx-none" },
+  ],
 ]

 [tool.uv.extra-build-dependencies]
@@ -183,6 +208,7 @@ mlx = [
  "ninja",
 ]
 mlx-lm = ["setuptools"]
+mflux = ["uv_build"]
 xgrammar = [
  "nanobind",
  "setuptools",
--- a/python/parts.nix
+++ b/python/parts.nix
@@ -10,10 +10,18 @@ let
      inherit (pkgs.stdenv.hostPlatform) isLinux isDarwin isx86_64;
      inherit (pkgs.config) cudaSupport;
      inherit (pkgs) cudaPackages;
-      cuda13Support = cudaSupport && cudaPackages.cudaMajorVersion == "13";
-      libmlx_source = if cuda13Support then "mlx-cuda-13" else if cudaSupport then "mlx-cuda-12" else "mlx-cpu";
+      libmlx_source =
+        if (builtins.elem "mlx-cuda13" members.exo or [ ]) then "mlx-cuda-13"
+        else if (builtins.elem "mlx-cuda12" members.exo or [ ]) then "mlx-cuda-12"
+        else "mlx-cpu";
      python = pkgs.python313;
+
+      cuda_cccl_compat = pkgs.runCommand "cuda-cccl-compat" { } ''
+        mkdir -p $out/include
+        ln -s ${cudaPackages.cuda_cccl}/include $out/include/cccl
+      '';
      cudaLibs = with cudaPackages; [
+        cuda_crt
        cuda_cudart
        cuda_cccl
        cuda_cupti
@@ -31,6 +39,10 @@ let
        libnvshmem
        nccl
      ];
+      cudaRoot = pkgs.symlinkJoin {
+        name = "cuda-merged-exo";
+        paths = builtins.concatMap (p: [ (lib.getBin p) (lib.getLib p) (lib.getDev p) ]) (cudaLibs ++ [ cudaPackages.cuda_nvcc cuda_cccl_compat ]);
+      };
      exoOverlay = final: prev: {
        # Replace workspace exo_pyo3_bindings with Nix-built wheel.
        # Preserve passthru so mkVirtualEnv can resolve dependency groups.
@@ -113,37 +125,60 @@ let
              });
          } // lib.optionalAttrs isLinux {
          mlx = prev.mlx.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ lib.optionals cudaSupport [ pkgs.autoAddDriverRunpath ];
            buildInputs = old.buildInputs ++ lib.optionals cudaSupport cudaLibs;
-            autoPatchelfIgnoreMissingDeps = lib.optionals cudaSupport [ "libcuda.so.1" ];
            postInstall = ''
              cp -r "${final.${libmlx_source}}/${final.python.sitePackages}/mlx" "$out/${final.python.sitePackages}/mlx/"
            '';
+            autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
          });
        } // lib.optionalAttrs cudaSupport {
          "${libmlx_source}" = prev."${libmlx_source}".overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
            buildInputs = old.buildInputs ++ cudaLibs;
            autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
          });
          nvidia-cufile = prev.nvidia-cufile.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
            buildInputs = old.buildInputs ++ [ pkgs.rdma-core ];
-            autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
          });
          nvidia-cusolver = prev.nvidia-cusolver.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
            buildInputs = old.buildInputs ++ cudaLibs;
-            autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
          });
          nvidia-nvshmem-cu13 = prev.nvidia-nvshmem-cu13.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
            buildInputs = old.buildInputs ++ [ pkgs.rdma-core pkgs.pmix pkgs.libfabric pkgs.ucx pkgs.openmpi ];
-            autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
          });
          nvidia-cusparse = prev.nvidia-cusparse.overrideAttrs (old: {
-            buildInputs = old.buildInputs ++ [ cudaLibs ];
-            autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
+            buildInputs = old.buildInputs ++ cudaLibs;
          });
          torch = prev.torch.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
            buildInputs = old.buildInputs ++ cudaLibs;
            autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
          });
+          torchaudio = prev.torchaudio.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
+            buildInputs = old.buildInputs ++ [ cudaPackages.cuda_cudart ];
+            preFixup = "addAutoPatchelfSearchPath '${final.torch}'";
+          });
+          torchvision = prev.torchvision.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
+            preFixup = "addAutoPatchelfSearchPath '${final.torch}'";
+          });
+
+          torch-c-dlpack-ext = prev.torch-c-dlpack-ext.overrideAttrs (old: {
+            buildInputs = old.buildInputs ++ cudaLibs;
+            autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
+            preFixup = "addAutoPatchelfSearchPath '${final.torch}'";
+          });
+
+        } // lib.optionalAttrs (cudaSupport && isx86_64) {
+          numba = prev.numba.overrideAttrs (old: {
+            buildInputs = (old.buildInputs or [ ]) ++ [ pkgs.tbb ];
+          });
        };
      pyprojectOverlay = workspace.mkPyprojectOverlay {
        sourcePreference = "wheel";
@@ -164,26 +199,43 @@ let
          buildSystemsOverlay
        ]
      );
-      venv = name: (pythonSet.mkVirtualEnv "${name}-env" members).overrideAttrs (_: { venvSkip = [ "lib/python${python.pythonVersion}/site-packages/mlx/share/cmake/*" ]; });
-      mkApp = cmd: name: pkgs.writeShellApplication {
-        inherit name;
-        runtimeEnv = {
-          EXO_DASHBOARD_DIR = self'.packages.dashboard;
-          EXO_RESOURCES_DIR = inputs.self + /resources;
+      # mlx and mlx-cuda ship clashing cmake files - we dont need them at runtime anyway
+      venv = name: (pythonSet.mkVirtualEnv "${name}-venv" members).overrideAttrs (_: { venvSkip = [ "lib/python${python.pythonVersion}/site-packages/mlx/share/cmake/*" "lib/python${python.pythonVersion}/site-packages/build_backend.py" ]; });
+      mkApp =
+        let
+          libPath = lib.makeLibraryPath (
+            [ pkgs.stdenv.cc.cc.lib ] ++ lib.optionals cudaSupport [ cudaRoot ]
+          );
+        in
+        text: name: pkgs.writeShellApplication {
+          inherit name;
+          text = ''
+            LD_LIBRARY_PATH="${libPath}''${LD_LIBRARY_PATH:+:}''${LD_LIBRARY_PATH:-}" exec \
+              ${lib.optionalString cudaSupport "nixglhost "} ${text}
+          '';
+          runtimeEnv = {
+            EXO_DASHBOARD_DIR = self'.packages.dashboard;
+            EXO_RESOURCES_DIR = inputs.self + /resources;
+          };
+          runtimeInputs = [
+            (venv name)
+          ] ++ lib.optionals cudaSupport [ pkgs.nix-gl-host ]
+          ++ lib.optionals isDarwin [ pkgs.macmon ];
+          passthru = {
+            venv = venv name;
+            evenv = ((pythonSet.overrideScope editableOverlay).mkVirtualEnv "${name}-evenv" (members // { exo = (members.exo or [ ]) ++ [ "dev" ]; })).overrideAttrs (_: {
+              venvSkip = [ "lib/python${python.pythonVersion}/site-packages/mlx/share/cmake/*" "lib/python${python.pythonVersion}/site-packages/build_backend.py" ];
+            });
+          } // lib.optionalAttrs cudaSupport {
+            inherit cudaRoot;
+          };
        };
-        runtimeInputs = [
-          # mlx and mlx-cuda ship clashing cmake files - we dont need them at runtime anyway
-          (venv name)
-        ]
-        ++ lib.optionals isDarwin [ pkgs.macmon ];
-        text = "exec " + lib.optionalString cudaSupport "${lib.getExe pkgs.nix-gl-host} " + cmd;
-      };
+
    in
    {
      inherit venv;
-      editablePythonSet = pythonSet.overrideScope editableOverlay;
      mkPythonScript = path: mkApp ''python ${path} "$@"'';
-      mkExo = mkApp ''exo "$@"'';
+      exo = mkApp ''exo "$@"'' "exo";
    };
 in
 {
@@ -191,18 +243,18 @@ in
    { self', pkgs, unfreePkgs, lib, ... }:
    let
      inherit (pkgs.stdenv.hostPlatform) isLinux;
-      inherit (mkPythonSet { inherit self' pkgs lib; members = { exo = [ "cpu" ]; }; }) editablePythonSet mkExo;
+      inherit (mkPythonSet { inherit self' pkgs lib; members = { exo = [ "mlx-cpu" ]; }; }) exo;

      # Virtual environment with dev dependencies for testing
      testVenv = (mkPythonSet {
        inherit self' pkgs lib; members = {
-        exo = [ "dev" "cpu" ]; # Include pytest, pytest-asyncio, pytest-env
+        exo = [ "dev" "mlx-cpu" ]; # Include pytest, pytest-asyncio, pytest-env
      };
      }).venv "exo-test";

      mkBenchScript = (mkPythonSet {
        inherit self' pkgs lib; members = {
-        exo = [ "cpu" ];
+        exo = [ "mlx-cpu" ];
        exo-bench = [ ]; # Include pytest, pytest-asyncio, pytest-env
      };
      }).mkPythonScript;
@@ -212,12 +264,14 @@ in
        runtimeInputs = [ pkgs.python313 ];
        text = ''exec python ${path} "$@"'';
      };
-
+      # if someone is particularly interested in cuda12 support in nix, please open an issue.
+      # until then, it's more hassle than its worth
+      #cuda12Set = mkPythonSet { inherit self' lib; inherit (unfreePkgs.pkgsCuda.cudaPackages_12) pkgs; members = { exo = [ "mlx-cuda12" ]; }; };
+      cuda13Set = mkPythonSet { inherit self' lib; inherit (unfreePkgs.pkgsCuda.cudaPackages_13) pkgs; members = { exo = [ "mlx-cuda13" ]; }; };
    in
    {
      packages = {
-        exo = mkExo "exo";
-        editableVenv = editablePythonSet.mkVirtualEnv "exo-dev-env" { exo = [ "dev" ]; };
+        inherit exo;
        # for running tests in ci
        exo-test-env = testVenv;
        exo-bench = mkBenchScript "exo-bench" (inputs.self + /bench/exo_bench.py);
@@ -226,8 +280,8 @@ in
        # used by ./tests/run_exo_on.sh
        exo-get-all-models-on-cluster = mkSimplePythonScript "exo-get-all-models-on-cluster" (inputs.self + /tests/get_all_models_on_cluster.py);
      } // lib.optionalAttrs isLinux {
-        exo-cuda-12 = (mkPythonSet { inherit self' lib; inherit (unfreePkgs.pkgsCuda.cudaPackages_12) pkgs; members = { exo = [ "cuda12" ]; }; }).mkExo "exo-cuda-12";
-        exo-cuda-13 = (mkPythonSet { inherit self' lib; inherit (unfreePkgs.pkgsCuda.cudaPackages_13) pkgs; members = { exo = [ "cuda13" ]; }; }).mkExo "exo-cuda-13";
+        #exo-cuda-12 = cuda12Set.exo;
+        exo-cuda-13 = cuda13Set.exo;
      };

      checks = {
--- a/uv.lock
+++ b/uv.lock