use custom mlx sources for linux (#2087)

switch to hosting mlx sources on github & cachix instead of using a
broken version of mlx. closes #2043.
This commit is contained in:
Evan Quiney
2026-05-13 10:45:11 +01:00
committed by GitHub
parent ed2d10bdc6
commit 4466cd5323
4 changed files with 814 additions and 501 deletions

View File

@@ -146,7 +146,7 @@
config.treefmt.build.wrapper
# PYTHON
self'.packages.editableVenv
self'.packages.exo.passthru.evenv
uv
# RUST

View File

@@ -15,20 +15,16 @@ dependencies = [
"huggingface-hub>=1.8.0",
"psutil>=7.0.0",
"loguru>=0.7.3",
"exo-pyo3-bindings", # rust bindings
"exo-pyo3-bindings", # rust bindings
"anyio==4.11.0",
"mlx==0.31.2; sys_platform == 'darwin'",
"mlx-lm; sys_platform=='darwin'",
"tiktoken>=0.12.0", # required for kimi k2 tokenizer
"tiktoken>=0.12.0", # required for kimi k2 tokenizer
"hypercorn>=0.18.0",
"openai-harmony>=0.0.8",
"httpx>=0.28.1",
"tomlkit>=0.14.0",
"mflux==0.17.2; sys_platform == 'darwin'",
"python-multipart>=0.0.21",
"msgspec>=0.19.0",
"zstandard>=0.23.0",
"mlx-vlm>=0.3.11; sys_platform == 'darwin'",
"transformers>=5.6.2",
]
@@ -49,26 +45,29 @@ dev = [
[project.optional-dependencies]
build = ["nanobind"]
cpu = [
"mlx==0.31.1; sys_platform == 'linux'",
"mlx-cpu==0.31.1; sys_platform == 'linux'",
"mlx-lm; sys_platform == 'linux'",
"mlx-vlm>=0.3.11; sys_platform== 'linux'",
"torch>=2.10.0; sys_platform == 'linux'",
mlx-none = ["anyio"]
mlx = [
"mlx==0.32.0",
"mlx-lm",
"mlx-vlm>=0.3.11",
"mflux==0.17.5",
"torch==2.10.0; sys_platform == 'darwin'",
"torch==2.10.0; sys_platform == 'linux'",
"torchaudio==2.10.0; sys_platform == 'darwin'",
"torchaudio==2.10.0; sys_platform == 'linux'",
"torchvision==0.25.0; sys_platform == 'darwin'",
"torchvision==0.25.0; sys_platform == 'linux'",
]
cuda12 = [
"mlx==0.31.1; sys_platform == 'linux'",
"mlx-cuda-12==0.31.1; sys_platform == 'linux'",
"mlx-lm; sys_platform == 'linux'",
"mlx-vlm>=0.3.11; sys_platform== 'linux'",
"torch>=2.10.0; sys_platform == 'linux'",
mlx-cpu = ["exo[mlx]", "mlx-cpu==0.31.2; sys_platform == 'linux'"]
mlx-cuda12 = [
"exo[mlx]",
"mlx-cuda-12==0.32.0; sys_platform == 'linux'",
"nvidia-ml-py>=13.595.45",
]
cuda13 = [
"mlx==0.31.1; sys_platform == 'linux'",
"mlx-cuda-13==0.31.1; sys_platform == 'linux'",
"mlx-lm; sys_platform == 'linux'",
"mlx-vlm>=0.3.11; sys_platform== 'linux'",
"torch>=2.10.0; sys_platform == 'linux'",
mlx-cuda13 = [
"exo[mlx]",
"mlx-cuda-13==0.32.0; sys_platform == 'linux'",
"nvidia-ml-py>=13.595.45",
]
###
@@ -80,14 +79,37 @@ members = ["rust/exo_pyo3_bindings", "bench", "tools"]
[tool.uv.sources]
exo-pyo3-bindings = { workspace = true }
mlx = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git", branch = "address-rdma-gpu-locks", marker = "sys_platform == 'darwin'" }
mlx-lm = { git = "https://github.com/rltakashige/mlx-lm", branch = "leo/deepseek-v4" }
torch = [
{ index = "pytorch-cu130", marker = "sys_platform == 'linux' and extra == 'cuda13' and extra != 'cpu' and extra != 'cuda12'" },
{ index = "pytorch-cu120", marker = "sys_platform == 'linux' and extra == 'cuda12' and extra != 'cpu' and extra != 'cuda13'" },
{ index = "pytorch-cpu", marker = "(extra != 'cuda12' and extra != 'cuda13' and sys_platform == 'linux') or sys_platform == 'darwin'" },
mlx = [
{ git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git", branch = "address-rdma-gpu-locks", marker = "sys_platform == 'darwin'" },
{ url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_aarch64.whl", marker = "sys_platform == 'linux' and platform_machine == 'aarch64'" },
{ url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_x86_64.whl", marker = "sys_platform == 'linux' and platform_machine != 'aarch64'" },
]
mlx-lm = { git = "https://github.com/rltakashige/mlx-lm", branch = "leo/deepseek-v4" }
mflux = { git = "https://github.com/evanev7/mflux", branch = "exo2" }
torch = [
{ index = "pytorch-cpu", marker = "sys_platform == 'linux' and extra == 'mlx-cpu' and extra != 'mlx-cuda13' and extra != 'mlx-cuda12'" },
{ index = "pytorch-cu128", marker = "sys_platform == 'linux' and extra == 'mlx-cuda12' and extra != 'mlx-cuda13' " },
{ index = "pytorch-cu130", marker = "sys_platform == 'linux' and extra == 'mlx-cuda13'" },
]
mlx-cuda-12 = [
{ url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_12-0.32.0-py3-none-manylinux_2_35_aarch64.whl", marker = "sys_platform == 'linux' and platform_machine == 'aarch64'" },
{ url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_12-0.32.0-py3-none-manylinux_2_35_x86_64.whl", marker = "sys_platform == 'linux' and platform_machine != 'aarch64'" },
]
mlx-cuda-13 = [
{ url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_13-0.32.0-py3-none-manylinux_2_35_aarch64.whl", marker = "sys_platform == 'linux' and platform_machine == 'aarch64'" },
{ url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_13-0.32.0-py3-none-manylinux_2_35_x86_64.whl", marker = "sys_platform == 'linux' and platform_machine != 'aarch64'" },
]
torchvision = [
{ index = "pytorch-cpu", marker = "sys_platform == 'linux' and extra == 'mlx-cpu' and extra != 'mlx-cuda13' and extra != 'mlx-cuda12'" },
{ index = "pytorch-cu128", marker = "sys_platform == 'linux' and extra == 'mlx-cuda12' and extra != 'mlx-cuda13'" },
{ index = "pytorch-cu130", marker = "sys_platform == 'linux' and extra == 'mlx-cuda13'" },
]
torchaudio = [
{ index = "pytorch-cpu", marker = "sys_platform == 'linux' and extra == 'mlx-cpu' and extra != 'mlx-cuda13' and extra != 'mlx-cuda12'" },
{ index = "pytorch-cu128", marker = "sys_platform == 'linux' and extra == 'mlx-cuda12' and extra != 'mlx-cuda13'" },
{ index = "pytorch-cu130", marker = "sys_platform == 'linux' and extra == 'mlx-cuda13'" },
]
vllm = { git = "https://github.com/hmellor/vllm.git", branch = "transformers-v5" }
[[tool.uv.index]]
name = "pytorch-cu130"
@@ -95,8 +117,8 @@ url = "https://download.pytorch.org/whl/cu130"
explicit = true
[[tool.uv.index]]
name = "pytorch-cu120"
url = "https://download.pytorch.org/whl/cu120"
name = "pytorch-cu128"
url = "https://download.pytorch.org/whl/cu128"
explicit = true
[[tool.uv.index]]
@@ -164,11 +186,14 @@ root = "tools/src"
required-version = ">=0.8.6"
prerelease = "allow"
environments = ["sys_platform == 'darwin'", "sys_platform == 'linux'"]
conflicts = [[{ extra = "cuda12" }, { extra = "cuda13" }, { extra = "cpu" }]]
constraint-dependencies = ["transformers>=5.6.2"]
override-dependencies = [
"mlx==0.31.1; sys_platform=='linux'",
"mlx; sys_platform=='darwin'",
override-dependencies = ["opencv-python; python_version < '0'"]
conflicts = [
[
{ extra = "mlx-cuda13" },
{ extra = "mlx-cuda12" },
{ extra = "mlx-cpu" },
{ extra = "mlx-none" },
],
]
[tool.uv.extra-build-dependencies]
@@ -183,6 +208,7 @@ mlx = [
"ninja",
]
mlx-lm = ["setuptools"]
mflux = ["uv_build"]
xgrammar = [
"nanobind",
"setuptools",

View File

@@ -10,10 +10,18 @@ let
inherit (pkgs.stdenv.hostPlatform) isLinux isDarwin isx86_64;
inherit (pkgs.config) cudaSupport;
inherit (pkgs) cudaPackages;
cuda13Support = cudaSupport && cudaPackages.cudaMajorVersion == "13";
libmlx_source = if cuda13Support then "mlx-cuda-13" else if cudaSupport then "mlx-cuda-12" else "mlx-cpu";
libmlx_source =
if (builtins.elem "mlx-cuda13" members.exo or [ ]) then "mlx-cuda-13"
else if (builtins.elem "mlx-cuda12" members.exo or [ ]) then "mlx-cuda-12"
else "mlx-cpu";
python = pkgs.python313;
cuda_cccl_compat = pkgs.runCommand "cuda-cccl-compat" { } ''
mkdir -p $out/include
ln -s ${cudaPackages.cuda_cccl}/include $out/include/cccl
'';
cudaLibs = with cudaPackages; [
cuda_crt
cuda_cudart
cuda_cccl
cuda_cupti
@@ -31,6 +39,10 @@ let
libnvshmem
nccl
];
cudaRoot = pkgs.symlinkJoin {
name = "cuda-merged-exo";
paths = builtins.concatMap (p: [ (lib.getBin p) (lib.getLib p) (lib.getDev p) ]) (cudaLibs ++ [ cudaPackages.cuda_nvcc cuda_cccl_compat ]);
};
exoOverlay = final: prev: {
# Replace workspace exo_pyo3_bindings with Nix-built wheel.
# Preserve passthru so mkVirtualEnv can resolve dependency groups.
@@ -113,37 +125,60 @@ let
});
} // lib.optionalAttrs isLinux {
mlx = prev.mlx.overrideAttrs (old: {
nativeBuildInputs = old.nativeBuildInputs ++ lib.optionals cudaSupport [ pkgs.autoAddDriverRunpath ];
buildInputs = old.buildInputs ++ lib.optionals cudaSupport cudaLibs;
autoPatchelfIgnoreMissingDeps = lib.optionals cudaSupport [ "libcuda.so.1" ];
postInstall = ''
cp -r "${final.${libmlx_source}}/${final.python.sitePackages}/mlx" "$out/${final.python.sitePackages}/mlx/"
'';
autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
});
} // lib.optionalAttrs cudaSupport {
"${libmlx_source}" = prev."${libmlx_source}".overrideAttrs (old: {
nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
buildInputs = old.buildInputs ++ cudaLibs;
autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
});
nvidia-cufile = prev.nvidia-cufile.overrideAttrs (old: {
nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
buildInputs = old.buildInputs ++ [ pkgs.rdma-core ];
autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
});
nvidia-cusolver = prev.nvidia-cusolver.overrideAttrs (old: {
nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
buildInputs = old.buildInputs ++ cudaLibs;
autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
});
nvidia-nvshmem-cu13 = prev.nvidia-nvshmem-cu13.overrideAttrs (old: {
nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
buildInputs = old.buildInputs ++ [ pkgs.rdma-core pkgs.pmix pkgs.libfabric pkgs.ucx pkgs.openmpi ];
autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
});
nvidia-cusparse = prev.nvidia-cusparse.overrideAttrs (old: {
buildInputs = old.buildInputs ++ [ cudaLibs ];
autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
buildInputs = old.buildInputs ++ cudaLibs;
});
torch = prev.torch.overrideAttrs (old: {
nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
buildInputs = old.buildInputs ++ cudaLibs;
autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
});
torchaudio = prev.torchaudio.overrideAttrs (old: {
nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
buildInputs = old.buildInputs ++ [ cudaPackages.cuda_cudart ];
preFixup = "addAutoPatchelfSearchPath '${final.torch}'";
});
torchvision = prev.torchvision.overrideAttrs (old: {
nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
preFixup = "addAutoPatchelfSearchPath '${final.torch}'";
});
torch-c-dlpack-ext = prev.torch-c-dlpack-ext.overrideAttrs (old: {
buildInputs = old.buildInputs ++ cudaLibs;
autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
preFixup = "addAutoPatchelfSearchPath '${final.torch}'";
});
} // lib.optionalAttrs (cudaSupport && isx86_64) {
numba = prev.numba.overrideAttrs (old: {
buildInputs = (old.buildInputs or [ ]) ++ [ pkgs.tbb ];
});
};
pyprojectOverlay = workspace.mkPyprojectOverlay {
sourcePreference = "wheel";
@@ -164,26 +199,43 @@ let
buildSystemsOverlay
]
);
venv = name: (pythonSet.mkVirtualEnv "${name}-env" members).overrideAttrs (_: { venvSkip = [ "lib/python${python.pythonVersion}/site-packages/mlx/share/cmake/*" ]; });
mkApp = cmd: name: pkgs.writeShellApplication {
inherit name;
runtimeEnv = {
EXO_DASHBOARD_DIR = self'.packages.dashboard;
EXO_RESOURCES_DIR = inputs.self + /resources;
# mlx and mlx-cuda ship clashing cmake files - we dont need them at runtime anyway
venv = name: (pythonSet.mkVirtualEnv "${name}-venv" members).overrideAttrs (_: { venvSkip = [ "lib/python${python.pythonVersion}/site-packages/mlx/share/cmake/*" "lib/python${python.pythonVersion}/site-packages/build_backend.py" ]; });
mkApp =
let
libPath = lib.makeLibraryPath (
[ pkgs.stdenv.cc.cc.lib ] ++ lib.optionals cudaSupport [ cudaRoot ]
);
in
text: name: pkgs.writeShellApplication {
inherit name;
text = ''
LD_LIBRARY_PATH="${libPath}''${LD_LIBRARY_PATH:+:}''${LD_LIBRARY_PATH:-}" exec \
${lib.optionalString cudaSupport "nixglhost "} ${text}
'';
runtimeEnv = {
EXO_DASHBOARD_DIR = self'.packages.dashboard;
EXO_RESOURCES_DIR = inputs.self + /resources;
};
runtimeInputs = [
(venv name)
] ++ lib.optionals cudaSupport [ pkgs.nix-gl-host ]
++ lib.optionals isDarwin [ pkgs.macmon ];
passthru = {
venv = venv name;
evenv = ((pythonSet.overrideScope editableOverlay).mkVirtualEnv "${name}-evenv" (members // { exo = (members.exo or [ ]) ++ [ "dev" ]; })).overrideAttrs (_: {
venvSkip = [ "lib/python${python.pythonVersion}/site-packages/mlx/share/cmake/*" "lib/python${python.pythonVersion}/site-packages/build_backend.py" ];
});
} // lib.optionalAttrs cudaSupport {
inherit cudaRoot;
};
};
runtimeInputs = [
# mlx and mlx-cuda ship clashing cmake files - we dont need them at runtime anyway
(venv name)
]
++ lib.optionals isDarwin [ pkgs.macmon ];
text = "exec " + lib.optionalString cudaSupport "${lib.getExe pkgs.nix-gl-host} " + cmd;
};
in
{
inherit venv;
editablePythonSet = pythonSet.overrideScope editableOverlay;
mkPythonScript = path: mkApp ''python ${path} "$@"'';
mkExo = mkApp ''exo "$@"'';
exo = mkApp ''exo "$@"'' "exo";
};
in
{
@@ -191,18 +243,18 @@ in
{ self', pkgs, unfreePkgs, lib, ... }:
let
inherit (pkgs.stdenv.hostPlatform) isLinux;
inherit (mkPythonSet { inherit self' pkgs lib; members = { exo = [ "cpu" ]; }; }) editablePythonSet mkExo;
inherit (mkPythonSet { inherit self' pkgs lib; members = { exo = [ "mlx-cpu" ]; }; }) exo;
# Virtual environment with dev dependencies for testing
testVenv = (mkPythonSet {
inherit self' pkgs lib; members = {
exo = [ "dev" "cpu" ]; # Include pytest, pytest-asyncio, pytest-env
exo = [ "dev" "mlx-cpu" ]; # Include pytest, pytest-asyncio, pytest-env
};
}).venv "exo-test";
mkBenchScript = (mkPythonSet {
inherit self' pkgs lib; members = {
exo = [ "cpu" ];
exo = [ "mlx-cpu" ];
exo-bench = [ ]; # Include pytest, pytest-asyncio, pytest-env
};
}).mkPythonScript;
@@ -212,12 +264,14 @@ in
runtimeInputs = [ pkgs.python313 ];
text = ''exec python ${path} "$@"'';
};
# if someone is particularly interested in cuda12 support in nix, please open an issue.
# until then, it's more hassle than its worth
#cuda12Set = mkPythonSet { inherit self' lib; inherit (unfreePkgs.pkgsCuda.cudaPackages_12) pkgs; members = { exo = [ "mlx-cuda12" ]; }; };
cuda13Set = mkPythonSet { inherit self' lib; inherit (unfreePkgs.pkgsCuda.cudaPackages_13) pkgs; members = { exo = [ "mlx-cuda13" ]; }; };
in
{
packages = {
exo = mkExo "exo";
editableVenv = editablePythonSet.mkVirtualEnv "exo-dev-env" { exo = [ "dev" ]; };
inherit exo;
# for running tests in ci
exo-test-env = testVenv;
exo-bench = mkBenchScript "exo-bench" (inputs.self + /bench/exo_bench.py);
@@ -226,8 +280,8 @@ in
# used by ./tests/run_exo_on.sh
exo-get-all-models-on-cluster = mkSimplePythonScript "exo-get-all-models-on-cluster" (inputs.self + /tests/get_all_models_on_cluster.py);
} // lib.optionalAttrs isLinux {
exo-cuda-12 = (mkPythonSet { inherit self' lib; inherit (unfreePkgs.pkgsCuda.cudaPackages_12) pkgs; members = { exo = [ "cuda12" ]; }; }).mkExo "exo-cuda-12";
exo-cuda-13 = (mkPythonSet { inherit self' lib; inherit (unfreePkgs.pkgsCuda.cudaPackages_13) pkgs; members = { exo = [ "cuda13" ]; }; }).mkExo "exo-cuda-13";
#exo-cuda-12 = cuda12Set.exo;
exo-cuda-13 = cuda13Set.exo;
};
checks = {

1095
uv.lock generated
View File

File diff suppressed because it is too large Load Diff