From a84d01c027ce8d556cf5a8f9d2be3483f0ff051b Mon Sep 17 00:00:00 2001 From: Andrei Cravtov Date: Tue, 19 May 2026 19:07:47 +0100 Subject: [PATCH] it builds --- .github/workflows/build-app.yml | 118 +- .github/workflows/pipeline.yml | 3 - .gitignore | 3 +- .idea/misc.xml | 3 + {.mlx_typings => .typings}/.gitkeep | 0 {.mlx_typings => .typings}/mflux/__init__.pyi | 0 .../mflux/callbacks/__init__.pyi | 0 .../mflux/callbacks/callback.pyi | 0 .../mflux/callbacks/callback_registry.pyi | 0 .../mflux/callbacks/generation_context.pyi | 0 .../mflux/cli/__init__.pyi | 0 .../mflux/cli/defaults/defaults.pyi | 0 .../mflux/models/__init__.pyi | 0 .../mflux/models/common/__init__.pyi | 0 .../mflux/models/common/cli/__init__.pyi | 0 .../mflux/models/common/config/__init__.pyi | 0 .../mflux/models/common/config/config.pyi | 0 .../models/common/config/model_config.pyi | 0 .../models/common/latent_creator/__init__.pyi | 0 .../common/latent_creator/latent_creator.pyi | 0 .../mflux/models/common/lora/__init__.pyi | 0 .../lora/layer/fused_linear_lora_layer.pyi | 0 .../common/lora/layer/linear_lora_layer.pyi | 0 .../common/lora/mapping/lora_loader.pyi | 0 .../common/lora/mapping/lora_mapping.pyi | 0 .../models/common/lora/mapping/lora_saver.pyi | 0 .../common/lora/mapping/lora_transforms.pyi | 0 .../models/common/resolution/__init__.pyi | 0 .../models/common/resolution/actions.pyi | 0 .../common/resolution/config_resolution.pyi | 0 .../common/resolution/lora_resolution.pyi | 0 .../common/resolution/path_resolution.pyi | 0 .../resolution/quantization_resolution.pyi | 0 .../models/common/schedulers/__init__.pyi | 0 .../common/schedulers/base_scheduler.pyi | 0 .../flow_match_euler_discrete_scheduler.pyi | 0 .../common/schedulers/linear_scheduler.pyi | 0 .../schedulers/seedvr2_euler_scheduler.pyi | 0 .../models/common/tokenizer/__init__.pyi | 0 .../models/common/tokenizer/tokenizer.pyi | 0 .../common/tokenizer/tokenizer_loader.pyi | 0 .../common/tokenizer/tokenizer_output.pyi | 0 .../mflux/models/common/vae/__init__.pyi | 0 .../mflux/models/common/vae/tiling_config.pyi | 0 .../mflux/models/common/vae/vae_tiler.pyi | 0 .../mflux/models/common/vae/vae_util.pyi | 0 .../mflux/models/common/weights/__init__.pyi | 0 .../common/weights/loading/loaded_weights.pyi | 0 .../common/weights/loading/weight_applier.pyi | 0 .../weights/loading/weight_definition.pyi | 0 .../common/weights/loading/weight_loader.pyi | 0 .../common/weights/mapping/weight_mapper.pyi | 0 .../common/weights/mapping/weight_mapping.pyi | 0 .../weights/mapping/weight_transforms.pyi | 0 .../common/weights/saving/model_saver.pyi | 0 .../depth_pro/depth_pro_initializer.pyi | 0 .../model/decoder/feature_fusion_block_2d.pyi | 0 .../model/decoder/multires_conv_decoder.pyi | 0 .../model/decoder/residual_block.pyi | 0 .../models/depth_pro/model/depth_pro.pyi | 0 .../depth_pro/model/depth_pro_model.pyi | 0 .../models/depth_pro/model/depth_pro_util.pyi | 0 .../depth_pro/model/dino_v2/attention.pyi | 0 .../model/dino_v2/dino_vision_transformer.pyi | 0 .../depth_pro/model/dino_v2/layer_scale.pyi | 0 .../models/depth_pro/model/dino_v2/mlp.pyi | 0 .../depth_pro/model/dino_v2/patch_embed.pyi | 0 .../model/dino_v2/transformer_block.pyi | 0 .../model/encoder/depth_pro_encoder.pyi | 0 .../model/encoder/upsample_block.pyi | 0 .../models/depth_pro/model/head/fov_head.pyi | 0 .../weights/depth_pro_weight_definition.pyi | 0 .../weights/depth_pro_weight_mapping.pyi | 0 .../latent_creator/fibo_latent_creator.pyi | 0 .../fibo/weights/fibo_weight_definition.pyi | 0 .../fibo/weights/fibo_weight_mapping.pyi | 0 .../tokenizer/qwen2vl_image_processor.pyi | 0 .../fibo_vlm/tokenizer/qwen2vl_processor.pyi | 0 .../weights/fibo_vlm_weight_definition.pyi | 0 .../weights/fibo_vlm_weight_mapping.pyi | 0 .../mflux/models/flux/__init__.pyi | 0 .../mflux/models/flux/cli/__init__.pyi | 0 .../mflux/models/flux/flux_initializer.pyi | 0 .../models/flux/latent_creator/__init__.pyi | 0 .../latent_creator/flux_latent_creator.pyi | 0 .../mflux/models/flux/model/__init__.pyi | 0 .../clip_encoder/clip_embeddings.pyi | 0 .../clip_encoder/clip_encoder.pyi | 0 .../clip_encoder/clip_encoder_layer.pyi | 0 .../clip_encoder/clip_mlp.pyi | 0 .../clip_encoder/clip_sdpa_attention.pyi | 0 .../clip_encoder/clip_text_model.pyi | 0 .../clip_encoder/encoder_clip.pyi | 0 .../flux_text_encoder/prompt_encoder.pyi | 0 .../t5_encoder/t5_attention.pyi | 0 .../flux_text_encoder/t5_encoder/t5_block.pyi | 0 .../t5_encoder/t5_dense_relu_dense.pyi | 0 .../t5_encoder/t5_encoder.pyi | 0 .../t5_encoder/t5_feed_forward.pyi | 0 .../t5_encoder/t5_layer_norm.pyi | 0 .../t5_encoder/t5_self_attention.pyi | 0 .../ada_layer_norm_continuous.pyi | 0 .../flux_transformer/ada_layer_norm_zero.pyi | 0 .../ada_layer_norm_zero_single.pyi | 0 .../common/attention_utils.pyi | 0 .../flux/model/flux_transformer/embed_nd.pyi | 0 .../model/flux_transformer/feed_forward.pyi | 0 .../flux_transformer/guidance_embedder.pyi | 0 .../flux_transformer/joint_attention.pyi | 0 .../joint_transformer_block.pyi | 0 .../single_block_attention.pyi | 0 .../single_transformer_block.pyi | 0 .../model/flux_transformer/text_embedder.pyi | 0 .../flux_transformer/time_text_embed.pyi | 0 .../flux_transformer/timestep_embedder.pyi | 0 .../model/flux_transformer/transformer.pyi | 0 .../flux/model/flux_vae/common/attention.pyi | 0 .../model/flux_vae/common/resnet_block_2d.pyi | 0 .../model/flux_vae/common/unet_mid_block.pyi | 0 .../flux/model/flux_vae/decoder/conv_in.pyi | 0 .../model/flux_vae/decoder/conv_norm_out.pyi | 0 .../flux/model/flux_vae/decoder/conv_out.pyi | 0 .../flux/model/flux_vae/decoder/decoder.pyi | 0 .../flux_vae/decoder/up_block_1_or_2.pyi | 0 .../model/flux_vae/decoder/up_block_3.pyi | 0 .../model/flux_vae/decoder/up_block_4.pyi | 0 .../model/flux_vae/decoder/up_sampler.pyi | 0 .../flux/model/flux_vae/encoder/conv_in.pyi | 0 .../model/flux_vae/encoder/conv_norm_out.pyi | 0 .../flux/model/flux_vae/encoder/conv_out.pyi | 0 .../model/flux_vae/encoder/down_block_1.pyi | 0 .../model/flux_vae/encoder/down_block_2.pyi | 0 .../model/flux_vae/encoder/down_block_3.pyi | 0 .../model/flux_vae/encoder/down_block_4.pyi | 0 .../model/flux_vae/encoder/down_sampler.pyi | 0 .../flux/model/flux_vae/encoder/encoder.pyi | 0 .../mflux/models/flux/model/flux_vae/vae.pyi | 0 .../model/redux_encoder/redux_encoder.pyi | 0 .../siglip_encoder.pyi | 0 .../siglip_encoder_layer.pyi | 0 .../siglip_vision_transformer/siglip_mlp.pyi | 0 ...glip_multi_head_attention_pooling_head.pyi | 0 .../siglip_sdpa_attention.pyi | 0 .../siglip_vision_embeddings.pyi | 0 .../siglip_vision_transformer.pyi | 0 .../mflux/models/flux/variants/__init__.pyi | 0 .../concept_attention/attention_data.pyi | 0 .../joint_attention_concept.pyi | 0 .../joint_transformer_block_concept.pyi | 0 .../concept_attention/transformer_concept.pyi | 0 .../controlnet/transformer_controlnet.pyi | 0 .../models/flux/variants/kontext/__init__.pyi | 0 .../flux/variants/kontext/flux_kontext.pyi | 0 .../flux/variants/kontext/kontext_util.pyi | 0 .../models/flux/variants/txt2img/flux.pyi | 0 .../mflux/models/flux/weights/__init__.pyi | 0 .../models/flux/weights/flux_lora_mapping.pyi | 0 .../flux/weights/flux_weight_definition.pyi | 0 .../flux/weights/flux_weight_mapping.pyi | 0 .../mflux/models/qwen/__init__.pyi | 0 .../mflux/models/qwen/cli/__init__.pyi | 0 .../models/qwen/latent_creator/__init__.pyi | 0 .../latent_creator/qwen_latent_creator.pyi | 0 .../mflux/models/qwen/model/__init__.pyi | 0 .../qwen_text_encoder/qwen_attention.pyi | 0 .../model/qwen_text_encoder/qwen_encoder.pyi | 0 .../qwen_text_encoder/qwen_encoder_layer.pyi | 0 .../qwen/model/qwen_text_encoder/qwen_mlp.pyi | 0 .../qwen_text_encoder/qwen_patch_merger.pyi | 0 .../qwen_text_encoder/qwen_prompt_encoder.pyi | 0 .../model/qwen_text_encoder/qwen_rms_norm.pyi | 0 .../model/qwen_text_encoder/qwen_rope.pyi | 0 .../qwen_text_encoder/qwen_text_encoder.pyi | 0 .../qwen_vision_attention.pyi | 0 .../qwen_text_encoder/qwen_vision_block.pyi | 0 .../qwen_vision_language_encoder.pyi | 0 .../qwen_text_encoder/qwen_vision_mlp.pyi | 0 .../qwen_vision_patch_embed.pyi | 0 .../qwen_vision_rotary_embedding.pyi | 0 .../qwen_vision_transformer.pyi | 0 .../model/qwen_transformer/qwen_attention.pyi | 0 .../qwen_transformer/qwen_feed_forward.pyi | 0 .../qwen/model/qwen_transformer/qwen_rope.pyi | 0 .../qwen_transformer/qwen_time_text_embed.pyi | 0 .../qwen_timestep_embedding.pyi | 0 .../model/qwen_transformer/qwen_timesteps.pyi | 0 .../qwen_transformer/qwen_transformer.pyi | 0 .../qwen_transformer_block.pyi | 0 .../qwen_transformer_rms_norm.pyi | 0 .../qwen_image_attention_block_3d.pyi | 0 .../qwen_vae/qwen_image_causal_conv_3d.pyi | 0 .../model/qwen_vae/qwen_image_decoder_3d.pyi | 0 .../qwen_vae/qwen_image_down_block_3d.pyi | 0 .../model/qwen_vae/qwen_image_encoder_3d.pyi | 0 .../qwen_vae/qwen_image_mid_block_3d.pyi | 0 .../qwen_vae/qwen_image_res_block_3d.pyi | 0 .../model/qwen_vae/qwen_image_resample_3d.pyi | 0 .../model/qwen_vae/qwen_image_rms_norm.pyi | 0 .../model/qwen_vae/qwen_image_up_block_3d.pyi | 0 .../models/qwen/model/qwen_vae/qwen_vae.pyi | 0 .../mflux/models/qwen/qwen_initializer.pyi | 0 .../mflux/models/qwen/tokenizer/__init__.pyi | 0 .../qwen/tokenizer/qwen_image_processor.pyi | 0 .../qwen_vision_language_processor.pyi | 0 .../qwen_vision_language_tokenizer.pyi | 0 .../mflux/models/qwen/variants/__init__.pyi | 0 .../qwen/variants/edit/qwen_edit_util.pyi | 0 .../qwen/variants/edit/qwen_image_edit.pyi | 0 .../qwen/variants/txt2img/qwen_image.pyi | 0 .../mflux/models/qwen/weights/__init__.pyi | 0 .../models/qwen/weights/qwen_lora_mapping.pyi | 0 .../qwen/weights/qwen_weight_definition.pyi | 0 .../qwen/weights/qwen_weight_mapping.pyi | 0 .../weights/seedvr2_weight_definition.pyi | 0 .../weights/seedvr2_weight_mapping.pyi | 0 .../latent_creator/z_image_latent_creator.pyi | 0 .../weights/z_image_weight_definition.pyi | 0 .../weights/z_image_weight_mapping.pyi | 0 .../mflux/release/__init__.pyi | 0 .../mflux/utils/__init__.pyi | 0 .../mflux/utils/box_values.pyi | 0 .../mflux/utils/exceptions.pyi | 0 .../mflux/utils/generated_image.pyi | 0 .../mflux/utils/image_util.pyi | 0 .../mflux/utils/metadata_builder.pyi | 0 .../mflux/utils/version_util.pyi | 0 .../mlx/core/__init__.pyi | 11 +- .../mlx/core/cuda/__init__.pyi | 0 .../mlx/core/distributed/__init__.pyi | 0 .../mlx/core/metal/__init__.pyi | 0 .../mlx/core/random/__init__.pyi | 0 .../mlx/nn/__init__.pyi | 0 {.mlx_typings => .typings}/mlx/nn/init.pyi | 0 .../mlx/nn/layers/__init__.pyi | 0 .../mlx/nn/layers/activations.pyi | 0 .../mlx/nn/layers/base.pyi | 4 + .../mlx/nn/layers/containers.pyi | 0 .../mlx/nn/layers/convolution.pyi | 0 .../mlx/nn/layers/convolution_transpose.pyi | 0 .../mlx/nn/layers/distributed.pyi | 0 .../mlx/nn/layers/dropout.pyi | 0 .../mlx/nn/layers/embedding.pyi | 0 .../mlx/nn/layers/linear.pyi | 0 .../mlx/nn/layers/normalization.pyi | 0 .../mlx/nn/layers/pooling.pyi | 0 .../mlx/nn/layers/positional_encoding.pyi | 0 .../mlx/nn/layers/quantized.pyi | 0 .../mlx/nn/layers/recurrent.pyi | 0 .../mlx/nn/layers/transformer.pyi | 0 .../mlx/nn/layers/upsample.pyi | 0 {.mlx_typings => .typings}/mlx/nn/losses.pyi | 0 {.mlx_typings => .typings}/mlx/nn/utils.pyi | 0 {.mlx_typings => .typings}/mlx/utils.pyi | 0 .../mlx_lm/__init__.pyi | 0 .../mlx_lm/_version.pyi | 0 {.mlx_typings => .typings}/mlx_lm/convert.pyi | 0 .../mlx_lm/generate.pyi | 9 +- .../mlx_lm/models/__init__.pyi | 0 .../mlx_lm/models/activations.pyi | 0 .../mlx_lm/models/base.pyi | 10 +- .../mlx_lm/models/bitlinear_layers.pyi | 0 .../mlx_lm/models/cache.pyi | 0 .../mlx_lm/models/deepseek_v3.pyi | 0 .typings/mlx_lm/models/deepseek_v4.pyi | 280 ++ .../mlx_lm/models/gated_delta.pyi | 0 .../mlx_lm/models/gemma4.pyi | 0 .../mlx_lm/models/gemma4_text.pyi | 0 .../mlx_lm/models/glm4_moe.pyi | 0 .../mlx_lm/models/glm_moe_dsa.pyi | 0 .typings/mlx_lm/models/gpt_oss.pyi | 103 + .typings/mlx_lm/models/minimax.pyi | 94 + .../mlx_lm/models/nemotron_h.pyi | 14 + .../mlx_lm/models/qwen3_5.pyi | 0 .../mlx_lm/models/qwen3_5_moe.pyi | 0 .../mlx_lm/models/qwen3_next.pyi | 1 + .../mlx_lm/models/rope_utils.pyi | 0 .../mlx_lm/models/step3p5.pyi | 0 .../mlx_lm/models/switch_layers.pyi | 0 .../mlx_lm/sample_utils.pyi | 4 + .../mlx_lm/tokenizer_utils.pyi | 0 .../mlx_lm/tuner/dora.pyi | 0 .../mlx_lm/tuner/lora.pyi | 0 .../mlx_lm/tuner/utils.pyi | 0 {.mlx_typings => .typings}/mlx_lm/utils.pyi | 0 .../mlx_vlm/__init__.pyi | 0 .../mlx_vlm/prompt_utils.pyi | 0 {.mlx_typings => .typings}/mlx_vlm/utils.pyi | 0 .typings/pynvml/__init__.pyi | 3226 ++++++++++++++++ .../safetensors/__init__.pyi | 0 .vscode/extensions.json | 3 +- .vscode/settings.json | 3 + Cargo.lock | 1326 +++---- Cargo.toml | 9 +- app/EXO/EXO/ContentView.swift | 226 +- app/EXO/EXO/EXOApp.swift | 3 + app/EXO/EXO/ExoProcessController.swift | 46 +- app/EXO/EXO/Info.plist | 2 +- .../EXO/Services/ClusterStateService.swift | 19 +- .../EXO/Views/BugReportWindowController.swift | 242 ++ app/EXO/EXO/Views/SettingsView.swift | 167 +- .../EXO/Views/SettingsWindowController.swift | 2 +- app/EXO/uninstall-exo.sh | 70 +- bench/METHODOLOGY.md | 18 +- bench/eval_configs/models.toml | 99 +- bench/eval_tool_calls.py | 124 +- bench/exo_bench.py | 364 +- bench/exo_eval.py | 488 ++- bench/prefill-decode.toml | 36 + bench/prefill_decode_bench.py | 784 ++++ bench/test_mlx_bandwidth.py | 377 -- dashboard/src/lib/components/HeaderNav.svelte | 26 + .../src/lib/components/IntegrationCard.svelte | 20 +- .../PrefillDecodeDisaggregation.svelte | 565 +++ dashboard/src/lib/stores/app.svelte.ts | 138 +- dashboard/src/lib/utils/clipboard.ts | 55 + dashboard/src/lib/utils/model_family.ts | 44 + dashboard/src/routes/+page.svelte | 3 + dashboard/src/routes/advanced/+page.svelte | 81 + .../src/routes/integrations/+page.svelte | 113 +- docs/architecture.md | 2 +- flake.lock | 69 +- flake.nix | 49 +- justfile | 4 +- nix/babeld.nix | 3 +- nix/mlx.nix | 158 - packaging/pyinstaller/exo.spec | 21 +- pyproject.toml | 174 +- python/parts.nix | 391 +- .../exolabs--FLUX.1-Kontext-dev-4bit.toml | 1 + .../exolabs--FLUX.1-Kontext-dev-8bit.toml | 1 + .../exolabs--FLUX.1-Kontext-dev.toml | 1 + .../exolabs--FLUX.1-Krea-dev-4bit.toml | 1 + .../exolabs--FLUX.1-Krea-dev-8bit.toml | 1 + .../exolabs--FLUX.1-Krea-dev.toml | 1 + .../exolabs--FLUX.1-dev-4bit.toml | 1 + .../exolabs--FLUX.1-dev-8bit.toml | 1 + .../exolabs--FLUX.1-dev.toml | 1 + .../exolabs--FLUX.1-schnell-4bit.toml | 1 + .../exolabs--FLUX.1-schnell-8bit.toml | 1 + .../exolabs--FLUX.1-schnell.toml | 1 + .../exolabs--Qwen-Image-4bit.toml | 1 + .../exolabs--Qwen-Image-8bit.toml | 1 + .../exolabs--Qwen-Image-Edit-2509-4bit.toml | 1 + .../exolabs--Qwen-Image-Edit-2509-8bit.toml | 1 + .../exolabs--Qwen-Image-Edit-2509.toml | 1 + .../exolabs--Qwen-Image.toml | 1 + .../mlx-community--DeepSeek-V3.1-4bit.toml | 9 +- .../mlx-community--DeepSeek-V3.1-8bit.toml | 9 +- .../mlx-community--DeepSeek-V3.2-4bit.toml | 9 +- .../mlx-community--DeepSeek-V3.2-8bit.toml | 9 +- .../mlx-community--DeepSeek-V4-Flash.toml | 21 + .../mlx-community--DeepSeek-V4-Pro.toml | 21 + .../mlx-community--GLM-4.5-Air-8bit.toml | 9 +- .../mlx-community--GLM-4.5-Air-bf16.toml | 9 +- .../mlx-community--GLM-4.7-4bit.toml | 11 +- .../mlx-community--GLM-4.7-6bit.toml | 11 +- .../mlx-community--GLM-4.7-8bit-gs32.toml | 11 +- .../mlx-community--GLM-4.7-Flash-4bit.toml | 11 +- .../mlx-community--GLM-4.7-Flash-5bit.toml | 11 +- .../mlx-community--GLM-4.7-Flash-6bit.toml | 11 +- .../mlx-community--GLM-4.7-Flash-8bit.toml | 11 +- .../mlx-community--GLM-5-8bit.toml | 10 +- .../mlx-community--GLM-5-MXFP4-Q8.toml | 10 +- .../mlx-community--GLM-5-bf16.toml | 10 +- .../mlx-community--GLM-5.1-DQ4plus-q8.toml | 21 + .../mlx-community--GLM-5.1-MXFP4-Q8.toml | 21 + .../mlx-community--GLM-5.1.toml | 21 + .../mlx-community--Kimi-K2-Instruct-4bit.toml | 7 +- .../mlx-community--Kimi-K2-Thinking.toml | 9 +- .../mlx-community--Kimi-K2.5.toml | 18 +- ...x-community--Kimi-K2.6-mlx-DQ3_K_M-q8.toml | 33 + ...ama-3.1-Nemotron-70B-Instruct-HF-4bit.toml | 8 +- ...ama-3.1-Nemotron-70B-Instruct-HF-8bit.toml | 8 +- ...ama-3.1-Nemotron-70B-Instruct-HF-bf16.toml | 8 +- ...-Llama-3.1-Nemotron-Nano-4B-v1.1-4bit.toml | 11 +- ...-Llama-3.1-Nemotron-Nano-4B-v1.1-8bit.toml | 11 +- ...-Llama-3.1-Nemotron-Nano-4B-v1.1-bf16.toml | 11 +- ...community--Llama-3.2-1B-Instruct-4bit.toml | 8 +- ...community--Llama-3.2-3B-Instruct-4bit.toml | 8 +- ...community--Llama-3.2-3B-Instruct-8bit.toml | 8 +- ...ommunity--Llama-3.3-70B-Instruct-4bit.toml | 8 +- ...ommunity--Llama-3.3-70B-Instruct-8bit.toml | 8 +- ...ity--Meta-Llama-3.1-70B-Instruct-4bit.toml | 8 +- ...nity--Meta-Llama-3.1-8B-Instruct-4bit.toml | 8 +- ...nity--Meta-Llama-3.1-8B-Instruct-8bit.toml | 8 +- ...nity--Meta-Llama-3.1-8B-Instruct-bf16.toml | 8 +- .../mlx-community--MiniMax-M2.1-3bit.toml | 11 +- .../mlx-community--MiniMax-M2.1-8bit.toml | 11 +- .../mlx-community--MiniMax-M2.5-4bit.toml | 11 +- .../mlx-community--MiniMax-M2.5-6bit.toml | 11 +- .../mlx-community--MiniMax-M2.5-8bit.toml | 11 +- ...lx-community--MiniMax-M2.7-4bit-mxfp4.toml | 12 +- .../mlx-community--MiniMax-M2.7-4bit.toml | 12 +- .../mlx-community--MiniMax-M2.7-5bit.toml | 12 +- .../mlx-community--MiniMax-M2.7-6bit.toml | 12 +- .../mlx-community--MiniMax-M2.7-8bit.toml | 12 +- .../mlx-community--MiniMax-M2.7.toml | 12 +- ...IDIA-Nemotron-3-Nano-30B-A3B-MLX-4Bit.toml | 8 +- ...IDIA-Nemotron-3-Nano-30B-A3B-MLX-5Bit.toml | 8 +- ...IDIA-Nemotron-3-Nano-30B-A3B-MLX-6Bit.toml | 8 +- ...IDIA-Nemotron-3-Nano-30B-A3B-MLX-8Bit.toml | 8 +- ...IDIA-Nemotron-3-Nano-30B-A3B-MLX-BF16.toml | 8 +- ...DIA-Nemotron-3-Nano-30B-A3B-MLX-MXFP4.toml | 8 +- ...-NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4.toml | 8 +- ...ity--NVIDIA-Nemotron-Nano-9B-v2-4bits.toml | 13 +- ...nity--NVIDIA-Nemotron-Nano-9B-v2-6bit.toml | 13 +- .../mlx-community--Qwen3-0.6B-4bit.toml | 16 +- .../mlx-community--Qwen3-0.6B-8bit.toml | 16 +- ...y--Qwen3-235B-A22B-Instruct-2507-4bit.toml | 9 +- ...y--Qwen3-235B-A22B-Instruct-2507-8bit.toml | 9 +- .../mlx-community--Qwen3-30B-A3B-4bit.toml | 16 +- .../mlx-community--Qwen3-30B-A3B-8bit.toml | 16 +- ...--Qwen3-Coder-480B-A35B-Instruct-4bit.toml | 10 +- ...--Qwen3-Coder-480B-A35B-Instruct-8bit.toml | 10 +- .../mlx-community--Qwen3-Coder-Next-4bit.toml | 9 +- .../mlx-community--Qwen3-Coder-Next-5bit.toml | 9 +- .../mlx-community--Qwen3-Coder-Next-6bit.toml | 9 +- .../mlx-community--Qwen3-Coder-Next-8bit.toml | 9 +- .../mlx-community--Qwen3-Coder-Next-bf16.toml | 9 +- ...ity--Qwen3-Next-80B-A3B-Instruct-4bit.toml | 9 +- ...ity--Qwen3-Next-80B-A3B-Instruct-8bit.toml | 9 +- ...ity--Qwen3-Next-80B-A3B-Thinking-4bit.toml | 11 +- ...ity--Qwen3-Next-80B-A3B-Thinking-8bit.toml | 11 +- ...-community--Qwen3-VL-4B-Instruct-4bit.toml | 11 +- ...mlx-community--Qwen3.5-122B-A10B-4bit.toml | 24 +- ...mlx-community--Qwen3.5-122B-A10B-6bit.toml | 24 +- ...mlx-community--Qwen3.5-122B-A10B-8bit.toml | 24 +- ...mlx-community--Qwen3.5-122B-A10B-bf16.toml | 24 +- .../mlx-community--Qwen3.5-27B-4bit.toml | 24 +- .../mlx-community--Qwen3.5-27B-8bit.toml | 24 +- .../mlx-community--Qwen3.5-2B-MLX-8bit.toml | 24 +- .../mlx-community--Qwen3.5-35B-A3B-4bit.toml | 24 +- .../mlx-community--Qwen3.5-35B-A3B-8bit.toml | 24 +- ...mlx-community--Qwen3.5-397B-A17B-4bit.toml | 24 +- ...mlx-community--Qwen3.5-397B-A17B-6bit.toml | 24 +- ...mlx-community--Qwen3.5-397B-A17B-8bit.toml | 24 +- .../mlx-community--Qwen3.5-9B-4bit.toml | 24 +- .../mlx-community--Qwen3.5-9B-8bit.toml | 24 +- .../mlx-community--Qwen3.6-27B-4bit.toml | 35 + .../mlx-community--Qwen3.6-27B-8bit.toml | 35 + .../mlx-community--Qwen3.6-27B-bf16.toml | 35 + .../mlx-community--Qwen3.6-35B-A3B-4bit.toml | 35 + .../mlx-community--Qwen3.6-35B-A3B-5bit.toml | 35 + .../mlx-community--Qwen3.6-35B-A3B-8bit.toml | 35 + .../mlx-community--Qwen3.6-35B-A3B-bf16.toml | 35 + .../mlx-community--Step-3.5-Flash-4bit.toml | 14 +- .../mlx-community--Step-3.5-Flash-6bit.toml | 14 +- .../mlx-community--Step-3.5-Flash-8Bit.toml | 14 +- ...lx-community--gemma-4-26b-a4b-it-4bit.toml | 11 +- ...lx-community--gemma-4-26b-a4b-it-6bit.toml | 11 +- ...lx-community--gemma-4-26b-a4b-it-8bit.toml | 11 +- ...lx-community--gemma-4-26b-a4b-it-bf16.toml | 9 +- .../mlx-community--gemma-4-31b-it-4bit.toml | 9 +- .../mlx-community--gemma-4-31b-it-6bit.toml | 9 +- .../mlx-community--gemma-4-31b-it-8bit.toml | 9 +- .../mlx-community--gemma-4-31b-it-bf16.toml | 9 +- .../mlx-community--gemma-4-e2b-it-4bit.toml | 9 +- .../mlx-community--gemma-4-e2b-it-6bit.toml | 9 +- .../mlx-community--gemma-4-e2b-it-8bit.toml | 9 +- .../mlx-community--gemma-4-e2b-it-bf16.toml | 9 +- .../mlx-community--gemma-4-e4b-it-4bit.toml | 9 +- .../mlx-community--gemma-4-e4b-it-6bit.toml | 9 +- .../mlx-community--gemma-4-e4b-it-8bit.toml | 9 +- .../mlx-community--gemma-4-e4b-it-bf16.toml | 9 +- .../mlx-community--gpt-oss-120b-MXFP4-Q8.toml | 11 +- .../mlx-community--gpt-oss-20b-MXFP4-Q8.toml | 11 +- ...ommunity--llama-3.3-70b-instruct-fp16.toml | 8 +- .../moonshotai--Kimi-K2.6.toml | 33 + rust/exo_pyo3_bindings/Cargo.toml | 3 + rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi | 44 + rust/exo_pyo3_bindings/pyproject.toml | 2 +- rust/exo_pyo3_bindings/src/lib.rs | 3 + rust/exo_pyo3_bindings/src/pidfile.rs | 87 + rust/exo_pyo3_bindings/tests/test_python.py | 13 + src/exo/api/adapters/chat_completions.py | 20 +- src/exo/api/adapters/responses.py | 169 +- src/exo/api/main.py | 213 +- .../api/tests/test_chat_completions_stream.py | 195 + .../api/tests/test_openai_responses_api.py | 225 +- src/exo/api/types/__init__.py | 2 + src/exo/api/types/api.py | 46 +- src/exo/api/types/ollama_api.py | 11 + src/exo/api/types/openai_responses.py | 14 + src/exo/download/coordinator.py | 50 +- src/exo/download/download_utils.py | 186 +- src/exo/download/impl_shard_downloader.py | 149 +- src/exo/download/shard_downloader.py | 2 + .../download/tests/test_cancel_download.py | 2 + .../tests/test_download_status_not_lost.py | 283 ++ src/exo/download/tests/test_offline_mode.py | 63 + .../tests/test_rate_limit_handling.py | 355 ++ src/exo/download/tests/test_re_download.py | 2 + .../download/tests/test_safetensors_index.py | 35 + src/exo/main.py | 33 +- src/exo/master/main.py | 102 +- src/exo/master/placement.py | 75 +- src/exo/master/placement_utils.py | 6 +- src/exo/master/tests/test_master.py | 38 +- src/exo/master/tests/test_placement.py | 330 +- src/exo/master/tests/test_placement_utils.py | 7 + src/exo/routing/connection_message.py | 4 +- src/exo/routing/event_router.py | 1 + src/exo/routing/router.py | 16 +- src/exo/routing/topics.py | 4 +- src/exo/shared/apply.py | 126 +- src/exo/shared/constants.py | 7 + src/exo/shared/election.py | 6 +- src/exo/shared/models/model_cards.py | 162 +- src/exo/shared/tests/conftest.py | 2 + .../test_apply_custom_model_cards.py | 46 + .../test_apply/test_apply_instance_link.py | 72 + .../test_apply/test_apply_rdma_gating.py | 231 ++ src/exo/shared/topology.py | 16 + src/exo/shared/types/backends.py | 8 + src/exo/shared/types/chunks.py | 11 +- src/exo/shared/types/commands.py | 19 +- src/exo/shared/types/common.py | 6 +- src/exo/shared/types/events.py | 23 +- src/exo/shared/types/instance_link.py | 13 + src/exo/shared/types/profiling.py | 20 +- src/exo/shared/types/state.py | 16 +- src/exo/shared/types/tasks.py | 3 + src/exo/shared/types/text_generation.py | 61 +- src/exo/shared/types/thunderbolt.py | 6 +- .../exo/shared/types/worker}/__init__.py | 0 src/exo/shared/types/worker/downloads.py | 6 +- src/exo/shared/types/worker/instances.py | 4 +- .../shared/types/worker/runner_response.py | 13 +- src/exo/shared/types/worker/runners.py | 8 +- src/exo/utils/async_process.py | 290 ++ src/exo/utils/daemon.py | 28 + src/exo/utils/info_gatherer/info_gatherer.py | 33 + src/exo/utils/pidfile.py | 28 + src/exo/utils/ports.py | 6 + src/exo/utils/power_sampler.py | 53 +- src/exo/utils/pydantic_ext.py | 27 +- src/exo/utils/tests/conftest.py | 8 + src/exo/utils/tests/test_async_process.py | 417 ++ src/exo/utils/tests/test_daemon.py | 168 + src/exo/utils/tests/test_pidfile.py | 84 + src/exo/utils/tests/test_power_sampler.py | 30 + src/exo/worker/disaggregated/__init__.py | 0 src/exo/worker/disaggregated/protocol.py | 152 + src/exo/worker/disaggregated/server.py | 105 + src/exo/worker/engines/base.py | 60 + src/exo/worker/engines/image/__init__.py | 8 +- src/exo/worker/engines/image/builder.py | 224 ++ .../worker/engines/image/distributed_model.py | 43 +- src/exo/worker/engines/image/generate.py | 76 +- src/exo/worker/engines/mlx/auto_parallel.py | 474 ++- src/exo/worker/engines/mlx/builder.py | 113 + src/exo/worker/engines/mlx/cache.py | 228 +- .../engines/mlx/disaggregated/__init__.py | 0 .../engines/mlx/disaggregated/adapter.py | 233 ++ .../engines/mlx/disaggregated/client.py | 147 + .../worker/engines/mlx/disaggregated/serve.py | 86 + .../mlx/disaggregated/tests/__init__.py | 0 .../disaggregated/tests/test_end_to_end.py | 167 + .../disaggregated/tests/test_mlx_adapter.py | 270 ++ .../tests/test_protocol_roundtrip.py | 154 + .../disaggregated/tests/test_server_drain.py | 120 + .../engines/mlx/generator/batch_generate.py | 95 +- .../worker/engines/mlx/generator/generate.py | 147 +- .../engines/mlx/generator/remote_prefill.py | 72 + .../engines/mlx/patches/opt_batch_gen.py | 21 +- .../engines/mlx/tests/test_batch_generate.py | 2 +- .../mlx.py => worker/engines/mlx/types.py} | 8 +- src/exo/worker/engines/mlx/utils_mlx.py | 174 +- src/exo/worker/engines/mlx/vendor/__init__.py | 0 .../mlx/vendor/deepseek_v4_encoding.py | 836 ++++ .../engines/mlx/{ => vendor}/dsml_encoding.py | 21 +- src/exo/worker/engines/mlx/vision.py | 155 +- src/exo/worker/main.py | 89 +- src/exo/worker/plan.py | 27 +- src/exo/worker/runner/bootstrap.py | 70 +- src/exo/worker/runner/diagnostics.py | 144 + src/exo/worker/runner/image_models/runner.py | 403 -- .../runner/llm_inference/batch_generator.py | 202 +- .../llm_inference/model_output_parsers.py | 227 +- src/exo/worker/runner/llm_inference/runner.py | 442 --- src/exo/worker/runner/runner.py | 394 ++ .../{runner_supervisor.py => supervisor.py} | 226 +- src/exo/worker/tests/unittests/conftest.py | 2 + .../tests/unittests/test_mlx/conftest.py | 23 +- .../test_mlx/test_kv_prefix_cache.py | 24 +- .../test_pipeline_prefill_callbacks.py | 11 +- .../test_prefix_cache_architectures.py | 56 +- .../unittests/test_mlx/test_tokenizers.py | 8 +- .../unittests/test_mlx/test_tp_bit_exact.py | 431 +++ .../test_plan/test_download_and_loading.py | 5 + .../test_plan/test_runner_lifecycle.py | 7 +- .../test_plan/test_task_forwarding.py | 5 + .../tests/unittests/test_plan/test_warmup.py | 8 + .../unittests/test_runner/test_dsml_e2e.py | 166 +- .../test_runner/test_event_ordering.py | 58 +- .../test_runner/test_finish_reason_sse.py | 271 +- .../test_runner/test_parse_tool_calls.py | 11 +- .../test_runner/test_runner_supervisor.py | 34 +- .../test_runner/test_serve_prefill.py | 227 ++ tests/__init__.py | 0 tests/conftest.py | 181 + tests/framework.py | 199 + tests/test_1node.py | 75 + tests/test_2node.py | 49 + tests/test_4node.py | 32 + tests/test_dashboard.py | 102 + tests/test_resilience.py | 56 + {tests => tmp/old_tests}/auto_bench.sh | 0 {tests => tmp/old_tests}/eval_tool_calls.sh | 0 .../old_tests}/get_all_models_on_cluster.py | 0 {tests => tmp/old_tests}/headless_runner.py | 0 {tests => tmp/old_tests}/run_exo_on.sh | 0 .../old_tests}/start_distributed_test.py | 0 {tests => tmp/old_tests}/test_vision_cache.py | 0 tools/pyproject.toml | 10 + tools/src/exo_tools/__init__.py | 0 tools/src/exo_tools/client.py | 117 + tools/src/exo_tools/cluster.py | 243 ++ {bench => tools/src/exo_tools}/harness.py | 265 +- uv.lock | 3351 ++++++++--------- 620 files changed, 23090 insertions(+), 6129 deletions(-) rename {.mlx_typings => .typings}/.gitkeep (100%) rename {.mlx_typings => .typings}/mflux/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/callbacks/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/callbacks/callback.pyi (100%) rename {.mlx_typings => .typings}/mflux/callbacks/callback_registry.pyi (100%) rename {.mlx_typings => .typings}/mflux/callbacks/generation_context.pyi (100%) rename {.mlx_typings => .typings}/mflux/cli/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/cli/defaults/defaults.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/cli/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/config/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/config/config.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/config/model_config.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/latent_creator/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/latent_creator/latent_creator.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/lora/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/lora/layer/fused_linear_lora_layer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/lora/layer/linear_lora_layer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/lora/mapping/lora_loader.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/lora/mapping/lora_mapping.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/lora/mapping/lora_saver.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/lora/mapping/lora_transforms.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/resolution/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/resolution/actions.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/resolution/config_resolution.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/resolution/lora_resolution.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/resolution/path_resolution.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/resolution/quantization_resolution.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/schedulers/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/schedulers/base_scheduler.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/schedulers/flow_match_euler_discrete_scheduler.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/schedulers/linear_scheduler.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/schedulers/seedvr2_euler_scheduler.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/tokenizer/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/tokenizer/tokenizer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/tokenizer/tokenizer_loader.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/tokenizer/tokenizer_output.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/vae/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/vae/tiling_config.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/vae/vae_tiler.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/vae/vae_util.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/weights/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/weights/loading/loaded_weights.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/weights/loading/weight_applier.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/weights/loading/weight_definition.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/weights/loading/weight_loader.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/weights/mapping/weight_mapper.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/weights/mapping/weight_mapping.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/weights/mapping/weight_transforms.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/common/weights/saving/model_saver.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/depth_pro_initializer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/decoder/feature_fusion_block_2d.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/decoder/multires_conv_decoder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/decoder/residual_block.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/depth_pro.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/depth_pro_model.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/depth_pro_util.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/dino_v2/attention.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/dino_v2/dino_vision_transformer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/dino_v2/layer_scale.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/dino_v2/mlp.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/dino_v2/patch_embed.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/dino_v2/transformer_block.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/encoder/depth_pro_encoder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/encoder/upsample_block.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/model/head/fov_head.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/weights/depth_pro_weight_definition.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/depth_pro/weights/depth_pro_weight_mapping.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/fibo/latent_creator/fibo_latent_creator.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/fibo/weights/fibo_weight_definition.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/fibo/weights/fibo_weight_mapping.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/fibo_vlm/tokenizer/qwen2vl_image_processor.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/fibo_vlm/tokenizer/qwen2vl_processor.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/fibo_vlm/weights/fibo_vlm_weight_definition.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/fibo_vlm/weights/fibo_vlm_weight_mapping.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/cli/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/flux_initializer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/latent_creator/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/latent_creator/flux_latent_creator.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_embeddings.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_encoder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_encoder_layer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_mlp.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_sdpa_attention.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_text_model.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/clip_encoder/encoder_clip.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/prompt_encoder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_attention.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_block.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_dense_relu_dense.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_encoder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_feed_forward.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_layer_norm.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_self_attention.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/ada_layer_norm_continuous.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/ada_layer_norm_zero.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/ada_layer_norm_zero_single.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/common/attention_utils.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/embed_nd.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/feed_forward.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/guidance_embedder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/joint_attention.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/joint_transformer_block.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/single_block_attention.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/single_transformer_block.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/text_embedder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/time_text_embed.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/timestep_embedder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_transformer/transformer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/common/attention.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/common/resnet_block_2d.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/common/unet_mid_block.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/decoder/conv_in.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/decoder/conv_norm_out.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/decoder/conv_out.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/decoder/decoder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/decoder/up_block_1_or_2.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/decoder/up_block_3.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/decoder/up_block_4.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/decoder/up_sampler.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/encoder/conv_in.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/encoder/conv_norm_out.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/encoder/conv_out.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/encoder/down_block_1.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/encoder/down_block_2.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/encoder/down_block_3.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/encoder/down_block_4.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/encoder/down_sampler.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/encoder/encoder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/flux_vae/vae.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/redux_encoder/redux_encoder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/siglip_vision_transformer/siglip_encoder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/siglip_vision_transformer/siglip_encoder_layer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/siglip_vision_transformer/siglip_mlp.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/siglip_vision_transformer/siglip_multi_head_attention_pooling_head.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/siglip_vision_transformer/siglip_sdpa_attention.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/siglip_vision_transformer/siglip_vision_embeddings.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/model/siglip_vision_transformer/siglip_vision_transformer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/variants/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/variants/concept_attention/attention_data.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/variants/concept_attention/joint_attention_concept.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/variants/concept_attention/joint_transformer_block_concept.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/variants/concept_attention/transformer_concept.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/variants/controlnet/transformer_controlnet.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/variants/kontext/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/variants/kontext/flux_kontext.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/variants/kontext/kontext_util.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/variants/txt2img/flux.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/weights/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/weights/flux_lora_mapping.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/weights/flux_weight_definition.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/flux/weights/flux_weight_mapping.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/cli/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/latent_creator/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/latent_creator/qwen_latent_creator.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_attention.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_encoder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_encoder_layer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_mlp.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_patch_merger.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_prompt_encoder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_rms_norm.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_rope.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_text_encoder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_attention.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_block.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_language_encoder.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_mlp.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_patch_embed.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_rotary_embedding.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_transformer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_transformer/qwen_attention.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_transformer/qwen_feed_forward.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_transformer/qwen_rope.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_transformer/qwen_time_text_embed.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_transformer/qwen_timestep_embedding.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_transformer/qwen_timesteps.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_transformer/qwen_transformer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_transformer/qwen_transformer_block.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_transformer/qwen_transformer_rms_norm.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_vae/qwen_image_attention_block_3d.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_vae/qwen_image_causal_conv_3d.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_vae/qwen_image_decoder_3d.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_vae/qwen_image_down_block_3d.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_vae/qwen_image_encoder_3d.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_vae/qwen_image_mid_block_3d.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_vae/qwen_image_res_block_3d.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_vae/qwen_image_resample_3d.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_vae/qwen_image_rms_norm.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_vae/qwen_image_up_block_3d.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/model/qwen_vae/qwen_vae.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/qwen_initializer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/tokenizer/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/tokenizer/qwen_image_processor.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/tokenizer/qwen_vision_language_processor.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/tokenizer/qwen_vision_language_tokenizer.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/variants/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/variants/edit/qwen_edit_util.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/variants/edit/qwen_image_edit.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/variants/txt2img/qwen_image.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/weights/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/weights/qwen_lora_mapping.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/weights/qwen_weight_definition.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/qwen/weights/qwen_weight_mapping.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/seedvr2/weights/seedvr2_weight_definition.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/seedvr2/weights/seedvr2_weight_mapping.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/z_image/latent_creator/z_image_latent_creator.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/z_image/weights/z_image_weight_definition.pyi (100%) rename {.mlx_typings => .typings}/mflux/models/z_image/weights/z_image_weight_mapping.pyi (100%) rename {.mlx_typings => .typings}/mflux/release/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/utils/__init__.pyi (100%) rename {.mlx_typings => .typings}/mflux/utils/box_values.pyi (100%) rename {.mlx_typings => .typings}/mflux/utils/exceptions.pyi (100%) rename {.mlx_typings => .typings}/mflux/utils/generated_image.pyi (100%) rename {.mlx_typings => .typings}/mflux/utils/image_util.pyi (100%) rename {.mlx_typings => .typings}/mflux/utils/metadata_builder.pyi (100%) rename {.mlx_typings => .typings}/mflux/utils/version_util.pyi (100%) rename {.mlx_typings => .typings}/mlx/core/__init__.pyi (99%) rename {.mlx_typings => .typings}/mlx/core/cuda/__init__.pyi (100%) rename {.mlx_typings => .typings}/mlx/core/distributed/__init__.pyi (100%) rename {.mlx_typings => .typings}/mlx/core/metal/__init__.pyi (100%) rename {.mlx_typings => .typings}/mlx/core/random/__init__.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/__init__.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/init.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/__init__.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/activations.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/base.pyi (98%) rename {.mlx_typings => .typings}/mlx/nn/layers/containers.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/convolution.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/convolution_transpose.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/distributed.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/dropout.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/embedding.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/linear.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/normalization.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/pooling.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/positional_encoding.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/quantized.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/recurrent.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/transformer.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/layers/upsample.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/losses.pyi (100%) rename {.mlx_typings => .typings}/mlx/nn/utils.pyi (100%) rename {.mlx_typings => .typings}/mlx/utils.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/__init__.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/_version.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/convert.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/generate.pyi (99%) rename {.mlx_typings => .typings}/mlx_lm/models/__init__.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/models/activations.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/models/base.pyi (89%) rename {.mlx_typings => .typings}/mlx_lm/models/bitlinear_layers.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/models/cache.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/models/deepseek_v3.pyi (100%) create mode 100644 .typings/mlx_lm/models/deepseek_v4.pyi rename {.mlx_typings => .typings}/mlx_lm/models/gated_delta.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/models/gemma4.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/models/gemma4_text.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/models/glm4_moe.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/models/glm_moe_dsa.pyi (100%) create mode 100644 .typings/mlx_lm/models/gpt_oss.pyi create mode 100644 .typings/mlx_lm/models/minimax.pyi rename {.mlx_typings => .typings}/mlx_lm/models/nemotron_h.pyi (91%) rename {.mlx_typings => .typings}/mlx_lm/models/qwen3_5.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/models/qwen3_5_moe.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/models/qwen3_next.pyi (99%) rename {.mlx_typings => .typings}/mlx_lm/models/rope_utils.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/models/step3p5.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/models/switch_layers.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/sample_utils.pyi (96%) rename {.mlx_typings => .typings}/mlx_lm/tokenizer_utils.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/tuner/dora.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/tuner/lora.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/tuner/utils.pyi (100%) rename {.mlx_typings => .typings}/mlx_lm/utils.pyi (100%) rename {.mlx_typings => .typings}/mlx_vlm/__init__.pyi (100%) rename {.mlx_typings => .typings}/mlx_vlm/prompt_utils.pyi (100%) rename {.mlx_typings => .typings}/mlx_vlm/utils.pyi (100%) create mode 100644 .typings/pynvml/__init__.pyi rename {.mlx_typings => .typings}/safetensors/__init__.pyi (100%) create mode 100644 app/EXO/EXO/Views/BugReportWindowController.swift create mode 100644 bench/prefill-decode.toml create mode 100644 bench/prefill_decode_bench.py delete mode 100644 bench/test_mlx_bandwidth.py create mode 100644 dashboard/src/lib/components/PrefillDecodeDisaggregation.svelte create mode 100644 dashboard/src/lib/utils/clipboard.ts create mode 100644 dashboard/src/lib/utils/model_family.ts create mode 100644 dashboard/src/routes/advanced/+page.svelte delete mode 100644 nix/mlx.nix create mode 100644 resources/inference_model_cards/mlx-community--DeepSeek-V4-Flash.toml create mode 100644 resources/inference_model_cards/mlx-community--DeepSeek-V4-Pro.toml create mode 100644 resources/inference_model_cards/mlx-community--GLM-5.1-DQ4plus-q8.toml create mode 100644 resources/inference_model_cards/mlx-community--GLM-5.1-MXFP4-Q8.toml create mode 100644 resources/inference_model_cards/mlx-community--GLM-5.1.toml create mode 100644 resources/inference_model_cards/mlx-community--Kimi-K2.6-mlx-DQ3_K_M-q8.toml create mode 100644 resources/inference_model_cards/mlx-community--Qwen3.6-27B-4bit.toml create mode 100644 resources/inference_model_cards/mlx-community--Qwen3.6-27B-8bit.toml create mode 100644 resources/inference_model_cards/mlx-community--Qwen3.6-27B-bf16.toml create mode 100644 resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-4bit.toml create mode 100644 resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-5bit.toml create mode 100644 resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-8bit.toml create mode 100644 resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-bf16.toml create mode 100644 resources/inference_model_cards/moonshotai--Kimi-K2.6.toml create mode 100644 rust/exo_pyo3_bindings/src/pidfile.rs create mode 100644 src/exo/api/tests/test_chat_completions_stream.py create mode 100644 src/exo/download/tests/test_download_status_not_lost.py create mode 100644 src/exo/download/tests/test_rate_limit_handling.py create mode 100644 src/exo/download/tests/test_safetensors_index.py create mode 100644 src/exo/shared/tests/test_apply/test_apply_custom_model_cards.py create mode 100644 src/exo/shared/tests/test_apply/test_apply_instance_link.py create mode 100644 src/exo/shared/tests/test_apply/test_apply_rdma_gating.py create mode 100644 src/exo/shared/types/backends.py create mode 100644 src/exo/shared/types/instance_link.py rename {bench/src/exo_bench => src/exo/shared/types/worker}/__init__.py (100%) create mode 100644 src/exo/utils/async_process.py create mode 100644 src/exo/utils/daemon.py create mode 100644 src/exo/utils/pidfile.py create mode 100644 src/exo/utils/ports.py create mode 100644 src/exo/utils/tests/conftest.py create mode 100644 src/exo/utils/tests/test_async_process.py create mode 100644 src/exo/utils/tests/test_daemon.py create mode 100644 src/exo/utils/tests/test_pidfile.py create mode 100644 src/exo/worker/disaggregated/__init__.py create mode 100644 src/exo/worker/disaggregated/protocol.py create mode 100644 src/exo/worker/disaggregated/server.py create mode 100644 src/exo/worker/engines/base.py create mode 100644 src/exo/worker/engines/image/builder.py create mode 100644 src/exo/worker/engines/mlx/builder.py create mode 100644 src/exo/worker/engines/mlx/disaggregated/__init__.py create mode 100644 src/exo/worker/engines/mlx/disaggregated/adapter.py create mode 100644 src/exo/worker/engines/mlx/disaggregated/client.py create mode 100644 src/exo/worker/engines/mlx/disaggregated/serve.py create mode 100644 src/exo/worker/engines/mlx/disaggregated/tests/__init__.py create mode 100644 src/exo/worker/engines/mlx/disaggregated/tests/test_end_to_end.py create mode 100644 src/exo/worker/engines/mlx/disaggregated/tests/test_mlx_adapter.py create mode 100644 src/exo/worker/engines/mlx/disaggregated/tests/test_protocol_roundtrip.py create mode 100644 src/exo/worker/engines/mlx/disaggregated/tests/test_server_drain.py create mode 100644 src/exo/worker/engines/mlx/generator/remote_prefill.py rename src/exo/{shared/types/mlx.py => worker/engines/mlx/types.py} (82%) create mode 100644 src/exo/worker/engines/mlx/vendor/__init__.py create mode 100644 src/exo/worker/engines/mlx/vendor/deepseek_v4_encoding.py rename src/exo/worker/engines/mlx/{ => vendor}/dsml_encoding.py (76%) create mode 100644 src/exo/worker/runner/diagnostics.py delete mode 100644 src/exo/worker/runner/image_models/runner.py delete mode 100644 src/exo/worker/runner/llm_inference/runner.py create mode 100644 src/exo/worker/runner/runner.py rename src/exo/worker/runner/{runner_supervisor.py => supervisor.py} (53%) create mode 100644 src/exo/worker/tests/unittests/test_mlx/test_tp_bit_exact.py create mode 100644 src/exo/worker/tests/unittests/test_runner/test_serve_prefill.py create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/framework.py create mode 100644 tests/test_1node.py create mode 100644 tests/test_2node.py create mode 100644 tests/test_4node.py create mode 100644 tests/test_dashboard.py create mode 100644 tests/test_resilience.py rename {tests => tmp/old_tests}/auto_bench.sh (100%) rename {tests => tmp/old_tests}/eval_tool_calls.sh (100%) rename {tests => tmp/old_tests}/get_all_models_on_cluster.py (100%) rename {tests => tmp/old_tests}/headless_runner.py (100%) rename {tests => tmp/old_tests}/run_exo_on.sh (100%) rename {tests => tmp/old_tests}/start_distributed_test.py (100%) rename {tests => tmp/old_tests}/test_vision_cache.py (100%) create mode 100644 tools/pyproject.toml create mode 100644 tools/src/exo_tools/__init__.py create mode 100644 tools/src/exo_tools/client.py create mode 100644 tools/src/exo_tools/cluster.py rename {bench => tools/src/exo_tools}/harness.py (74%) diff --git a/.github/workflows/build-app.yml b/.github/workflows/build-app.yml index c0cb0e9b9..e88744224 100644 --- a/.github/workflows/build-app.yml +++ b/.github/workflows/build-app.yml @@ -32,7 +32,6 @@ jobs: SPARKLE_ED25519_PRIVATE: ${{ secrets.SPARKLE_ED25519_PRIVATE }} SPARKLE_S3_BUCKET: ${{ secrets.SPARKLE_S3_BUCKET }} SPARKLE_S3_PREFIX: ${{ secrets.SPARKLE_S3_PREFIX }} - EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT: ${{ secrets.EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT }} AWS_REGION: ${{ secrets.AWS_REGION }} EXO_BUILD_NUMBER: ${{ github.run_number }} EXO_LIBP2P_NAMESPACE: ${{ github.ref_name }} @@ -239,6 +238,80 @@ jobs: # Export keychain path for other steps echo "BUILD_KEYCHAIN_PATH=$KEYCHAIN_PATH" >> $GITHUB_ENV + # ============================================================ + # Pre-flight credential / profile validation + # Runs BEFORE the ~16 min build so auth/expiry failures surface in <1 min. + # ============================================================ + + - name: Validate Apple notarization credentials + env: + APPLE_NOTARIZATION_USERNAME: ${{ secrets.APPLE_NOTARIZATION_USERNAME }} + APPLE_NOTARIZATION_PASSWORD: ${{ secrets.APPLE_NOTARIZATION_PASSWORD }} + APPLE_NOTARIZATION_TEAM: ${{ secrets.APPLE_NOTARIZATION_TEAM }} + run: | + # All-or-nothing: either all three creds are set, or none are. + CRED_COUNT=0 + for v in "$APPLE_NOTARIZATION_USERNAME" "$APPLE_NOTARIZATION_PASSWORD" "$APPLE_NOTARIZATION_TEAM"; do + [[ -n "$v" ]] && CRED_COUNT=$((CRED_COUNT + 1)) + done + if [[ "$CRED_COUNT" -eq 0 ]]; then + echo "No notarization credentials configured — skipping notarization for this build." + exit 0 + fi + if [[ "$CRED_COUNT" -ne 3 ]]; then + echo "ERROR: partial notarization credentials set ($CRED_COUNT/3). Aborting before build." + exit 1 + fi + # Cheap, ~5s, auth-only call. Fails instantly with a clear message if + # the app-specific password is stale, wrong team-id, etc. + echo "Verifying Apple notarization credentials via notarytool history..." + if ! xcrun notarytool history \ + --apple-id "$APPLE_NOTARIZATION_USERNAME" \ + --password "$APPLE_NOTARIZATION_PASSWORD" \ + --team-id "$APPLE_NOTARIZATION_TEAM" >/dev/null; then + echo "ERROR: notarytool rejected the provided credentials. Fix before rerunning." + echo "Common causes: app-specific password expired/revoked, wrong team-id," + echo "Apple ID not on the team, or 2FA not configured for this Apple ID." + exit 1 + fi + echo "Apple notarization credentials OK." + + - name: Validate provisioning profile expiry + run: | + PROFILE="$HOME/Library/Developer/Xcode/UserData/Provisioning Profiles/EXO.provisionprofile" + if [[ ! -f "$PROFILE" ]]; then + echo "ERROR: provisioning profile not found at $PROFILE" + exit 1 + fi + EXPIRY=$(security cms -D -i "$PROFILE" | plutil -extract ExpirationDate raw -o - - 2>/dev/null || true) + if [[ -z "$EXPIRY" ]]; then + echo "WARNING: could not read ExpirationDate from provisioning profile; skipping expiry check." + exit 0 + fi + # Try a couple of known plutil date formats. If none parse, skip the check rather + # than risk a false-positive "expired" block on a format we didn't anticipate. + EXPIRY_EPOCH="" + for fmt in "%Y-%m-%dT%H:%M:%SZ" "%Y-%m-%d %H:%M:%S %z" "%Y-%m-%d %H:%M:%S +0000"; do + if parsed=$(date -j -f "$fmt" "$EXPIRY" +%s 2>/dev/null); then + EXPIRY_EPOCH="$parsed" + break + fi + done + if [[ -z "$EXPIRY_EPOCH" ]]; then + echo "WARNING: could not parse ExpirationDate '$EXPIRY'; skipping expiry check." + exit 0 + fi + NOW_EPOCH=$(date +%s) + if [[ "$EXPIRY_EPOCH" -le "$NOW_EPOCH" ]]; then + echo "ERROR: provisioning profile expired on $EXPIRY. Regenerate it before rerunning." + exit 1 + fi + DAYS_LEFT=$(( (EXPIRY_EPOCH - NOW_EPOCH) / 86400 )) + echo "Provisioning profile valid until $EXPIRY ($DAYS_LEFT days remaining)." + if [[ "$DAYS_LEFT" -lt 14 ]]; then + echo "WARNING: profile expires in under 14 days — regenerate soon." + fi + # ============================================================ # Build the bundle # ============================================================ @@ -273,7 +346,6 @@ jobs: EXO_BUILD_COMMIT="$GITHUB_SHA" \ SPARKLE_FEED_URL="$SPARKLE_FEED_URL" \ SPARKLE_ED25519_PUBLIC="$SPARKLE_ED25519_PUBLIC" \ - EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT="$EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT" \ CODE_SIGNING_IDENTITY="$SIGNING_IDENTITY" \ CODE_SIGN_INJECT_BASE_ENTITLEMENTS=YES mkdir -p ../../output @@ -306,11 +378,41 @@ jobs: APPLE_NOTARIZATION_PASSWORD: ${{ secrets.APPLE_NOTARIZATION_PASSWORD }} APPLE_NOTARIZATION_TEAM: ${{ secrets.APPLE_NOTARIZATION_TEAM }} run: | + set -o pipefail cd output security unlock-keychain -p "$MACOS_CERTIFICATE_PASSWORD" "$BUILD_KEYCHAIN_PATH" SIGNING_IDENTITY=$(security find-identity -v -p codesigning "$BUILD_KEYCHAIN_PATH" | awk -F '"' '{print $2}') + + # Fail fast if notarization creds are partial. All-or-nothing. + CRED_COUNT=0 + for v in "$APPLE_NOTARIZATION_USERNAME" "$APPLE_NOTARIZATION_PASSWORD" "$APPLE_NOTARIZATION_TEAM"; do + [[ -n "$v" ]] && CRED_COUNT=$((CRED_COUNT + 1)) + done + if [[ "$CRED_COUNT" -ne 0 && "$CRED_COUNT" -ne 3 ]]; then + echo "ERROR: partial Apple notarization credentials set ($CRED_COUNT/3). Aborting." + exit 1 + fi + /usr/bin/codesign --deep --force --timestamp --options runtime \ --sign "$SIGNING_IDENTITY" EXO.app + + # Pre-flight: verify the signed app BEFORE building DMG and submitting to Apple. + # If this fails, notarization will fail too — cheap way to fail in seconds, not 15 minutes. + echo "===== codesign --verify EXO.app =====" + if ! /usr/bin/codesign --verify --deep --strict --verbose=2 EXO.app; then + echo "ERROR: EXO.app failed codesign verification. Dumping signing status of every executable:" + find EXO.app -type f \( -perm -111 -o -name "*.dylib" -o -name "*.so" -o -name "*.framework" \) -print0 | + while IFS= read -r -d '' f; do + printf -- '--- %s\n' "$f" + /usr/bin/codesign -dv --verbose=2 "$f" 2>&1 | sed 's/^/ /' || true + done + exit 1 + fi + + # Gatekeeper assessment. A failure here strongly predicts notarization rejection. + echo "===== spctl assessment (predicts notarization outcome) =====" + /usr/bin/spctl -a -vvv -t install EXO.app || echo "WARNING: spctl assessment failed — notarization is likely to fail too." + mkdir -p dmg-root cp -R EXO.app dmg-root/ ln -s /Applications dmg-root/Applications @@ -318,12 +420,22 @@ jobs: hdiutil create -volname "EXO" -srcfolder dmg-root -ov -format UDZO "$DMG_NAME" /usr/bin/codesign --force --timestamp --options runtime \ --sign "$SIGNING_IDENTITY" "$DMG_NAME" + + echo "===== codesign --verify DMG =====" + if ! /usr/bin/codesign --verify --verbose=2 "$DMG_NAME"; then + echo "ERROR: DMG failed codesign verification." + exit 1 + fi + if [[ -n "$APPLE_NOTARIZATION_USERNAME" ]]; then + echo "===== notarytool submit =====" + # `|| true` so set -e doesn't abort before we can echo output / fetch the log. + # We rely on the parsed STATUS below to decide pass/fail. SUBMISSION_OUTPUT=$(xcrun notarytool submit "$DMG_NAME" \ --apple-id "$APPLE_NOTARIZATION_USERNAME" \ --password "$APPLE_NOTARIZATION_PASSWORD" \ --team-id "$APPLE_NOTARIZATION_TEAM" \ - --wait --timeout 15m 2>&1) + --wait --timeout 15m 2>&1) || true echo "$SUBMISSION_OUTPUT" SUBMISSION_ID=$(echo "$SUBMISSION_OUTPUT" | awk 'tolower($1)=="id:" && $2 ~ /^[0-9a-fA-F-]+$/ {print $2; exit}') diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml index 8483b1309..c1a4674ff 100644 --- a/.github/workflows/pipeline.yml +++ b/.github/workflows/pipeline.yml @@ -91,9 +91,6 @@ jobs: nix build .#metal-toolchain fi - # Build mlx (depends on metal-toolchain) - nix build .#mlx - - name: Build all Nix outputs run: | nix flake show --json | jq -r ' diff --git a/.gitignore b/.gitignore index b162de342..fa09fb01d 100644 --- a/.gitignore +++ b/.gitignore @@ -18,7 +18,6 @@ digest.txt app/EXO/build/ dist/ - # rust target/ **/*.rs.bk @@ -40,3 +39,5 @@ bench/**/*.json tmp/models /build/exo /.claude/skills +/.claude +/.codex diff --git a/.idea/misc.xml b/.idea/misc.xml index 124c79a18..86011b9b1 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -4,4 +4,7 @@