add kimi tool parseing

implement mlx-lm tool calling
Add mflux type stubs (#1234 )
2026-01-21 12:30:22 -05:00 · 2026-01-21 16:53:38 +00:00 · 2026-01-21 16:53:38 +00:00 · 2026-01-21 15:07:42 +00:00 · 2026-01-20 18:24:54 +00:00 · 2026-01-20 18:20:01 +00:00
688 changed files with 62860 additions and 22463 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,376 +0,0 @@
-version: 2.1
-
-orbs:
-  python: circleci/python@2
-
-commands:
-  run_chatgpt_api_test:
-    parameters:
-      inference_engine:
-        type: string
-      model_id:
-        type: string
-      expected_output:
-        type: string
-      prompt:
-        type: string
-    steps:
-      - run:
-          name: Run chatgpt api integration test (<<parameters.inference_engine>>, <<parameters.model_id>>)
-          command: |
-            source env/bin/activate
-
-            # Set CLANG=1 for tinygrad only
-            if [ "<<parameters.inference_engine>>" = "tinygrad" ]; then
-              pip install llvmlite
-              export TOKENIZERS_PARALLELISM=true SUPPORT_BF16=0 CLANG=1
-            fi
-
-            # Start first instance
-            EXO_HOME="$(pwd)/.exo_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> \
-              --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 \
-              --chatgpt-api-response-timeout 900 --disable-tui > output1.log &
-            PID1=$!
-            tail -f output1.log &
-            TAIL1=$!
-
-            # Start second instance
-            EXO_HOME="$(pwd)/.exo_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> \
-              --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 \
-              --chatgpt-api-response-timeout 900 --disable-tui > output2.log &
-            PID2=$!
-            tail -f output2.log &
-            TAIL2=$!
-
-            # Remember to kill the tail processes at the end
-            trap 'kill $TAIL1 $TAIL2' EXIT
-
-            # Wait for discovery
-            sleep 10
-
-            # Function to check if processes are still running
-            check_processes() {
-              if ! kill -0 $PID1 2>/dev/null; then
-                echo "First instance (PID $PID1) died unexpectedly. Log output:"
-                cat output1.log
-                exit 1
-              fi
-              if ! kill -0 $PID2 2>/dev/null; then
-                echo "Second instance (PID $PID2) died unexpectedly. Log output:"
-                cat output2.log
-                exit 1
-              fi
-            }
-
-            # Check processes before proceeding
-            check_processes
-
-            echo "Sending request to first instance..."
-            response_1=$(curl -s http://localhost:8000/v1/chat/completions \
-              -H "Content-Type: application/json" \
-              -d '{
-                "model": "<<parameters.model_id>>",
-                "messages": [{"role": "user", "content": "<<parameters.prompt>>"}],
-                "temperature": 0.7
-              }')
-            echo "Response 1: $response_1"
-
-            # Check processes after first response
-            check_processes
-
-            echo "Sending request to second instance..."
-            response_2=$(curl -s http://localhost:8001/v1/chat/completions \
-              -H "Content-Type: application/json" \
-              -d '{
-                "model": "<<parameters.model_id>>",
-                "messages": [{"role": "user", "content": "<<parameters.prompt>>"}],
-                "temperature": 0.7
-              }')
-            echo "Response 2: $response_2"
-
-            # Check processes after second response
-            check_processes
-
-            # Stop both instances
-            kill $PID1 $PID2
-
-            echo ""
-            # Extract content using jq and check if it contains expected output
-            content1=$(echo "$response_1" | jq -r '.choices[0].message.content')
-            content2=$(echo "$response_2" | jq -r '.choices[0].message.content')
-
-            if [[ "$content1" != *"<<parameters.expected_output>>"* ]] || [[ "$content2" != *"<<parameters.expected_output>>"* ]]; then
-              echo "Test failed: Response does not match '<<parameters.expected_output>>'"
-              echo "Response 1 content: $content1"
-              echo ""
-              echo "Response 2 content: $content2"
-              echo "Output of first instance:"
-              cat output1.log
-              echo "Output of second instance:"
-              cat output2.log
-              exit 1
-            else
-              echo "Test passed: Response from both nodes matches '<<parameters.expected_output>>'"
-            fi
-
-jobs:
-  unit_test:
-    macos:
-      xcode: "16.0.0"
-    resource_class: m2pro.large
-    steps:
-      - checkout
-      - run:
-          name: Set up Python
-          command: |
-            brew install python@3.12
-            python3.12 -m venv env
-            source env/bin/activate
-      - run:
-          name: Install dependencies
-          command: |
-            source env/bin/activate
-            pip install --upgrade pip
-            pip install .
-      - run:
-          name: Run tests
-          command: |
-            source env/bin/activate
-            # set TEMPERATURE to 0 for deterministic sampling
-            echo "Running inference engine tests..."
-            METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=0 METAL_XCODE=1 TEMPERATURE=0 python3 -m exo.inference.test_inference_engine
-            echo "Running tokenizer tests..."
-            python3 ./test/test_tokenizers.py
-            python3 ./test/test_model_helpers.py
-
-  discovery_integration_test:
-    macos:
-      xcode: "16.0.0"
-    steps:
-      - checkout
-      - run:
-          name: Set up Python
-          command: |
-            brew install python@3.12
-            python3.12 -m venv env
-            source env/bin/activate
-      - run:
-          name: Install dependencies
-          command: |
-            source env/bin/activate
-            pip install --upgrade pip
-            pip install .
-      - run:
-          name: Run discovery integration test
-          command: |
-            source env/bin/activate
-            DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --disable-tui > output1.log 2>&1 &
-            PID1=$!
-            DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --disable-tui > output2.log 2>&1 &
-            PID2=$!
-            sleep 10
-            kill $PID1 $PID2
-            if grep -q "Peer statuses: {\\'node2\\': \\'is_connected=True, health_check=True" output1.log && ! grep -q "Failed to connect peers:" output1.log && grep -q "Peer statuses: {\\'node1\\': \\'is_connected=True, health_check=True" output2.log && ! grep -q "Failed to connect peers:" output2.log; then
-              echo "Test passed: Both instances discovered each other"
-              exit 0
-            else
-              echo "Test failed: Devices did not discover each other"
-              echo "Output of first instance:"
-              cat output1.log
-              echo "Output of second instance:"
-              cat output2.log
-              exit 1
-            fi
-
-  chatgpt_api_integration_test_mlx:
-    macos:
-      xcode: "16.0.0"
-    resource_class: m2pro.large
-    steps:
-      - checkout
-      - run:
-          name: Set up Python
-          command: |
-            brew install python@3.12
-            python3.12 -m venv env
-            source env/bin/activate
-      - run:
-          name: Install dependencies
-          command: |
-            source env/bin/activate
-            pip install --upgrade pip
-            pip install .
-      - run_chatgpt_api_test:
-          inference_engine: mlx
-          model_id: llama-3.2-1b
-          prompt: "Keep responses concise. Who was the king of pop?"
-          expected_output: "Michael Jackson"
-
-  chatgpt_api_integration_test_dummy:
-    macos:
-      xcode: "16.0.0"
-    resource_class: m2pro.large
-    steps:
-      - checkout
-      - run:
-          name: Set up Python
-          command: |
-            brew install python@3.12
-            python3.12 -m venv env
-            source env/bin/activate
-      - run:
-          name: Install dependencies
-          command: |
-            source env/bin/activate
-            pip install --upgrade pip
-            pip install .
-      - run_chatgpt_api_test:
-          inference_engine: dummy
-          model_id: dummy
-          prompt: "Dummy prompt."
-          expected_output: "dummy"
-
-  chatgpt_api_integration_test_tinygrad:
-    macos:
-      xcode: "16.0.0"
-    resource_class: m2pro.large
-    steps:
-      - checkout
-      - run:
-          name: Set up Python
-          command: |
-            brew install python@3.12
-            python3.12 -m venv env
-            source env/bin/activate
-      - run:
-          name: Install dependencies
-          command: |
-            source env/bin/activate
-            pip install --upgrade pip
-            pip install .
-      - run_chatgpt_api_test:
-          inference_engine: tinygrad
-          model_id: llama-3.2-1b
-          prompt: "Keep responses concise. Who was the king of pop?"
-          expected_output: "Michael Jackson"
-
-  chatgpt_api_integration_test_tinygrad_linux:
-    machine:
-      image: ubuntu-2204:current
-    resource_class: xlarge
-    steps:
-      - checkout
-      - run:
-          name: Set up Python
-          command: |
-            export DEBIAN_FRONTEND=noninteractive
-            export DEBCONF_NONINTERACTIVE_SEEN=true
-            sudo apt-get update
-            sudo add-apt-repository -y ppa:deadsnakes/ppa
-            sudo apt-get update
-            sudo apt-get install -y python3.12 python3.12-venv clang
-            python3.12 -m venv env
-            source env/bin/activate
-      - run:
-          name: Install dependencies
-          command: |
-            source env/bin/activate
-            pip install --upgrade pip
-            pip install .
-      - run_chatgpt_api_test:
-          inference_engine: tinygrad
-          model_id: llama-3.2-1b
-          prompt: "Keep responses concise. Who was the king of pop?"
-          expected_output: "Michael Jackson"
-
-  measure_pip_sizes:
-    macos:
-      xcode: "16.0.0"
-    steps:
-      - checkout
-      - run:
-          name: Set up Python
-          command: |
-            brew install python@3.12
-            python3.12 -m venv env
-            source env/bin/activate
-      - run:
-          name: Install dependencies and measure sizes
-          command: |
-            source env/bin/activate
-            pip install --upgrade pip
-            pip install .
-            python ./extra/pipsize.py --json ./pipsize.json
-      - store_artifacts:
-          path: ./pipsize.json
-          destination: pip-sizes.json
-
-  check_line_count:
-    docker:
-      - image: cimg/python:3.10
-    steps:
-      - checkout
-
-      - run:
-          name: Setup git for PR comparison
-          command: |
-            if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then
-              PR_NUMBER=$(echo $CIRCLE_PULL_REQUEST | rev | cut -d'/' -f1 | rev)
-              BASE_BRANCH=$(curl -s -H "Circle-Token: $CIRCLE_TOKEN" \
-                "https://circleci.com/api/v2/project/github/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pipeline/$CIRCLE_WORKFLOW_ID" \
-                | jq -r '.target_branch')
-
-              git clone -b $BASE_BRANCH --single-branch \
-                https://github.com/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME.git \
-                base_branch
-            fi
-
-      - run:
-          name: Install dependencies
-          command: |
-            python -m pip install --upgrade pip
-            pip install tabulate
-
-      - run:
-          name: Run line count check
-          command: |
-            if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then
-              python extra/line_counter.py base_branch .
-            else
-              python extra/line_counter.py .
-            fi
-
-      - store_artifacts:
-          path: line-count-snapshot.json
-          destination: line-count-snapshot.json
-
-      - store_artifacts:
-          path: line-count-diff.json
-          destination: line-count-diff.json
-
-      - run:
-          name: Create test results directory
-          command: |
-            mkdir -p test-results/line-count
-            cp line-count-*.json test-results/line-count/
-
-      - store_test_results:
-          path: test-results
-
-workflows:
-  version: 2
-  build_and_test:
-    jobs:
-      - check_line_count:
-          filters:
-            branches:
-              only: /.*/
-            tags:
-              only: /.*/
-      - unit_test
-      - discovery_integration_test
-      - chatgpt_api_integration_test_mlx
-      - chatgpt_api_integration_test_tinygrad
-      - chatgpt_api_integration_test_tinygrad_linux
-      - chatgpt_api_integration_test_dummy
-      - measure_pip_sizes
--- a/.clauderules
+++ b/.clauderules
@@ -0,0 +1,63 @@
+# Claude Code Rules - Follow Every Rule Exactly
+
+You must prioritize straightforward code semantics, well-named types, clear function signatures, and robust, carefully-chosen abstractions. Think about how your decisions might impact these aspects of code quality before proposing any changes.
+
+You have access to all modern Python features from Python 3.13, 3.12, 3.11...
+
+**When you're done making changes, remove any redundant comments; remaining comments should only apply to complex code segments, adding relevant context.**
+
+## 1. Code Discipline
+
+* Eliminate superfluous `try`/`catch` and `if` branches through strict typing and static analysis.
+* Use pure functions unless you must mutate fixed state—then wrap that state in a class.
+* Every function is **referentially transparent**: same inputs ⇒ same outputs, no hidden state, no unintended I/O.
+* Put side-effects in injectable "effect handlers"; keep core logic pure.
+
+## 2. Naming
+
+* Choose descriptive, non-abbreviated names—no 3-letter acronyms or non-standard contractions.
+* Anyone reading a function's type signature alone should grasp its purpose without extra context.
+
+## 3. Typing
+
+* Maintain **strict, exhaustive** typing; never bypass the type-checker.
+* Default to `Literal[...]` when an enum-like set is needed.
+* Prefer built-in types; when two values share structure but differ in meaning, enforce separation:
+  * Use `typing.NewType` for primitives (zero runtime cost).
+  * For serializable objects, add a `type: str` field that states the object's identity.
+
+## 4. Pydantic
+
+* Read, respect, and rely on Pydantic documentation.
+* Centralize a common `ConfigDict` with `frozen=True` and `strict=True` (or stricter) and reuse it everywhere.
+* For hierarchies of `BaseModel` variants, declare a discriminated union with `typing.Annotated[Base, Field(discriminator='variant')]`; publish a single `TypeAdapter[Base]` so all variants share one strict validator.
+
+## 5. IDs & UUIDs
+
+* Subclass Pydantic's `UUID4` for custom ID types.
+* Generate fresh IDs with `uuid.uuid4()`.
+* Create idempotency keys by hashing *persisted* state plus a **function-specific salt** to avoid collisions after crashes.
+
+## 6. Error Handling
+
+* Catch an exception **only** where you can handle or transform it meaningfully.
+* State in the docstring **where** each exception is expected to be handled and **why**.
+
+## 7. Dependencies
+
+* Introduce new external dependencies only after approval.
+* Request only libraries common in production environments.
+
+## 8. Use of `@final` & Freezing
+
+* Mark classes, methods, and variables as `@final` or otherwise immutable wherever applicable.
+
+## 9. Repository Workflow
+
+If you spot a rule violation within code that you've not been asked to work on directly, inform the user rather than patching it ad-hoc.
+
+---
+
+### One-Sentence Summary
+
+Write strictly-typed, pure, self-describing Python that uses Pydantic, well-scoped side-effects, immutable state, approved dependencies, and explicit error handling.
--- a/.cursorrules
+++ b/.cursorrules
@@ -0,0 +1,64 @@
+# follow **every** rule exactly; report any violation instead of silently fixing it.
+
+You must prioritize straightforward code semantics, well-named types, clear function signatures, and robust, carefully-chosen abstractions. Think about how your decisions might impact these aspects of code quality before proposing any changes.
+
+You can use the advanced features of `typing`. You have access to all of the new features from Python 3.13, 3.12, 3.11...
+
+**When you're done making your changes, remove any redundant comments that you may have left; the comments that remain should only apply to complex segments of code, adding relevant context.**
+
+## 1. Code Discipline
+
+* Eliminate superfluous `try` / `catch` and `if` branches through strict typing and static analysis.
+* Use pure functions unless you must mutate fixed state—then wrap that state in a class.
+* Every function is **referentially transparent**: same inputs ⇒ same outputs, no hidden state, no unintended I/O.
+* Put side-effects in injectable “effect handlers”; keep core logic pure.
+
+## 2. Naming
+
+* Choose descriptive, non-abbreviated names—no 3-letter acronyms or non-standard contractions.
+* Anyone reading a function’s type signature alone should grasp its purpose without extra context.
+
+## 3. Typing
+
+* Maintain **strict, exhaustive** typing; never bypass the type-checker.
+* Default to `Literal[...]` when an enum-like set is needed.
+* Prefer built-in types; when two values share structure but differ in meaning, enforce separation:
+  * Use `typing.NewType` for primitives (zero runtime cost).
+  * For serialisable objects, add a `type: str` field that states the object’s identity.
+
+## 4. Pydantic
+
+* Read, respect, and rely on Pydantic docs.
+* Centralise a common `ConfigDict` with `frozen=True` and `strict=True` (or stricter) and reuse it everywhere.
+* For hierarchies of `BaseModel` variants, declare a discriminated union with `typing.Annotated[Base, Field(discriminator='variant')]`; publish a single `TypeAdapter[Base]` so all variants share one strict validator.
+
+## 5. IDs & UUIDs
+
+* Subclass Pydantic’s `UUID4` for custom ID types.
+* Generate fresh IDs with `uuid.uuid4()`.
+* Create idempotency keys by hashing *persisted* state plus a **function-specific salt** to avoid collisions after crashes.
+
+## 6. Error Handling
+
+* Catch an exception **only** where you can handle or transform it meaningfully.
+* State in the docstring **where** each exception is expected to be handled and **why**.
+
+## 7. Dependencies
+
+* Introduce new external dependencies only after approval.
+* Request only libraries common in production environments.
+
+## 8. Use of `@final` & Freezing
+
+* Mark classes, methods, and variables as `@final` or otherwise immutable wherever applicable.
+
+## 9. Repository Workflow
+
+If you spot a rule violation within code that you've not been asked to work on directly, inform the user rather than patching it ad-hoc.
+
+
+---
+
+### One-Sentence Summary
+
+Write strictly-typed, pure, self-describing Python that uses Pydantic, well-scoped side-effects, immutable state, approved dependencies, and explicit error handling
--- a/.envrc
+++ b/.envrc
@@ -0,0 +1 @@
+use flake
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +0,0 @@
-*.mp3 filter=lfs diff=lfs merge=lfs -text
-*.png filter=lfs diff=lfs merge=lfs -text
--- a/.githooks/post-checkout
+++ b/.githooks/post-checkout
@@ -0,0 +1,3 @@
+#!/bin/sh
+command -v git-lfs >/dev/null 2>&1 || { printf >&2 "\n%s\n\n" "This repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting the 'post-checkout' file in the hooks directory (set by 'core.hookspath'; usually '.git/hooks')."; exit 2; }
+git lfs post-checkout "$@"
--- a/.githooks/post-commit
+++ b/.githooks/post-commit
@@ -0,0 +1,3 @@
+#!/bin/sh
+command -v git-lfs >/dev/null 2>&1 || { printf >&2 "\n%s\n\n" "This repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting the 'post-commit' file in the hooks directory (set by 'core.hookspath'; usually '.git/hooks')."; exit 2; }
+git lfs post-commit "$@"
--- a/.githooks/post-merge
+++ b/.githooks/post-merge
@@ -0,0 +1,3 @@
+#!/bin/sh
+command -v git-lfs >/dev/null 2>&1 || { printf >&2 "\n%s\n\n" "This repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting the 'post-merge' file in the hooks directory (set by 'core.hookspath'; usually '.git/hooks')."; exit 2; }
+git lfs post-merge "$@"
--- a/.githooks/pre-push
+++ b/.githooks/pre-push
@@ -0,0 +1,3 @@
+#!/bin/sh
+command -v git-lfs >/dev/null 2>&1 || { printf >&2 "\n%s\n\n" "This repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting the 'pre-push' file in the hooks directory (set by 'core.hookspath'; usually '.git/hooks')."; exit 2; }
+git lfs pre-push "$@"
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -0,0 +1,3 @@
+*   @ToxicPine
+*   @AlexCheema
+*   @GeluVrabie
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,43 @@
+---
+name: Bug Report
+about: Create a report to help us improve
+title: '[BUG] '
+labels: bug
+assignees: ''
+---
+
+## Describe the bug
+
+A clear and concise description of what the bug is.
+
+## To Reproduce
+
+Steps to reproduce the behavior:
+1.
+2.
+3.
+
+## Expected behavior
+
+A clear and concise description of what you expected to happen.
+
+## Actual behavior
+
+A clear and concise description of what actually happened.
+
+## Environment
+
+- macOS Version:
+- EXO Version:
+- Hardware:
+  - Device 1: (e.g., MacBook Pro M1 Max, 32GB RAM)
+  - Device 2: (e.g., Mac Mini M2, 16GB RAM)
+  - Additional devices:
+- Interconnection:
+  - (e.g., Thunderbolt 4 cable between Device 1 and 2)
+  - (e.g., WiFi 6 for Device 3)
+  - (e.g., 10GbE Ethernet between all devices)
+
+## Additional context
+
+Add any other context about the problem here.
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,11 @@
+---
+name: Feature Request
+about: Suggest an idea for this project
+title: ''
+labels: enhancement
+assignees: ''
+---
+
+<!-- Please use a clear, descriptive title above -->
+
+Describe what you'd like to see added to EXO.
--- a/.github/actions/conditional-commit/action.yml
+++ b/.github/actions/conditional-commit/action.yml
@@ -0,0 +1,16 @@
+name: Commit if changed
+description: "Create a commit when the working tree is dirty"
+
+inputs:
+  message:
+    description: "Commit message"
+    required: true
+
+runs:
+  using: composite
+  steps:
+    - name: Commit changed files
+      shell: bash
+      run: |
+        git diff --quiet && exit 0
+        git commit -am "${{ inputs.message }}"
--- a/.github/actions/format/action.yml
+++ b/.github/actions/format/action.yml
@@ -0,0 +1,10 @@
+name: Format Code
+
+description: "Run code formatter"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Format code
+      run: nix --extra-experimental-features nix-command --extra-experimental-features flakes develop -c just fmt
+      shell: bash
--- a/.github/actions/lint-check/action.yml
+++ b/.github/actions/lint-check/action.yml
@@ -0,0 +1,10 @@
+name: Lint Check
+
+description: "Check for lint errors"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Lint check
+      run: nix --extra-experimental-features nix-command --extra-experimental-features flakes develop -c just lint-check
+      shell: bash
--- a/.github/actions/lint/action.yml
+++ b/.github/actions/lint/action.yml
@@ -0,0 +1,10 @@
+name: Lint Code
+
+description: "Run code linter"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Lint code
+      run: nix --extra-experimental-features nix-command --extra-experimental-features flakes develop -c just lint
+      shell: bash
--- a/.github/actions/regenerate-protobufs/action.yml
+++ b/.github/actions/regenerate-protobufs/action.yml
@@ -0,0 +1,10 @@
+name: Regenerate Protobufs
+
+description: "Regenerate protobuf files"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Regenerate protobufs
+      run: nix --extra-experimental-features nix-command --extra-experimental-features flakes develop -c just regenerate-protobufs
+      shell: bash
--- a/.github/actions/setup-python-uv/action.yml
+++ b/.github/actions/setup-python-uv/action.yml
@@ -0,0 +1,20 @@
+name: Setup Python & uv
+
+description: "Regenerate Python environment from uv.lock"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Install uv
+      uses: astral-sh/setup-uv@v6
+      with:
+        enable-cache: true
+        cache-dependency-glob: uv.lock
+
+    - name: Install Python
+      run: uv python install
+      shell: bash
+
+    - name: Sync
+      run: uv sync --locked --all-extras --dev
+      shell: bash
--- a/.github/actions/typecheck/action.yml
+++ b/.github/actions/typecheck/action.yml
@@ -0,0 +1,12 @@
+name: Type Check
+
+description: "Run type checker"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Run type checker
+      run: |
+        nix --extra-experimental-features nix-command --extra-experimental-features flakes develop -c just sync
+        nix --extra-experimental-features nix-command --extra-experimental-features flakes develop -c just check
+      shell: bash
--- a/.github/actions/unit-test/action.yml
+++ b/.github/actions/unit-test/action.yml
@@ -0,0 +1,12 @@
+name: Unit Test
+
+description: "Run unit tests"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Run unit tests
+      run: |
+        nix --extra-experimental-features nix-command --extra-experimental-features flakes develop -c just sync-clean
+        nix --extra-experimental-features nix-command --extra-experimental-features flakes develop -c just test-fast
+      shell: bash
--- a/.github/actions/verify-clean/action.yml
+++ b/.github/actions/verify-clean/action.yml
@@ -0,0 +1,20 @@
+name: Verify Clean Working Tree
+
+description: "Fail the job if the previous step left the working tree dirty"
+
+inputs:
+  step:
+    description: "The name of the step that just executed"
+    required: true
+
+runs:
+  using: composite
+  steps:
+    - name: Check git diff
+      shell: bash
+      run: |
+        if ! git diff --quiet; then
+          echo "Error: ${{ inputs.step }} left working tree dirty." >&2
+          git --no-pager diff >&2
+          exit 1
+        fi 
--- a/.github/bench.py
+++ b/.github/bench.py
@@ -1,401 +0,0 @@
-import aiohttp
-import asyncio
-import time
-import json
-import os
-import boto3
-from typing import Dict, Any
-from datetime import datetime
-import subprocess
-import psutil
-import platform
-from pathlib import Path
-
-
-def check_system_state():
-    print("\n=== System State Check ===", flush=True)
-    
-    # Add macOS-specific checks
-    try:
-        # Check powermetrics with sudo
-        try:
-            power_metrics = subprocess.run(
-                ['sudo', 'powermetrics', '-n', '1', '-i', '1000', '--samplers', 'cpu_power'],
-                capture_output=True, text=True
-            )
-            print("\nPower Metrics:", power_metrics.stdout, flush=True)
-        except Exception as e:
-            print(f"Error getting power metrics: {e}", flush=True)
-        
-        # Check thermal state
-        thermal_state = subprocess.run(['pmset', '-g', 'therm'], capture_output=True, text=True)
-        print("\nThermal State:", thermal_state.stdout, flush=True)
-        
-        # Check if running under Rosetta
-        arch = subprocess.run(['arch'], capture_output=True, text=True)
-        print("\nArchitecture:", arch.stdout, flush=True)
-        
-        # Check MLX compilation mode - only if mlx is available
-        try:
-            import mlx.core as mx
-            if hasattr(mx, 'build_info'):
-                print("\nMLX Build Info:", mx.build_info(), flush=True)
-            else:
-                print("\nMLX Build Info: Not available in this version", flush=True)
-        except ImportError:
-            print("\nMLX: Not installed", flush=True)
-        except Exception as e:
-            print(f"\nError checking MLX: {e}", flush=True)
-        
-    except Exception as e:
-        print(f"Error in macOS checks: {e}", flush=True)
-
-    # CPU Info
-    print("\nCPU Information:", flush=True)
-    try:
-        if platform.system() == 'Darwin' and platform.processor() == 'arm':
-            # Use sysctl for Apple Silicon Macs
-            cpu_info = subprocess.run(['sysctl', 'machdep.cpu'], capture_output=True, text=True)
-            if cpu_info.returncode == 0:
-                print(f"CPU Info (Apple Silicon):", cpu_info.stdout, flush=True)
-            
-            # Parse powermetrics output for clearer CPU frequency display
-            try:
-                power_metrics = subprocess.run(
-                    ['sudo', 'powermetrics', '-n', '1', '-i', '100', '--samplers', 'cpu_power'],
-                    capture_output=True, text=True
-                )
-                if power_metrics.returncode == 0:
-                    output = power_metrics.stdout
-                    print("\nDetailed CPU Frequency Information:")
-                    
-                    # Extract cluster frequencies and max frequencies
-                    current_cluster = None
-                    max_freqs = {'E': 0, 'P0': 0, 'P1': 0}
-                    
-                    for line in output.split('\n'):
-                        # Track which cluster we're processing
-                        if "E-Cluster" in line:
-                            current_cluster = 'E'
-                        elif "P0-Cluster" in line:
-                            current_cluster = 'P0'
-                        elif "P1-Cluster" in line:
-                            current_cluster = 'P1'
-                            
-                        # Get current frequencies
-                        if "HW active frequency:" in line:
-                            freq = line.split(':')[1].strip()
-                            if freq != "0 MHz":
-                                print(f"Current {current_cluster}-Cluster Frequency: {freq}")
-                        
-                        # Get max frequencies from residency lines
-                        if current_cluster and "active residency:" in line and "MHz:" in line:
-                            try:
-                                # Extract all frequency values
-                                freqs = []
-                                parts = line.split('MHz:')[:-1]  # Skip last part as it's not a frequency
-                                for part in parts:
-                                    freq_str = part.split()[-1]
-                                    try:
-                                        freq = float(freq_str)
-                                        freqs.append(freq)
-                                    except ValueError:
-                                        continue
-                                if freqs:
-                                    max_freqs[current_cluster] = max(max_freqs[current_cluster], max(freqs))
-                            except Exception:
-                                continue
-                    
-                    # Print max frequencies
-                    print("\nMaximum Available Frequencies:")
-                    for cluster, max_freq in max_freqs.items():
-                        if max_freq > 0:
-                            print(f"{cluster}-Cluster Max: {max_freq:.0f} MHz")
-                            
-            except Exception as e:
-                print(f"Error parsing powermetrics: {e}", flush=True)
-        else:
-            # Use psutil for other systems
-            cpu_freq = psutil.cpu_freq()
-            print(f"CPU Frequency - Current: {cpu_freq.current:.2f}MHz, Min: {cpu_freq.min:.2f}MHz, Max: {cpu_freq.max:.2f}MHz", flush=True)
-        
-        print(f"\nCPU Usage per Core: {psutil.cpu_percent(percpu=True)}%", flush=True)
-        
-        # Check if running in low power mode
-        power_mode = subprocess.run(['pmset', '-g'], capture_output=True, text=True)
-        print("\nPower Settings:", power_mode.stdout, flush=True)
-    except Exception as e:
-        print(f"Error getting CPU info: {e}", flush=True)
-
-    # Memory Info
-    print("\nMemory Information:", flush=True)
-    try:
-        mem = psutil.virtual_memory()
-        print(f"Total: {mem.total/1024/1024/1024:.2f}GB", flush=True)
-        print(f"Available: {mem.available/1024/1024/1024:.2f}GB", flush=True)
-        print(f"Used: {mem.used/1024/1024/1024:.2f}GB ({mem.percent}%)", flush=True)
-        
-        # Check swap
-        swap = psutil.swap_memory()
-        print(f"Swap Used: {swap.used/1024/1024/1024:.2f}GB of {swap.total/1024/1024/1024:.2f}GB", flush=True)
-    except Exception as e:
-        print(f"Error getting memory info: {e}", flush=True)
-
-    # GPU Info
-    print("\nGPU Information:", flush=True)
-    try:
-        # Check MLX GPU settings
-        print("MLX Environment Variables:", flush=True)
-        mlx_vars = {k: v for k, v in os.environ.items() if k.startswith('MLX')}
-        print(json.dumps(mlx_vars, indent=2), flush=True)
-        
-        # Check Metal GPU memory allocation
-        gpu_mem = subprocess.run(['sysctl', 'iogpu'], capture_output=True, text=True)
-        print("GPU Memory Settings:", gpu_mem.stdout, flush=True)
-    except Exception as e:
-        print(f"Error getting GPU info: {e}", flush=True)
-
-    # Process Priority
-    print("\nProcess Priority Information:", flush=True)
-    try:
-        current_process = psutil.Process()
-        print(f"Process Nice Value: {current_process.nice()}", flush=True)
-        # Only try to get ionice if the platform supports it
-        if hasattr(current_process, 'ionice'):
-            print(f"Process IO Nice Value: {current_process.ionice()}", flush=True)
-    except Exception as e:
-        print(f"Error getting process priority info: {e}", flush=True)
-
-    # System Load
-    print("\nSystem Load:", flush=True)
-    try:
-        load_avg = psutil.getloadavg()
-        print(f"Load Average: {load_avg}", flush=True)
-        
-        # Get top processes by CPU and Memory
-        print("\nTop Processes by CPU Usage:", flush=True)
-        processes = []
-        for proc in psutil.process_iter(['pid', 'name', 'cpu_percent', 'memory_percent']):
-            try:
-                pinfo = proc.info
-                if pinfo['cpu_percent'] is not None and pinfo['memory_percent'] is not None:
-                    processes.append(pinfo)
-            except (psutil.NoSuchProcess, psutil.AccessDenied):
-                continue
-        
-        # Sort and display top 5 CPU-consuming processes
-        sorted_by_cpu = sorted(processes, key=lambda x: x['cpu_percent'] or 0, reverse=True)[:5]
-        for proc in sorted_by_cpu:
-            print(f"PID: {proc['pid']}, Name: {proc['name']}, CPU: {proc['cpu_percent']}%, Memory: {proc['memory_percent']:.1f}%")
-    except Exception as e:
-        print(f"Error getting system load info: {e}", flush=True)
-
-    print("\n=== End System State Check ===\n", flush=True)
-
-
-def check_gpu_access():
-    try:
-        # Check if MLX can see the GPU
-        import mlx.core as mx
-        print("MLX device info:", mx.default_device())
-        
-        # Check Metal device availability
-        result = subprocess.run(['system_profiler', 'SPDisplaysDataType'], capture_output=True, text=True)
-        print("GPU Info:", result.stdout)
-    except Exception as e:
-        print(f"Failed to check GPU access: {e}")
-
-
-async def measure_performance(api_endpoint: str, prompt: str, model: str) -> Dict[str, Any]:
-    """
-    Measures the performance of an API endpoint by sending a prompt and recording metrics.
-
-    Args:
-        api_endpoint (str): The API endpoint URL.
-        prompt (str): The prompt to send to the API.
-
-    Returns:
-        Dict[str, Any]: A dictionary containing performance metrics or error information.
-    """
-
-    results = {
-        'model': model,
-        'run_id': os.environ.get('GITHUB_RUN_ID', 'unknown'),
-        'branch': os.environ.get('GITHUB_REF_NAME', 'unknown'),
-        'commit': os.environ.get('GITHUB_SHA', 'unknown'),
-        'configuration': json.loads(os.environ.get('HARDWARE_CONFIG', '{}'))
-    }
-
-    # Get token count
-    session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=600, connect=10, sock_read=600, sock_connect=10))
-    try:
-        response = await session.post(
-            "http://localhost:52415/v1/chat/token/encode",
-            json={
-                "model": model,
-                "messages": [{"role": "user", "content": prompt}]
-            }
-        )
-        response.raise_for_status()
-        token_data = await response.json()
-        results['prompt_len'] = token_data['num_tokens']
-    except Exception as e:
-        await session.close()
-        raise RuntimeError(f"Failed to get token count: {str(e)}")
-
-    # Measure completion performance
-    try:
-        start_time = time.time()
-        response = await session.post(
-            api_endpoint,
-            json={
-                "model": model,
-                "messages": [{"role": "user", "content": prompt}],
-                "temperature": 0,
-                "stream": True
-            }
-        )
-        response.raise_for_status()
-
-        first_token_time = None
-        total_tokens = 0
-
-        async for line in response.content.iter_chunks():
-            line = line[0].decode('utf-8').strip()
-            if not line.startswith('data: '):
-                continue
-
-            data = json.loads(line[6:])  # Skip 'data: ' prefix
-            if content := data.get('choices', [{}])[0].get('delta', {}).get('content'):
-                print(f"Received content: {content}", flush=True)
-                if first_token_time is None:
-                    first_token_time = time.time()
-                    ttft = first_token_time - start_time
-                    results.update({
-                        'ttft': ttft,
-                        'prompt_tps': results['prompt_len'] / ttft
-                    })
-                total_tokens += 1
-
-        total_time = time.time() - start_time
-        results.update({
-            'generation_tps': total_tokens / total_time,
-            'response_len': total_tokens,
-            'total_time': total_time
-        })
-
-    except Exception as e:
-        raise RuntimeError(f"Performance measurement failed: {str(e)}")
-    finally:
-        await session.close()
-
-    return results
-
-
-async def main() -> None:
-    api_endpoint = "http://localhost:52415/v1/chat/completions"
-
-    # Define prompts
-    prompt_warmup = "what is the capital of France?"
-    prompt_essay = "write an essay about cats"
-
-    model = os.environ.get('model', 'llama-3.2-1b')
-    # Warmup request
-    print("\nPerforming warmup request...", flush=True)
-    try:
-        warmup_results = await measure_performance(api_endpoint, prompt_warmup, model)
-        print("Warmup completed successfully", flush=True)
-    except Exception as e:
-        print(f"Warmup request failed: {e}", flush=True)
-
-    # Measure performance for the essay prompt
-    print("\nMeasuring performance for the essay prompt...", flush=True)
-    results = await measure_performance(api_endpoint, prompt_essay, model)
-
-    try:
-        s3_client = boto3.client(
-            's3',
-            aws_access_key_id=os.environ.get('aws_access_key_id'),
-            aws_secret_access_key=os.environ.get('aws_secret_key')
-        )
-        job_name = os.environ.get('GITHUB_JOB')
-
-        # Create S3 key with timestamp and commit info
-        now = datetime.utcnow()
-        timestamp = now.strftime('%H-%M-%S')
-        commit_sha = os.environ.get('GITHUB_SHA', 'unknown')[:7]
-        s3_key = f"{job_name}/{model}/{now.year}/{now.month}/{now.day}/{timestamp}_{commit_sha}.json"
-
-        # Upload to S3
-        s3_client.put_object(
-            Bucket='exo-benchmarks',
-            Key=s3_key,
-            Body=json.dumps(results),
-            ContentType='application/json'
-        )
-        print(f"Performance metrics uploaded to S3: s3://exo-benchmarks/{s3_key}", flush=True)
-    except Exception as e:
-        print(f"Failed to upload metrics to S3: {e}", flush=True)
-
-    # Optionally print the metrics for visibility
-    print("Performance metrics:", flush=True)
-    print(json.dumps(results, indent=4), flush=True)
-
-
-def optimize_system_performance():
-    """Set optimal system performance settings before running benchmark."""
-    try:
-        # Try to set high performance power mode
-        subprocess.run(['sudo', 'pmset', '-a', 'powermode', '2'], check=False)
-        
-        # Ensure MLX uses performance cores and GPU
-        os.environ['MLX_FORCE_P_CORES'] = '1'
-        os.environ['MLX_METAL_PREWARM'] = '1'
-        os.environ['MLX_USE_GPU'] = '1'
-        
-        # Set process priority
-        current_process = psutil.Process()
-        try:
-            # Set highest priority
-            subprocess.run(['sudo', 'renice', '-n', '-20', '-p', str(current_process.pid)], check=False)
-            
-            # Print current process state
-            print("\nProcess State Before Benchmark:", flush=True)
-            proc_info = subprocess.run(
-                ['ps', '-o', 'pid,ppid,user,%cpu,%mem,nice,stat,pri,command', '-p', str(current_process.pid)],
-                capture_output=True, text=True
-            )
-            print(proc_info.stdout, flush=True)
-            
-            # Verify power mode
-            power_info = subprocess.run(['pmset', '-g'], capture_output=True, text=True)
-            if 'powermode            0' in power_info.stdout:
-                print("\nWarning: System still in normal power mode. Trying to set high performance mode again...", flush=True)
-                subprocess.run(['sudo', 'pmset', '-a', 'powermode', '2'], check=False)
-            
-        except Exception as e:
-            print(f"Warning: Could not set process priority: {e}", flush=True)
-            
-    except Exception as e:
-        print(f"Warning: Could not optimize system performance: {e}", flush=True)
-    
-    # Print optimization status
-    print("\nOptimization Settings:", flush=True)
-    print("MLX Environment Variables:", flush=True)
-    for var in ['MLX_FORCE_P_CORES', 'MLX_METAL_PREWARM', 'MLX_USE_GPU']:
-        print(f"{var}: {os.environ.get(var, 'Not set')}", flush=True)
-    
-    try:
-        nice_value = psutil.Process().nice()
-        print(f"Process Nice Value: {nice_value}", flush=True)
-        if nice_value != -20:
-            print("Warning: Process not running at highest priority", flush=True)
-    except Exception:
-        pass
-
-
-if __name__ == "__main__":
-    check_system_state()
-    check_gpu_access()
-    optimize_system_performance()
-    asyncio.run(main())
--- a/.github/bootstrap.sh
+++ b/.github/bootstrap.sh
@@ -1,330 +0,0 @@
-#!/bin/bash
-set -e
-
-command_exists() {
-    command -v "$1" >/dev/null 2>&1
-}
-
-log() {
-    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
-}
-
-if [ "$EUID" -eq 0 ]; then 
-    log "Please do not run as root. Run as regular user with sudo access."
-    exit 1
-fi
-
-# Check for required arguments
-if [ -z "$1" ]; then
-    log "Error: Runner token is required"
-    log "Usage: $0 <runner-token> [tailscale-auth-key]"
-    exit 1
-fi
-
-RUNNER_TOKEN=$1
-TAILSCALE_AUTH_KEY=$2
-REPO="exo-explore/exo"
-
-# Add sudoers configuration
-log "Configuring sudo access..."
-SUDOERS_CONTENT="$(whoami) ALL=(ALL) NOPASSWD: ALL"
-echo "$SUDOERS_CONTENT" | sudo tee /etc/sudoers.d/github-runner > /dev/null
-sudo chmod 440 /etc/sudoers.d/github-runner
-
-log "Configuring privacy permissions..."
-sudo tccutil reset All
-sudo tccutil reset SystemPolicyAllFiles
-sudo tccutil reset SystemPolicyNetworkVolumes
-
-# Configure power management for maximum performance
-log "Configuring power management..."
-sudo pmset -a powermode 2  # Force highest performance mode
-sudo pmset -a gpuswitch 2  # Force discrete/high-performance GPU
-sudo pmset -a lowpowermode 0
-sudo pmset -a lessbright 0
-sudo pmset -a disablesleep 1
-sudo pmset -a sleep 0
-sudo pmset -a hibernatemode 0
-sudo pmset -a autopoweroff 0
-sudo pmset -a standby 0
-sudo pmset -a powernap 0
-
-# For Python specifically
-PYTHON_PATH="/opt/homebrew/bin/python3.12"
-sudo chmod 755 "$PYTHON_PATH"
-
-# Add to firewall
-log "Configuring firewall access..."
-sudo /usr/libexec/ApplicationFirewall/socketfilterfw --add "$PYTHON_PATH"
-sudo /usr/libexec/ApplicationFirewall/socketfilterfw --unblock "$PYTHON_PATH"
-
-# Set Homebrew paths based on architecture
-if [ "$(uname -p)" = "arm" ]; then
-    BREW_PREFIX="/opt/homebrew"
-else
-    BREW_PREFIX="/usr/local"
-fi
-
-# Install Homebrew if not present
-if ! command_exists brew; then
-    log "Installing Homebrew..."
-    /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
-    echo 'eval "$(/opt/homebrew/bin/brew shellenv)"' >> ~/.zshrc
-    eval "$(/opt/homebrew/bin/brew shellenv)"
-fi
-
-# Install required packages
-log "Installing required packages..."
-export HOMEBREW_NO_AUTO_UPDATE=1
-brew install python@3.12 coreutils
-
-# Optional Tailscale setup if auth key is provided
-if [ -n "$TAILSCALE_AUTH_KEY" ]; then
-    log "Installing and configuring Tailscale..."
-    brew install --quiet tailscale
-    sudo brew services stop tailscale 2>/dev/null || true
-    sudo rm -f /var/db/tailscale/tailscaled.state 2>/dev/null || true
-    sudo brew services start tailscale
-    sleep 2
-    sudo tailscale up --authkey=$TAILSCALE_AUTH_KEY
-
-    # Enable SSH and Screen Sharing
-    log "Enabling remote access services..."
-    sudo launchctl load -w /System/Library/LaunchDaemons/ssh.plist
-    sudo /System/Library/CoreServices/RemoteManagement/ARDAgent.app/Contents/Resources/kickstart \
-        -activate \
-        -configure -access -on \
-        -configure -allowAccessFor -allUsers \
-        -configure -restart -agent -privs -all
-
-    # Create launch daemon for remote access
-    sudo bash -c 'cat > /Library/LaunchDaemons/com.remote.access.setup.plist' << 'EOL'
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-    <key>Label</key>
-    <string>com.remote.access.setup</string>
-    <key>ProgramArguments</key>
-    <array>
-        <string>/bin/bash</string>
-        <string>-c</string>
-        <string>
-            launchctl load -w /System/Library/LaunchDaemons/ssh.plist;
-            /System/Library/CoreServices/RemoteManagement/ARDAgent.app/Contents/Resources/kickstart -activate -configure -access -on
-        </string>
-    </array>
-    <key>RunAtLoad</key>
-    <true/>
-</dict>
-</plist>
-EOL
-
-    sudo chmod 644 /Library/LaunchDaemons/com.remote.access.setup.plist
-    sudo launchctl load -w /Library/LaunchDaemons/com.remote.access.setup.plist
-fi
-
-# Configure GitHub Actions Runner
-log "Gathering system metadata..."
-MACHINE_NAME=$(scutil --get ComputerName)
-MACHINE_NAME="runner-$(echo -n "$MACHINE_NAME" | tr '[:upper:]' '[:lower:]' | tr -cd '[:alnum:]-')"
-
-# Enhanced Apple Silicon detection
-MACHINE_INFO=$(system_profiler SPHardwareDataType)
-CHIP_FULL=$(echo "$MACHINE_INFO" | grep "Chip" | cut -d: -f2 | xargs)
-if [[ $CHIP_FULL =~ "Apple" ]]; then
-    CHIP_MODEL=$(echo "$CHIP_FULL" | sed 's/^Apple //' | tr -d ' ' | tr '[:lower:]' '[:upper:]')
-    GPU_CORES=$(ioreg -l | grep "gpu-core-count" | awk -F'= ' '{print $2}')
-    if [ -z "$GPU_CORES" ]; then
-        GPU_CORES="N/A"
-    fi
-else
-    CHIP_MODEL="Intel"
-    GPU_CORES="N/A"
-fi
-
-MEMORY=$(($(sysctl -n hw.memsize) / 1024 / 1024 / 1024))
-
-# Set up GitHub Runner
-RUNNER_DIR="$HOME/actions-runner"
-
-# Check if runner is already configured
-if [ -f "$RUNNER_DIR/.runner" ]; then
-  log "Runner already configured. Stopping existing service..."
-  sudo launchctl unload /Library/LaunchDaemons/com.github.runner.plist 2>/dev/null || true
-fi
-
-# Create runner directory if it doesn't exist
-mkdir -p "$RUNNER_DIR"
-cd "$RUNNER_DIR"
-
-CUSTOM_LABELS="self-hosted,macos,arm64,${CHIP_MODEL}_GPU${GPU_CORES}_${MEMORY}GB"
-
-# Only download and extract if not already present or if forced
-if [ ! -f "$RUNNER_DIR/run.sh" ] || [ "${FORCE_SETUP:-false}" = "true" ]; then
-  log "Downloading GitHub Actions runner..."
-  RUNNER_VERSION=$(curl -s https://api.github.com/repos/actions/runner/releases/latest | grep '"tag_name":' | cut -d'"' -f4)
-  curl -o actions-runner.tar.gz -L "https://github.com/actions/runner/releases/download/${RUNNER_VERSION}/actions-runner-osx-arm64-${RUNNER_VERSION#v}.tar.gz"
-  tar xzf actions-runner.tar.gz
-  rm actions-runner.tar.gz
-else
-  log "Runner already downloaded, skipping download step"
-fi
-
-log "Configuring runner with labels: $CUSTOM_LABELS"
-./config.sh --unattended \
-    --url "https://github.com/${REPO}" \
-    --token "${RUNNER_TOKEN}" \
-    --name "${MACHINE_NAME}" \
-    --labels "${CUSTOM_LABELS}" \
-    --work "_work"
-
-# Set optimal performance settings
-log "Configuring system for optimal performance..."
-
-# Configure CPU performance
-log "Setting CPU performance controls..."
-# Disable timer coalescing
-sudo sysctl -w kern.timer.coalescing_enabled=0
-sudo sysctl -w kern.timer_coalesce_bg_scale=-5
-sudo sysctl -w kern.timer_resort_threshold_ns=0
-# Set minimum timer intervals
-sudo sysctl -w kern.wq_max_timer_interval_usecs=1000
-sudo sysctl -w kern.timer_coalesce_bg_ns_max=1000
-# Set minimum timer coalescing for all tiers
-sudo sysctl -w kern.timer_coalesce_tier0_scale=-5
-sudo sysctl -w kern.timer_coalesce_tier0_ns_max=1000
-sudo sysctl -w kern.timer_coalesce_tier1_scale=-5
-sudo sysctl -w kern.timer_coalesce_tier1_ns_max=1000
-sudo sysctl -w kern.timer_coalesce_tier2_scale=-5
-sudo sysctl -w kern.timer_coalesce_tier2_ns_max=1000
-sudo sysctl -w kern.timer_coalesce_tier3_scale=-5
-sudo sysctl -w kern.timer_coalesce_tier3_ns_max=1000
-sudo sysctl -w kern.timer_coalesce_tier4_scale=-5
-sudo sysctl -w kern.timer_coalesce_tier4_ns_max=1000
-# Disable QoS restrictions
-sudo sysctl -w net.qos.policy.restricted=0
-sudo sysctl -w net.qos.policy.restrict_avapps=0
-sudo sysctl -w net.qos.policy.wifi_enabled=0
-sudo sysctl -w net.qos.policy.capable_enabled=0
-# Set scheduler parameters
-sudo sysctl -w kern.sched_rt_avoid_cpu0=0
-sudo sysctl -w debug.sched=2
-sudo sysctl -w net.pktsched.netem.sched_output_ival_ms=1
-
-# Clean up any existing runner services
-log "Cleaning up existing runner services..."
-for service in com.github.runner com.github.runner.monitor com.github.runner.cpuaffinity com.github.runner.affinity; do
-    sudo launchctl bootout system/$service 2>/dev/null || true
-    sudo rm -f /Library/LaunchDaemons/$service.plist
-done
-
-# Create a simple runner service configuration
-sudo tee /Library/LaunchDaemons/com.github.runner.plist > /dev/null << EOF
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-    <dict>
-        <key>Label</key>
-        <string>com.github.runner</string>
-        <key>UserName</key>
-        <string>$(whoami)</string>
-        <key>GroupName</key>
-        <string>staff</string>
-        <key>WorkingDirectory</key>
-        <string>$RUNNER_DIR</string>
-        <key>ProgramArguments</key>
-        <array>
-            <string>$RUNNER_DIR/run.sh</string>
-        </array>
-        <key>RunAtLoad</key>
-        <true/>
-        <key>KeepAlive</key>
-        <dict>
-            <key>SuccessfulExit</key>
-            <false/>
-            <key>Crashed</key>
-            <true/>
-        </dict>
-        <key>ProcessType</key>
-        <string>Interactive</string>
-        <key>LowPriorityIO</key>
-        <false/>
-        <key>AbandonProcessGroup</key>
-        <false/>
-        <key>EnableTransactions</key>
-        <true/>
-        <key>ThrottleInterval</key>
-        <integer>0</integer>
-        <key>HardResourceLimits</key>
-        <dict>
-            <key>NumberOfFiles</key>
-            <integer>524288</integer>
-            <key>MemoryLock</key>
-            <integer>-1</integer>
-        </dict>
-        <key>SoftResourceLimits</key>
-        <dict>
-            <key>NumberOfFiles</key>
-            <integer>524288</integer>
-            <key>MemoryLock</key>
-            <integer>-1</integer>
-        </dict>
-        <key>QOSClass</key>
-        <string>User-Interactive</string>
-        <key>StandardOutPath</key>
-        <string>$RUNNER_DIR/_diag/runner.log</string>
-        <key>StandardErrorPath</key>
-        <string>$RUNNER_DIR/_diag/runner.err</string>
-        <key>EnvironmentVariables</key>
-        <dict>
-            <key>PATH</key>
-            <string>/usr/local/bin:/opt/homebrew/bin:/usr/bin:/bin:/usr/sbin:/sbin</string>
-        </dict>
-        <key>Nice</key>
-        <integer>-20</integer>
-    </dict>
-</plist>
-EOF
-
-# Set proper permissions for the LaunchDaemon
-sudo chown root:wheel /Library/LaunchDaemons/com.github.runner.plist
-sudo chmod 644 /Library/LaunchDaemons/com.github.runner.plist
-
-# Remove any existing service
-sudo launchctl bootout system/com.github.runner 2>/dev/null || true
-
-# Load the new service using bootstrap
-sudo launchctl bootstrap system /Library/LaunchDaemons/com.github.runner.plist
-
-# Add Runner.Listener permissions (after runner installation)
-RUNNER_PATH="$RUNNER_DIR/bin/Runner.Listener"
-sudo chmod 755 "$RUNNER_PATH"
-sudo /usr/libexec/ApplicationFirewall/socketfilterfw --add "$RUNNER_PATH"
-sudo /usr/libexec/ApplicationFirewall/socketfilterfw --unblock "$RUNNER_PATH"
-
-# Create connection info file if Tailscale is configured
-if [ -n "$TAILSCALE_AUTH_KEY" ]; then
-    TAILSCALE_IP=$(tailscale ip)
-    cat > "$HOME/remote_access_info.txt" << EOL
-Mac Remote Access Information
-============================
-Computer Name: $MACHINE_NAME
-Username: $USER
-Tailscale IP: $TAILSCALE_IP
-
-SSH Command: ssh $USER@$TAILSCALE_IP
-Screen Sharing: vnc://$TAILSCALE_IP
-EOL
-    chmod 600 "$HOME/remote_access_info.txt"
-fi
-
-log "Verifying runner service status..."
-if sudo launchctl list | grep com.github.runner > /dev/null; then
-    log "GitHub Actions runner service is running successfully!"
-    log "Runner labels: $CUSTOM_LABELS"
-    [ -n "$TAILSCALE_AUTH_KEY" ] && log "Remote access details saved to: $HOME/remote_access_info.txt"
-else
-    log "Error: Failed to start GitHub Actions runner service"
-    exit 1
-fi
--- a/.github/optimize_performance.sh
+++ b/.github/optimize_performance.sh
@@ -1,95 +0,0 @@
-#!/bin/bash
-set -e
-
-# Function to log with timestamp
-log() {
-  echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
-}
-
-log "Applying comprehensive performance optimizations..."
-
-# System-wide power management
-log "Configuring power management..."
-sudo pmset -a lessbright 0
-sudo pmset -a disablesleep 1
-sudo pmset -a sleep 0
-sudo pmset -a hibernatemode 0
-sudo pmset -a autopoweroff 0
-sudo pmset -a standby 0
-sudo pmset -a powernap 0
-sudo pmset -a proximitywake 0
-sudo pmset -a tcpkeepalive 1
-sudo pmset -a powermode 2
-sudo pmset -a gpuswitch 2
-sudo pmset -a displaysleep 0
-sudo pmset -a disksleep 0
-
-# Memory and kernel optimizations
-log "Configuring memory and kernel settings..."
-sudo sysctl -w kern.memorystatus_purge_on_warning=0
-sudo sysctl -w kern.memorystatus_purge_on_critical=0
-sudo sysctl -w kern.timer.coalescing_enabled=0
-
-# Metal and GPU optimizations
-log "Configuring Metal and GPU settings..."
-defaults write com.apple.CoreML MPSEnableGPUValidation -bool false
-defaults write com.apple.CoreML MPSEnableMetalValidation -bool false
-defaults write com.apple.CoreML MPSEnableGPUDebug -bool false
-defaults write com.apple.Metal GPUDebug -bool false
-defaults write com.apple.Metal GPUValidation -bool false
-defaults write com.apple.Metal MetalValidation -bool false
-defaults write com.apple.Metal MetalCaptureEnabled -bool false
-defaults write com.apple.Metal MTLValidationBehavior -string "Disabled"
-defaults write com.apple.Metal EnableMTLDebugLayer -bool false
-defaults write com.apple.Metal MTLDebugLevel -int 0
-defaults write com.apple.Metal PreferIntegratedGPU -bool false
-defaults write com.apple.Metal ForceMaximumPerformance -bool true
-defaults write com.apple.Metal MTLPreferredDeviceGPUFrame -bool true
-
-# Create MPS cache directory with proper permissions
-sudo mkdir -p /tmp/mps_cache
-sudo chmod 777 /tmp/mps_cache
-
-# Process and resource limits
-log "Configuring process limits..."
-sudo launchctl limit maxfiles 524288 524288
-ulimit -n 524288 || log "Warning: Could not set file descriptor limit"
-ulimit -c 0
-ulimit -l unlimited || log "Warning: Could not set memory lock limit"
-
-# Export performance-related environment variables
-cat << 'EOF' > /tmp/performance_env.sh
-# Metal optimizations
-export MTL_DEBUG_LAYER=0
-export METAL_DEVICE_WRAPPER_TYPE=1
-export METAL_DEBUG_ERROR_MODE=0
-export METAL_FORCE_PERFORMANCE_MODE=1
-export METAL_DEVICE_PRIORITY=high
-export METAL_MAX_COMMAND_QUEUES=1024
-export METAL_LOAD_LIMIT=0
-export METAL_VALIDATION_ENABLED=0
-export METAL_ENABLE_VALIDATION_LAYER=0
-export OBJC_DEBUG_MISSING_POOLS=NO
-export MPS_CACHEDIR=/tmp/mps_cache
-
-# MLX optimizations
-export MLX_USE_GPU=1
-export MLX_METAL_COMPILE_ASYNC=1
-export MLX_METAL_PREALLOCATE=1
-export MLX_METAL_MEMORY_GUARD=0
-export MLX_METAL_CACHE_KERNELS=1
-export MLX_PLACEMENT_POLICY=metal
-export MLX_METAL_VALIDATION=0
-export MLX_METAL_DEBUG=0
-export MLX_FORCE_P_CORES=1
-export MLX_METAL_MEMORY_BUDGET=0
-export MLX_METAL_PREWARM=1
-
-# Python optimizations
-export PYTHONUNBUFFERED=1
-export PYTHONOPTIMIZE=2
-export PYTHONHASHSEED=0
-export PYTHONDONTWRITEBYTECODE=1
-EOF
-
-log "Performance optimizations completed. Environment variables written to /tmp/performance_env.sh"
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -0,0 +1,23 @@
+## Motivation
+
+<!-- Why is this change needed? What problem does it solve? -->
+<!-- If it fixes an open issue, please link to the issue here -->
+
+## Changes
+
+<!-- Describe what you changed in detail -->
+
+## Why It Works
+
+<!-- Explain why your approach solves the problem -->
+
+## Test Plan
+
+### Manual Testing
+<!-- Hardware: (e.g., MacBook Pro M1 Max 32GB, Mac Mini M2 16GB, connected via Thunderbolt 4) -->
+<!-- What you did: -->
+<!-- - -->
+
+### Automated Testing
+<!-- Describe changes to automated tests, or how existing tests cover this change -->
+<!-- - -->
--- a/.github/workflows/bench_job.yml
+++ b/.github/workflows/bench_job.yml
@@ -1,207 +0,0 @@
-# This is the reusable workflow file
-name: Distributed Job Runner
-
-on:
-  workflow_call:
-    inputs:
-      config:
-        required: true
-        type: string
-      model:
-        required: true
-        type: string
-      calling_job_name:
-        required: true
-        type: string
-      network_interface:
-        required: true
-        type: string
-jobs:
-  generate-matrix:
-    runs-on: ubuntu-latest
-    outputs:
-      matrix: ${{ steps.set-matrix.outputs.matrix }}
-    steps:
-      - id: set-matrix
-        env:
-          CONFIG: ${{ inputs.config }}
-        run: |
-          MATRIX=$(echo $CONFIG | jq -c '{cpu: [to_entries | .[] | .key as $k | range(.value) | $k]}')
-          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
-
-  run-distributed-job:
-    needs: generate-matrix
-    strategy:
-      matrix: ${{fromJson(needs.generate-matrix.outputs.matrix)}}
-    runs-on: ['self-hosted', 'macOS', '${{ matrix.cpu }}']
-    env:
-      HARDWARE_CONFIG: ${{ inputs.config }}
-      model: ${{ inputs.model }}
-      # Add performance-related environment variables
-      MTL_DEBUG_LAYER: 0
-      METAL_VALIDATION_ENABLED: 0
-      MLX_METAL_VALIDATION: 0
-      MLX_METAL_DEBUG: 0
-      MLX_FORCE_P_CORES: 1
-      MLX_METAL_PREWARM: 1
-      PYTHONOPTIMIZE: 2
-    steps:
-      - name: Cleanup workspace
-        run: |
-          sudo rm -rf "$GITHUB_WORKSPACE"
-          sudo mkdir -p "$GITHUB_WORKSPACE"
-          sudo chown -R $(whoami):$(id -g) "$GITHUB_WORKSPACE"
-
-      - uses: actions/checkout@v4
-
-      - name: Install dependencies
-        run: |
-          export PATH="/usr/local/bin:/opt/homebrew/bin:$PATH"
-          python3.12 -m venv .venv || {
-            echo "Failed to find python3.12. Checking installation locations:"
-            ls -l /usr/local/bin/python* /opt/homebrew/bin/python* 2>/dev/null || true
-            exit 1
-          }
-          source .venv/bin/activate
-          pip install --upgrade pip
-          pip install -e .
-          pip install boto3==1.35.76
-
-      - name: Apply Performance Optimizations
-        run: |
-          # Export performance-related environment variables
-          cat << 'EOF' > /tmp/performance_env.sh
-          # MLX and Metal optimizations
-          export MTL_DEBUG_LAYER=0
-          export METAL_VALIDATION_ENABLED=0
-          export MLX_METAL_VALIDATION=0
-          export MLX_METAL_DEBUG=0
-          export MLX_FORCE_P_CORES=1
-          export MLX_METAL_PREWARM=1
-          export PYTHONOPTIMIZE=2
-          EOF
-          
-          # Source the performance environment variables
-          source /tmp/performance_env.sh
-
-          # MLX Memory Settings
-          ./configure_mlx.sh
-          
-          # Verify optimizations
-          echo "Verifying performance settings..."
-          env | grep -E "MLX_|METAL_|MTL_"
-
-      - name: Run exo
-        env:
-          aws_access_key_id: ${{ secrets.S3_EXO_BENCHMARKS_AWS_ACCESS_KEY_ID }}
-          aws_secret_key: ${{ secrets.S3_EXO_BENCHMARKS_AWS_SECRET_ACCESS_KEY }}
-        run: |
-          # Source performance environment variables
-          source /tmp/performance_env.sh
-          
-          # Debug information
-          echo "Current commit SHA: $GITHUB_SHA"
-          git rev-parse HEAD
-          git status
-          
-          CALLING_JOB="${{ inputs.calling_job_name }}"
-          UNIQUE_JOB_ID="${CALLING_JOB}_${model}_${GITHUB_RUN_ID}"
-          ALL_NODE_IDS=$(for i in $(seq ${{ strategy.job-total }} -1 0); do echo -n "${UNIQUE_JOB_ID}_${i},"; done | sed 's/,$//')
-          MY_NODE_ID="${UNIQUE_JOB_ID}_${{ strategy.job-index }}"
-          
-          source .venv/bin/activate
-          export PATH="/usr/local/bin:/opt/homebrew/bin:$PATH"
-          
-          echo "=== Before starting exo ==="
-          ps -eo pid,ppid,user,%cpu,%mem,nice,state,pri,command | head -1
-          ps -eo pid,ppid,user,%cpu,%mem,nice,state,pri,command | grep -i python
-          
-          echo "Starting exo daemon..."
-          
-          echo "Power mode settings:"
-          sudo pmset -g
-          
-          # Start exo with explicit process control
-          sudo taskpolicy -d default -g default -a -t 0 -l 0 .venv/bin/exo \
-            --node-id="${MY_NODE_ID}" \
-            --node-id-filter="${ALL_NODE_IDS}" \
-            --interface-type-filter="${{ inputs.network_interface }}" \
-            --disable-tui \
-            --max-generate-tokens 250 \
-            --chatgpt-api-response-timeout 900 \
-            --chatgpt-api-port 52415 > output1.log 2>&1 &
-          PID1=$!
-          
-          echo "Exo process started with PID: $PID1"
-          tail -f output1.log &
-          TAIL1=$!
-
-          # Give process time to start
-          sleep 2
-          
-          # Set additional process priorities
-          sudo renice -n -20 -p $PID1
-          sudo taskpolicy -t 4 -p $PID1
-          
-          echo "=== After starting exo ==="
-          ps -eo pid,ppid,user,%cpu,%mem,nice,state,pri,command | head -1
-          ps -eo pid,ppid,user,%cpu,%mem,nice,state,pri,command | grep $PID1
-          
-          echo "Additional process details:"
-          sudo powermetrics -n 1 -i 1000 --show-process-energy | grep -A 5 $PID1 || true
-
-          trap 'kill $TAIL1' EXIT
-          trap 'kill $PID1' EXIT
-
-          echo "Waiting for all nodes to connect..."
-          for i in {1..20}; do
-            echo "Attempt $i: Checking node count..."
-            nodes=$(curl -s http://localhost:52415/topology | jq ".nodes | length")
-            echo "Current node count: $nodes"
-            if [ "$nodes" -eq "${{ strategy.job-total }}" ]; then
-              echo "All nodes connected successfully!"
-              break
-            fi
-            if [ $i -eq 20 ]; then
-              echo "ERROR: Failed to connect all nodes after 20 attempts. Expected ${{ strategy.job-total }} nodes, but got $nodes"
-              exit 1
-            fi
-            sleep 5
-          done
-
-          if ! kill -0 $PID1 2>/dev/null; then
-              echo "ERROR: Instance (PID $PID1) died unexpectedly. Full log output:"
-              cat output1.log
-              exit 1
-          fi
-
-          if [ "${{ strategy.job-index }}" -eq "0" ]; then
-            sleep 10
-            echo "This is the primary node (index 0). Running benchmark..."
-            GITHUB_JOB=$CALLING_JOB python .github/bench.py
-          else
-            echo "This is a secondary node (index ${{ strategy.job-index }}). Waiting for completion..."
-            sleep 10
-            while true; do
-              echo "Checking if primary node is still running..."
-              nodes=$(curl -s http://localhost:52415/topology | jq ".nodes | length")
-              echo "Current node count: $nodes"
-              if [ "$nodes" -lt "${{ strategy.job-total }}" ]; then
-                echo "Primary node completed, exiting..."
-                break
-              fi
-              sleep 5
-            done
-          fi
-
-      - name: Check Final System State
-        if: always()
-        run: |
-          echo "=== Final System State ==="
-          sudo pmset -g
-          sudo powermetrics -n 1 -i 1000 --show-process-energy || true
-          system_profiler SPDisplaysDataType
-          sysctl iogpu
-          ps -eo pid,ppid,user,%cpu,%mem,nice,state,command | grep -i python
-          env | grep -E "MLX_|METAL_|MTL_"
-          echo "=== End Final System State ==="
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -1,71 +0,0 @@
-name: Build and Test
-
-on:
-  push:
-    branches: [ '*' ]
-    tags: [ '*' ]
-  pull_request:
-    branches: [ '*' ]
-
-jobs:
-  single-m4-pro:
-    strategy:
-      matrix:
-        model: ['llama-3.2-1b', 'llama-3.2-3b', 'llama-3.1-8b']
-    uses: ./.github/workflows/bench_job.yml
-    with:
-      config: '{"M4PRO_GPU16_24GB": 1}'
-      model: ${{ matrix.model }}
-      calling_job_name: 'single-m4-pro'
-      network_interface: 'Ethernet'
-    secrets: inherit
-
-  two-m4-pro-cluster:
-    strategy:
-      matrix:
-        model: ['llama-3.2-1b', 'llama-3.2-3b', 'llama-3.1-8b']
-    uses: ./.github/workflows/bench_job.yml
-    with:
-      config: '{"M4PRO_GPU16_24GB": 2}'
-      model: ${{ matrix.model }}
-      calling_job_name: 'two-m4-pro-cluster'
-      network_interface: 'Ethernet'
-    secrets: inherit
-
-  # two-m4-pro-cluster-thunderbolt:
-  #   strategy:
-  #     matrix:
-  #       model: ['llama-3.2-1b', 'llama-3.2-3b', 'llama-3.1-8b']
-  #   uses: ./.github/workflows/bench_job.yml
-  #   with:
-  #     config: '{"M4PRO_GPU16_24GB": 2}'
-  #     model: ${{ matrix.model }}
-  #     calling_job_name: 'two-m4-pro-cluster-thunderbolt'
-  #     network_interface: 'Thunderbolt'
-  #   secrets: inherit
-
-  three-m4-pro-cluster:
-    strategy:
-      matrix:
-        model: ['llama-3.2-1b', 'llama-3.2-3b', 'llama-3.1-8b', 'llama-3.3-70b']
-      fail-fast: false
-    uses: ./.github/workflows/bench_job.yml
-    with:
-      config: '{"M4PRO_GPU16_24GB": 3}'
-      model: ${{ matrix.model }}
-      calling_job_name: 'three-m4-pro-cluster'
-      network_interface: 'Ethernet'
-    secrets: inherit
-
-  # test-m3-single-node:
-  #   strategy:
-  #     matrix:
-  #       model: ['llama-3.2-1b']
-  #     fail-fast: false
-  #   uses: ./.github/workflows/bench_job.yml
-  #   with:
-  #     config: '{"M3MAX_GPU40_128GB": 1}'
-  #     model: ${{ matrix.model }}
-  #     calling_job_name: 'test-m3-cluster'
-  #     network_interface: 'Ethernet'
-  #   secrets: inherit
--- a/.github/workflows/build-app.yml
+++ b/.github/workflows/build-app.yml
@@ -0,0 +1,442 @@
+name: Build EXO macOS DMG
+
+# Release workflow:
+# 1. Create a draft GitHub Release with the tag name (e.g. v1.0.0) and write release notes in markdown
+# 2. Push the tag: git tag v1.0.0 && git push origin v1.0.0
+# 3. This workflow builds, signs, and notarizes the DMG
+# 4. Release notes are embedded in appcast.xml for Sparkle (rendered as markdown)
+# 5. DMG and appcast.xml are uploaded to S3
+# 6. The draft GitHub Release is published with the DMG attached
+#
+# For alpha releases (e.g. v1.0.0-alpha.1): draft release and notes are optional.
+# If no draft exists, a release is auto-created with generated notes.
+
+on:
+  workflow_dispatch:
+  push:
+    tags:
+      - "v*"
+    branches:
+      - "test-app"
+
+jobs:
+  build-macos-app:
+    runs-on: "macos-26"
+    permissions:
+      contents: write
+    env:
+      SPARKLE_VERSION: 2.9.0-beta.1
+      SPARKLE_DOWNLOAD_PREFIX: ${{ secrets.SPARKLE_DOWNLOAD_PREFIX }}
+      SPARKLE_FEED_URL: ${{ secrets.SPARKLE_FEED_URL }}
+      SPARKLE_ED25519_PUBLIC: ${{ secrets.SPARKLE_ED25519_PUBLIC }}
+      SPARKLE_ED25519_PRIVATE: ${{ secrets.SPARKLE_ED25519_PRIVATE }}
+      SPARKLE_S3_BUCKET: ${{ secrets.SPARKLE_S3_BUCKET }}
+      SPARKLE_S3_PREFIX: ${{ secrets.SPARKLE_S3_PREFIX }}
+      EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT: ${{ secrets.EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT }}
+      AWS_REGION: ${{ secrets.AWS_REGION }}
+      EXO_BUILD_NUMBER: ${{ github.run_number }}
+      EXO_LIBP2P_NAMESPACE: ${{ github.ref_name }}
+
+    steps:
+      # ============================================================
+      # Checkout and tag validation
+      # ============================================================
+
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Derive release version from tag
+        run: |
+          if [[ "$GITHUB_REF_NAME" == "test-app" || "${{ github.event_name }}" == "workflow_dispatch" ]]; then
+            VERSION="0.0.0-alpha.0"
+            echo "IS_ALPHA=true" >> $GITHUB_ENV
+          else
+            VERSION="${GITHUB_REF_NAME#v}"
+            if [[ "$VERSION" == *-alpha* ]]; then
+              echo "IS_ALPHA=true" >> $GITHUB_ENV
+            else
+              echo "IS_ALPHA=false" >> $GITHUB_ENV
+            fi
+          fi
+          echo "RELEASE_VERSION=$VERSION" >> $GITHUB_ENV
+
+      - name: Compute build version from semver
+        run: |
+          VERSION="$RELEASE_VERSION"
+          # Extract major.minor.patch (strip prerelease suffix)
+          BASE_VERSION="${VERSION%%-*}"
+          MAJOR=$(echo "$BASE_VERSION" | cut -d. -f1)
+          MINOR=$(echo "$BASE_VERSION" | cut -d. -f2)
+          PATCH=$(echo "$BASE_VERSION" | cut -d. -f3)
+
+          # Extract prerelease number (e.g., "alpha.2" -> 2, or 999 for releases)
+          if [[ "$VERSION" == *-* ]]; then
+            PRERELEASE_PART="${VERSION#*-}"
+            PRERELEASE_NUM="${PRERELEASE_PART##*.}"
+            # Default to 0 if not a number
+            if ! [[ "$PRERELEASE_NUM" =~ ^[0-9]+$ ]]; then
+              PRERELEASE_NUM=0
+            fi
+          else
+            PRERELEASE_NUM=999
+          fi
+
+          # Compute: PRERELEASE + (1000 * PATCH) + (1_000_000 * MINOR) + (1_000_000_000 * MAJOR)
+          BUILD_VERSION=$((PRERELEASE_NUM + 1000 * PATCH + 1000000 * MINOR + 1000000000 * MAJOR))
+          echo "EXO_BUILD_VERSION=$BUILD_VERSION" >> $GITHUB_ENV
+          echo "Computed build version: $BUILD_VERSION from $VERSION"
+
+      - name: Ensure tag commit is on main
+        if: github.ref_type == 'tag'
+        run: |
+          git fetch origin main
+          # Alpha tags can be on any branch, production tags must be on main
+          if [[ "$IS_ALPHA" == "true" ]]; then
+            echo "Alpha tag detected, skipping main branch check"
+          elif ! git merge-base --is-ancestor origin/main HEAD; then
+            echo "Production tag must point to a commit on main"
+            exit 1
+          fi
+
+      - name: Fetch and validate release notes
+        if: github.ref_type == 'tag'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Find draft release by name using gh release list (more reliable with default token)
+          echo "Looking for draft release named '$GITHUB_REF_NAME'..."
+          DRAFT_EXISTS=$(gh release list --json name,isDraft --jq ".[] | select(.isDraft == true) | select(.name == \"$GITHUB_REF_NAME\") | .name" 2>/dev/null || echo "")
+
+          if [[ -z "$DRAFT_EXISTS" ]]; then
+            if [[ "$IS_ALPHA" == "true" ]]; then
+              echo "No draft release found for alpha tag $GITHUB_REF_NAME (optional for alphas)"
+              echo "HAS_RELEASE_NOTES=false" >> $GITHUB_ENV
+              exit 0
+            fi
+            echo "ERROR: No draft release found for tag $GITHUB_REF_NAME"
+            echo "Please create a draft release with release notes before pushing the tag."
+            exit 1
+          fi
+
+          # Fetch full release details via API to get body and ID
+          echo "Found draft release, fetching details..."
+          RELEASE_JSON=$(gh api repos/${{ github.repository }}/releases --jq ".[] | select(.draft == true) | select(.name == \"$GITHUB_REF_NAME\")" 2>/dev/null || echo "")
+
+          # Extract release notes
+          NOTES=$(echo "$RELEASE_JSON" | jq -r '.body // ""')
+          if [[ -z "$NOTES" || "$NOTES" == "null" ]]; then
+            if [[ "$IS_ALPHA" == "true" ]]; then
+              echo "Draft release has no notes (optional for alphas)"
+              echo "HAS_RELEASE_NOTES=false" >> $GITHUB_ENV
+              exit 0
+            fi
+            echo "ERROR: Draft release exists but has no release notes"
+            echo "Please add release notes to the draft release before pushing the tag."
+            exit 1
+          fi
+
+          # Save release ID for later publishing
+          RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id')
+          echo "DRAFT_RELEASE_ID=$RELEASE_ID" >> $GITHUB_ENV
+          echo "HAS_RELEASE_NOTES=true" >> $GITHUB_ENV
+
+          echo "Found draft release (ID: $RELEASE_ID), saving release notes..."
+          echo "$NOTES" > /tmp/release_notes.md
+          echo "RELEASE_NOTES_FILE=/tmp/release_notes.md" >> $GITHUB_ENV
+
+      # ============================================================
+      # Install dependencies
+      # ============================================================
+
+      - name: Select Xcode 26.2
+        run: |
+          sudo xcode-select -s /Applications/Xcode_26.2.app
+          if ! xcrun -f metal >/dev/null 2>&1; then
+            echo "Metal toolchain is not installed."
+            exit 1
+          fi
+
+      - name: Install Homebrew packages
+        run: brew install just awscli macmon
+
+      - name: Install UV
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+          cache-dependency-glob: uv.lock
+
+      - name: Setup Python
+        run: |
+          uv python install
+          uv sync --locked
+
+      - name: Install Nix
+        uses: cachix/install-nix-action@v31
+        with:
+          nix_path: nixpkgs=channel:nixos-unstable
+
+      - name: Configure Cachix
+        uses: cachix/cachix-action@v14
+        with:
+          name: exo
+          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
+
+      - name: Build dashboard
+        run: |
+          DASHBOARD_OUT=$(nix build .#dashboard --print-build-logs --no-link --print-out-paths)
+          mkdir -p dashboard/build
+          cp -r "$DASHBOARD_OUT"/* dashboard/build/
+
+      - name: Install Sparkle CLI
+        run: |
+          CLI_URL="${SPARKLE_CLI_URL:-https://github.com/sparkle-project/Sparkle/releases/download/${SPARKLE_VERSION}/Sparkle-${SPARKLE_VERSION}.tar.xz}"
+          echo "Downloading Sparkle CLI from: $CLI_URL"
+          mkdir -p /tmp/sparkle
+          curl --fail --location --output /tmp/sparkle.tar.xz "$CLI_URL"
+          tar -xJf /tmp/sparkle.tar.xz -C /tmp/sparkle --strip-components=1
+          echo "SPARKLE_BIN=/tmp/sparkle/bin" >> $GITHUB_ENV
+
+      - name: Prepare code-signing keychain
+        env:
+          MACOS_CERTIFICATE: ${{ secrets.MACOS_CERTIFICATE }}
+          MACOS_CERTIFICATE_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_PASSWORD }}
+          PROVISIONING_PROFILE: ${{ secrets.PROVISIONING_PROFILE }}
+        run: |
+          KEYCHAIN_PATH="$HOME/Library/Keychains/build.keychain-db"
+
+          # Create fresh keychain
+          security create-keychain -p "$MACOS_CERTIFICATE_PASSWORD" "$KEYCHAIN_PATH"
+
+          # Disable auto-lock (no timeout, no lock-on-sleep)
+          security set-keychain-settings "$KEYCHAIN_PATH"
+
+          # Add to search list while preserving existing keychains
+          security list-keychains -d user -s "$KEYCHAIN_PATH" $(security list-keychains -d user | tr -d '"')
+
+          # Set as default and unlock
+          security default-keychain -s "$KEYCHAIN_PATH"
+          security unlock-keychain -p "$MACOS_CERTIFICATE_PASSWORD" "$KEYCHAIN_PATH"
+
+          # Import certificate with full access for codesign
+          echo "$MACOS_CERTIFICATE" | base64 --decode > /tmp/cert.p12
+          security import /tmp/cert.p12 -k "$KEYCHAIN_PATH" -P "$MACOS_CERTIFICATE_PASSWORD" \
+            -T /usr/bin/codesign -T /usr/bin/security -T /usr/bin/productbuild
+          rm /tmp/cert.p12
+
+          # Allow codesign to access the key without prompting
+          security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k "$MACOS_CERTIFICATE_PASSWORD" "$KEYCHAIN_PATH"
+
+          # Verify keychain is unlocked and identity is available
+          echo "Verifying signing identity..."
+          security find-identity -v -p codesigning "$KEYCHAIN_PATH"
+
+          # Setup provisioning profile
+          mkdir -p "$HOME/Library/Developer/Xcode/UserData/Provisioning Profiles"
+          echo "$PROVISIONING_PROFILE" | base64 --decode > "$HOME/Library/Developer/Xcode/UserData/Provisioning Profiles/EXO.provisionprofile"
+
+          # Export keychain path for other steps
+          echo "BUILD_KEYCHAIN_PATH=$KEYCHAIN_PATH" >> $GITHUB_ENV
+
+      # ============================================================
+      # Build the bundle
+      # ============================================================
+
+      - name: Build PyInstaller bundle
+        run: uv run pyinstaller packaging/pyinstaller/exo.spec
+
+      - name: Build Swift app
+        env:
+          MACOS_CERTIFICATE_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_PASSWORD }}
+          SPARKLE_FEED_URL: ${{ secrets.SPARKLE_FEED_URL }}
+          SPARKLE_ED25519_PUBLIC: ${{ secrets.SPARKLE_ED25519_PUBLIC }}
+        run: |
+          cd app/EXO
+          security unlock-keychain -p "$MACOS_CERTIFICATE_PASSWORD" "$BUILD_KEYCHAIN_PATH"
+          SIGNING_IDENTITY=$(security find-identity -v -p codesigning "$BUILD_KEYCHAIN_PATH" | awk -F '"' '{print $2}')
+          xcodebuild clean build \
+            -scheme EXO \
+            -configuration Release \
+            -derivedDataPath build \
+            MARKETING_VERSION="$RELEASE_VERSION" \
+            CURRENT_PROJECT_VERSION="$EXO_BUILD_VERSION" \
+            EXO_BUILD_TAG="$RELEASE_VERSION" \
+            EXO_BUILD_COMMIT="$GITHUB_SHA" \
+            SPARKLE_FEED_URL="$SPARKLE_FEED_URL" \
+            SPARKLE_ED25519_PUBLIC="$SPARKLE_ED25519_PUBLIC" \
+            EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT="$EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT" \
+            CODE_SIGNING_IDENTITY="$SIGNING_IDENTITY" \
+            CODE_SIGN_INJECT_BASE_ENTITLEMENTS=YES
+          mkdir -p ../../output
+          cp -R build/Build/Products/Release/EXO.app ../../output/EXO.app
+
+      - name: Inject PyInstaller runtime
+        run: |
+          rm -rf output/EXO.app/Contents/Resources/exo
+          mkdir -p output/EXO.app/Contents/Resources
+          cp -R dist/exo output/EXO.app/Contents/Resources/exo
+
+      - name: Codesign PyInstaller runtime
+        env:
+          MACOS_CERTIFICATE_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_PASSWORD }}
+        run: |
+          cd output
+          security unlock-keychain -p "$MACOS_CERTIFICATE_PASSWORD" "$BUILD_KEYCHAIN_PATH"
+          SIGNING_IDENTITY=$(security find-identity -v -p codesigning "$BUILD_KEYCHAIN_PATH" | awk -F '"' '{print $2}')
+          RUNTIME_DIR="EXO.app/Contents/Resources/exo"
+          find "$RUNTIME_DIR" -type f \( -perm -111 -o -name "*.dylib" -o -name "*.so" \) -print0 |
+            while IFS= read -r -d '' file; do
+              /usr/bin/codesign --force --timestamp --options runtime \
+                --sign "$SIGNING_IDENTITY" "$file"
+            done
+
+      - name: Sign, notarize, and create DMG
+        env:
+          MACOS_CERTIFICATE_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_PASSWORD }}
+          APPLE_NOTARIZATION_USERNAME: ${{ secrets.APPLE_NOTARIZATION_USERNAME }}
+          APPLE_NOTARIZATION_PASSWORD: ${{ secrets.APPLE_NOTARIZATION_PASSWORD }}
+          APPLE_NOTARIZATION_TEAM: ${{ secrets.APPLE_NOTARIZATION_TEAM }}
+        run: |
+          cd output
+          security unlock-keychain -p "$MACOS_CERTIFICATE_PASSWORD" "$BUILD_KEYCHAIN_PATH"
+          SIGNING_IDENTITY=$(security find-identity -v -p codesigning "$BUILD_KEYCHAIN_PATH" | awk -F '"' '{print $2}')
+          /usr/bin/codesign --deep --force --timestamp --options runtime \
+            --sign "$SIGNING_IDENTITY" EXO.app
+          mkdir -p dmg-root
+          cp -R EXO.app dmg-root/
+          ln -s /Applications dmg-root/Applications
+          DMG_NAME="EXO-${RELEASE_VERSION}.dmg"
+          hdiutil create -volname "EXO" -srcfolder dmg-root -ov -format UDZO "$DMG_NAME"
+          /usr/bin/codesign --force --timestamp --options runtime \
+            --sign "$SIGNING_IDENTITY" "$DMG_NAME"
+          if [[ -n "$APPLE_NOTARIZATION_USERNAME" ]]; then
+            SUBMISSION_OUTPUT=$(xcrun notarytool submit "$DMG_NAME" \
+              --apple-id "$APPLE_NOTARIZATION_USERNAME" \
+              --password "$APPLE_NOTARIZATION_PASSWORD" \
+              --team-id "$APPLE_NOTARIZATION_TEAM" \
+              --wait --timeout 15m 2>&1)
+            echo "$SUBMISSION_OUTPUT"
+
+            SUBMISSION_ID=$(echo "$SUBMISSION_OUTPUT" | awk 'tolower($1)=="id:" && $2 ~ /^[0-9a-fA-F-]+$/ {print $2; exit}')
+            STATUS=$(echo "$SUBMISSION_OUTPUT" | awk 'tolower($1)=="status:" {print $2; exit}')
+
+            if [[ -n "$SUBMISSION_ID" ]]; then
+              xcrun notarytool log "$SUBMISSION_ID" \
+                --apple-id "$APPLE_NOTARIZATION_USERNAME" \
+                --password "$APPLE_NOTARIZATION_PASSWORD" \
+                --team-id "$APPLE_NOTARIZATION_TEAM" > notarization-log.txt || true
+              echo "===== Notarization Log ====="
+              cat notarization-log.txt
+              echo "============================"
+            fi
+
+            if [[ "$STATUS" != "Accepted" ]]; then
+              echo "Notarization failed with status: ${STATUS:-Unknown}"
+              exit 1
+            fi
+
+            xcrun stapler staple "$DMG_NAME"
+          fi
+
+      - name: Generate Sparkle appcast
+        env:
+          SPARKLE_DOWNLOAD_PREFIX: ${{ env.SPARKLE_DOWNLOAD_PREFIX }}
+          SPARKLE_ED25519_PRIVATE: ${{ secrets.SPARKLE_ED25519_PRIVATE }}
+          IS_ALPHA: ${{ env.IS_ALPHA }}
+        run: |
+          set -euo pipefail
+          cd output
+          DOWNLOAD_PREFIX="${SPARKLE_DOWNLOAD_PREFIX:-https://assets.exolabs.net}"
+          echo "$SPARKLE_ED25519_PRIVATE" > sparkle_ed25519.key
+          chmod 600 sparkle_ed25519.key
+
+          CHANNEL_FLAG=""
+          if [[ "$IS_ALPHA" == "true" ]]; then
+            CHANNEL_FLAG="--channel alpha"
+            echo "Generating appcast for alpha channel"
+          fi
+
+          $SPARKLE_BIN/generate_appcast \
+            --ed-key-file sparkle_ed25519.key \
+            --download-url-prefix "$DOWNLOAD_PREFIX" \
+            $CHANNEL_FLAG \
+            .
+
+      - name: Inject release notes into appcast
+        if: github.ref_type == 'tag' && env.HAS_RELEASE_NOTES == 'true'
+        env:
+          RELEASE_VERSION: ${{ env.RELEASE_VERSION }}
+        run: |
+          # Inject markdown release notes with sparkle:format="markdown" (Sparkle 2.9+)
+          export NOTES=$(cat "$RELEASE_NOTES_FILE")
+
+          # Insert description after the enclosure tag for this version
+          awk '
+            /<enclosure[^>]*>/ && index($0, ENVIRON["RELEASE_VERSION"]) {
+              print
+              print "            <description sparkle:format=\"markdown\"><![CDATA["
+              print ENVIRON["NOTES"]
+              print "            ]]></description>"
+              next
+            }
+            { print }
+          ' output/appcast.xml > output/appcast.xml.tmp && mv output/appcast.xml.tmp output/appcast.xml
+
+          echo "Injected markdown release notes for version $RELEASE_VERSION"
+
+      # ============================================================
+      # Upload artifacts
+      # ============================================================
+
+      - name: Upload DMG
+        uses: actions/upload-artifact@v4
+        with:
+          name: EXO-dmg-${{ env.RELEASE_VERSION }}
+          path: output/EXO-${{ env.RELEASE_VERSION }}.dmg
+
+      - name: Upload to S3
+        if: env.SPARKLE_S3_BUCKET != '' && github.ref_type == 'tag'
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          AWS_REGION: ${{ env.AWS_REGION }}
+          SPARKLE_S3_BUCKET: ${{ env.SPARKLE_S3_BUCKET }}
+          SPARKLE_S3_PREFIX: ${{ env.SPARKLE_S3_PREFIX }}
+          IS_ALPHA: ${{ env.IS_ALPHA }}
+        run: |
+          set -euo pipefail
+          cd output
+          PREFIX="${SPARKLE_S3_PREFIX:-}"
+          if [[ -n "$PREFIX" && "${PREFIX: -1}" != "/" ]]; then
+            PREFIX="${PREFIX}/"
+          fi
+          DMG_NAME="EXO-${RELEASE_VERSION}.dmg"
+          aws s3 cp "$DMG_NAME" "s3://${SPARKLE_S3_BUCKET}/${PREFIX}${DMG_NAME}"
+          if [[ "$IS_ALPHA" != "true" ]]; then
+            aws s3 cp "$DMG_NAME" "s3://${SPARKLE_S3_BUCKET}/${PREFIX}EXO-latest.dmg"
+            aws s3 cp appcast.xml "s3://${SPARKLE_S3_BUCKET}/${PREFIX}appcast.xml" --content-type application/xml --cache-control no-cache
+          fi
+
+      - name: Publish GitHub Release
+        if: github.ref_type == 'tag'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          DMG_PATH="output/EXO-${RELEASE_VERSION}.dmg"
+
+          if [[ "$HAS_RELEASE_NOTES" == "true" ]]; then
+            # Update the draft release with the tag and upload DMG
+            gh api --method PATCH "repos/${{ github.repository }}/releases/$DRAFT_RELEASE_ID" \
+              -f tag_name="$GITHUB_REF_NAME" \
+              -F draft=false
+            gh release upload "$GITHUB_REF_NAME" "$DMG_PATH" --clobber
+            echo "Published release $GITHUB_REF_NAME with DMG attached"
+          else
+            # Alpha without draft release - create one with auto-generated notes
+            gh release create "$GITHUB_REF_NAME" "$DMG_PATH" \
+              --title "$GITHUB_REF_NAME" \
+              --generate-notes \
+              --prerelease
+            echo "Created alpha release $GITHUB_REF_NAME with auto-generated notes"
+          fi
--- a/.github/workflows/pipeline.yml
+++ b/.github/workflows/pipeline.yml
@@ -0,0 +1,136 @@
+name: ci-pipeline
+
+on:
+  push:
+  pull_request:
+    branches:
+      - staging
+      - main
+
+jobs:
+  typecheck:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          lfs: false
+
+      - uses: cachix/install-nix-action@v31
+        with:
+          nix_path: nixpkgs=channel:nixos-unstable
+
+      - uses: cachix/cachix-action@v14
+        name: Configure Cachix
+        with:
+          name: exo
+          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
+
+      - name: Configure git user
+        run: |
+          git config --local user.email "github-actions@users.noreply.github.com"
+          git config --local user.name  "github-actions bot"
+        shell: bash
+
+      - name: Pull LFS files
+        run: |
+          echo "Pulling Git LFS files..."
+          git lfs pull
+        shell: bash
+
+      - name: Setup Nix Environment
+        run: |
+          echo "Checking for nix installation..."
+          
+          # Check if nix binary exists directly
+          if [ -f /nix/var/nix/profiles/default/bin/nix ]; then
+            echo "Found nix binary at /nix/var/nix/profiles/default/bin/nix"
+            export PATH="/nix/var/nix/profiles/default/bin:$PATH"
+            echo "PATH=$PATH" >> $GITHUB_ENV
+            nix --version
+          elif [ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]; then
+            echo "Found nix profile script, sourcing..."
+            source /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
+            nix --version
+          elif command -v nix >/dev/null 2>&1; then
+            echo "Nix already in PATH"
+            nix --version
+          else
+            echo "Nix not found. Debugging info:"
+            echo "Contents of /nix/var/nix/profiles/default/:"
+            ls -la /nix/var/nix/profiles/default/ 2>/dev/null || echo "Directory not found"
+            echo "Contents of /nix/var/nix/profiles/default/bin/:"
+            ls -la /nix/var/nix/profiles/default/bin/ 2>/dev/null || echo "Directory not found"
+            exit 1
+          fi
+        shell: bash
+
+      - name: Configure basedpyright include for local MLX
+        run: |
+          RUNNER_LABELS='${{ toJSON(runner.labels) }}'
+          if echo "$RUNNER_LABELS" | grep -q "local_mlx"; then
+            if [ -d "/Users/Shared/mlx" ]; then
+              echo "Updating [tool.basedpyright].include to use /Users/Shared/mlx"
+              awk '
+                BEGIN { in=0 }
+                /^\[tool\.basedpyright\]/ { in=1; print; next }
+                in && /^\[/ { in=0 }  # next section
+                in && /^[ \t]*include[ \t]*=/ {
+                  print "include = [\"/Users/Shared/mlx\"]"
+                  next
+                }
+                { print }
+              ' pyproject.toml > pyproject.toml.tmp && mv pyproject.toml.tmp pyproject.toml
+
+              echo "New [tool.basedpyright] section:"
+              sed -n '/^\[tool\.basedpyright\]/,/^\[/p' pyproject.toml | sed '$d' || true
+            else
+              echo "local_mlx tag present but /Users/Shared/mlx not found; leaving pyproject unchanged."
+            fi
+          else
+            echo "Runner does not have 'local_mlx' tag; leaving pyproject unchanged."
+          fi
+        shell: bash
+
+      - uses: ./.github/actions/typecheck
+
+  nix:
+    name: Build and check (${{ matrix.system }})
+    runs-on: ${{ matrix.runner }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - runner: macos-26
+            system: aarch64-darwin
+          - runner: ubuntu-latest
+            system: x86_64-linux
+          - runner: ubuntu-24.04-arm
+            system: aarch64-linux
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          lfs: false
+
+      - uses: cachix/install-nix-action@v31
+        with:
+          nix_path: nixpkgs=channel:nixos-unstable
+
+      - uses: cachix/cachix-action@v14
+        name: Configure Cachix
+        with:
+          name: exo
+          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
+
+      - name: Build all Nix outputs
+        run: |
+          nix flake show --json | jq -r '
+            [
+              (.packages."${{ matrix.system }}" // {} | keys[] | ".#packages.${{ matrix.system }}.\(.)"),
+              (.devShells."${{ matrix.system }}" // {} | keys[] | ".#devShells.${{ matrix.system }}.\(.)")
+            ] | .[]
+          ' | xargs nix build
+
+      - name: Run nix flake check
+        run: nix flake check
--- a/.gitignore
+++ b/.gitignore
@@ -1,175 +1,30 @@
-__pycache__/
-.venv*
-test_weights.npz
-.exo_used_ports
-.exo_node_id
+# gitingest
+digest.txt
+
+# python
+**/__pycache__
+
+# nix
+.direnv/
+
+# IDEA (PyCharm)
 .idea
-.DS_Store

-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
+# xcode / macos
+*.xcuserstate
+*.xcuserdata
+*.xcuserdatad/
+**/.DS_Store
+app/EXO/build/
+dist/

-# C extensions
-*.so

-# Distribution / packaging
-/.Python
-/develop-eggs/
-/dist/
-/downloads/
-/eggs/
-/.eggs/
-/lib/
-/lib64/
-/parts/
-/sdist/
-/var/
-/wheels/
-/share/python-wheels/
-/*.egg-info/
-/.installed.cfg
-/*.egg
-/MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-.pybuilder/
+# rust
 target/
+**/*.rs.bk
+*.pdb

-# Jupyter Notebook
-.ipynb_checkpoints
-Untitled.ipynb
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
-#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
-#   in version control.
-#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
-.pdm.toml
-.pdm-python
-.pdm-build/
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
-
-# pytype static type analyzer
-.pytype/
-
-# Cython debug symbols
-cython_debug/
-
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
-
-**/*.xcodeproj/*
-.aider*
-
-exo/tinychat/images/*.png
+# svelte
+dashboard/build/
+dashboard/node_modules/
+dashboard/.svelte-kit/
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -0,0 +1,9 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+workspace.xml
--- a/.idea/LanguageServersSettings.xml
+++ b/.idea/LanguageServersSettings.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="LanguageServerSettingsState">
+    <state>
+      <map>
+        <entry key="com.insyncwithfoo.pyright">
+          <value>
+            <LanguageServerDefinitionSettings>
+              <option name="errorReportingKind" value="in_log" />
+            </LanguageServerDefinitionSettings>
+          </value>
+        </entry>
+      </map>
+    </state>
+  </component>
+</project>
--- a/.idea/exo-v2.iml
+++ b/.idea/exo-v2.iml
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="EMPTY_MODULE" version="4">
+  <component name="FacetManager">
+    <facet type="Python" name="Python facet">
+      <configuration sdkName="Python 3.13 virtualenv at ~/Desktop/exo/.venv" />
+    </facet>
+  </component>
+  <component name="Go" enabled="true" />
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/scripts/src" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/rust/exo_pyo3_bindings/src" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/rust/exo_pyo3_bindings/tests" isTestSource="true" />
+      <sourceFolder url="file://$MODULE_DIR$/rust/util/src" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/rust/networking/examples" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/rust/networking/src" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/rust/networking/tests" isTestSource="true" />
+      <sourceFolder url="file://$MODULE_DIR$/rust/system_custodian/src" isTestSource="false" />
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+      <excludeFolder url="file://$MODULE_DIR$/.direnv" />
+      <excludeFolder url="file://$MODULE_DIR$/build" />
+      <excludeFolder url="file://$MODULE_DIR$/dist" />
+      <excludeFolder url="file://$MODULE_DIR$/.go_cache" />
+      <excludeFolder url="file://$MODULE_DIR$/rust/target" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.13 (exo)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="library" name="Python 3.13 virtualenv at ~/Desktop/exo/.venv interpreter library" level="application" />
+  </component>
+</module>
--- a/.idea/externalDependencies.xml
+++ b/.idea/externalDependencies.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ExternalDependencies">
+    <plugin id="systems.fehn.intellijdirenv" />
+  </component>
+</project>
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,14 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyCompatibilityInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ourVersions">
+        <value>
+          <list size="1">
+            <item index="0" class="java.lang.String" itemvalue="3.14" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.13 (exo)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 (exo)" project-jdk-type="Python SDK" />
+  <component name="PythonCompatibilityInspectionAdvertiser">
+    <option name="version" value="3" />
+  </component>
+</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/exo.iml" filepath="$PROJECT_DIR$/.idea/exo.iml" />
+    </modules>
+  </component>
+</project>
--- a/.idea/pyright-overrides.xml
+++ b/.idea/pyright-overrides.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="com.insyncwithfoo.pyright.configurations.Override">
+    <option name="names">
+      <map>
+        <entry key="configurationFile" value="true" />
+        <entry key="diagnosticMode" value="true" />
+        <entry key="inlayHintsGenericTypes" value="true" />
+        <entry key="prefixTooltipMessages" value="true" />
+        <entry key="runningMode" value="true" />
+        <entry key="smartExecutableResolution" value="true" />
+        <entry key="smartLanguageServerExecutableResolution" value="true" />
+        <entry key="useEditorFontForTooltips" value="true" />
+        <entry key="useTypingExtensions" value="true" />
+      </map>
+    </option>
+  </component>
+</project>
--- a/.idea/pyright.xml
+++ b/.idea/pyright.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="com.insyncwithfoo.pyright.configurations.Local">
+    <option name="diagnosticMode" value="WORKSPACE" />
+    <option name="inlayHintsGenericTypes" value="true" />
+    <option name="prefixTooltipMessages" value="true" />
+    <option name="useEditorFontForTooltips" value="true" />
+  </component>
+</project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
--- a/.mlx_typings/.gitkeep
+++ b/.mlx_typings/.gitkeep
--- a/.mlx_typings/mflux/init.pyi
+++ b/.mlx_typings/mflux/init.pyi
@@ -0,0 +1,7 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import os
+
+if "TOKENIZERS_PARALLELISM" not in os.environ: ...
--- a/.mlx_typings/mflux/callbacks/init.pyi
+++ b/.mlx_typings/mflux/callbacks/init.pyi
@@ -0,0 +1,3 @@
+"""
+This type stub file was generated by pyright.
+"""
--- a/.mlx_typings/mflux/callbacks/callback.pyi
+++ b/.mlx_typings/mflux/callbacks/callback.pyi
@@ -0,0 +1,47 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+import PIL.Image
+import tqdm
+from typing import Protocol
+from mflux.models.common.config.config import Config
+
+class BeforeLoopCallback(Protocol):
+    def call_before_loop(
+        self,
+        seed: int,
+        prompt: str,
+        latents: mx.array,
+        config: Config,
+        canny_image: PIL.Image.Image | None = ...,
+        depth_image: PIL.Image.Image | None = ...,
+    ) -> None: ...
+
+class InLoopCallback(Protocol):
+    def call_in_loop(
+        self,
+        t: int,
+        seed: int,
+        prompt: str,
+        latents: mx.array,
+        config: Config,
+        time_steps: tqdm,
+    ) -> None: ...
+
+class AfterLoopCallback(Protocol):
+    def call_after_loop(
+        self, seed: int, prompt: str, latents: mx.array, config: Config
+    ) -> None: ...
+
+class InterruptCallback(Protocol):
+    def call_interrupt(
+        self,
+        t: int,
+        seed: int,
+        prompt: str,
+        latents: mx.array,
+        config: Config,
+        time_steps: tqdm,
+    ) -> None: ...
--- a/.mlx_typings/mflux/callbacks/callback_registry.pyi
+++ b/.mlx_typings/mflux/callbacks/callback_registry.pyi
@@ -0,0 +1,24 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from typing import TYPE_CHECKING
+from mflux.callbacks.callback import (
+    AfterLoopCallback,
+    BeforeLoopCallback,
+    InLoopCallback,
+    InterruptCallback,
+)
+from mflux.callbacks.generation_context import GenerationContext
+from mflux.models.common.config.config import Config
+
+if TYPE_CHECKING: ...
+
+class CallbackRegistry:
+    def __init__(self) -> None: ...
+    def register(self, callback) -> None: ...
+    def start(self, seed: int, prompt: str, config: Config) -> GenerationContext: ...
+    def before_loop_callbacks(self) -> list[BeforeLoopCallback]: ...
+    def in_loop_callbacks(self) -> list[InLoopCallback]: ...
+    def after_loop_callbacks(self) -> list[AfterLoopCallback]: ...
+    def interrupt_callbacks(self) -> list[InterruptCallback]: ...
--- a/.mlx_typings/mflux/callbacks/generation_context.pyi
+++ b/.mlx_typings/mflux/callbacks/generation_context.pyi
@@ -0,0 +1,29 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+import PIL.Image
+import tqdm
+from typing import TYPE_CHECKING
+from mflux.callbacks.callback_registry import CallbackRegistry
+from mflux.models.common.config.config import Config
+
+if TYPE_CHECKING: ...
+
+class GenerationContext:
+    def __init__(
+        self, registry: CallbackRegistry, seed: int, prompt: str, config: Config
+    ) -> None: ...
+    def before_loop(
+        self,
+        latents: mx.array,
+        *,
+        canny_image: PIL.Image.Image | None = ...,
+        depth_image: PIL.Image.Image | None = ...,
+    ) -> None: ...
+    def in_loop(self, t: int, latents: mx.array, time_steps: tqdm = ...) -> None: ...
+    def after_loop(self, latents: mx.array) -> None: ...
+    def interruption(
+        self, t: int, latents: mx.array, time_steps: tqdm = ...
+    ) -> None: ...
--- a/.mlx_typings/mflux/cli/init.pyi
+++ b/.mlx_typings/mflux/cli/init.pyi
@@ -0,0 +1,3 @@
+"""
+This type stub file was generated by pyright.
+"""
--- a/.mlx_typings/mflux/cli/defaults/defaults.pyi
+++ b/.mlx_typings/mflux/cli/defaults/defaults.pyi
@@ -0,0 +1,22 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import os
+
+BATTERY_PERCENTAGE_STOP_LIMIT = ...
+CONTROLNET_STRENGTH = ...
+DEFAULT_DEV_FILL_GUIDANCE = ...
+DEFAULT_DEPTH_GUIDANCE = ...
+DIMENSION_STEP_PIXELS = ...
+GUIDANCE_SCALE = ...
+GUIDANCE_SCALE_KONTEXT = ...
+IMAGE_STRENGTH = ...
+MODEL_CHOICES = ...
+MODEL_INFERENCE_STEPS = ...
+QUANTIZE_CHOICES = ...
+if os.environ.get("MFLUX_CACHE_DIR"):
+    MFLUX_CACHE_DIR = ...
+else:
+    MFLUX_CACHE_DIR = ...
+MFLUX_LORA_CACHE_DIR = ...
--- a/.mlx_typings/mflux/models/init.pyi
+++ b/.mlx_typings/mflux/models/init.pyi
@@ -0,0 +1,3 @@
+"""
+This type stub file was generated by pyright.
+"""
--- a/.mlx_typings/mflux/models/common/init.pyi
+++ b/.mlx_typings/mflux/models/common/init.pyi
@@ -0,0 +1,3 @@
+"""
+This type stub file was generated by pyright.
+"""
--- a/.mlx_typings/mflux/models/common/cli/init.pyi
+++ b/.mlx_typings/mflux/models/common/cli/init.pyi
@@ -0,0 +1,3 @@
+"""
+This type stub file was generated by pyright.
+"""
--- a/.mlx_typings/mflux/models/common/config/init.pyi
+++ b/.mlx_typings/mflux/models/common/config/init.pyi
@@ -0,0 +1,8 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from mflux.models.common.config.config import Config
+from mflux.models.common.config.model_config import ModelConfig
+
+__all__ = ["Config", "ModelConfig"]
--- a/.mlx_typings/mflux/models/common/config/config.pyi
+++ b/.mlx_typings/mflux/models/common/config/config.pyi
@@ -0,0 +1,66 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from pathlib import Path
+from typing import Any
+from tqdm import tqdm
+from mflux.models.common.config.model_config import ModelConfig
+
+logger = ...
+
+class Config:
+    def __init__(
+        self,
+        model_config: ModelConfig,
+        num_inference_steps: int = ...,
+        height: int = ...,
+        width: int = ...,
+        guidance: float = ...,
+        image_path: Path | str | None = ...,
+        image_strength: float | None = ...,
+        depth_image_path: Path | str | None = ...,
+        redux_image_paths: list[Path | str] | None = ...,
+        redux_image_strengths: list[float] | None = ...,
+        masked_image_path: Path | str | None = ...,
+        controlnet_strength: float | None = ...,
+        scheduler: str = ...,
+    ) -> None: ...
+    @property
+    def height(self) -> int: ...
+    @property
+    def width(self) -> int: ...
+    @width.setter
+    def width(self, value):  # -> None:
+        ...
+    @property
+    def image_seq_len(self) -> int: ...
+    @property
+    def guidance(self) -> float: ...
+    @property
+    def num_inference_steps(self) -> int: ...
+    @property
+    def precision(self) -> mx.Dtype: ...
+    @property
+    def num_train_steps(self) -> int: ...
+    @property
+    def image_path(self) -> Path | None: ...
+    @property
+    def image_strength(self) -> float | None: ...
+    @property
+    def depth_image_path(self) -> Path | None: ...
+    @property
+    def redux_image_paths(self) -> list[Path] | None: ...
+    @property
+    def redux_image_strengths(self) -> list[float] | None: ...
+    @property
+    def masked_image_path(self) -> Path | None: ...
+    @property
+    def init_time_step(self) -> int: ...
+    @property
+    def time_steps(self) -> tqdm: ...
+    @property
+    def controlnet_strength(self) -> float | None: ...
+    @property
+    def scheduler(self) -> Any: ...
--- a/.mlx_typings/mflux/models/common/config/model_config.pyi
+++ b/.mlx_typings/mflux/models/common/config/model_config.pyi
@@ -0,0 +1,86 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from functools import lru_cache
+from typing import Literal
+
+class ModelConfig:
+    precision: mx.Dtype = ...
+    def __init__(
+        self,
+        priority: int,
+        aliases: list[str],
+        model_name: str,
+        base_model: str | None,
+        controlnet_model: str | None,
+        custom_transformer_model: str | None,
+        num_train_steps: int | None,
+        max_sequence_length: int | None,
+        supports_guidance: bool | None,
+        requires_sigma_shift: bool | None,
+        transformer_overrides: dict | None = ...,
+    ) -> None: ...
+    @staticmethod
+    @lru_cache
+    def dev() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def schnell() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def dev_kontext() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def dev_fill() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def dev_redux() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def dev_depth() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def dev_controlnet_canny() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def schnell_controlnet_canny() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def dev_controlnet_upscaler() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def dev_fill_catvton() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def krea_dev() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def flux2_klein_4b() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def flux2_klein_9b() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def qwen_image() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def qwen_image_edit() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def fibo() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def z_image_turbo() -> ModelConfig: ...
+    @staticmethod
+    @lru_cache
+    def seedvr2_3b() -> ModelConfig: ...
+    def x_embedder_input_dim(self) -> int: ...
+    def is_canny(self) -> bool: ...
+    @staticmethod
+    def from_name(
+        model_name: str, base_model: Literal["dev", "schnell", "krea-dev"] | None = ...
+    ) -> ModelConfig: ...
+
+AVAILABLE_MODELS = ...
--- a/.mlx_typings/mflux/models/common/latent_creator/init.pyi
+++ b/.mlx_typings/mflux/models/common/latent_creator/init.pyi
@@ -0,0 +1,7 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+"""
+This type stub file was generated by pyright.
+"""
--- a/.mlx_typings/mflux/models/common/latent_creator/latent_creator.pyi
+++ b/.mlx_typings/mflux/models/common/latent_creator/latent_creator.pyi
@@ -0,0 +1,49 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from pathlib import Path
+from typing import TYPE_CHECKING, TypeAlias
+from mlx import nn
+from mflux.models.common.vae.tiling_config import TilingConfig
+from mflux.models.fibo.latent_creator.fibo_latent_creator import FiboLatentCreator
+from mflux.models.flux.latent_creator.flux_latent_creator import FluxLatentCreator
+from mflux.models.qwen.latent_creator.qwen_latent_creator import QwenLatentCreator
+from mflux.models.z_image.latent_creator.z_image_latent_creator import (
+    ZImageLatentCreator,
+)
+
+if TYPE_CHECKING:
+    LatentCreatorType: TypeAlias = type[
+        FiboLatentCreator | FluxLatentCreator | QwenLatentCreator | ZImageLatentCreator
+    ]
+
+class Img2Img:
+    def __init__(
+        self,
+        vae: nn.Module,
+        latent_creator: LatentCreatorType,
+        sigmas: mx.array,
+        init_time_step: int,
+        image_path: str | Path | None,
+        tiling_config: TilingConfig | None = ...,
+    ) -> None: ...
+
+class LatentCreator:
+    @staticmethod
+    def create_for_txt2img_or_img2img(
+        seed: int, height: int, width: int, img2img: Img2Img
+    ) -> mx.array: ...
+    @staticmethod
+    def encode_image(
+        vae: nn.Module,
+        image_path: str | Path,
+        height: int,
+        width: int,
+        tiling_config: TilingConfig | None = ...,
+    ) -> mx.array: ...
+    @staticmethod
+    def add_noise_by_interpolation(
+        clean: mx.array, noise: mx.array, sigma: float
+    ) -> mx.array: ...
--- a/.mlx_typings/mflux/models/common/lora/init.pyi
+++ b/.mlx_typings/mflux/models/common/lora/init.pyi
@@ -0,0 +1,3 @@
+"""
+This type stub file was generated by pyright.
+"""
--- a/.mlx_typings/mflux/models/common/lora/layer/fused_linear_lora_layer.pyi
+++ b/.mlx_typings/mflux/models/common/lora/layer/fused_linear_lora_layer.pyi
@@ -0,0 +1,13 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from mlx import nn
+from mflux.models.common.lora.layer.linear_lora_layer import LoRALinear
+
+class FusedLoRALinear(nn.Module):
+    def __init__(
+        self, base_linear: nn.Linear | nn.QuantizedLinear, loras: list[LoRALinear]
+    ) -> None: ...
+    def __call__(self, x):  # -> array:
+        ...
--- a/.mlx_typings/mflux/models/common/lora/layer/linear_lora_layer.pyi
+++ b/.mlx_typings/mflux/models/common/lora/layer/linear_lora_layer.pyi
@@ -0,0 +1,22 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from mlx import nn
+
+class LoRALinear(nn.Module):
+    @staticmethod
+    def from_linear(
+        linear: nn.Linear | nn.QuantizedLinear, r: int = ..., scale: float = ...
+    ):  # -> LoRALinear:
+        ...
+    def __init__(
+        self,
+        input_dims: int,
+        output_dims: int,
+        r: int = ...,
+        scale: float = ...,
+        bias: bool = ...,
+    ) -> None: ...
+    def __call__(self, x):  # -> array:
+        ...
--- a/.mlx_typings/mflux/models/common/lora/mapping/lora_loader.pyi
+++ b/.mlx_typings/mflux/models/common/lora/mapping/lora_loader.pyi
@@ -0,0 +1,26 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+import mlx.nn as nn
+from collections.abc import Callable
+from dataclasses import dataclass
+from mflux.models.common.lora.mapping.lora_mapping import LoRATarget
+
+@dataclass
+class PatternMatch:
+    source_pattern: str
+    target_path: str
+    matrix_name: str
+    transpose: bool
+    transform: Callable[[mx.array], mx.array] | None = ...
+
+class LoRALoader:
+    @staticmethod
+    def load_and_apply_lora(
+        lora_mapping: list[LoRATarget],
+        transformer: nn.Module,
+        lora_paths: list[str] | None = ...,
+        lora_scales: list[float] | None = ...,
+    ) -> tuple[list[str], list[float]]: ...
--- a/.mlx_typings/mflux/models/common/lora/mapping/lora_mapping.pyi
+++ b/.mlx_typings/mflux/models/common/lora/mapping/lora_mapping.pyi
@@ -0,0 +1,21 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from collections.abc import Callable
+from dataclasses import dataclass
+from typing import List, Protocol
+
+@dataclass
+class LoRATarget:
+    model_path: str
+    possible_up_patterns: List[str]
+    possible_down_patterns: List[str]
+    possible_alpha_patterns: List[str] = ...
+    up_transform: Callable[[mx.array], mx.array] | None = ...
+    down_transform: Callable[[mx.array], mx.array] | None = ...
+
+class LoRAMapping(Protocol):
+    @staticmethod
+    def get_mapping() -> List[LoRATarget]: ...
--- a/.mlx_typings/mflux/models/common/lora/mapping/lora_saver.pyi
+++ b/.mlx_typings/mflux/models/common/lora/mapping/lora_saver.pyi
@@ -0,0 +1,9 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.nn as nn
+
+class LoRASaver:
+    @staticmethod
+    def bake_and_strip_lora(module: nn.Module) -> nn.Module: ...
--- a/.mlx_typings/mflux/models/common/lora/mapping/lora_transforms.pyi
+++ b/.mlx_typings/mflux/models/common/lora/mapping/lora_transforms.pyi
@@ -0,0 +1,35 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+
+class LoraTransforms:
+    @staticmethod
+    def split_q_up(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def split_k_up(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def split_v_up(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def split_q_down(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def split_k_down(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def split_v_down(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def split_single_q_up(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def split_single_k_up(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def split_single_v_up(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def split_single_mlp_up(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def split_single_q_down(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def split_single_k_down(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def split_single_v_down(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def split_single_mlp_down(tensor: mx.array) -> mx.array: ...
--- a/.mlx_typings/mflux/models/common/resolution/init.pyi
+++ b/.mlx_typings/mflux/models/common/resolution/init.pyi
@@ -0,0 +1,17 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from mflux.models.common.resolution.config_resolution import ConfigResolution
+from mflux.models.common.resolution.lora_resolution import LoraResolution
+from mflux.models.common.resolution.path_resolution import PathResolution
+from mflux.models.common.resolution.quantization_resolution import (
+    QuantizationResolution,
+)
+
+__all__ = [
+    "ConfigResolution",
+    "LoraResolution",
+    "PathResolution",
+    "QuantizationResolution",
+]
--- a/.mlx_typings/mflux/models/common/resolution/actions.pyi
+++ b/.mlx_typings/mflux/models/common/resolution/actions.pyi
@@ -0,0 +1,39 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from enum import Enum
+from typing import NamedTuple
+
+class QuantizationAction(Enum):
+    NONE = ...
+    STORED = ...
+    REQUESTED = ...
+
+class PathAction(Enum):
+    LOCAL = ...
+    HUGGINGFACE_CACHED = ...
+    HUGGINGFACE = ...
+    ERROR = ...
+
+class LoraAction(Enum):
+    LOCAL = ...
+    REGISTRY = ...
+    HUGGINGFACE_COLLECTION_CACHED = ...
+    HUGGINGFACE_COLLECTION = ...
+    HUGGINGFACE_REPO_CACHED = ...
+    HUGGINGFACE_REPO = ...
+    ERROR = ...
+
+class ConfigAction(Enum):
+    EXACT_MATCH = ...
+    EXPLICIT_BASE = ...
+    INFER_SUBSTRING = ...
+    ERROR = ...
+
+class Rule(NamedTuple):
+    priority: int
+    name: str
+    check: str
+    action: QuantizationAction | PathAction | LoraAction | ConfigAction
+    ...
--- a/.mlx_typings/mflux/models/common/resolution/config_resolution.pyi
+++ b/.mlx_typings/mflux/models/common/resolution/config_resolution.pyi
@@ -0,0 +1,14 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from typing import TYPE_CHECKING
+from mflux.models.common.config.model_config import ModelConfig
+
+if TYPE_CHECKING: ...
+logger = ...
+
+class ConfigResolution:
+    RULES = ...
+    @staticmethod
+    def resolve(model_name: str, base_model: str | None = ...) -> ModelConfig: ...
--- a/.mlx_typings/mflux/models/common/resolution/lora_resolution.pyi
+++ b/.mlx_typings/mflux/models/common/resolution/lora_resolution.pyi
@@ -0,0 +1,21 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from pathlib import Path
+
+logger = ...
+
+class LoraResolution:
+    RULES = ...
+    _registry: dict[str, Path] = ...
+    @staticmethod
+    def resolve(path: str) -> str: ...
+    @staticmethod
+    def resolve_paths(paths: list[str] | None) -> list[str]: ...
+    @staticmethod
+    def resolve_scales(scales: list[float] | None, num_paths: int) -> list[float]: ...
+    @staticmethod
+    def get_registry() -> dict[str, Path]: ...
+    @staticmethod
+    def discover_files(library_paths: list[Path]) -> dict[str, Path]: ...
--- a/.mlx_typings/mflux/models/common/resolution/path_resolution.pyi
+++ b/.mlx_typings/mflux/models/common/resolution/path_resolution.pyi
@@ -0,0 +1,12 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from pathlib import Path
+
+logger = ...
+
+class PathResolution:
+    RULES = ...
+    @staticmethod
+    def resolve(path: str | None, patterns: list[str] | None = ...) -> Path | None: ...
--- a/.mlx_typings/mflux/models/common/resolution/quantization_resolution.pyi
+++ b/.mlx_typings/mflux/models/common/resolution/quantization_resolution.pyi
@@ -0,0 +1,12 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+logger = ...
+
+class QuantizationResolution:
+    RULES = ...
+    @staticmethod
+    def resolve(
+        stored: int | None, requested: int | None
+    ) -> tuple[int | None, str | None]: ...
--- a/.mlx_typings/mflux/models/common/schedulers/init.pyi
+++ b/.mlx_typings/mflux/models/common/schedulers/init.pyi
@@ -0,0 +1,26 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from .flow_match_euler_discrete_scheduler import FlowMatchEulerDiscreteScheduler
+from .linear_scheduler import LinearScheduler
+from .seedvr2_euler_scheduler import SeedVR2EulerScheduler
+
+__all__ = [
+    "LinearScheduler",
+    "FlowMatchEulerDiscreteScheduler",
+    "SeedVR2EulerScheduler",
+]
+
+class SchedulerModuleNotFound(ValueError): ...
+class SchedulerClassNotFound(ValueError): ...
+class InvalidSchedulerType(TypeError): ...
+
+SCHEDULER_REGISTRY = ...
+
+def register_contrib(scheduler_object, scheduler_name=...):  # -> None:
+    ...
+def try_import_external_scheduler(
+    scheduler_object_path: str,
+):  # -> type[BaseScheduler]:
+    ...
--- a/.mlx_typings/mflux/models/common/schedulers/base_scheduler.pyi
+++ b/.mlx_typings/mflux/models/common/schedulers/base_scheduler.pyi
@@ -0,0 +1,16 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from abc import ABC, abstractmethod
+
+class BaseScheduler(ABC):
+    @property
+    @abstractmethod
+    def sigmas(self) -> mx.array: ...
+    @abstractmethod
+    def step(
+        self, noise: mx.array, timestep: int, latents: mx.array, **kwargs
+    ) -> mx.array: ...
+    def scale_model_input(self, latents: mx.array, t: int) -> mx.array: ...
--- a/.mlx_typings/mflux/models/common/schedulers/flow_match_euler_discrete_scheduler.pyi
+++ b/.mlx_typings/mflux/models/common/schedulers/flow_match_euler_discrete_scheduler.pyi
@@ -0,0 +1,26 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from typing import TYPE_CHECKING
+from mflux.models.common.config.config import Config
+from mflux.models.common.schedulers.base_scheduler import BaseScheduler
+
+if TYPE_CHECKING: ...
+
+class FlowMatchEulerDiscreteScheduler(BaseScheduler):
+    def __init__(self, config: Config) -> None: ...
+    @property
+    def sigmas(self) -> mx.array: ...
+    @property
+    def timesteps(self) -> mx.array: ...
+    def set_image_seq_len(self, image_seq_len: int) -> None: ...
+    @staticmethod
+    def get_timesteps_and_sigmas(
+        image_seq_len: int, num_inference_steps: int, num_train_timesteps: int = ...
+    ) -> tuple[mx.array, mx.array]: ...
+    def step(
+        self, noise: mx.array, timestep: int, latents: mx.array, **kwargs
+    ) -> mx.array: ...
+    def scale_model_input(self, latents: mx.array, t: int) -> mx.array: ...
--- a/.mlx_typings/mflux/models/common/schedulers/linear_scheduler.pyi
+++ b/.mlx_typings/mflux/models/common/schedulers/linear_scheduler.pyi
@@ -0,0 +1,20 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from typing import TYPE_CHECKING
+from mflux.models.common.config.config import Config
+from mflux.models.common.schedulers.base_scheduler import BaseScheduler
+
+if TYPE_CHECKING: ...
+
+class LinearScheduler(BaseScheduler):
+    def __init__(self, config: Config) -> None: ...
+    @property
+    def sigmas(self) -> mx.array: ...
+    @property
+    def timesteps(self) -> mx.array: ...
+    def step(
+        self, noise: mx.array, timestep: int, latents: mx.array, **kwargs
+    ) -> mx.array: ...
--- a/.mlx_typings/mflux/models/common/schedulers/seedvr2_euler_scheduler.pyi
+++ b/.mlx_typings/mflux/models/common/schedulers/seedvr2_euler_scheduler.pyi
@@ -0,0 +1,20 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from typing import TYPE_CHECKING
+from mflux.models.common.config.config import Config
+from mflux.models.common.schedulers.base_scheduler import BaseScheduler
+
+if TYPE_CHECKING: ...
+
+class SeedVR2EulerScheduler(BaseScheduler):
+    def __init__(self, config: Config) -> None: ...
+    @property
+    def timesteps(self) -> mx.array: ...
+    @property
+    def sigmas(self) -> mx.array: ...
+    def step(
+        self, noise: mx.array, timestep: int, latents: mx.array, **kwargs
+    ) -> mx.array: ...
--- a/.mlx_typings/mflux/models/common/tokenizer/init.pyi
+++ b/.mlx_typings/mflux/models/common/tokenizer/init.pyi
@@ -0,0 +1,24 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from mflux.models.common.tokenizer.tokenizer import (
+    BaseTokenizer,
+    LanguageTokenizer,
+    Tokenizer,
+    VisionLanguageTokenizer,
+)
+from mflux.models.common.tokenizer.tokenizer_loader import TokenizerLoader
+from mflux.models.common.tokenizer.tokenizer_output import TokenizerOutput
+
+"""
+This type stub file was generated by pyright.
+"""
+__all__ = [
+    "Tokenizer",
+    "BaseTokenizer",
+    "LanguageTokenizer",
+    "VisionLanguageTokenizer",
+    "TokenizerLoader",
+    "TokenizerOutput",
+]
--- a/.mlx_typings/mflux/models/common/tokenizer/tokenizer.pyi
+++ b/.mlx_typings/mflux/models/common/tokenizer/tokenizer.pyi
@@ -0,0 +1,74 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Protocol, runtime_checkable
+from PIL import Image
+from transformers import PreTrainedTokenizer
+from mflux.models.common.tokenizer.tokenizer_output import TokenizerOutput
+
+"""
+This type stub file was generated by pyright.
+"""
+
+@runtime_checkable
+class Tokenizer(Protocol):
+    tokenizer: PreTrainedTokenizer
+    def tokenize(
+        self,
+        prompt: str | list[str],
+        images: list[Image.Image] | None = ...,
+        max_length: int | None = ...,
+        **kwargs,
+    ) -> TokenizerOutput: ...
+
+class BaseTokenizer(ABC):
+    def __init__(
+        self, tokenizer: PreTrainedTokenizer, max_length: int = ...
+    ) -> None: ...
+    @abstractmethod
+    def tokenize(
+        self,
+        prompt: str | list[str],
+        images: list[Image.Image] | None = ...,
+        max_length: int | None = ...,
+        **kwargs,
+    ) -> TokenizerOutput: ...
+
+class LanguageTokenizer(BaseTokenizer):
+    def __init__(
+        self,
+        tokenizer: PreTrainedTokenizer,
+        max_length: int = ...,
+        padding: str = ...,
+        return_attention_mask: bool = ...,
+        template: str | None = ...,
+        use_chat_template: bool = ...,
+        chat_template_kwargs: dict | None = ...,
+        add_special_tokens: bool = ...,
+    ) -> None: ...
+    def tokenize(
+        self,
+        prompt: str | list[str],
+        images: list[Image.Image] | None = ...,
+        max_length: int | None = ...,
+        **kwargs,
+    ) -> TokenizerOutput: ...
+
+class VisionLanguageTokenizer(BaseTokenizer):
+    def __init__(
+        self,
+        tokenizer: PreTrainedTokenizer,
+        processor,
+        max_length: int = ...,
+        template: str | None = ...,
+        image_token: str = ...,
+    ) -> None: ...
+    def tokenize(
+        self,
+        prompt: str | list[str],
+        images: list[Image.Image] | None = ...,
+        max_length: int | None = ...,
+        **kwargs,
+    ) -> TokenizerOutput: ...
--- a/.mlx_typings/mflux/models/common/tokenizer/tokenizer_loader.pyi
+++ b/.mlx_typings/mflux/models/common/tokenizer/tokenizer_loader.pyi
@@ -0,0 +1,22 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from typing import TYPE_CHECKING
+from mflux.models.common.tokenizer.tokenizer import BaseTokenizer
+from mflux.models.common.weights.loading.weight_definition import TokenizerDefinition
+
+"""
+This type stub file was generated by pyright.
+"""
+if TYPE_CHECKING: ...
+
+class TokenizerLoader:
+    @staticmethod
+    def load(definition: TokenizerDefinition, model_path: str) -> BaseTokenizer: ...
+    @staticmethod
+    def load_all(
+        definitions: list[TokenizerDefinition],
+        model_path: str,
+        max_length_overrides: dict[str, int] | None = ...,
+    ) -> dict[str, BaseTokenizer]: ...
--- a/.mlx_typings/mflux/models/common/tokenizer/tokenizer_output.pyi
+++ b/.mlx_typings/mflux/models/common/tokenizer/tokenizer_output.pyi
@@ -0,0 +1,17 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from dataclasses import dataclass
+
+"""
+This type stub file was generated by pyright.
+"""
+
+@dataclass
+class TokenizerOutput:
+    input_ids: mx.array
+    attention_mask: mx.array
+    pixel_values: mx.array | None = ...
+    image_grid_thw: mx.array | None = ...
--- a/.mlx_typings/mflux/models/common/vae/init.pyi
+++ b/.mlx_typings/mflux/models/common/vae/init.pyi
@@ -0,0 +1,8 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from mflux.models.common.vae.tiling_config import TilingConfig
+from mflux.models.common.vae.vae_tiler import VAETiler
+
+__all__ = ["TilingConfig", "VAETiler"]
--- a/.mlx_typings/mflux/models/common/vae/tiling_config.pyi
+++ b/.mlx_typings/mflux/models/common/vae/tiling_config.pyi
@@ -0,0 +1,13 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from dataclasses import dataclass
+
+@dataclass(frozen=True, slots=True)
+class TilingConfig:
+    vae_decode_tiles_per_dim: int | None = ...
+    vae_decode_overlap: int = ...
+    vae_encode_tiled: bool = ...
+    vae_encode_tile_size: int = ...
+    vae_encode_tile_overlap: int = ...
--- a/.mlx_typings/mflux/models/common/vae/vae_tiler.pyi
+++ b/.mlx_typings/mflux/models/common/vae/vae_tiler.pyi
@@ -0,0 +1,27 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from typing import Callable
+
+class VAETiler:
+    @staticmethod
+    def encode_image_tiled(
+        *,
+        image: mx.array,
+        encode_fn: Callable[[mx.array], mx.array],
+        latent_channels: int,
+        tile_size: tuple[int, int] = ...,
+        tile_overlap: tuple[int, int] = ...,
+        spatial_scale: int = ...,
+    ) -> mx.array: ...
+    @staticmethod
+    def decode_image_tiled(
+        *,
+        latent: mx.array,
+        decode_fn: Callable[[mx.array], mx.array],
+        tile_size: tuple[int, int] = ...,
+        tile_overlap: tuple[int, int] = ...,
+        spatial_scale: int = ...,
+    ) -> mx.array: ...
--- a/.mlx_typings/mflux/models/common/vae/vae_util.pyi
+++ b/.mlx_typings/mflux/models/common/vae/vae_util.pyi
@@ -0,0 +1,17 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from mlx import nn
+from mflux.models.common.vae.tiling_config import TilingConfig
+
+class VAEUtil:
+    @staticmethod
+    def encode(
+        vae: nn.Module, image: mx.array, tiling_config: TilingConfig | None = ...
+    ) -> mx.array: ...
+    @staticmethod
+    def decode(
+        vae: nn.Module, latent: mx.array, tiling_config: TilingConfig | None = ...
+    ) -> mx.array: ...
--- a/.mlx_typings/mflux/models/common/weights/init.pyi
+++ b/.mlx_typings/mflux/models/common/weights/init.pyi
@@ -0,0 +1,18 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from mflux.models.common.weights.loading.loaded_weights import LoadedWeights, MetaData
+from mflux.models.common.weights.loading.weight_applier import WeightApplier
+from mflux.models.common.weights.loading.weight_definition import ComponentDefinition
+from mflux.models.common.weights.loading.weight_loader import WeightLoader
+from mflux.models.common.weights.saving.model_saver import ModelSaver
+
+__all__ = [
+    "ComponentDefinition",
+    "LoadedWeights",
+    "MetaData",
+    "ModelSaver",
+    "WeightApplier",
+    "WeightLoader",
+]
--- a/.mlx_typings/mflux/models/common/weights/loading/loaded_weights.pyi
+++ b/.mlx_typings/mflux/models/common/weights/loading/loaded_weights.pyi
@@ -0,0 +1,18 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from dataclasses import dataclass
+
+@dataclass
+class MetaData:
+    quantization_level: int | None = ...
+    mflux_version: str | None = ...
+
+@dataclass
+class LoadedWeights:
+    components: dict[str, dict]
+    meta_data: MetaData
+    def __getattr__(self, name: str) -> dict | None: ...
+    def num_transformer_blocks(self, component_name: str = ...) -> int: ...
+    def num_single_transformer_blocks(self, component_name: str = ...) -> int: ...
--- a/.mlx_typings/mflux/models/common/weights/loading/weight_applier.pyi
+++ b/.mlx_typings/mflux/models/common/weights/loading/weight_applier.pyi
@@ -0,0 +1,30 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.nn as nn
+from typing import TYPE_CHECKING
+from mflux.models.common.weights.loading.loaded_weights import LoadedWeights
+from mflux.models.common.weights.loading.weight_definition import (
+    ComponentDefinition,
+    WeightDefinitionType,
+)
+
+if TYPE_CHECKING: ...
+
+class WeightApplier:
+    @staticmethod
+    def apply_and_quantize_single(
+        weights: LoadedWeights,
+        model: nn.Module,
+        component: ComponentDefinition,
+        quantize_arg: int | None,
+        quantization_predicate=...,
+    ) -> int | None: ...
+    @staticmethod
+    def apply_and_quantize(
+        weights: LoadedWeights,
+        models: dict[str, nn.Module],
+        quantize_arg: int | None,
+        weight_definition: WeightDefinitionType,
+    ) -> int | None: ...
--- a/.mlx_typings/mflux/models/common/weights/loading/weight_definition.pyi
+++ b/.mlx_typings/mflux/models/common/weights/loading/weight_definition.pyi
@@ -0,0 +1,73 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from dataclasses import dataclass
+from typing import Callable, List, TYPE_CHECKING, TypeAlias
+from mflux.models.common.weights.mapping.weight_mapping import WeightTarget
+from mflux.models.common.tokenizer.tokenizer import BaseTokenizer
+from mflux.models.depth_pro.weights.depth_pro_weight_definition import (
+    DepthProWeightDefinition,
+)
+from mflux.models.fibo.weights.fibo_weight_definition import FIBOWeightDefinition
+from mflux.models.fibo_vlm.weights.fibo_vlm_weight_definition import (
+    FIBOVLMWeightDefinition,
+)
+from mflux.models.flux.weights.flux_weight_definition import FluxWeightDefinition
+from mflux.models.qwen.weights.qwen_weight_definition import QwenWeightDefinition
+from mflux.models.seedvr2.weights.seedvr2_weight_definition import (
+    SeedVR2WeightDefinition,
+)
+from mflux.models.z_image.weights.z_image_weight_definition import (
+    ZImageWeightDefinition,
+)
+
+"""
+This type stub file was generated by pyright.
+"""
+if TYPE_CHECKING:
+    WeightDefinitionType: TypeAlias = type[
+        FluxWeightDefinition
+        | FIBOWeightDefinition
+        | FIBOVLMWeightDefinition
+        | QwenWeightDefinition
+        | ZImageWeightDefinition
+        | SeedVR2WeightDefinition
+        | DepthProWeightDefinition
+    ]
+
+@dataclass
+class ComponentDefinition:
+    name: str
+    hf_subdir: str
+    mapping_getter: Callable[[], List[WeightTarget]] | None = ...
+    model_attr: str | None = ...
+    num_blocks: int | None = ...
+    num_layers: int | None = ...
+    loading_mode: str = ...
+    precision: mx.Dtype | None = ...
+    skip_quantization: bool = ...
+    bulk_transform: Callable[[mx.array], mx.array] | None = ...
+    weight_subkey: str | None = ...
+    download_url: str | None = ...
+    weight_prefix_filters: List[str] | None = ...
+    weight_files: List[str] | None = ...
+
+@dataclass
+class TokenizerDefinition:
+    name: str
+    hf_subdir: str
+    tokenizer_class: str = ...
+    fallback_subdirs: List[str] | None = ...
+    download_patterns: List[str] | None = ...
+    encoder_class: type[BaseTokenizer] | None = ...
+    max_length: int = ...
+    padding: str = ...
+    template: str | None = ...
+    use_chat_template: bool = ...
+    chat_template_kwargs: dict | None = ...
+    add_special_tokens: bool = ...
+    processor_class: type | None = ...
+    image_token: str = ...
+    chat_template: str | None = ...
--- a/.mlx_typings/mflux/models/common/weights/loading/weight_loader.pyi
+++ b/.mlx_typings/mflux/models/common/weights/loading/weight_loader.pyi
@@ -0,0 +1,23 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from typing import TYPE_CHECKING
+from mflux.models.common.weights.loading.loaded_weights import LoadedWeights
+from mflux.models.common.weights.loading.weight_definition import (
+    ComponentDefinition,
+    WeightDefinitionType,
+)
+
+if TYPE_CHECKING: ...
+logger = ...
+
+class WeightLoader:
+    @staticmethod
+    def load_single(
+        component: ComponentDefinition, repo_id: str, file_pattern: str = ...
+    ) -> LoadedWeights: ...
+    @staticmethod
+    def load(
+        weight_definition: WeightDefinitionType, model_path: str | None = ...
+    ) -> LoadedWeights: ...
--- a/.mlx_typings/mflux/models/common/weights/mapping/weight_mapper.pyi
+++ b/.mlx_typings/mflux/models/common/weights/mapping/weight_mapper.pyi
@@ -0,0 +1,16 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from typing import Dict, List, Optional
+from mflux.models.common.weights.mapping.weight_mapping import WeightTarget
+
+class WeightMapper:
+    @staticmethod
+    def apply_mapping(
+        hf_weights: Dict[str, mx.array],
+        mapping: List[WeightTarget],
+        num_blocks: Optional[int] = ...,
+        num_layers: Optional[int] = ...,
+    ) -> Dict: ...
--- a/.mlx_typings/mflux/models/common/weights/mapping/weight_mapping.pyi
+++ b/.mlx_typings/mflux/models/common/weights/mapping/weight_mapping.pyi
@@ -0,0 +1,23 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from dataclasses import dataclass
+from typing import Callable, List, Optional, Protocol
+
+"""
+This type stub file was generated by pyright.
+"""
+
+@dataclass
+class WeightTarget:
+    to_pattern: str
+    from_pattern: List[str]
+    transform: Optional[Callable[[mx.array], mx.array]] = ...
+    required: bool = ...
+    max_blocks: Optional[int] = ...
+
+class WeightMapping(Protocol):
+    @staticmethod
+    def get_mapping() -> List[WeightTarget]: ...
--- a/.mlx_typings/mflux/models/common/weights/mapping/weight_transforms.pyi
+++ b/.mlx_typings/mflux/models/common/weights/mapping/weight_transforms.pyi
@@ -0,0 +1,17 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+
+class WeightTransforms:
+    @staticmethod
+    def reshape_gamma_to_1d(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def transpose_patch_embed(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def transpose_conv3d_weight(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def transpose_conv2d_weight(tensor: mx.array) -> mx.array: ...
+    @staticmethod
+    def transpose_conv_transpose2d_weight(tensor: mx.array) -> mx.array: ...
--- a/.mlx_typings/mflux/models/common/weights/saving/model_saver.pyi
+++ b/.mlx_typings/mflux/models/common/weights/saving/model_saver.pyi
@@ -0,0 +1,14 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from typing import Any, TYPE_CHECKING
+from mflux.models.common.weights.loading.weight_definition import WeightDefinitionType
+
+if TYPE_CHECKING: ...
+
+class ModelSaver:
+    @staticmethod
+    def save_model(
+        model: Any, bits: int, base_path: str, weight_definition: WeightDefinitionType
+    ) -> None: ...
--- a/.mlx_typings/mflux/models/depth_pro/depth_pro_initializer.pyi
+++ b/.mlx_typings/mflux/models/depth_pro/depth_pro_initializer.pyi
@@ -0,0 +1,9 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+from mflux.models.depth_pro.model.depth_pro_model import DepthProModel
+
+class DepthProInitializer:
+    @staticmethod
+    def init(model: DepthProModel, quantize: int | None = ...) -> None: ...
--- a/.mlx_typings/mflux/models/depth_pro/model/decoder/feature_fusion_block_2d.pyi
+++ b/.mlx_typings/mflux/models/depth_pro/model/decoder/feature_fusion_block_2d.pyi
@@ -0,0 +1,10 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+import mlx.nn as nn
+
+class FeatureFusionBlock2d(nn.Module):
+    def __init__(self, num_features: int, deconv: bool = ...) -> None: ...
+    def __call__(self, x0: mx.array, x1: mx.array | None = ...) -> mx.array: ...
--- a/.mlx_typings/mflux/models/depth_pro/model/decoder/multires_conv_decoder.pyi
+++ b/.mlx_typings/mflux/models/depth_pro/model/decoder/multires_conv_decoder.pyi
@@ -0,0 +1,17 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+import mlx.nn as nn
+
+class MultiresConvDecoder(nn.Module):
+    def __init__(self) -> None: ...
+    def __call__(
+        self,
+        x0_latent: mx.array,
+        x1_latent: mx.array,
+        x0_features: mx.array,
+        x1_features: mx.array,
+        x_global_features: mx.array,
+    ) -> mx.array: ...
--- a/.mlx_typings/mflux/models/depth_pro/model/decoder/residual_block.pyi
+++ b/.mlx_typings/mflux/models/depth_pro/model/decoder/residual_block.pyi
@@ -0,0 +1,10 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+import mlx.nn as nn
+
+class ResidualBlock(nn.Module):
+    def __init__(self, num_features: int) -> None: ...
+    def __call__(self, x: mx.array) -> mx.array: ...
--- a/.mlx_typings/mflux/models/depth_pro/model/depth_pro.pyi
+++ b/.mlx_typings/mflux/models/depth_pro/model/depth_pro.pyi
@@ -0,0 +1,20 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from dataclasses import dataclass
+from pathlib import Path
+from PIL import Image
+
+@dataclass
+class DepthResult:
+    depth_image: Image.Image
+    depth_array: mx.array
+    min_depth: float
+    max_depth: float
+    ...
+
+class DepthPro:
+    def __init__(self, quantize: int | None = ...) -> None: ...
+    def create_depth_map(self, image_path: str | Path) -> DepthResult: ...
--- a/.mlx_typings/mflux/models/depth_pro/model/depth_pro_model.pyi
+++ b/.mlx_typings/mflux/models/depth_pro/model/depth_pro_model.pyi
@@ -0,0 +1,12 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+import mlx.nn as nn
+
+class DepthProModel(nn.Module):
+    def __init__(self) -> None: ...
+    def __call__(
+        self, x0: mx.array, x1: mx.array, x2: mx.array
+    ) -> tuple[mx.array, mx.array]: ...
--- a/.mlx_typings/mflux/models/depth_pro/model/depth_pro_util.pyi
+++ b/.mlx_typings/mflux/models/depth_pro/model/depth_pro_util.pyi
@@ -0,0 +1,15 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+import mlx.nn as nn
+
+class DepthProUtil:
+    @staticmethod
+    def split(x: mx.array, overlap_ratio: float = ...) -> mx.array: ...
+    @staticmethod
+    def interpolate(x: mx.array, size=..., scale_factor=...):  # -> array:
+        ...
+    @staticmethod
+    def apply_conv(x: mx.array, conv_module: nn.Module) -> mx.array: ...
--- a/.mlx_typings/mflux/models/depth_pro/model/dino_v2/attention.pyi
+++ b/.mlx_typings/mflux/models/depth_pro/model/dino_v2/attention.pyi
@@ -0,0 +1,12 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+from mlx import nn
+
+class Attention(nn.Module):
+    def __init__(
+        self, dim: int = ..., head_dim: int = ..., num_heads: int = ...
+    ) -> None: ...
+    def __call__(self, x: mx.array) -> mx.array: ...
--- a/.mlx_typings/mflux/models/depth_pro/model/dino_v2/dino_vision_transformer.pyi
+++ b/.mlx_typings/mflux/models/depth_pro/model/dino_v2/dino_vision_transformer.pyi
@@ -0,0 +1,10 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import mlx.core as mx
+import mlx.nn as nn
+
+class DinoVisionTransformer(nn.Module):
+    def __init__(self) -> None: ...
+    def __call__(self, x: mx.array) -> tuple[mx.array, mx.array, mx.array]: ...
--- a/Show More
+++ b/Show More