diff --git a/.github/workflows/build-app.yml b/.github/workflows/build-app.yml
index c0cb0e9b9..e88744224 100644
--- a/.github/workflows/build-app.yml
+++ b/.github/workflows/build-app.yml
@@ -32,7 +32,6 @@ jobs:
       SPARKLE_ED25519_PRIVATE: ${{ secrets.SPARKLE_ED25519_PRIVATE }}
       SPARKLE_S3_BUCKET: ${{ secrets.SPARKLE_S3_BUCKET }}
       SPARKLE_S3_PREFIX: ${{ secrets.SPARKLE_S3_PREFIX }}
-      EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT: ${{ secrets.EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT }}
       AWS_REGION: ${{ secrets.AWS_REGION }}
       EXO_BUILD_NUMBER: ${{ github.run_number }}
       EXO_LIBP2P_NAMESPACE: ${{ github.ref_name }}
@@ -239,6 +238,80 @@ jobs:
           # Export keychain path for other steps
           echo "BUILD_KEYCHAIN_PATH=$KEYCHAIN_PATH" >> $GITHUB_ENV
 
+      # ============================================================
+      # Pre-flight credential / profile validation
+      # Runs BEFORE the ~16 min build so auth/expiry failures surface in <1 min.
+      # ============================================================
+
+      - name: Validate Apple notarization credentials
+        env:
+          APPLE_NOTARIZATION_USERNAME: ${{ secrets.APPLE_NOTARIZATION_USERNAME }}
+          APPLE_NOTARIZATION_PASSWORD: ${{ secrets.APPLE_NOTARIZATION_PASSWORD }}
+          APPLE_NOTARIZATION_TEAM: ${{ secrets.APPLE_NOTARIZATION_TEAM }}
+        run: |
+          # All-or-nothing: either all three creds are set, or none are.
+          CRED_COUNT=0
+          for v in "$APPLE_NOTARIZATION_USERNAME" "$APPLE_NOTARIZATION_PASSWORD" "$APPLE_NOTARIZATION_TEAM"; do
+            [[ -n "$v" ]] && CRED_COUNT=$((CRED_COUNT + 1))
+          done
+          if [[ "$CRED_COUNT" -eq 0 ]]; then
+            echo "No notarization credentials configured — skipping notarization for this build."
+            exit 0
+          fi
+          if [[ "$CRED_COUNT" -ne 3 ]]; then
+            echo "ERROR: partial notarization credentials set ($CRED_COUNT/3). Aborting before build."
+            exit 1
+          fi
+          # Cheap, ~5s, auth-only call. Fails instantly with a clear message if
+          # the app-specific password is stale, wrong team-id, etc.
+          echo "Verifying Apple notarization credentials via notarytool history..."
+          if ! xcrun notarytool history \
+            --apple-id "$APPLE_NOTARIZATION_USERNAME" \
+            --password "$APPLE_NOTARIZATION_PASSWORD" \
+            --team-id "$APPLE_NOTARIZATION_TEAM" >/dev/null; then
+            echo "ERROR: notarytool rejected the provided credentials. Fix before rerunning."
+            echo "Common causes: app-specific password expired/revoked, wrong team-id,"
+            echo "Apple ID not on the team, or 2FA not configured for this Apple ID."
+            exit 1
+          fi
+          echo "Apple notarization credentials OK."
+
+      - name: Validate provisioning profile expiry
+        run: |
+          PROFILE="$HOME/Library/Developer/Xcode/UserData/Provisioning Profiles/EXO.provisionprofile"
+          if [[ ! -f "$PROFILE" ]]; then
+            echo "ERROR: provisioning profile not found at $PROFILE"
+            exit 1
+          fi
+          EXPIRY=$(security cms -D -i "$PROFILE" | plutil -extract ExpirationDate raw -o - - 2>/dev/null || true)
+          if [[ -z "$EXPIRY" ]]; then
+            echo "WARNING: could not read ExpirationDate from provisioning profile; skipping expiry check."
+            exit 0
+          fi
+          # Try a couple of known plutil date formats. If none parse, skip the check rather
+          # than risk a false-positive "expired" block on a format we didn't anticipate.
+          EXPIRY_EPOCH=""
+          for fmt in "%Y-%m-%dT%H:%M:%SZ" "%Y-%m-%d %H:%M:%S %z" "%Y-%m-%d %H:%M:%S +0000"; do
+            if parsed=$(date -j -f "$fmt" "$EXPIRY" +%s 2>/dev/null); then
+              EXPIRY_EPOCH="$parsed"
+              break
+            fi
+          done
+          if [[ -z "$EXPIRY_EPOCH" ]]; then
+            echo "WARNING: could not parse ExpirationDate '$EXPIRY'; skipping expiry check."
+            exit 0
+          fi
+          NOW_EPOCH=$(date +%s)
+          if [[ "$EXPIRY_EPOCH" -le "$NOW_EPOCH" ]]; then
+            echo "ERROR: provisioning profile expired on $EXPIRY. Regenerate it before rerunning."
+            exit 1
+          fi
+          DAYS_LEFT=$(( (EXPIRY_EPOCH - NOW_EPOCH) / 86400 ))
+          echo "Provisioning profile valid until $EXPIRY ($DAYS_LEFT days remaining)."
+          if [[ "$DAYS_LEFT" -lt 14 ]]; then
+            echo "WARNING: profile expires in under 14 days — regenerate soon."
+          fi
+
       # ============================================================
       # Build the bundle
       # ============================================================
@@ -273,7 +346,6 @@ jobs:
             EXO_BUILD_COMMIT="$GITHUB_SHA" \
             SPARKLE_FEED_URL="$SPARKLE_FEED_URL" \
             SPARKLE_ED25519_PUBLIC="$SPARKLE_ED25519_PUBLIC" \
-            EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT="$EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT" \
             CODE_SIGNING_IDENTITY="$SIGNING_IDENTITY" \
             CODE_SIGN_INJECT_BASE_ENTITLEMENTS=YES
           mkdir -p ../../output
@@ -306,11 +378,41 @@ jobs:
           APPLE_NOTARIZATION_PASSWORD: ${{ secrets.APPLE_NOTARIZATION_PASSWORD }}
           APPLE_NOTARIZATION_TEAM: ${{ secrets.APPLE_NOTARIZATION_TEAM }}
         run: |
+          set -o pipefail
           cd output
           security unlock-keychain -p "$MACOS_CERTIFICATE_PASSWORD" "$BUILD_KEYCHAIN_PATH"
           SIGNING_IDENTITY=$(security find-identity -v -p codesigning "$BUILD_KEYCHAIN_PATH" | awk -F '"' '{print $2}')
+
+          # Fail fast if notarization creds are partial. All-or-nothing.
+          CRED_COUNT=0
+          for v in "$APPLE_NOTARIZATION_USERNAME" "$APPLE_NOTARIZATION_PASSWORD" "$APPLE_NOTARIZATION_TEAM"; do
+            [[ -n "$v" ]] && CRED_COUNT=$((CRED_COUNT + 1))
+          done
+          if [[ "$CRED_COUNT" -ne 0 && "$CRED_COUNT" -ne 3 ]]; then
+            echo "ERROR: partial Apple notarization credentials set ($CRED_COUNT/3). Aborting."
+            exit 1
+          fi
+
           /usr/bin/codesign --deep --force --timestamp --options runtime \
             --sign "$SIGNING_IDENTITY" EXO.app
+
+          # Pre-flight: verify the signed app BEFORE building DMG and submitting to Apple.
+          # If this fails, notarization will fail too — cheap way to fail in seconds, not 15 minutes.
+          echo "===== codesign --verify EXO.app ====="
+          if ! /usr/bin/codesign --verify --deep --strict --verbose=2 EXO.app; then
+            echo "ERROR: EXO.app failed codesign verification. Dumping signing status of every executable:"
+            find EXO.app -type f \( -perm -111 -o -name "*.dylib" -o -name "*.so" -o -name "*.framework" \) -print0 |
+              while IFS= read -r -d '' f; do
+                printf -- '--- %s\n' "$f"
+                /usr/bin/codesign -dv --verbose=2 "$f" 2>&1 | sed 's/^/    /' || true
+              done
+            exit 1
+          fi
+
+          # Gatekeeper assessment. A failure here strongly predicts notarization rejection.
+          echo "===== spctl assessment (predicts notarization outcome) ====="
+          /usr/bin/spctl -a -vvv -t install EXO.app || echo "WARNING: spctl assessment failed — notarization is likely to fail too."
+
           mkdir -p dmg-root
           cp -R EXO.app dmg-root/
           ln -s /Applications dmg-root/Applications
@@ -318,12 +420,22 @@ jobs:
           hdiutil create -volname "EXO" -srcfolder dmg-root -ov -format UDZO "$DMG_NAME"
           /usr/bin/codesign --force --timestamp --options runtime \
             --sign "$SIGNING_IDENTITY" "$DMG_NAME"
+
+          echo "===== codesign --verify DMG ====="
+          if ! /usr/bin/codesign --verify --verbose=2 "$DMG_NAME"; then
+            echo "ERROR: DMG failed codesign verification."
+            exit 1
+          fi
+
           if [[ -n "$APPLE_NOTARIZATION_USERNAME" ]]; then
+            echo "===== notarytool submit ====="
+            # `|| true` so set -e doesn't abort before we can echo output / fetch the log.
+            # We rely on the parsed STATUS below to decide pass/fail.
             SUBMISSION_OUTPUT=$(xcrun notarytool submit "$DMG_NAME" \
               --apple-id "$APPLE_NOTARIZATION_USERNAME" \
               --password "$APPLE_NOTARIZATION_PASSWORD" \
               --team-id "$APPLE_NOTARIZATION_TEAM" \
-              --wait --timeout 15m 2>&1)
+              --wait --timeout 15m 2>&1) || true
             echo "$SUBMISSION_OUTPUT"
 
             SUBMISSION_ID=$(echo "$SUBMISSION_OUTPUT" | awk 'tolower($1)=="id:" && $2 ~ /^[0-9a-fA-F-]+$/ {print $2; exit}')
diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml
index 8483b1309..c1a4674ff 100644
--- a/.github/workflows/pipeline.yml
+++ b/.github/workflows/pipeline.yml
@@ -91,9 +91,6 @@ jobs:
             nix build .#metal-toolchain
           fi
 
-          # Build mlx (depends on metal-toolchain)
-          nix build .#mlx
-
       - name: Build all Nix outputs
         run: |
           nix flake show --json | jq -r '
diff --git a/.gitignore b/.gitignore
index b162de342..fa09fb01d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,7 +18,6 @@ digest.txt
 app/EXO/build/
 dist/
 
-
 # rust
 target/
 **/*.rs.bk
@@ -40,3 +39,5 @@ bench/**/*.json
 tmp/models
 /build/exo
 /.claude/skills
+/.claude
+/.codex
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 124c79a18..86011b9b1 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -4,4 +4,7 @@
     <option name="sdkName" value="Python 3.13 (exo)" />
   </component>
   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 (exo)" project-jdk-type="Python SDK" />
+  <component name="RuffConfiguration">
+    <option name="enabled" value="true" />
+  </component>
 </project>
\ No newline at end of file
diff --git a/.mlx_typings/.gitkeep b/.typings/.gitkeep
similarity index 100%
rename from .mlx_typings/.gitkeep
rename to .typings/.gitkeep
diff --git a/.mlx_typings/mflux/__init__.pyi b/.typings/mflux/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/__init__.pyi
rename to .typings/mflux/__init__.pyi
diff --git a/.mlx_typings/mflux/callbacks/__init__.pyi b/.typings/mflux/callbacks/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/callbacks/__init__.pyi
rename to .typings/mflux/callbacks/__init__.pyi
diff --git a/.mlx_typings/mflux/callbacks/callback.pyi b/.typings/mflux/callbacks/callback.pyi
similarity index 100%
rename from .mlx_typings/mflux/callbacks/callback.pyi
rename to .typings/mflux/callbacks/callback.pyi
diff --git a/.mlx_typings/mflux/callbacks/callback_registry.pyi b/.typings/mflux/callbacks/callback_registry.pyi
similarity index 100%
rename from .mlx_typings/mflux/callbacks/callback_registry.pyi
rename to .typings/mflux/callbacks/callback_registry.pyi
diff --git a/.mlx_typings/mflux/callbacks/generation_context.pyi b/.typings/mflux/callbacks/generation_context.pyi
similarity index 100%
rename from .mlx_typings/mflux/callbacks/generation_context.pyi
rename to .typings/mflux/callbacks/generation_context.pyi
diff --git a/.mlx_typings/mflux/cli/__init__.pyi b/.typings/mflux/cli/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/cli/__init__.pyi
rename to .typings/mflux/cli/__init__.pyi
diff --git a/.mlx_typings/mflux/cli/defaults/defaults.pyi b/.typings/mflux/cli/defaults/defaults.pyi
similarity index 100%
rename from .mlx_typings/mflux/cli/defaults/defaults.pyi
rename to .typings/mflux/cli/defaults/defaults.pyi
diff --git a/.mlx_typings/mflux/models/__init__.pyi b/.typings/mflux/models/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/__init__.pyi
rename to .typings/mflux/models/__init__.pyi
diff --git a/.mlx_typings/mflux/models/common/__init__.pyi b/.typings/mflux/models/common/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/__init__.pyi
rename to .typings/mflux/models/common/__init__.pyi
diff --git a/.mlx_typings/mflux/models/common/cli/__init__.pyi b/.typings/mflux/models/common/cli/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/cli/__init__.pyi
rename to .typings/mflux/models/common/cli/__init__.pyi
diff --git a/.mlx_typings/mflux/models/common/config/__init__.pyi b/.typings/mflux/models/common/config/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/config/__init__.pyi
rename to .typings/mflux/models/common/config/__init__.pyi
diff --git a/.mlx_typings/mflux/models/common/config/config.pyi b/.typings/mflux/models/common/config/config.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/config/config.pyi
rename to .typings/mflux/models/common/config/config.pyi
diff --git a/.mlx_typings/mflux/models/common/config/model_config.pyi b/.typings/mflux/models/common/config/model_config.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/config/model_config.pyi
rename to .typings/mflux/models/common/config/model_config.pyi
diff --git a/.mlx_typings/mflux/models/common/latent_creator/__init__.pyi b/.typings/mflux/models/common/latent_creator/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/latent_creator/__init__.pyi
rename to .typings/mflux/models/common/latent_creator/__init__.pyi
diff --git a/.mlx_typings/mflux/models/common/latent_creator/latent_creator.pyi b/.typings/mflux/models/common/latent_creator/latent_creator.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/latent_creator/latent_creator.pyi
rename to .typings/mflux/models/common/latent_creator/latent_creator.pyi
diff --git a/.mlx_typings/mflux/models/common/lora/__init__.pyi b/.typings/mflux/models/common/lora/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/lora/__init__.pyi
rename to .typings/mflux/models/common/lora/__init__.pyi
diff --git a/.mlx_typings/mflux/models/common/lora/layer/fused_linear_lora_layer.pyi b/.typings/mflux/models/common/lora/layer/fused_linear_lora_layer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/lora/layer/fused_linear_lora_layer.pyi
rename to .typings/mflux/models/common/lora/layer/fused_linear_lora_layer.pyi
diff --git a/.mlx_typings/mflux/models/common/lora/layer/linear_lora_layer.pyi b/.typings/mflux/models/common/lora/layer/linear_lora_layer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/lora/layer/linear_lora_layer.pyi
rename to .typings/mflux/models/common/lora/layer/linear_lora_layer.pyi
diff --git a/.mlx_typings/mflux/models/common/lora/mapping/lora_loader.pyi b/.typings/mflux/models/common/lora/mapping/lora_loader.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/lora/mapping/lora_loader.pyi
rename to .typings/mflux/models/common/lora/mapping/lora_loader.pyi
diff --git a/.mlx_typings/mflux/models/common/lora/mapping/lora_mapping.pyi b/.typings/mflux/models/common/lora/mapping/lora_mapping.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/lora/mapping/lora_mapping.pyi
rename to .typings/mflux/models/common/lora/mapping/lora_mapping.pyi
diff --git a/.mlx_typings/mflux/models/common/lora/mapping/lora_saver.pyi b/.typings/mflux/models/common/lora/mapping/lora_saver.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/lora/mapping/lora_saver.pyi
rename to .typings/mflux/models/common/lora/mapping/lora_saver.pyi
diff --git a/.mlx_typings/mflux/models/common/lora/mapping/lora_transforms.pyi b/.typings/mflux/models/common/lora/mapping/lora_transforms.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/lora/mapping/lora_transforms.pyi
rename to .typings/mflux/models/common/lora/mapping/lora_transforms.pyi
diff --git a/.mlx_typings/mflux/models/common/resolution/__init__.pyi b/.typings/mflux/models/common/resolution/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/resolution/__init__.pyi
rename to .typings/mflux/models/common/resolution/__init__.pyi
diff --git a/.mlx_typings/mflux/models/common/resolution/actions.pyi b/.typings/mflux/models/common/resolution/actions.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/resolution/actions.pyi
rename to .typings/mflux/models/common/resolution/actions.pyi
diff --git a/.mlx_typings/mflux/models/common/resolution/config_resolution.pyi b/.typings/mflux/models/common/resolution/config_resolution.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/resolution/config_resolution.pyi
rename to .typings/mflux/models/common/resolution/config_resolution.pyi
diff --git a/.mlx_typings/mflux/models/common/resolution/lora_resolution.pyi b/.typings/mflux/models/common/resolution/lora_resolution.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/resolution/lora_resolution.pyi
rename to .typings/mflux/models/common/resolution/lora_resolution.pyi
diff --git a/.mlx_typings/mflux/models/common/resolution/path_resolution.pyi b/.typings/mflux/models/common/resolution/path_resolution.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/resolution/path_resolution.pyi
rename to .typings/mflux/models/common/resolution/path_resolution.pyi
diff --git a/.mlx_typings/mflux/models/common/resolution/quantization_resolution.pyi b/.typings/mflux/models/common/resolution/quantization_resolution.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/resolution/quantization_resolution.pyi
rename to .typings/mflux/models/common/resolution/quantization_resolution.pyi
diff --git a/.mlx_typings/mflux/models/common/schedulers/__init__.pyi b/.typings/mflux/models/common/schedulers/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/schedulers/__init__.pyi
rename to .typings/mflux/models/common/schedulers/__init__.pyi
diff --git a/.mlx_typings/mflux/models/common/schedulers/base_scheduler.pyi b/.typings/mflux/models/common/schedulers/base_scheduler.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/schedulers/base_scheduler.pyi
rename to .typings/mflux/models/common/schedulers/base_scheduler.pyi
diff --git a/.mlx_typings/mflux/models/common/schedulers/flow_match_euler_discrete_scheduler.pyi b/.typings/mflux/models/common/schedulers/flow_match_euler_discrete_scheduler.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/schedulers/flow_match_euler_discrete_scheduler.pyi
rename to .typings/mflux/models/common/schedulers/flow_match_euler_discrete_scheduler.pyi
diff --git a/.mlx_typings/mflux/models/common/schedulers/linear_scheduler.pyi b/.typings/mflux/models/common/schedulers/linear_scheduler.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/schedulers/linear_scheduler.pyi
rename to .typings/mflux/models/common/schedulers/linear_scheduler.pyi
diff --git a/.mlx_typings/mflux/models/common/schedulers/seedvr2_euler_scheduler.pyi b/.typings/mflux/models/common/schedulers/seedvr2_euler_scheduler.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/schedulers/seedvr2_euler_scheduler.pyi
rename to .typings/mflux/models/common/schedulers/seedvr2_euler_scheduler.pyi
diff --git a/.mlx_typings/mflux/models/common/tokenizer/__init__.pyi b/.typings/mflux/models/common/tokenizer/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/tokenizer/__init__.pyi
rename to .typings/mflux/models/common/tokenizer/__init__.pyi
diff --git a/.mlx_typings/mflux/models/common/tokenizer/tokenizer.pyi b/.typings/mflux/models/common/tokenizer/tokenizer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/tokenizer/tokenizer.pyi
rename to .typings/mflux/models/common/tokenizer/tokenizer.pyi
diff --git a/.mlx_typings/mflux/models/common/tokenizer/tokenizer_loader.pyi b/.typings/mflux/models/common/tokenizer/tokenizer_loader.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/tokenizer/tokenizer_loader.pyi
rename to .typings/mflux/models/common/tokenizer/tokenizer_loader.pyi
diff --git a/.mlx_typings/mflux/models/common/tokenizer/tokenizer_output.pyi b/.typings/mflux/models/common/tokenizer/tokenizer_output.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/tokenizer/tokenizer_output.pyi
rename to .typings/mflux/models/common/tokenizer/tokenizer_output.pyi
diff --git a/.mlx_typings/mflux/models/common/vae/__init__.pyi b/.typings/mflux/models/common/vae/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/vae/__init__.pyi
rename to .typings/mflux/models/common/vae/__init__.pyi
diff --git a/.mlx_typings/mflux/models/common/vae/tiling_config.pyi b/.typings/mflux/models/common/vae/tiling_config.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/vae/tiling_config.pyi
rename to .typings/mflux/models/common/vae/tiling_config.pyi
diff --git a/.mlx_typings/mflux/models/common/vae/vae_tiler.pyi b/.typings/mflux/models/common/vae/vae_tiler.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/vae/vae_tiler.pyi
rename to .typings/mflux/models/common/vae/vae_tiler.pyi
diff --git a/.mlx_typings/mflux/models/common/vae/vae_util.pyi b/.typings/mflux/models/common/vae/vae_util.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/vae/vae_util.pyi
rename to .typings/mflux/models/common/vae/vae_util.pyi
diff --git a/.mlx_typings/mflux/models/common/weights/__init__.pyi b/.typings/mflux/models/common/weights/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/weights/__init__.pyi
rename to .typings/mflux/models/common/weights/__init__.pyi
diff --git a/.mlx_typings/mflux/models/common/weights/loading/loaded_weights.pyi b/.typings/mflux/models/common/weights/loading/loaded_weights.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/weights/loading/loaded_weights.pyi
rename to .typings/mflux/models/common/weights/loading/loaded_weights.pyi
diff --git a/.mlx_typings/mflux/models/common/weights/loading/weight_applier.pyi b/.typings/mflux/models/common/weights/loading/weight_applier.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/weights/loading/weight_applier.pyi
rename to .typings/mflux/models/common/weights/loading/weight_applier.pyi
diff --git a/.mlx_typings/mflux/models/common/weights/loading/weight_definition.pyi b/.typings/mflux/models/common/weights/loading/weight_definition.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/weights/loading/weight_definition.pyi
rename to .typings/mflux/models/common/weights/loading/weight_definition.pyi
diff --git a/.mlx_typings/mflux/models/common/weights/loading/weight_loader.pyi b/.typings/mflux/models/common/weights/loading/weight_loader.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/weights/loading/weight_loader.pyi
rename to .typings/mflux/models/common/weights/loading/weight_loader.pyi
diff --git a/.mlx_typings/mflux/models/common/weights/mapping/weight_mapper.pyi b/.typings/mflux/models/common/weights/mapping/weight_mapper.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/weights/mapping/weight_mapper.pyi
rename to .typings/mflux/models/common/weights/mapping/weight_mapper.pyi
diff --git a/.mlx_typings/mflux/models/common/weights/mapping/weight_mapping.pyi b/.typings/mflux/models/common/weights/mapping/weight_mapping.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/weights/mapping/weight_mapping.pyi
rename to .typings/mflux/models/common/weights/mapping/weight_mapping.pyi
diff --git a/.mlx_typings/mflux/models/common/weights/mapping/weight_transforms.pyi b/.typings/mflux/models/common/weights/mapping/weight_transforms.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/weights/mapping/weight_transforms.pyi
rename to .typings/mflux/models/common/weights/mapping/weight_transforms.pyi
diff --git a/.mlx_typings/mflux/models/common/weights/saving/model_saver.pyi b/.typings/mflux/models/common/weights/saving/model_saver.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/common/weights/saving/model_saver.pyi
rename to .typings/mflux/models/common/weights/saving/model_saver.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/depth_pro_initializer.pyi b/.typings/mflux/models/depth_pro/depth_pro_initializer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/depth_pro_initializer.pyi
rename to .typings/mflux/models/depth_pro/depth_pro_initializer.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/decoder/feature_fusion_block_2d.pyi b/.typings/mflux/models/depth_pro/model/decoder/feature_fusion_block_2d.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/decoder/feature_fusion_block_2d.pyi
rename to .typings/mflux/models/depth_pro/model/decoder/feature_fusion_block_2d.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/decoder/multires_conv_decoder.pyi b/.typings/mflux/models/depth_pro/model/decoder/multires_conv_decoder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/decoder/multires_conv_decoder.pyi
rename to .typings/mflux/models/depth_pro/model/decoder/multires_conv_decoder.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/decoder/residual_block.pyi b/.typings/mflux/models/depth_pro/model/decoder/residual_block.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/decoder/residual_block.pyi
rename to .typings/mflux/models/depth_pro/model/decoder/residual_block.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/depth_pro.pyi b/.typings/mflux/models/depth_pro/model/depth_pro.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/depth_pro.pyi
rename to .typings/mflux/models/depth_pro/model/depth_pro.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/depth_pro_model.pyi b/.typings/mflux/models/depth_pro/model/depth_pro_model.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/depth_pro_model.pyi
rename to .typings/mflux/models/depth_pro/model/depth_pro_model.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/depth_pro_util.pyi b/.typings/mflux/models/depth_pro/model/depth_pro_util.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/depth_pro_util.pyi
rename to .typings/mflux/models/depth_pro/model/depth_pro_util.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/dino_v2/attention.pyi b/.typings/mflux/models/depth_pro/model/dino_v2/attention.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/dino_v2/attention.pyi
rename to .typings/mflux/models/depth_pro/model/dino_v2/attention.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/dino_v2/dino_vision_transformer.pyi b/.typings/mflux/models/depth_pro/model/dino_v2/dino_vision_transformer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/dino_v2/dino_vision_transformer.pyi
rename to .typings/mflux/models/depth_pro/model/dino_v2/dino_vision_transformer.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/dino_v2/layer_scale.pyi b/.typings/mflux/models/depth_pro/model/dino_v2/layer_scale.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/dino_v2/layer_scale.pyi
rename to .typings/mflux/models/depth_pro/model/dino_v2/layer_scale.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/dino_v2/mlp.pyi b/.typings/mflux/models/depth_pro/model/dino_v2/mlp.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/dino_v2/mlp.pyi
rename to .typings/mflux/models/depth_pro/model/dino_v2/mlp.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/dino_v2/patch_embed.pyi b/.typings/mflux/models/depth_pro/model/dino_v2/patch_embed.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/dino_v2/patch_embed.pyi
rename to .typings/mflux/models/depth_pro/model/dino_v2/patch_embed.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/dino_v2/transformer_block.pyi b/.typings/mflux/models/depth_pro/model/dino_v2/transformer_block.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/dino_v2/transformer_block.pyi
rename to .typings/mflux/models/depth_pro/model/dino_v2/transformer_block.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/encoder/depth_pro_encoder.pyi b/.typings/mflux/models/depth_pro/model/encoder/depth_pro_encoder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/encoder/depth_pro_encoder.pyi
rename to .typings/mflux/models/depth_pro/model/encoder/depth_pro_encoder.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/encoder/upsample_block.pyi b/.typings/mflux/models/depth_pro/model/encoder/upsample_block.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/encoder/upsample_block.pyi
rename to .typings/mflux/models/depth_pro/model/encoder/upsample_block.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/model/head/fov_head.pyi b/.typings/mflux/models/depth_pro/model/head/fov_head.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/model/head/fov_head.pyi
rename to .typings/mflux/models/depth_pro/model/head/fov_head.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/weights/depth_pro_weight_definition.pyi b/.typings/mflux/models/depth_pro/weights/depth_pro_weight_definition.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/weights/depth_pro_weight_definition.pyi
rename to .typings/mflux/models/depth_pro/weights/depth_pro_weight_definition.pyi
diff --git a/.mlx_typings/mflux/models/depth_pro/weights/depth_pro_weight_mapping.pyi b/.typings/mflux/models/depth_pro/weights/depth_pro_weight_mapping.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/depth_pro/weights/depth_pro_weight_mapping.pyi
rename to .typings/mflux/models/depth_pro/weights/depth_pro_weight_mapping.pyi
diff --git a/.mlx_typings/mflux/models/fibo/latent_creator/fibo_latent_creator.pyi b/.typings/mflux/models/fibo/latent_creator/fibo_latent_creator.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/fibo/latent_creator/fibo_latent_creator.pyi
rename to .typings/mflux/models/fibo/latent_creator/fibo_latent_creator.pyi
diff --git a/.mlx_typings/mflux/models/fibo/weights/fibo_weight_definition.pyi b/.typings/mflux/models/fibo/weights/fibo_weight_definition.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/fibo/weights/fibo_weight_definition.pyi
rename to .typings/mflux/models/fibo/weights/fibo_weight_definition.pyi
diff --git a/.mlx_typings/mflux/models/fibo/weights/fibo_weight_mapping.pyi b/.typings/mflux/models/fibo/weights/fibo_weight_mapping.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/fibo/weights/fibo_weight_mapping.pyi
rename to .typings/mflux/models/fibo/weights/fibo_weight_mapping.pyi
diff --git a/.mlx_typings/mflux/models/fibo_vlm/tokenizer/qwen2vl_image_processor.pyi b/.typings/mflux/models/fibo_vlm/tokenizer/qwen2vl_image_processor.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/fibo_vlm/tokenizer/qwen2vl_image_processor.pyi
rename to .typings/mflux/models/fibo_vlm/tokenizer/qwen2vl_image_processor.pyi
diff --git a/.mlx_typings/mflux/models/fibo_vlm/tokenizer/qwen2vl_processor.pyi b/.typings/mflux/models/fibo_vlm/tokenizer/qwen2vl_processor.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/fibo_vlm/tokenizer/qwen2vl_processor.pyi
rename to .typings/mflux/models/fibo_vlm/tokenizer/qwen2vl_processor.pyi
diff --git a/.mlx_typings/mflux/models/fibo_vlm/weights/fibo_vlm_weight_definition.pyi b/.typings/mflux/models/fibo_vlm/weights/fibo_vlm_weight_definition.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/fibo_vlm/weights/fibo_vlm_weight_definition.pyi
rename to .typings/mflux/models/fibo_vlm/weights/fibo_vlm_weight_definition.pyi
diff --git a/.mlx_typings/mflux/models/fibo_vlm/weights/fibo_vlm_weight_mapping.pyi b/.typings/mflux/models/fibo_vlm/weights/fibo_vlm_weight_mapping.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/fibo_vlm/weights/fibo_vlm_weight_mapping.pyi
rename to .typings/mflux/models/fibo_vlm/weights/fibo_vlm_weight_mapping.pyi
diff --git a/.mlx_typings/mflux/models/flux/__init__.pyi b/.typings/mflux/models/flux/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/__init__.pyi
rename to .typings/mflux/models/flux/__init__.pyi
diff --git a/.mlx_typings/mflux/models/flux/cli/__init__.pyi b/.typings/mflux/models/flux/cli/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/cli/__init__.pyi
rename to .typings/mflux/models/flux/cli/__init__.pyi
diff --git a/.mlx_typings/mflux/models/flux/flux_initializer.pyi b/.typings/mflux/models/flux/flux_initializer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/flux_initializer.pyi
rename to .typings/mflux/models/flux/flux_initializer.pyi
diff --git a/.mlx_typings/mflux/models/flux/latent_creator/__init__.pyi b/.typings/mflux/models/flux/latent_creator/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/latent_creator/__init__.pyi
rename to .typings/mflux/models/flux/latent_creator/__init__.pyi
diff --git a/.mlx_typings/mflux/models/flux/latent_creator/flux_latent_creator.pyi b/.typings/mflux/models/flux/latent_creator/flux_latent_creator.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/latent_creator/flux_latent_creator.pyi
rename to .typings/mflux/models/flux/latent_creator/flux_latent_creator.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/__init__.pyi b/.typings/mflux/models/flux/model/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/__init__.pyi
rename to .typings/mflux/models/flux/model/__init__.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_embeddings.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_embeddings.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_embeddings.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_embeddings.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_encoder.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_encoder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_encoder.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_encoder.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_encoder_layer.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_encoder_layer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_encoder_layer.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_encoder_layer.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_mlp.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_mlp.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_mlp.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_mlp.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_sdpa_attention.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_sdpa_attention.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_sdpa_attention.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_sdpa_attention.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_text_model.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_text_model.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_text_model.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/clip_text_model.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/encoder_clip.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/encoder_clip.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/encoder_clip.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/clip_encoder/encoder_clip.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/prompt_encoder.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/prompt_encoder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/prompt_encoder.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/prompt_encoder.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_attention.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_attention.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_attention.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_attention.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_block.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_block.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_block.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_block.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_dense_relu_dense.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_dense_relu_dense.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_dense_relu_dense.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_dense_relu_dense.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_encoder.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_encoder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_encoder.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_encoder.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_feed_forward.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_feed_forward.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_feed_forward.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_feed_forward.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_layer_norm.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_layer_norm.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_layer_norm.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_layer_norm.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_self_attention.pyi b/.typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_self_attention.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_self_attention.pyi
rename to .typings/mflux/models/flux/model/flux_text_encoder/t5_encoder/t5_self_attention.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/ada_layer_norm_continuous.pyi b/.typings/mflux/models/flux/model/flux_transformer/ada_layer_norm_continuous.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/ada_layer_norm_continuous.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/ada_layer_norm_continuous.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/ada_layer_norm_zero.pyi b/.typings/mflux/models/flux/model/flux_transformer/ada_layer_norm_zero.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/ada_layer_norm_zero.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/ada_layer_norm_zero.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/ada_layer_norm_zero_single.pyi b/.typings/mflux/models/flux/model/flux_transformer/ada_layer_norm_zero_single.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/ada_layer_norm_zero_single.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/ada_layer_norm_zero_single.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/common/attention_utils.pyi b/.typings/mflux/models/flux/model/flux_transformer/common/attention_utils.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/common/attention_utils.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/common/attention_utils.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/embed_nd.pyi b/.typings/mflux/models/flux/model/flux_transformer/embed_nd.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/embed_nd.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/embed_nd.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/feed_forward.pyi b/.typings/mflux/models/flux/model/flux_transformer/feed_forward.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/feed_forward.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/feed_forward.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/guidance_embedder.pyi b/.typings/mflux/models/flux/model/flux_transformer/guidance_embedder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/guidance_embedder.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/guidance_embedder.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/joint_attention.pyi b/.typings/mflux/models/flux/model/flux_transformer/joint_attention.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/joint_attention.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/joint_attention.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/joint_transformer_block.pyi b/.typings/mflux/models/flux/model/flux_transformer/joint_transformer_block.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/joint_transformer_block.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/joint_transformer_block.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/single_block_attention.pyi b/.typings/mflux/models/flux/model/flux_transformer/single_block_attention.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/single_block_attention.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/single_block_attention.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/single_transformer_block.pyi b/.typings/mflux/models/flux/model/flux_transformer/single_transformer_block.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/single_transformer_block.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/single_transformer_block.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/text_embedder.pyi b/.typings/mflux/models/flux/model/flux_transformer/text_embedder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/text_embedder.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/text_embedder.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/time_text_embed.pyi b/.typings/mflux/models/flux/model/flux_transformer/time_text_embed.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/time_text_embed.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/time_text_embed.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/timestep_embedder.pyi b/.typings/mflux/models/flux/model/flux_transformer/timestep_embedder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/timestep_embedder.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/timestep_embedder.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_transformer/transformer.pyi b/.typings/mflux/models/flux/model/flux_transformer/transformer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_transformer/transformer.pyi
rename to .typings/mflux/models/flux/model/flux_transformer/transformer.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/common/attention.pyi b/.typings/mflux/models/flux/model/flux_vae/common/attention.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/common/attention.pyi
rename to .typings/mflux/models/flux/model/flux_vae/common/attention.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/common/resnet_block_2d.pyi b/.typings/mflux/models/flux/model/flux_vae/common/resnet_block_2d.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/common/resnet_block_2d.pyi
rename to .typings/mflux/models/flux/model/flux_vae/common/resnet_block_2d.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/common/unet_mid_block.pyi b/.typings/mflux/models/flux/model/flux_vae/common/unet_mid_block.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/common/unet_mid_block.pyi
rename to .typings/mflux/models/flux/model/flux_vae/common/unet_mid_block.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/decoder/conv_in.pyi b/.typings/mflux/models/flux/model/flux_vae/decoder/conv_in.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/decoder/conv_in.pyi
rename to .typings/mflux/models/flux/model/flux_vae/decoder/conv_in.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/decoder/conv_norm_out.pyi b/.typings/mflux/models/flux/model/flux_vae/decoder/conv_norm_out.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/decoder/conv_norm_out.pyi
rename to .typings/mflux/models/flux/model/flux_vae/decoder/conv_norm_out.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/decoder/conv_out.pyi b/.typings/mflux/models/flux/model/flux_vae/decoder/conv_out.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/decoder/conv_out.pyi
rename to .typings/mflux/models/flux/model/flux_vae/decoder/conv_out.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/decoder/decoder.pyi b/.typings/mflux/models/flux/model/flux_vae/decoder/decoder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/decoder/decoder.pyi
rename to .typings/mflux/models/flux/model/flux_vae/decoder/decoder.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/decoder/up_block_1_or_2.pyi b/.typings/mflux/models/flux/model/flux_vae/decoder/up_block_1_or_2.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/decoder/up_block_1_or_2.pyi
rename to .typings/mflux/models/flux/model/flux_vae/decoder/up_block_1_or_2.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/decoder/up_block_3.pyi b/.typings/mflux/models/flux/model/flux_vae/decoder/up_block_3.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/decoder/up_block_3.pyi
rename to .typings/mflux/models/flux/model/flux_vae/decoder/up_block_3.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/decoder/up_block_4.pyi b/.typings/mflux/models/flux/model/flux_vae/decoder/up_block_4.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/decoder/up_block_4.pyi
rename to .typings/mflux/models/flux/model/flux_vae/decoder/up_block_4.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/decoder/up_sampler.pyi b/.typings/mflux/models/flux/model/flux_vae/decoder/up_sampler.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/decoder/up_sampler.pyi
rename to .typings/mflux/models/flux/model/flux_vae/decoder/up_sampler.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/encoder/conv_in.pyi b/.typings/mflux/models/flux/model/flux_vae/encoder/conv_in.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/encoder/conv_in.pyi
rename to .typings/mflux/models/flux/model/flux_vae/encoder/conv_in.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/encoder/conv_norm_out.pyi b/.typings/mflux/models/flux/model/flux_vae/encoder/conv_norm_out.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/encoder/conv_norm_out.pyi
rename to .typings/mflux/models/flux/model/flux_vae/encoder/conv_norm_out.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/encoder/conv_out.pyi b/.typings/mflux/models/flux/model/flux_vae/encoder/conv_out.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/encoder/conv_out.pyi
rename to .typings/mflux/models/flux/model/flux_vae/encoder/conv_out.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/encoder/down_block_1.pyi b/.typings/mflux/models/flux/model/flux_vae/encoder/down_block_1.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/encoder/down_block_1.pyi
rename to .typings/mflux/models/flux/model/flux_vae/encoder/down_block_1.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/encoder/down_block_2.pyi b/.typings/mflux/models/flux/model/flux_vae/encoder/down_block_2.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/encoder/down_block_2.pyi
rename to .typings/mflux/models/flux/model/flux_vae/encoder/down_block_2.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/encoder/down_block_3.pyi b/.typings/mflux/models/flux/model/flux_vae/encoder/down_block_3.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/encoder/down_block_3.pyi
rename to .typings/mflux/models/flux/model/flux_vae/encoder/down_block_3.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/encoder/down_block_4.pyi b/.typings/mflux/models/flux/model/flux_vae/encoder/down_block_4.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/encoder/down_block_4.pyi
rename to .typings/mflux/models/flux/model/flux_vae/encoder/down_block_4.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/encoder/down_sampler.pyi b/.typings/mflux/models/flux/model/flux_vae/encoder/down_sampler.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/encoder/down_sampler.pyi
rename to .typings/mflux/models/flux/model/flux_vae/encoder/down_sampler.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/encoder/encoder.pyi b/.typings/mflux/models/flux/model/flux_vae/encoder/encoder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/encoder/encoder.pyi
rename to .typings/mflux/models/flux/model/flux_vae/encoder/encoder.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/flux_vae/vae.pyi b/.typings/mflux/models/flux/model/flux_vae/vae.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/flux_vae/vae.pyi
rename to .typings/mflux/models/flux/model/flux_vae/vae.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/redux_encoder/redux_encoder.pyi b/.typings/mflux/models/flux/model/redux_encoder/redux_encoder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/redux_encoder/redux_encoder.pyi
rename to .typings/mflux/models/flux/model/redux_encoder/redux_encoder.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/siglip_vision_transformer/siglip_encoder.pyi b/.typings/mflux/models/flux/model/siglip_vision_transformer/siglip_encoder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/siglip_vision_transformer/siglip_encoder.pyi
rename to .typings/mflux/models/flux/model/siglip_vision_transformer/siglip_encoder.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/siglip_vision_transformer/siglip_encoder_layer.pyi b/.typings/mflux/models/flux/model/siglip_vision_transformer/siglip_encoder_layer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/siglip_vision_transformer/siglip_encoder_layer.pyi
rename to .typings/mflux/models/flux/model/siglip_vision_transformer/siglip_encoder_layer.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/siglip_vision_transformer/siglip_mlp.pyi b/.typings/mflux/models/flux/model/siglip_vision_transformer/siglip_mlp.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/siglip_vision_transformer/siglip_mlp.pyi
rename to .typings/mflux/models/flux/model/siglip_vision_transformer/siglip_mlp.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/siglip_vision_transformer/siglip_multi_head_attention_pooling_head.pyi b/.typings/mflux/models/flux/model/siglip_vision_transformer/siglip_multi_head_attention_pooling_head.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/siglip_vision_transformer/siglip_multi_head_attention_pooling_head.pyi
rename to .typings/mflux/models/flux/model/siglip_vision_transformer/siglip_multi_head_attention_pooling_head.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/siglip_vision_transformer/siglip_sdpa_attention.pyi b/.typings/mflux/models/flux/model/siglip_vision_transformer/siglip_sdpa_attention.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/siglip_vision_transformer/siglip_sdpa_attention.pyi
rename to .typings/mflux/models/flux/model/siglip_vision_transformer/siglip_sdpa_attention.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/siglip_vision_transformer/siglip_vision_embeddings.pyi b/.typings/mflux/models/flux/model/siglip_vision_transformer/siglip_vision_embeddings.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/siglip_vision_transformer/siglip_vision_embeddings.pyi
rename to .typings/mflux/models/flux/model/siglip_vision_transformer/siglip_vision_embeddings.pyi
diff --git a/.mlx_typings/mflux/models/flux/model/siglip_vision_transformer/siglip_vision_transformer.pyi b/.typings/mflux/models/flux/model/siglip_vision_transformer/siglip_vision_transformer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/model/siglip_vision_transformer/siglip_vision_transformer.pyi
rename to .typings/mflux/models/flux/model/siglip_vision_transformer/siglip_vision_transformer.pyi
diff --git a/.mlx_typings/mflux/models/flux/variants/__init__.pyi b/.typings/mflux/models/flux/variants/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/variants/__init__.pyi
rename to .typings/mflux/models/flux/variants/__init__.pyi
diff --git a/.mlx_typings/mflux/models/flux/variants/concept_attention/attention_data.pyi b/.typings/mflux/models/flux/variants/concept_attention/attention_data.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/variants/concept_attention/attention_data.pyi
rename to .typings/mflux/models/flux/variants/concept_attention/attention_data.pyi
diff --git a/.mlx_typings/mflux/models/flux/variants/concept_attention/joint_attention_concept.pyi b/.typings/mflux/models/flux/variants/concept_attention/joint_attention_concept.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/variants/concept_attention/joint_attention_concept.pyi
rename to .typings/mflux/models/flux/variants/concept_attention/joint_attention_concept.pyi
diff --git a/.mlx_typings/mflux/models/flux/variants/concept_attention/joint_transformer_block_concept.pyi b/.typings/mflux/models/flux/variants/concept_attention/joint_transformer_block_concept.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/variants/concept_attention/joint_transformer_block_concept.pyi
rename to .typings/mflux/models/flux/variants/concept_attention/joint_transformer_block_concept.pyi
diff --git a/.mlx_typings/mflux/models/flux/variants/concept_attention/transformer_concept.pyi b/.typings/mflux/models/flux/variants/concept_attention/transformer_concept.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/variants/concept_attention/transformer_concept.pyi
rename to .typings/mflux/models/flux/variants/concept_attention/transformer_concept.pyi
diff --git a/.mlx_typings/mflux/models/flux/variants/controlnet/transformer_controlnet.pyi b/.typings/mflux/models/flux/variants/controlnet/transformer_controlnet.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/variants/controlnet/transformer_controlnet.pyi
rename to .typings/mflux/models/flux/variants/controlnet/transformer_controlnet.pyi
diff --git a/.mlx_typings/mflux/models/flux/variants/kontext/__init__.pyi b/.typings/mflux/models/flux/variants/kontext/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/variants/kontext/__init__.pyi
rename to .typings/mflux/models/flux/variants/kontext/__init__.pyi
diff --git a/.mlx_typings/mflux/models/flux/variants/kontext/flux_kontext.pyi b/.typings/mflux/models/flux/variants/kontext/flux_kontext.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/variants/kontext/flux_kontext.pyi
rename to .typings/mflux/models/flux/variants/kontext/flux_kontext.pyi
diff --git a/.mlx_typings/mflux/models/flux/variants/kontext/kontext_util.pyi b/.typings/mflux/models/flux/variants/kontext/kontext_util.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/variants/kontext/kontext_util.pyi
rename to .typings/mflux/models/flux/variants/kontext/kontext_util.pyi
diff --git a/.mlx_typings/mflux/models/flux/variants/txt2img/flux.pyi b/.typings/mflux/models/flux/variants/txt2img/flux.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/variants/txt2img/flux.pyi
rename to .typings/mflux/models/flux/variants/txt2img/flux.pyi
diff --git a/.mlx_typings/mflux/models/flux/weights/__init__.pyi b/.typings/mflux/models/flux/weights/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/weights/__init__.pyi
rename to .typings/mflux/models/flux/weights/__init__.pyi
diff --git a/.mlx_typings/mflux/models/flux/weights/flux_lora_mapping.pyi b/.typings/mflux/models/flux/weights/flux_lora_mapping.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/weights/flux_lora_mapping.pyi
rename to .typings/mflux/models/flux/weights/flux_lora_mapping.pyi
diff --git a/.mlx_typings/mflux/models/flux/weights/flux_weight_definition.pyi b/.typings/mflux/models/flux/weights/flux_weight_definition.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/weights/flux_weight_definition.pyi
rename to .typings/mflux/models/flux/weights/flux_weight_definition.pyi
diff --git a/.mlx_typings/mflux/models/flux/weights/flux_weight_mapping.pyi b/.typings/mflux/models/flux/weights/flux_weight_mapping.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/flux/weights/flux_weight_mapping.pyi
rename to .typings/mflux/models/flux/weights/flux_weight_mapping.pyi
diff --git a/.mlx_typings/mflux/models/qwen/__init__.pyi b/.typings/mflux/models/qwen/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/__init__.pyi
rename to .typings/mflux/models/qwen/__init__.pyi
diff --git a/.mlx_typings/mflux/models/qwen/cli/__init__.pyi b/.typings/mflux/models/qwen/cli/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/cli/__init__.pyi
rename to .typings/mflux/models/qwen/cli/__init__.pyi
diff --git a/.mlx_typings/mflux/models/qwen/latent_creator/__init__.pyi b/.typings/mflux/models/qwen/latent_creator/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/latent_creator/__init__.pyi
rename to .typings/mflux/models/qwen/latent_creator/__init__.pyi
diff --git a/.mlx_typings/mflux/models/qwen/latent_creator/qwen_latent_creator.pyi b/.typings/mflux/models/qwen/latent_creator/qwen_latent_creator.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/latent_creator/qwen_latent_creator.pyi
rename to .typings/mflux/models/qwen/latent_creator/qwen_latent_creator.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/__init__.pyi b/.typings/mflux/models/qwen/model/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/__init__.pyi
rename to .typings/mflux/models/qwen/model/__init__.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_attention.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_attention.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_attention.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_attention.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_encoder.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_encoder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_encoder.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_encoder.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_encoder_layer.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_encoder_layer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_encoder_layer.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_encoder_layer.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_mlp.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_mlp.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_mlp.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_mlp.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_patch_merger.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_patch_merger.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_patch_merger.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_patch_merger.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_prompt_encoder.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_prompt_encoder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_prompt_encoder.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_prompt_encoder.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_rms_norm.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_rms_norm.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_rms_norm.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_rms_norm.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_rope.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_rope.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_rope.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_rope.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_text_encoder.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_text_encoder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_text_encoder.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_text_encoder.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_attention.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_attention.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_attention.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_attention.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_block.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_block.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_block.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_block.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_language_encoder.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_language_encoder.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_language_encoder.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_language_encoder.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_mlp.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_mlp.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_mlp.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_mlp.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_patch_embed.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_patch_embed.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_patch_embed.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_patch_embed.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_rotary_embedding.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_rotary_embedding.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_rotary_embedding.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_rotary_embedding.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_transformer.pyi b/.typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_transformer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_transformer.pyi
rename to .typings/mflux/models/qwen/model/qwen_text_encoder/qwen_vision_transformer.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_attention.pyi b/.typings/mflux/models/qwen/model/qwen_transformer/qwen_attention.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_attention.pyi
rename to .typings/mflux/models/qwen/model/qwen_transformer/qwen_attention.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_feed_forward.pyi b/.typings/mflux/models/qwen/model/qwen_transformer/qwen_feed_forward.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_feed_forward.pyi
rename to .typings/mflux/models/qwen/model/qwen_transformer/qwen_feed_forward.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_rope.pyi b/.typings/mflux/models/qwen/model/qwen_transformer/qwen_rope.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_rope.pyi
rename to .typings/mflux/models/qwen/model/qwen_transformer/qwen_rope.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_time_text_embed.pyi b/.typings/mflux/models/qwen/model/qwen_transformer/qwen_time_text_embed.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_time_text_embed.pyi
rename to .typings/mflux/models/qwen/model/qwen_transformer/qwen_time_text_embed.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_timestep_embedding.pyi b/.typings/mflux/models/qwen/model/qwen_transformer/qwen_timestep_embedding.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_timestep_embedding.pyi
rename to .typings/mflux/models/qwen/model/qwen_transformer/qwen_timestep_embedding.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_timesteps.pyi b/.typings/mflux/models/qwen/model/qwen_transformer/qwen_timesteps.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_timesteps.pyi
rename to .typings/mflux/models/qwen/model/qwen_transformer/qwen_timesteps.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_transformer.pyi b/.typings/mflux/models/qwen/model/qwen_transformer/qwen_transformer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_transformer.pyi
rename to .typings/mflux/models/qwen/model/qwen_transformer/qwen_transformer.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_transformer_block.pyi b/.typings/mflux/models/qwen/model/qwen_transformer/qwen_transformer_block.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_transformer_block.pyi
rename to .typings/mflux/models/qwen/model/qwen_transformer/qwen_transformer_block.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_transformer_rms_norm.pyi b/.typings/mflux/models/qwen/model/qwen_transformer/qwen_transformer_rms_norm.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_transformer/qwen_transformer_rms_norm.pyi
rename to .typings/mflux/models/qwen/model/qwen_transformer/qwen_transformer_rms_norm.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_attention_block_3d.pyi b/.typings/mflux/models/qwen/model/qwen_vae/qwen_image_attention_block_3d.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_attention_block_3d.pyi
rename to .typings/mflux/models/qwen/model/qwen_vae/qwen_image_attention_block_3d.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_causal_conv_3d.pyi b/.typings/mflux/models/qwen/model/qwen_vae/qwen_image_causal_conv_3d.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_causal_conv_3d.pyi
rename to .typings/mflux/models/qwen/model/qwen_vae/qwen_image_causal_conv_3d.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_decoder_3d.pyi b/.typings/mflux/models/qwen/model/qwen_vae/qwen_image_decoder_3d.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_decoder_3d.pyi
rename to .typings/mflux/models/qwen/model/qwen_vae/qwen_image_decoder_3d.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_down_block_3d.pyi b/.typings/mflux/models/qwen/model/qwen_vae/qwen_image_down_block_3d.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_down_block_3d.pyi
rename to .typings/mflux/models/qwen/model/qwen_vae/qwen_image_down_block_3d.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_encoder_3d.pyi b/.typings/mflux/models/qwen/model/qwen_vae/qwen_image_encoder_3d.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_encoder_3d.pyi
rename to .typings/mflux/models/qwen/model/qwen_vae/qwen_image_encoder_3d.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_mid_block_3d.pyi b/.typings/mflux/models/qwen/model/qwen_vae/qwen_image_mid_block_3d.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_mid_block_3d.pyi
rename to .typings/mflux/models/qwen/model/qwen_vae/qwen_image_mid_block_3d.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_res_block_3d.pyi b/.typings/mflux/models/qwen/model/qwen_vae/qwen_image_res_block_3d.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_res_block_3d.pyi
rename to .typings/mflux/models/qwen/model/qwen_vae/qwen_image_res_block_3d.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_resample_3d.pyi b/.typings/mflux/models/qwen/model/qwen_vae/qwen_image_resample_3d.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_resample_3d.pyi
rename to .typings/mflux/models/qwen/model/qwen_vae/qwen_image_resample_3d.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_rms_norm.pyi b/.typings/mflux/models/qwen/model/qwen_vae/qwen_image_rms_norm.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_rms_norm.pyi
rename to .typings/mflux/models/qwen/model/qwen_vae/qwen_image_rms_norm.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_up_block_3d.pyi b/.typings/mflux/models/qwen/model/qwen_vae/qwen_image_up_block_3d.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_image_up_block_3d.pyi
rename to .typings/mflux/models/qwen/model/qwen_vae/qwen_image_up_block_3d.pyi
diff --git a/.mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_vae.pyi b/.typings/mflux/models/qwen/model/qwen_vae/qwen_vae.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/model/qwen_vae/qwen_vae.pyi
rename to .typings/mflux/models/qwen/model/qwen_vae/qwen_vae.pyi
diff --git a/.mlx_typings/mflux/models/qwen/qwen_initializer.pyi b/.typings/mflux/models/qwen/qwen_initializer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/qwen_initializer.pyi
rename to .typings/mflux/models/qwen/qwen_initializer.pyi
diff --git a/.mlx_typings/mflux/models/qwen/tokenizer/__init__.pyi b/.typings/mflux/models/qwen/tokenizer/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/tokenizer/__init__.pyi
rename to .typings/mflux/models/qwen/tokenizer/__init__.pyi
diff --git a/.mlx_typings/mflux/models/qwen/tokenizer/qwen_image_processor.pyi b/.typings/mflux/models/qwen/tokenizer/qwen_image_processor.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/tokenizer/qwen_image_processor.pyi
rename to .typings/mflux/models/qwen/tokenizer/qwen_image_processor.pyi
diff --git a/.mlx_typings/mflux/models/qwen/tokenizer/qwen_vision_language_processor.pyi b/.typings/mflux/models/qwen/tokenizer/qwen_vision_language_processor.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/tokenizer/qwen_vision_language_processor.pyi
rename to .typings/mflux/models/qwen/tokenizer/qwen_vision_language_processor.pyi
diff --git a/.mlx_typings/mflux/models/qwen/tokenizer/qwen_vision_language_tokenizer.pyi b/.typings/mflux/models/qwen/tokenizer/qwen_vision_language_tokenizer.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/tokenizer/qwen_vision_language_tokenizer.pyi
rename to .typings/mflux/models/qwen/tokenizer/qwen_vision_language_tokenizer.pyi
diff --git a/.mlx_typings/mflux/models/qwen/variants/__init__.pyi b/.typings/mflux/models/qwen/variants/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/variants/__init__.pyi
rename to .typings/mflux/models/qwen/variants/__init__.pyi
diff --git a/.mlx_typings/mflux/models/qwen/variants/edit/qwen_edit_util.pyi b/.typings/mflux/models/qwen/variants/edit/qwen_edit_util.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/variants/edit/qwen_edit_util.pyi
rename to .typings/mflux/models/qwen/variants/edit/qwen_edit_util.pyi
diff --git a/.mlx_typings/mflux/models/qwen/variants/edit/qwen_image_edit.pyi b/.typings/mflux/models/qwen/variants/edit/qwen_image_edit.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/variants/edit/qwen_image_edit.pyi
rename to .typings/mflux/models/qwen/variants/edit/qwen_image_edit.pyi
diff --git a/.mlx_typings/mflux/models/qwen/variants/txt2img/qwen_image.pyi b/.typings/mflux/models/qwen/variants/txt2img/qwen_image.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/variants/txt2img/qwen_image.pyi
rename to .typings/mflux/models/qwen/variants/txt2img/qwen_image.pyi
diff --git a/.mlx_typings/mflux/models/qwen/weights/__init__.pyi b/.typings/mflux/models/qwen/weights/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/weights/__init__.pyi
rename to .typings/mflux/models/qwen/weights/__init__.pyi
diff --git a/.mlx_typings/mflux/models/qwen/weights/qwen_lora_mapping.pyi b/.typings/mflux/models/qwen/weights/qwen_lora_mapping.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/weights/qwen_lora_mapping.pyi
rename to .typings/mflux/models/qwen/weights/qwen_lora_mapping.pyi
diff --git a/.mlx_typings/mflux/models/qwen/weights/qwen_weight_definition.pyi b/.typings/mflux/models/qwen/weights/qwen_weight_definition.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/weights/qwen_weight_definition.pyi
rename to .typings/mflux/models/qwen/weights/qwen_weight_definition.pyi
diff --git a/.mlx_typings/mflux/models/qwen/weights/qwen_weight_mapping.pyi b/.typings/mflux/models/qwen/weights/qwen_weight_mapping.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/qwen/weights/qwen_weight_mapping.pyi
rename to .typings/mflux/models/qwen/weights/qwen_weight_mapping.pyi
diff --git a/.mlx_typings/mflux/models/seedvr2/weights/seedvr2_weight_definition.pyi b/.typings/mflux/models/seedvr2/weights/seedvr2_weight_definition.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/seedvr2/weights/seedvr2_weight_definition.pyi
rename to .typings/mflux/models/seedvr2/weights/seedvr2_weight_definition.pyi
diff --git a/.mlx_typings/mflux/models/seedvr2/weights/seedvr2_weight_mapping.pyi b/.typings/mflux/models/seedvr2/weights/seedvr2_weight_mapping.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/seedvr2/weights/seedvr2_weight_mapping.pyi
rename to .typings/mflux/models/seedvr2/weights/seedvr2_weight_mapping.pyi
diff --git a/.mlx_typings/mflux/models/z_image/latent_creator/z_image_latent_creator.pyi b/.typings/mflux/models/z_image/latent_creator/z_image_latent_creator.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/z_image/latent_creator/z_image_latent_creator.pyi
rename to .typings/mflux/models/z_image/latent_creator/z_image_latent_creator.pyi
diff --git a/.mlx_typings/mflux/models/z_image/weights/z_image_weight_definition.pyi b/.typings/mflux/models/z_image/weights/z_image_weight_definition.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/z_image/weights/z_image_weight_definition.pyi
rename to .typings/mflux/models/z_image/weights/z_image_weight_definition.pyi
diff --git a/.mlx_typings/mflux/models/z_image/weights/z_image_weight_mapping.pyi b/.typings/mflux/models/z_image/weights/z_image_weight_mapping.pyi
similarity index 100%
rename from .mlx_typings/mflux/models/z_image/weights/z_image_weight_mapping.pyi
rename to .typings/mflux/models/z_image/weights/z_image_weight_mapping.pyi
diff --git a/.mlx_typings/mflux/release/__init__.pyi b/.typings/mflux/release/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/release/__init__.pyi
rename to .typings/mflux/release/__init__.pyi
diff --git a/.mlx_typings/mflux/utils/__init__.pyi b/.typings/mflux/utils/__init__.pyi
similarity index 100%
rename from .mlx_typings/mflux/utils/__init__.pyi
rename to .typings/mflux/utils/__init__.pyi
diff --git a/.mlx_typings/mflux/utils/box_values.pyi b/.typings/mflux/utils/box_values.pyi
similarity index 100%
rename from .mlx_typings/mflux/utils/box_values.pyi
rename to .typings/mflux/utils/box_values.pyi
diff --git a/.mlx_typings/mflux/utils/exceptions.pyi b/.typings/mflux/utils/exceptions.pyi
similarity index 100%
rename from .mlx_typings/mflux/utils/exceptions.pyi
rename to .typings/mflux/utils/exceptions.pyi
diff --git a/.mlx_typings/mflux/utils/generated_image.pyi b/.typings/mflux/utils/generated_image.pyi
similarity index 100%
rename from .mlx_typings/mflux/utils/generated_image.pyi
rename to .typings/mflux/utils/generated_image.pyi
diff --git a/.mlx_typings/mflux/utils/image_util.pyi b/.typings/mflux/utils/image_util.pyi
similarity index 100%
rename from .mlx_typings/mflux/utils/image_util.pyi
rename to .typings/mflux/utils/image_util.pyi
diff --git a/.mlx_typings/mflux/utils/metadata_builder.pyi b/.typings/mflux/utils/metadata_builder.pyi
similarity index 100%
rename from .mlx_typings/mflux/utils/metadata_builder.pyi
rename to .typings/mflux/utils/metadata_builder.pyi
diff --git a/.mlx_typings/mflux/utils/version_util.pyi b/.typings/mflux/utils/version_util.pyi
similarity index 100%
rename from .mlx_typings/mflux/utils/version_util.pyi
rename to .typings/mflux/utils/version_util.pyi
diff --git a/.mlx_typings/mlx/core/__init__.pyi b/.typings/mlx/core/__init__.pyi
similarity index 99%
rename from .mlx_typings/mlx/core/__init__.pyi
rename to .typings/mlx/core/__init__.pyi
index 421e75dbf..b0479bf26 100644
--- a/.mlx_typings/mlx/core/__init__.pyi
+++ b/.typings/mlx/core/__init__.pyi
@@ -1767,12 +1767,12 @@ def clip(
         array: The clipped array.
     """
 
-def compile(
-    fun: Callable,
+def compile[F: Callable[..., object]](
+    fun: F,
     inputs: object | None = ...,
     outputs: object | None = ...,
     shapeless: bool = ...,
-) -> Callable:
+) -> F:
     """
     Returns a compiled function which produces the same output as ``fun``.
 
@@ -2915,8 +2915,8 @@ def gather_mm(
     a: array,
     b: array,
     /,
-    lhs_indices: array,
-    rhs_indices: array,
+    lhs_indices: array | None = ...,
+    rhs_indices: array | None = ...,
     *,
     sorted_indices: bool = ...,
     stream: Stream | Device | None = ...,
@@ -4707,6 +4707,7 @@ def softmax(
     /,
     axis: int | Sequence[int] | None = ...,
     *,
+    precise: bool = ...,
     stream: Stream | Device | None = ...,
 ) -> array:
     """
diff --git a/.mlx_typings/mlx/core/cuda/__init__.pyi b/.typings/mlx/core/cuda/__init__.pyi
similarity index 100%
rename from .mlx_typings/mlx/core/cuda/__init__.pyi
rename to .typings/mlx/core/cuda/__init__.pyi
diff --git a/.mlx_typings/mlx/core/distributed/__init__.pyi b/.typings/mlx/core/distributed/__init__.pyi
similarity index 100%
rename from .mlx_typings/mlx/core/distributed/__init__.pyi
rename to .typings/mlx/core/distributed/__init__.pyi
diff --git a/.mlx_typings/mlx/core/metal/__init__.pyi b/.typings/mlx/core/metal/__init__.pyi
similarity index 100%
rename from .mlx_typings/mlx/core/metal/__init__.pyi
rename to .typings/mlx/core/metal/__init__.pyi
diff --git a/.mlx_typings/mlx/core/random/__init__.pyi b/.typings/mlx/core/random/__init__.pyi
similarity index 100%
rename from .mlx_typings/mlx/core/random/__init__.pyi
rename to .typings/mlx/core/random/__init__.pyi
diff --git a/.mlx_typings/mlx/nn/__init__.pyi b/.typings/mlx/nn/__init__.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/__init__.pyi
rename to .typings/mlx/nn/__init__.pyi
diff --git a/.mlx_typings/mlx/nn/init.pyi b/.typings/mlx/nn/init.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/init.pyi
rename to .typings/mlx/nn/init.pyi
diff --git a/.mlx_typings/mlx/nn/layers/__init__.pyi b/.typings/mlx/nn/layers/__init__.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/__init__.pyi
rename to .typings/mlx/nn/layers/__init__.pyi
diff --git a/.mlx_typings/mlx/nn/layers/activations.pyi b/.typings/mlx/nn/layers/activations.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/activations.pyi
rename to .typings/mlx/nn/layers/activations.pyi
diff --git a/.mlx_typings/mlx/nn/layers/base.pyi b/.typings/mlx/nn/layers/base.pyi
similarity index 98%
rename from .mlx_typings/mlx/nn/layers/base.pyi
rename to .typings/mlx/nn/layers/base.pyi
index fcd8cf53e..e9055679d 100644
--- a/.mlx_typings/mlx/nn/layers/base.pyi
+++ b/.typings/mlx/nn/layers/base.pyi
@@ -57,6 +57,10 @@ class Module(dict):
     def __init__(self) -> None:
         """Should be called by the subclasses of ``Module``."""
 
+    def __getitem__(self, key: str) -> mx.array | Module: ...
+    def get(
+        self, key: str, default: mx.array | Module | None = ...
+    ) -> mx.array | Module | None: ...
     @property
     def training(self):  # -> bool:
         """Boolean indicating if the model is in training mode."""
diff --git a/.mlx_typings/mlx/nn/layers/containers.pyi b/.typings/mlx/nn/layers/containers.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/containers.pyi
rename to .typings/mlx/nn/layers/containers.pyi
diff --git a/.mlx_typings/mlx/nn/layers/convolution.pyi b/.typings/mlx/nn/layers/convolution.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/convolution.pyi
rename to .typings/mlx/nn/layers/convolution.pyi
diff --git a/.mlx_typings/mlx/nn/layers/convolution_transpose.pyi b/.typings/mlx/nn/layers/convolution_transpose.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/convolution_transpose.pyi
rename to .typings/mlx/nn/layers/convolution_transpose.pyi
diff --git a/.mlx_typings/mlx/nn/layers/distributed.pyi b/.typings/mlx/nn/layers/distributed.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/distributed.pyi
rename to .typings/mlx/nn/layers/distributed.pyi
diff --git a/.mlx_typings/mlx/nn/layers/dropout.pyi b/.typings/mlx/nn/layers/dropout.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/dropout.pyi
rename to .typings/mlx/nn/layers/dropout.pyi
diff --git a/.mlx_typings/mlx/nn/layers/embedding.pyi b/.typings/mlx/nn/layers/embedding.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/embedding.pyi
rename to .typings/mlx/nn/layers/embedding.pyi
diff --git a/.mlx_typings/mlx/nn/layers/linear.pyi b/.typings/mlx/nn/layers/linear.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/linear.pyi
rename to .typings/mlx/nn/layers/linear.pyi
diff --git a/.mlx_typings/mlx/nn/layers/normalization.pyi b/.typings/mlx/nn/layers/normalization.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/normalization.pyi
rename to .typings/mlx/nn/layers/normalization.pyi
diff --git a/.mlx_typings/mlx/nn/layers/pooling.pyi b/.typings/mlx/nn/layers/pooling.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/pooling.pyi
rename to .typings/mlx/nn/layers/pooling.pyi
diff --git a/.mlx_typings/mlx/nn/layers/positional_encoding.pyi b/.typings/mlx/nn/layers/positional_encoding.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/positional_encoding.pyi
rename to .typings/mlx/nn/layers/positional_encoding.pyi
diff --git a/.mlx_typings/mlx/nn/layers/quantized.pyi b/.typings/mlx/nn/layers/quantized.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/quantized.pyi
rename to .typings/mlx/nn/layers/quantized.pyi
diff --git a/.mlx_typings/mlx/nn/layers/recurrent.pyi b/.typings/mlx/nn/layers/recurrent.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/recurrent.pyi
rename to .typings/mlx/nn/layers/recurrent.pyi
diff --git a/.mlx_typings/mlx/nn/layers/transformer.pyi b/.typings/mlx/nn/layers/transformer.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/transformer.pyi
rename to .typings/mlx/nn/layers/transformer.pyi
diff --git a/.mlx_typings/mlx/nn/layers/upsample.pyi b/.typings/mlx/nn/layers/upsample.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/layers/upsample.pyi
rename to .typings/mlx/nn/layers/upsample.pyi
diff --git a/.mlx_typings/mlx/nn/losses.pyi b/.typings/mlx/nn/losses.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/losses.pyi
rename to .typings/mlx/nn/losses.pyi
diff --git a/.mlx_typings/mlx/nn/utils.pyi b/.typings/mlx/nn/utils.pyi
similarity index 100%
rename from .mlx_typings/mlx/nn/utils.pyi
rename to .typings/mlx/nn/utils.pyi
diff --git a/.mlx_typings/mlx/utils.pyi b/.typings/mlx/utils.pyi
similarity index 100%
rename from .mlx_typings/mlx/utils.pyi
rename to .typings/mlx/utils.pyi
diff --git a/.mlx_typings/mlx_lm/__init__.pyi b/.typings/mlx_lm/__init__.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/__init__.pyi
rename to .typings/mlx_lm/__init__.pyi
diff --git a/.mlx_typings/mlx_lm/_version.pyi b/.typings/mlx_lm/_version.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/_version.pyi
rename to .typings/mlx_lm/_version.pyi
diff --git a/.mlx_typings/mlx_lm/convert.pyi b/.typings/mlx_lm/convert.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/convert.pyi
rename to .typings/mlx_lm/convert.pyi
diff --git a/.mlx_typings/mlx_lm/generate.pyi b/.typings/mlx_lm/generate.pyi
similarity index 99%
rename from .mlx_typings/mlx_lm/generate.pyi
rename to .typings/mlx_lm/generate.pyi
index aa38c0275..22ddc558d 100644
--- a/.mlx_typings/mlx_lm/generate.pyi
+++ b/.typings/mlx_lm/generate.pyi
@@ -383,11 +383,12 @@ class GenerationBatch:
     state_machines: List[SequenceStateMachine]
     max_tokens: List[int]
     _current_tokens: Optional[mx.array]
-    _current_logprobs: List[mx.array]
-    _next_tokens: mx.array
-    _next_logprobs: List[mx.array]
-    _token_context: List[mx.array]
+    _current_logprobs: mx.array | List[mx.array]
+    _next_tokens: Optional[mx.array]
+    _next_logprobs: mx.array | List[mx.array]
+    _token_context: List[Any]
     _num_tokens: List[int]
+    _matcher_states: List[Any]
 
     def __init__(
         self,
diff --git a/.mlx_typings/mlx_lm/models/__init__.pyi b/.typings/mlx_lm/models/__init__.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/__init__.pyi
rename to .typings/mlx_lm/models/__init__.pyi
diff --git a/.mlx_typings/mlx_lm/models/activations.pyi b/.typings/mlx_lm/models/activations.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/activations.pyi
rename to .typings/mlx_lm/models/activations.pyi
diff --git a/.mlx_typings/mlx_lm/models/base.pyi b/.typings/mlx_lm/models/base.pyi
similarity index 89%
rename from .mlx_typings/mlx_lm/models/base.pyi
rename to .typings/mlx_lm/models/base.pyi
index e549e624f..06b16e19d 100644
--- a/.mlx_typings/mlx_lm/models/base.pyi
+++ b/.typings/mlx_lm/models/base.pyi
@@ -3,7 +3,7 @@ This type stub file was generated by pyright.
 """
 
 from dataclasses import dataclass
-from typing import Optional
+from typing import Any, Optional
 
 import mlx.core as mx
 
@@ -37,10 +37,10 @@ def quantized_scaled_dot_product_attention(
     bits: int = ...,
 ) -> mx.array: ...
 def scaled_dot_product_attention(
-    queries,
-    keys,
-    values,
-    cache,
+    queries: mx.array,
+    keys: mx.array,
+    values: mx.array,
+    cache: Optional[Any],
     scale: float,
     mask: Optional[mx.array],
     sinks: Optional[mx.array] = ...,
diff --git a/.mlx_typings/mlx_lm/models/bitlinear_layers.pyi b/.typings/mlx_lm/models/bitlinear_layers.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/bitlinear_layers.pyi
rename to .typings/mlx_lm/models/bitlinear_layers.pyi
diff --git a/.mlx_typings/mlx_lm/models/cache.pyi b/.typings/mlx_lm/models/cache.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/cache.pyi
rename to .typings/mlx_lm/models/cache.pyi
diff --git a/.mlx_typings/mlx_lm/models/deepseek_v3.pyi b/.typings/mlx_lm/models/deepseek_v3.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/deepseek_v3.pyi
rename to .typings/mlx_lm/models/deepseek_v3.pyi
diff --git a/.typings/mlx_lm/models/deepseek_v4.pyi b/.typings/mlx_lm/models/deepseek_v4.pyi
new file mode 100644
index 000000000..0f753826f
--- /dev/null
+++ b/.typings/mlx_lm/models/deepseek_v4.pyi
@@ -0,0 +1,280 @@
+"""Type stubs for mlx_lm.models.deepseek_v4"""
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+import mlx.core as mx
+import mlx.nn as nn
+
+from .base import BaseModelArgs
+from .cache import ArraysCache, RotatingKVCache
+from .switch_layers import SwitchGLU
+
+@dataclass
+class ModelArgs(BaseModelArgs):
+    model_type: str
+    vocab_size: int
+    hidden_size: int
+    intermediate_size: int
+    moe_intermediate_size: int
+    num_hidden_layers: int
+    num_attention_heads: int
+    num_key_value_heads: int
+    n_shared_experts: Optional[int]
+    n_routed_experts: int
+    num_experts_per_tok: int
+    head_dim: int
+    qk_rope_head_dim: int
+    q_lora_rank: int
+    o_lora_rank: int
+    o_groups: int
+    sliding_window: int
+    hc_mult: int
+    hc_sinkhorn_iters: int
+    hc_eps: float
+    compress_ratios: Optional[List[int]]
+    compress_rope_theta: float
+    rope_theta: float
+    rope_scaling: Optional[Dict[str, Any]]
+    rms_norm_eps: float
+    swiglu_limit: float
+    attention_bias: bool
+    max_position_embeddings: int
+
+class DeepseekV4RoPE(nn.Module):
+    dims: int
+    freqs: mx.array
+
+    def __init__(
+        self,
+        dims: int,
+        base: float,
+        scaling_config: Optional[Dict[str, Any]] = None,
+    ) -> None: ...
+    def __call__(
+        self,
+        x: mx.array,
+        offset: int = 0,
+        inverse: bool = False,
+    ) -> mx.array: ...
+
+class HyperConnection(nn.Module):
+    dim: int
+    hc_mult: int
+    norm_eps: float
+
+    def __init__(
+        self,
+        dim: int,
+        hc_mult: int,
+        norm_eps: float,
+        sinkhorn_iters: int,
+        hc_eps: float,
+    ) -> None: ...
+
+class HyperHead(nn.Module):
+    dim: int
+    hc_mult: int
+
+    def __init__(
+        self,
+        dim: int,
+        hc_mult: int,
+        norm_eps: float,
+        hc_eps: float,
+    ) -> None: ...
+    def __call__(self, x: mx.array) -> mx.array: ...
+
+class Compressor(nn.Module):
+    dim: int
+    head_dim: int
+    rope_head_dim: int
+    compress_ratio: int
+    overlap: bool
+    wkv_gate: nn.Linear
+    ape: mx.array
+    norm: nn.RMSNorm
+    rope: DeepseekV4RoPE
+
+    def __init__(
+        self,
+        dim: int,
+        compress_ratio: int,
+        head_dim: int,
+        rope_head_dim: int,
+        rms_norm_eps: float,
+        rope: DeepseekV4RoPE,
+    ) -> None: ...
+    def __call__(
+        self,
+        x: mx.array,
+        cache: "DeepseekV4Cache",
+        offset: Any,
+        key: str = ...,
+    ) -> mx.array: ...
+
+class Indexer(nn.Module):
+    def __init__(
+        self,
+        args: ModelArgs,
+        compress_ratio: int,
+        rope: DeepseekV4RoPE,
+    ) -> None: ...
+
+class _CompressorBranch:
+    buffer_kv: Optional[mx.array]
+    buffer_gate: Optional[mx.array]
+    prev_kv: Optional[mx.array]
+    prev_gate: Optional[mx.array]
+    pool: Optional[mx.array]
+    buffer_lengths: Optional[List[int]]
+    pool_lengths: Optional[List[int]]
+    buffer_count: int
+    _new_pool_lengths: Optional[List[int]]
+
+    def __init__(self) -> None: ...
+
+class DeepseekV4Cache:
+    local: RotatingKVCache
+    offset: int
+    keys: Optional[mx.array]
+    values: Optional[mx.array]
+    state: Any
+    meta_state: Any
+    nbytes: int
+    _branches: Dict[str, _CompressorBranch]
+    _pending_lengths: Optional[List[int]]
+
+    def __init__(self, sliding_window: int) -> None: ...
+    def update_and_fetch(
+        self, keys: mx.array, values: mx.array
+    ) -> tuple[mx.array, mx.array]: ...
+    def is_trimmable(self) -> bool: ...
+    def trim(self, n: int) -> int: ...
+    def empty(self) -> bool: ...
+    def size(self) -> int: ...
+    def prepare(
+        self,
+        *,
+        left_padding: Optional[List[int]] = None,
+        lengths: Optional[List[int]] = None,
+        right_padding: Optional[List[int]] = None,
+    ) -> None: ...
+    def finalize(self) -> None: ...
+    def filter(self, batch_indices: mx.array) -> None: ...
+    def extend(self, other: "DeepseekV4Cache") -> None: ...
+    def extract(self, idx: int) -> "DeepseekV4Cache": ...
+    @classmethod
+    def merge(cls, caches: List["DeepseekV4Cache"]) -> "DeepseekV4Cache": ...
+
+class V4Attention(nn.Module):
+    args: ModelArgs
+    layer_id: int
+    dim: int
+    n_heads: int
+    head_dim: int
+    rope_head_dim: int
+    nope_head_dim: int
+    n_groups: int
+    q_lora_rank: int
+    o_lora_rank: int
+    window: int
+    eps: float
+    scale: float
+    compress_ratio: int
+    wqkv_a: nn.Linear
+    q_norm: nn.RMSNorm
+    wq_b: nn.Linear
+    kv_norm: nn.RMSNorm
+    attn_sink: mx.array
+    wo_a: nn.Linear
+    wo_b: nn.Linear
+    rope: DeepseekV4RoPE
+    compressor: Compressor
+    indexer: Indexer
+
+    def __init__(self, args: ModelArgs, layer_id: int) -> None: ...
+    def __call__(
+        self,
+        x: mx.array,
+        mask: Optional[mx.array] = None,
+        cache: Optional[Any] = None,
+    ) -> mx.array: ...
+
+class DeepseekV4MLP(nn.Module):
+    gate_proj: nn.Linear
+    up_proj: nn.Linear
+    down_proj: nn.Linear
+
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        swiglu_limit: float = 0.0,
+    ) -> None: ...
+    def __call__(self, x: mx.array) -> mx.array: ...
+
+class MoEGate(nn.Module):
+    weight: mx.array
+
+    def __init__(self, args: ModelArgs, layer_id: int) -> None: ...
+    def __call__(
+        self, x: mx.array, input_ids: mx.array
+    ) -> tuple[mx.array, mx.array]: ...
+
+class DeepseekV4MoE(nn.Module):
+    num_experts_per_tok: int
+    switch_mlp: SwitchGLU
+    gate: MoEGate
+    shared_experts: DeepseekV4MLP
+
+    def __init__(self, args: ModelArgs, layer_id: int) -> None: ...
+    def __call__(self, x: mx.array, input_ids: mx.array) -> mx.array: ...
+
+class DeepseekV4Block(nn.Module):
+    attn_norm: nn.RMSNorm
+    attn: V4Attention
+    hc_attn: HyperConnection
+    ffn_norm: nn.RMSNorm
+    ffn: DeepseekV4MoE
+    hc_ffn: HyperConnection
+
+    def __init__(self, args: ModelArgs, layer_id: int) -> None: ...
+    def __call__(
+        self,
+        h: mx.array,
+        cache: Optional[Any],
+        input_ids: mx.array,
+    ) -> mx.array: ...
+
+class DeepseekV4Model(nn.Module):
+    args: ModelArgs
+    vocab_size: int
+    embed_tokens: nn.Embedding
+    layers: list[DeepseekV4Block]
+    norm: nn.RMSNorm
+    hc_head: HyperHead
+
+    def __init__(self, args: ModelArgs) -> None: ...
+    def __call__(
+        self,
+        inputs: mx.array,
+        cache: Optional[List[Any]] = None,
+    ) -> mx.array: ...
+
+class Model(nn.Module):
+    args: ModelArgs
+    model_type: str
+    model: DeepseekV4Model
+    lm_head: nn.Linear
+
+    def __init__(self, args: ModelArgs) -> None: ...
+    def __call__(
+        self,
+        inputs: mx.array,
+        cache: Optional[List[Any]] = None,
+    ) -> mx.array: ...
+    def sanitize(self, weights: dict[str, Any]) -> dict[str, Any]: ...
+    def make_cache(self) -> list[RotatingKVCache | DeepseekV4Cache]: ...
+    @property
+    def layers(self) -> list[DeepseekV4Block]: ...
diff --git a/.mlx_typings/mlx_lm/models/gated_delta.pyi b/.typings/mlx_lm/models/gated_delta.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/gated_delta.pyi
rename to .typings/mlx_lm/models/gated_delta.pyi
diff --git a/.mlx_typings/mlx_lm/models/gemma4.pyi b/.typings/mlx_lm/models/gemma4.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/gemma4.pyi
rename to .typings/mlx_lm/models/gemma4.pyi
diff --git a/.mlx_typings/mlx_lm/models/gemma4_text.pyi b/.typings/mlx_lm/models/gemma4_text.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/gemma4_text.pyi
rename to .typings/mlx_lm/models/gemma4_text.pyi
diff --git a/.mlx_typings/mlx_lm/models/glm4_moe.pyi b/.typings/mlx_lm/models/glm4_moe.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/glm4_moe.pyi
rename to .typings/mlx_lm/models/glm4_moe.pyi
diff --git a/.mlx_typings/mlx_lm/models/glm_moe_dsa.pyi b/.typings/mlx_lm/models/glm_moe_dsa.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/glm_moe_dsa.pyi
rename to .typings/mlx_lm/models/glm_moe_dsa.pyi
diff --git a/.typings/mlx_lm/models/gpt_oss.pyi b/.typings/mlx_lm/models/gpt_oss.pyi
new file mode 100644
index 000000000..dc4ba8786
--- /dev/null
+++ b/.typings/mlx_lm/models/gpt_oss.pyi
@@ -0,0 +1,103 @@
+"""Type stubs for mlx_lm.models.gpt_oss"""
+
+from dataclasses import dataclass
+from typing import Any, List, Optional
+
+import mlx.core as mx
+import mlx.nn as nn
+
+from .base import BaseModelArgs
+from .cache import KVCache
+from .switch_layers import SwitchGLU
+
+@dataclass
+class ModelArgs(BaseModelArgs):
+    model_type: str
+    hidden_size: int
+    intermediate_size: int
+    num_hidden_layers: int
+    num_attention_heads: int
+    num_key_value_heads: int
+    num_local_experts: int
+    num_experts_per_tok: int
+    vocab_size: int
+    rms_norm_eps: float
+    sliding_window: int
+    layer_types: Optional[List[str]]
+
+def mlx_topk(a: mx.array, k: int, axis: int = -1) -> tuple[mx.array, mx.array]: ...
+
+class AttentionBlock(nn.Module):
+    head_dim: int
+    num_attention_heads: int
+    num_key_value_heads: int
+    num_key_value_groups: int
+    sinks: mx.array
+    q_proj: nn.Linear
+    k_proj: nn.Linear
+    v_proj: nn.Linear
+    o_proj: nn.Linear
+    sm_scale: float
+    rope: nn.Module
+
+    def __init__(self, config: ModelArgs) -> None: ...
+    def __call__(
+        self,
+        x: mx.array,
+        mask: Optional[mx.array] = None,
+        cache: Optional[Any] = None,
+    ) -> mx.array: ...
+
+class TransformerBlock(nn.Module):
+    self_attn: AttentionBlock
+    mlp: MLPBlock
+
+    def __init__(self, config: ModelArgs) -> None: ...
+    def __call__(
+        self,
+        x: mx.array,
+        mask: Optional[mx.array] = None,
+        cache: Optional[Any] = None,
+    ) -> mx.array: ...
+
+class MLPBlock(nn.Module):
+    hidden_size: int
+    num_local_experts: int
+    num_experts_per_tok: int
+    experts: SwitchGLU
+    router: nn.Linear
+    sharding_group: Optional[mx.distributed.Group]
+
+    def __init__(self, config: ModelArgs) -> None: ...
+    def __call__(self, x: mx.array) -> mx.array: ...
+
+class GptOssMoeModel(nn.Module):
+    embed_tokens: nn.Embedding
+    norm: nn.RMSNorm
+    layer_types: List[str]
+    layers: list[TransformerBlock]
+    window_size: int
+    swa_idx: int
+    ga_idx: int
+
+    def __init__(self, args: ModelArgs) -> None: ...
+    def __call__(
+        self,
+        inputs: mx.array,
+        cache: Optional[Any] = None,
+    ) -> mx.array: ...
+
+class Model(nn.Module):
+    model_type: str
+    model: GptOssMoeModel
+    lm_head: nn.Linear
+
+    def __init__(self, args: ModelArgs) -> None: ...
+    def __call__(
+        self,
+        inputs: mx.array,
+        cache: Optional[Any] = None,
+    ) -> mx.array: ...
+    @property
+    def layers(self) -> list[nn.Module]: ...
+    def make_cache(self) -> list[KVCache]: ...
diff --git a/.typings/mlx_lm/models/minimax.pyi b/.typings/mlx_lm/models/minimax.pyi
new file mode 100644
index 000000000..06806d861
--- /dev/null
+++ b/.typings/mlx_lm/models/minimax.pyi
@@ -0,0 +1,94 @@
+"""Type stubs for mlx_lm.models.minimax"""
+
+from dataclasses import dataclass
+from typing import Any, Optional
+
+import mlx.core as mx
+import mlx.nn as nn
+
+from .base import BaseModelArgs
+from .switch_layers import SwitchGLU
+
+@dataclass
+class ModelArgs(BaseModelArgs):
+    model_type: str
+    hidden_size: int
+    intermediate_size: int
+    num_hidden_layers: int
+    num_attention_heads: int
+    num_key_value_heads: int
+    num_local_experts: int
+    num_experts_per_tok: int
+    max_position_embeddings: int
+
+class MiniMaxAttention(nn.Module):
+    num_heads: int
+    num_attention_heads: int
+    num_key_value_heads: int
+    head_dim: int
+    scale: float
+    q_proj: nn.Linear
+    k_proj: nn.Linear
+    v_proj: nn.Linear
+    o_proj: nn.Linear
+    q_norm: nn.Module
+    k_norm: nn.Module
+    rope: nn.Module
+
+    def __init__(self, args: ModelArgs) -> None: ...
+    def __call__(
+        self,
+        x: mx.array,
+        mask: Optional[mx.array] = None,
+        cache: Optional[Any] = None,
+    ) -> mx.array: ...
+
+class MiniMaxSparseMoeBlock(nn.Module):
+    num_experts_per_tok: int
+    gate: nn.Linear
+    switch_mlp: SwitchGLU
+    e_score_correction_bias: mx.array
+    sharding_group: Optional[mx.distributed.Group]
+
+    def __init__(self, args: ModelArgs) -> None: ...
+    def __call__(self, x: mx.array) -> mx.array: ...
+
+class MiniMaxDecoderLayer(nn.Module):
+    self_attn: MiniMaxAttention
+    block_sparse_moe: MiniMaxSparseMoeBlock
+    input_layernorm: nn.RMSNorm
+    post_attention_layernorm: nn.RMSNorm
+
+    def __init__(self, args: ModelArgs) -> None: ...
+    def __call__(
+        self,
+        x: mx.array,
+        mask: Optional[mx.array] = None,
+        cache: Optional[Any] = None,
+    ) -> mx.array: ...
+
+class MiniMaxModel(nn.Module):
+    embed_tokens: nn.Embedding
+    layers: list[MiniMaxDecoderLayer]
+    norm: nn.RMSNorm
+
+    def __init__(self, args: ModelArgs) -> None: ...
+    def __call__(
+        self,
+        inputs: mx.array,
+        cache: Optional[Any] = None,
+    ) -> mx.array: ...
+
+class Model(nn.Module):
+    model_type: str
+    model: MiniMaxModel
+    lm_head: nn.Linear
+
+    def __init__(self, args: ModelArgs) -> None: ...
+    def __call__(
+        self,
+        inputs: mx.array,
+        cache: Optional[Any] = None,
+    ) -> mx.array: ...
+    @property
+    def layers(self) -> list[MiniMaxDecoderLayer]: ...
diff --git a/.mlx_typings/mlx_lm/models/nemotron_h.pyi b/.typings/mlx_lm/models/nemotron_h.pyi
similarity index 91%
rename from .mlx_typings/mlx_lm/models/nemotron_h.pyi
rename to .typings/mlx_lm/models/nemotron_h.pyi
index 6f38f3adf..32b07271e 100644
--- a/.mlx_typings/mlx_lm/models/nemotron_h.pyi
+++ b/.typings/mlx_lm/models/nemotron_h.pyi
@@ -92,6 +92,15 @@ class NemotronHAttention(nn.Module):
         cache: Optional[KVCache] = None,
     ) -> mx.array: ...
 
+class MoEGate(nn.Module):
+    config: ModelArgs
+    top_k: int
+    norm_topk_prob: bool
+    weight: mx.array
+
+    def __init__(self, config: ModelArgs) -> None: ...
+    def __call__(self, x: mx.array) -> tuple[mx.array, mx.array]: ...
+
 class NemotronHMLP(nn.Module):
     up_proj: nn.Linear
     down_proj: nn.Linear
@@ -102,9 +111,14 @@ class NemotronHMLP(nn.Module):
     def __call__(self, x: mx.array) -> mx.array: ...
 
 class NemotronHMoE(nn.Module):
+    config: ModelArgs
     num_experts_per_tok: int
+    moe_latent_size: Optional[int]
     switch_mlp: SwitchMLP
+    gate: MoEGate
     shared_experts: NemotronHMLP
+    fc1_latent_proj: nn.Linear
+    fc2_latent_proj: nn.Linear
 
     def __init__(self, config: ModelArgs) -> None: ...
     def __call__(self, x: mx.array) -> mx.array: ...
diff --git a/.mlx_typings/mlx_lm/models/qwen3_5.pyi b/.typings/mlx_lm/models/qwen3_5.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/qwen3_5.pyi
rename to .typings/mlx_lm/models/qwen3_5.pyi
diff --git a/.mlx_typings/mlx_lm/models/qwen3_5_moe.pyi b/.typings/mlx_lm/models/qwen3_5_moe.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/qwen3_5_moe.pyi
rename to .typings/mlx_lm/models/qwen3_5_moe.pyi
diff --git a/.mlx_typings/mlx_lm/models/qwen3_next.pyi b/.typings/mlx_lm/models/qwen3_next.pyi
similarity index 99%
rename from .mlx_typings/mlx_lm/models/qwen3_next.pyi
rename to .typings/mlx_lm/models/qwen3_next.pyi
index 649669cf7..b36ac6b45 100644
--- a/.mlx_typings/mlx_lm/models/qwen3_next.pyi
+++ b/.typings/mlx_lm/models/qwen3_next.pyi
@@ -71,6 +71,7 @@ class Qwen3NextAttention(nn.Module):
 class Qwen3NextSparseMoeBlock(nn.Module):
     norm_topk_prob: bool
     num_experts: int
+    num_experts_per_tok: int
     top_k: int
     gate: nn.Linear
     switch_mlp: SwitchGLU
diff --git a/.mlx_typings/mlx_lm/models/rope_utils.pyi b/.typings/mlx_lm/models/rope_utils.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/rope_utils.pyi
rename to .typings/mlx_lm/models/rope_utils.pyi
diff --git a/.mlx_typings/mlx_lm/models/step3p5.pyi b/.typings/mlx_lm/models/step3p5.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/step3p5.pyi
rename to .typings/mlx_lm/models/step3p5.pyi
diff --git a/.mlx_typings/mlx_lm/models/switch_layers.pyi b/.typings/mlx_lm/models/switch_layers.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/models/switch_layers.pyi
rename to .typings/mlx_lm/models/switch_layers.pyi
diff --git a/.mlx_typings/mlx_lm/sample_utils.pyi b/.typings/mlx_lm/sample_utils.pyi
similarity index 96%
rename from .mlx_typings/mlx_lm/sample_utils.pyi
rename to .typings/mlx_lm/sample_utils.pyi
index c001e3ec5..b9f91a0c5 100644
--- a/.mlx_typings/mlx_lm/sample_utils.pyi
+++ b/.typings/mlx_lm/sample_utils.pyi
@@ -48,6 +48,10 @@ def make_logits_processors(
     logit_bias: Optional[Dict[int, float]] = ...,
     repetition_penalty: Optional[float] = ...,
     repetition_context_size: Optional[int] = ...,
+    presence_penalty: Optional[float] = ...,
+    presence_context_size: Optional[int] = ...,
+    frequency_penalty: Optional[float] = ...,
+    frequency_context_size: Optional[int] = ...,
 ) -> list[Callable[[mx.array, mx.array], mx.array]]:
     """
     Make logits processors for use with ``generate_step``.
diff --git a/.mlx_typings/mlx_lm/tokenizer_utils.pyi b/.typings/mlx_lm/tokenizer_utils.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/tokenizer_utils.pyi
rename to .typings/mlx_lm/tokenizer_utils.pyi
diff --git a/.mlx_typings/mlx_lm/tuner/dora.pyi b/.typings/mlx_lm/tuner/dora.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/tuner/dora.pyi
rename to .typings/mlx_lm/tuner/dora.pyi
diff --git a/.mlx_typings/mlx_lm/tuner/lora.pyi b/.typings/mlx_lm/tuner/lora.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/tuner/lora.pyi
rename to .typings/mlx_lm/tuner/lora.pyi
diff --git a/.mlx_typings/mlx_lm/tuner/utils.pyi b/.typings/mlx_lm/tuner/utils.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/tuner/utils.pyi
rename to .typings/mlx_lm/tuner/utils.pyi
diff --git a/.mlx_typings/mlx_lm/utils.pyi b/.typings/mlx_lm/utils.pyi
similarity index 100%
rename from .mlx_typings/mlx_lm/utils.pyi
rename to .typings/mlx_lm/utils.pyi
diff --git a/.mlx_typings/mlx_vlm/__init__.pyi b/.typings/mlx_vlm/__init__.pyi
similarity index 100%
rename from .mlx_typings/mlx_vlm/__init__.pyi
rename to .typings/mlx_vlm/__init__.pyi
diff --git a/.mlx_typings/mlx_vlm/prompt_utils.pyi b/.typings/mlx_vlm/prompt_utils.pyi
similarity index 100%
rename from .mlx_typings/mlx_vlm/prompt_utils.pyi
rename to .typings/mlx_vlm/prompt_utils.pyi
diff --git a/.mlx_typings/mlx_vlm/utils.pyi b/.typings/mlx_vlm/utils.pyi
similarity index 100%
rename from .mlx_typings/mlx_vlm/utils.pyi
rename to .typings/mlx_vlm/utils.pyi
diff --git a/.typings/pynvml/__init__.pyi b/.typings/pynvml/__init__.pyi
new file mode 100644
index 000000000..c0ded6f12
--- /dev/null
+++ b/.typings/pynvml/__init__.pyi
@@ -0,0 +1,3226 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+import sys
+import os
+import threading
+import string
+from ctypes import *
+from ctypes import _Pointer
+from ctypes.util import find_library
+from functools import wraps
+
+_nvmlEnableState_t = c_uint
+NVML_FEATURE_DISABLED = ...
+NVML_FEATURE_ENABLED = ...
+_nvmlBrandType_t = c_uint
+NVML_BRAND_UNKNOWN = ...
+NVML_BRAND_QUADRO = ...
+NVML_BRAND_TESLA = ...
+NVML_BRAND_NVS = ...
+NVML_BRAND_GRID = ...
+NVML_BRAND_GEFORCE = ...
+NVML_BRAND_TITAN = ...
+NVML_BRAND_NVIDIA_VAPPS = ...
+NVML_BRAND_NVIDIA_VPC = ...
+NVML_BRAND_NVIDIA_VCS = ...
+NVML_BRAND_NVIDIA_VWS = ...
+NVML_BRAND_NVIDIA_CLOUD_GAMING = ...
+NVML_BRAND_NVIDIA_VGAMING = ...
+NVML_BRAND_QUADRO_RTX = ...
+NVML_BRAND_NVIDIA_RTX = ...
+NVML_BRAND_NVIDIA = ...
+NVML_BRAND_GEFORCE_RTX = ...
+NVML_BRAND_TITAN_RTX = ...
+NVML_BRAND_COUNT = ...
+_nvmlTemperatureThresholds_t = c_uint
+NVML_TEMPERATURE_THRESHOLD_SHUTDOWN = ...
+NVML_TEMPERATURE_THRESHOLD_SLOWDOWN = ...
+NVML_TEMPERATURE_THRESHOLD_MEM_MAX = ...
+NVML_TEMPERATURE_THRESHOLD_GPU_MAX = ...
+NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MIN = ...
+NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_CURR = ...
+NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MAX = ...
+NVML_TEMPERATURE_THRESHOLD_GPS_CURR = ...
+NVML_TEMPERATURE_THRESHOLD_COUNT = ...
+_nvmlTemperatureSensors_t = c_uint
+NVML_TEMPERATURE_GPU: int = ...
+NVML_TEMPERATURE_COUNT = ...
+_nvmlComputeMode_t = c_uint
+NVML_COMPUTEMODE_DEFAULT = ...
+NVML_COMPUTEMODE_EXCLUSIVE_THREAD = ...
+NVML_COMPUTEMODE_PROHIBITED = ...
+NVML_COMPUTEMODE_EXCLUSIVE_PROCESS = ...
+NVML_COMPUTEMODE_COUNT = ...
+_nvmlMemoryLocation_t = c_uint
+NVML_MEMORY_LOCATION_L1_CACHE = ...
+NVML_MEMORY_LOCATION_L2_CACHE = ...
+NVML_MEMORY_LOCATION_DEVICE_MEMORY = ...
+NVML_MEMORY_LOCATION_DRAM = ...
+NVML_MEMORY_LOCATION_REGISTER_FILE = ...
+NVML_MEMORY_LOCATION_TEXTURE_MEMORY = ...
+NVML_MEMORY_LOCATION_TEXTURE_SHM = ...
+NVML_MEMORY_LOCATION_CBU = ...
+NVML_MEMORY_LOCATION_SRAM = ...
+NVML_MEMORY_LOCATION_COUNT = ...
+NVML_NVLINK_MAX_LINKS = ...
+NVML_NVLINK_MAX_LANES = ...
+_nvmlNvLinkErrorCounter_t = c_uint
+NVML_NVLINK_ERROR_DL_REPLAY = ...
+NVML_NVLINK_ERROR_DL_RECOVERY = ...
+NVML_NVLINK_ERROR_DL_CRC_FLIT = ...
+NVML_NVLINK_ERROR_DL_CRC_DATA = ...
+NVML_NVLINK_ERROR_DL_ECC_DATA = ...
+NVML_NVLINK_ERROR_COUNT = ...
+_nvmlNvLinkEccLaneErrorCounter_t = c_uint
+NVML_NVLINK_ERROR_DL_ECC_LANE0 = ...
+NVML_NVLINK_ERROR_DL_ECC_LANE1 = ...
+NVML_NVLINK_ERROR_DL_ECC_LANE2 = ...
+NVML_NVLINK_ERROR_DL_ECC_LANE3 = ...
+NVML_NVLINK_ERROR_DL_ECC_COUNT = ...
+_nvmlNvLinkCapability_t = c_uint
+NVML_NVLINK_CAP_P2P_SUPPORTED = ...
+NVML_NVLINK_CAP_SYSMEM_ACCESS = ...
+NVML_NVLINK_CAP_P2P_ATOMICS = ...
+NVML_NVLINK_CAP_SYSMEM_ATOMICS = ...
+NVML_NVLINK_CAP_SLI_BRIDGE = ...
+NVML_NVLINK_CAP_VALID = ...
+NVML_NVLINK_CAP_COUNT = ...
+_nvmlNvLinkUtilizationCountPktTypes_t = c_uint
+NVML_NVLINK_COUNTER_PKTFILTER_NOP = ...
+NVML_NVLINK_COUNTER_PKTFILTER_READ = ...
+NVML_NVLINK_COUNTER_PKTFILTER_WRITE = ...
+NVML_NVLINK_COUNTER_PKTFILTER_RATOM = ...
+NVML_NVLINK_COUNTER_PKTFILTER_NRATOM = ...
+NVML_NVLINK_COUNTER_PKTFILTER_FLUSH = ...
+NVML_NVLINK_COUNTER_PKTFILTER_RESPDATA = ...
+NVML_NVLINK_COUNTER_PKTFILTER_RESPNODATA = ...
+NVML_NVLINK_COUNTER_PKTFILTER_ALL = ...
+_nvmlNvLinkUtilizationCountUnits_t = c_uint
+NVML_NVLINK_COUNTER_UNIT_CYCLES = ...
+NVML_NVLINK_COUNTER_UNIT_PACKETS = ...
+NVML_NVLINK_COUNTER_UNIT_BYTES = ...
+NVML_NVLINK_COUNTER_UNIT_RESERVED = ...
+NVML_NVLINK_COUNTER_UNIT_COUNT = ...
+_nvmlNvLinkDeviceType_t = c_uint
+NVML_NVLINK_DEVICE_TYPE_GPU = ...
+NVML_NVLINK_DEVICE_TYPE_IBMNPU = ...
+NVML_NVLINK_DEVICE_TYPE_SWITCH = ...
+NVML_NVLINK_DEVICE_TYPE_UNKNOWN = ...
+_nvmlEccBitType_t = c_uint
+NVML_SINGLE_BIT_ECC = ...
+NVML_DOUBLE_BIT_ECC = ...
+NVML_ECC_ERROR_TYPE_COUNT = ...
+_nvmlEccCounterType_t = c_uint
+NVML_VOLATILE_ECC = ...
+NVML_AGGREGATE_ECC = ...
+NVML_ECC_COUNTER_TYPE_COUNT = ...
+_nvmlMemoryErrorType_t = c_uint
+NVML_MEMORY_ERROR_TYPE_CORRECTED = ...
+NVML_MEMORY_ERROR_TYPE_UNCORRECTED = ...
+NVML_MEMORY_ERROR_TYPE_COUNT = ...
+_nvmlClockType_t = c_uint
+NVML_CLOCK_GRAPHICS = ...
+NVML_CLOCK_SM = ...
+NVML_CLOCK_MEM = ...
+NVML_CLOCK_VIDEO = ...
+NVML_CLOCK_COUNT = ...
+_nvmlClockId_t = c_uint
+NVML_CLOCK_ID_CURRENT = ...
+NVML_CLOCK_ID_APP_CLOCK_TARGET = ...
+NVML_CLOCK_ID_APP_CLOCK_DEFAULT = ...
+NVML_CLOCK_ID_CUSTOMER_BOOST_MAX = ...
+NVML_CLOCK_ID_COUNT = ...
+_nvmlDriverModel_t = c_uint
+NVML_DRIVER_WDDM = ...
+NVML_DRIVER_WDM = ...
+NVML_DRIVER_MCDM = ...
+NVML_MAX_GPU_PERF_PSTATES = ...
+_nvmlPstates_t = c_uint
+NVML_PSTATE_0 = ...
+NVML_PSTATE_1 = ...
+NVML_PSTATE_2 = ...
+NVML_PSTATE_3 = ...
+NVML_PSTATE_4 = ...
+NVML_PSTATE_5 = ...
+NVML_PSTATE_6 = ...
+NVML_PSTATE_7 = ...
+NVML_PSTATE_8 = ...
+NVML_PSTATE_9 = ...
+NVML_PSTATE_10 = ...
+NVML_PSTATE_11 = ...
+NVML_PSTATE_12 = ...
+NVML_PSTATE_13 = ...
+NVML_PSTATE_14 = ...
+NVML_PSTATE_15 = ...
+NVML_PSTATE_UNKNOWN = ...
+_nvmlInforomObject_t = c_uint
+NVML_INFOROM_OEM = ...
+NVML_INFOROM_ECC = ...
+NVML_INFOROM_POWER = ...
+NVML_INFOROM_DEN = ...
+NVML_INFOROM_COUNT = ...
+_nvmlReturn_t = c_uint
+NVML_SUCCESS = ...
+NVML_ERROR_UNINITIALIZED = ...
+NVML_ERROR_INVALID_ARGUMENT = ...
+NVML_ERROR_NOT_SUPPORTED = ...
+NVML_ERROR_NO_PERMISSION = ...
+NVML_ERROR_ALREADY_INITIALIZED = ...
+NVML_ERROR_NOT_FOUND = ...
+NVML_ERROR_INSUFFICIENT_SIZE = ...
+NVML_ERROR_INSUFFICIENT_POWER = ...
+NVML_ERROR_DRIVER_NOT_LOADED = ...
+NVML_ERROR_TIMEOUT = ...
+NVML_ERROR_IRQ_ISSUE = ...
+NVML_ERROR_LIBRARY_NOT_FOUND = ...
+NVML_ERROR_FUNCTION_NOT_FOUND = ...
+NVML_ERROR_CORRUPTED_INFOROM = ...
+NVML_ERROR_GPU_IS_LOST = ...
+NVML_ERROR_RESET_REQUIRED = ...
+NVML_ERROR_OPERATING_SYSTEM = ...
+NVML_ERROR_LIB_RM_VERSION_MISMATCH = ...
+NVML_ERROR_IN_USE = ...
+NVML_ERROR_MEMORY = ...
+NVML_ERROR_NO_DATA = ...
+NVML_ERROR_VGPU_ECC_NOT_SUPPORTED = ...
+NVML_ERROR_INSUFFICIENT_RESOURCES = ...
+NVML_ERROR_FREQ_NOT_SUPPORTED = ...
+NVML_ERROR_ARGUMENT_VERSION_MISMATCH = ...
+NVML_ERROR_DEPRECATED = ...
+NVML_ERROR_NOT_READY = ...
+NVML_ERROR_GPU_NOT_FOUND = ...
+NVML_ERROR_INVALID_STATE = ...
+NVML_ERROR_RESET_TYPE_NOT_SUPPORTED = ...
+NVML_ERROR_UNKNOWN = ...
+_nvmlFanState_t = c_uint
+NVML_FAN_NORMAL = ...
+NVML_FAN_FAILED = ...
+_nvmlFanControlPolicy_t = c_uint
+NVML_FAN_POLICY_TEMPERATURE_CONTINOUS_SW = ...
+NVML_FAN_POLICY_MANUAL = ...
+_nvmlLedColor_t = c_uint
+NVML_LED_COLOR_GREEN = ...
+NVML_LED_COLOR_AMBER = ...
+_nvmlGpuOperationMode_t = c_uint
+NVML_GOM_ALL_ON = ...
+NVML_GOM_COMPUTE = ...
+NVML_GOM_LOW_DP = ...
+_nvmlPageRetirementCause_t = c_uint
+NVML_PAGE_RETIREMENT_CAUSE_MULTIPLE_SINGLE_BIT_ECC_ERRORS = ...
+NVML_PAGE_RETIREMENT_CAUSE_DOUBLE_BIT_ECC_ERROR = ...
+NVML_PAGE_RETIREMENT_CAUSE_COUNT = ...
+_nvmlRestrictedAPI_t = c_uint
+NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = ...
+NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = ...
+NVML_RESTRICTED_API_COUNT = ...
+_nvmlBridgeChipType_t = c_uint
+NVML_BRIDGE_CHIP_PLX = ...
+NVML_BRIDGE_CHIP_BRO4 = ...
+NVML_MAX_PHYSICAL_BRIDGE = ...
+_nvmlValueType_t = c_uint
+NVML_VALUE_TYPE_DOUBLE = ...
+NVML_VALUE_TYPE_UNSIGNED_INT = ...
+NVML_VALUE_TYPE_UNSIGNED_LONG = ...
+NVML_VALUE_TYPE_UNSIGNED_LONG_LONG = ...
+NVML_VALUE_TYPE_SIGNED_LONG_LONG = ...
+NVML_VALUE_TYPE_SIGNED_INT = ...
+NVML_VALUE_TYPE_UNSIGNED_SHORT = ...
+NVML_VALUE_TYPE_COUNT = ...
+_nvmlNvlinkVersion_t = c_uint
+NVML_NVLINK_VERSION_INVALID = ...
+NVML_NVLINK_VERSION_1_0 = ...
+NVML_NVLINK_VERSION_2_0 = ...
+NVML_NVLINK_VERSION_2_2 = ...
+NVML_NVLINK_VERSION_3_0 = ...
+NVML_NVLINK_VERSION_3_1 = ...
+NVML_NVLINK_VERSION_4_0 = ...
+NVML_NVLINK_VERSION_5_0 = ...
+_nvmlPerfPolicyType_t = c_uint
+NVML_PERF_POLICY_POWER = ...
+NVML_PERF_POLICY_THERMAL = ...
+NVML_PERF_POLICY_SYNC_BOOST = ...
+NVML_PERF_POLICY_BOARD_LIMIT = ...
+NVML_PERF_POLICY_LOW_UTILIZATION = ...
+NVML_PERF_POLICY_RELIABILITY = ...
+NVML_PERF_POLICY_TOTAL_APP_CLOCKS = ...
+NVML_PERF_POLICY_TOTAL_BASE_CLOCKS = ...
+NVML_PERF_POLICY_COUNT = ...
+_nvmlEncoderQueryType_t = c_uint
+NVML_ENCODER_QUERY_H264 = ...
+NVML_ENCODER_QUERY_HEVC = ...
+NVML_ENCODER_QUERY_AV1 = ...
+NVML_ENCODER_QUERY_UNKNOWN = ...
+_nvmlFBCSessionType_t = c_uint
+NVML_FBC_SESSION_TYPE_UNKNOWN = ...
+NVML_FBC_SESSION_TYPE_TOSYS = ...
+NVML_FBC_SESSION_TYPE_CUDA = ...
+NVML_FBC_SESSION_TYPE_VID = ...
+NVML_FBC_SESSION_TYPE_HWENC = ...
+_nvmlDetachGpuState_t = c_uint
+NVML_DETACH_GPU_KEEP = ...
+NVML_DETACH_GPU_REMOVE = ...
+_nvmlPcieLinkState_t = c_uint
+NVML_PCIE_LINK_KEEP = ...
+NVML_PCIE_LINK_SHUT_DOWN = ...
+_nvmlSamplingType_t = c_uint
+NVML_TOTAL_POWER_SAMPLES = ...
+NVML_GPU_UTILIZATION_SAMPLES = ...
+NVML_MEMORY_UTILIZATION_SAMPLES = ...
+NVML_ENC_UTILIZATION_SAMPLES = ...
+NVML_DEC_UTILIZATION_SAMPLES = ...
+NVML_PROCESSOR_CLK_SAMPLES = ...
+NVML_MEMORY_CLK_SAMPLES = ...
+NVML_MODULE_POWER_SAMPLES = ...
+NVML_JPG_UTILIZATION_SAMPLES = ...
+NVML_OFA_UTILIZATION_SAMPLES = ...
+NVML_SAMPLINGTYPE_COUNT = ...
+_nvmlPcieUtilCounter_t = c_uint
+NVML_PCIE_UTIL_TX_BYTES = ...
+NVML_PCIE_UTIL_RX_BYTES = ...
+NVML_PCIE_UTIL_COUNT = ...
+_nvmlGpuTopologyLevel_t = c_uint
+NVML_TOPOLOGY_INTERNAL = ...
+NVML_TOPOLOGY_SINGLE = ...
+NVML_TOPOLOGY_MULTIPLE = ...
+NVML_TOPOLOGY_HOSTBRIDGE = ...
+NVML_TOPOLOGY_NODE = ...
+NVML_TOPOLOGY_CPU = ...
+NVML_TOPOLOGY_SYSTEM = ...
+_nvmlGpuP2PCapsIndex_t = c_uint
+NVML_P2P_CAPS_INDEX_READ = ...
+NVML_P2P_CAPS_INDEX_WRITE = ...
+NVML_P2P_CAPS_INDEX_NVLINK = ...
+NVML_P2P_CAPS_INDEX_ATOMICS = ...
+NVML_P2P_CAPS_INDEX_PROP = ...
+NVML_P2P_CAPS_INDEX_PCI = ...
+NVML_P2P_CAPS_INDEX_UNKNOWN = ...
+_nvmlGpuP2PStatus_t = c_uint
+NVML_P2P_STATUS_OK = ...
+NVML_P2P_STATUS_CHIPSET_NOT_SUPPORED = ...
+NVML_P2P_STATUS_CHIPSET_NOT_SUPPORTED = ...
+NVML_P2P_STATUS_GPU_NOT_SUPPORTED = ...
+NVML_P2P_STATUS_IOH_TOPOLOGY_NOT_SUPPORTED = ...
+NVML_P2P_STATUS_DISABLED_BY_REGKEY = ...
+NVML_P2P_STATUS_NOT_SUPPORTED = ...
+NVML_P2P_STATUS_UNKNOWN = ...
+_nvmlDeviceArchitecture_t = c_uint
+NVML_DEVICE_ARCH_KEPLER = ...
+NVML_DEVICE_ARCH_MAXWELL = ...
+NVML_DEVICE_ARCH_PASCAL = ...
+NVML_DEVICE_ARCH_VOLTA = ...
+NVML_DEVICE_ARCH_TURING = ...
+NVML_DEVICE_ARCH_AMPERE = ...
+NVML_DEVICE_ARCH_ADA = ...
+NVML_DEVICE_ARCH_HOPPER = ...
+NVML_DEVICE_ARCH_BLACKWELL = ...
+NVML_DEVICE_ARCH_UNKNOWN = ...
+_nvmlBusType_t = c_uint
+NVML_BUS_TYPE_UNKNOWN = ...
+NVML_BUS_TYPE_PCI = ...
+NVML_BUS_TYPE_PCIE = ...
+NVML_BUS_TYPE_FPCI = ...
+NVML_BUS_TYPE_AGP = ...
+_nvmlPowerSource_t = c_uint
+NVML_POWER_SOURCE_AC = ...
+NVML_POWER_SOURCE_BATTERY = ...
+NVML_POWER_SOURCE_UNDERSIZED = ...
+_nvmlAdaptiveClockInfoStatus_t = c_uint
+NVML_ADAPTIVE_CLOCKING_INFO_STATUS_DISABLED = ...
+NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED = ...
+_nvmlClockLimitId_t = c_uint
+NVML_CLOCK_LIMIT_ID_RANGE_START = ...
+NVML_CLOCK_LIMIT_ID_TDP = ...
+NVML_CLOCK_LIMIT_ID_UNLIMITED = ...
+_nvmlPcieLinkMaxSpeed_t = c_uint
+NVML_PCIE_LINK_MAX_SPEED_INVALID = ...
+NVML_PCIE_LINK_MAX_SPEED_2500MBPS = ...
+NVML_PCIE_LINK_MAX_SPEED_5000MBPS = ...
+NVML_PCIE_LINK_MAX_SPEED_8000MBPS = ...
+NVML_PCIE_LINK_MAX_SPEED_16000MBPS = ...
+NVML_PCIE_LINK_MAX_SPEED_32000MBPS = ...
+NVML_PCIE_LINK_MAX_SPEED_64000MBPS = ...
+_nvmlPcieAtomicsCapability_t = c_uint
+NVML_PCIE_ATOMICS_CAP_FETCHADD32 = ...
+NVML_PCIE_ATOMICS_CAP_FETCHADD64 = ...
+NVML_PCIE_ATOMICS_CAP_SWAP32 = ...
+NVML_PCIE_ATOMICS_CAP_SWAP64 = ...
+NVML_PCIE_ATOMICS_CAP_CAS32 = ...
+NVML_PCIE_ATOMICS_CAP_CAS64 = ...
+NVML_PCIE_ATOMICS_CAP_CAS128 = ...
+NVML_PCIE_ATOMICS_OPS_MAX = ...
+_nvmlAffinityScope_t = c_uint
+NVML_AFFINITY_SCOPE_NODE = ...
+NVML_AFFINITY_SCOPE_SOCKET = ...
+_nvmlDeviceGpuRecoveryAction_t = c_uint
+NVML_GPU_RECOVERY_ACTION_NONE = ...
+NVML_GPU_RECOVERY_ACTION_GPU_RESET = ...
+NVML_GPU_RECOVERY_ACTION_NODE_REBOOT = ...
+NVML_GPU_RECOVERY_ACTION_DRAIN_P2P = ...
+NVML_GPU_RECOVERY_ACTION_DRAIN_AND_RESET = ...
+nvmlFlagDefault = ...
+nvmlFlagForce = ...
+NVML_INIT_FLAG_NO_GPUS = ...
+NVML_INIT_FLAG_NO_ATTACH = ...
+NVML_INIT_FLAG_FORCE_INIT = ...
+NVML_MAX_GPC_COUNT = ...
+NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE = ...
+NVML_DEVICE_UUID_BUFFER_SIZE = ...
+NVML_DEVICE_UUID_V2_BUFFER_SIZE = ...
+NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE = ...
+NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE = ...
+NVML_DEVICE_NAME_BUFFER_SIZE = ...
+NVML_DEVICE_NAME_V2_BUFFER_SIZE = ...
+NVML_DEVICE_SERIAL_BUFFER_SIZE = ...
+NVML_DEVICE_PART_NUMBER_BUFFER_SIZE = ...
+NVML_DEVICE_GPU_PART_NUMBER_BUFFER_SIZE = ...
+NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE = ...
+NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE = ...
+NVML_DEVICE_PCI_BUS_ID_BUFFER_V2_SIZE = ...
+NVML_GRID_LICENSE_BUFFER_SIZE = ...
+NVML_VGPU_NAME_BUFFER_SIZE = ...
+NVML_GRID_LICENSE_FEATURE_MAX_COUNT = ...
+NVML_VGPU_METADATA_OPAQUE_DATA_SIZE = ...
+NVML_VGPU_PGPU_METADATA_OPAQUE_DATA_SIZE = ...
+NVML_DEVICE_GPU_FRU_PART_NUMBER_BUFFER_SIZE = ...
+NVML_PERF_MODES_BUFFER_SIZE = ...
+NVML_DEVICE_PCI_BUS_ID_LEGACY_FMT = ...
+NVML_DEVICE_PCI_BUS_ID_FMT = ...
+NVML_VALUE_NOT_AVAILABLE_ulonglong = ...
+NVML_VALUE_NOT_AVAILABLE_uint = ...
+NVML_FI_DEV_ECC_CURRENT = ...
+NVML_FI_DEV_ECC_PENDING = ...
+NVML_FI_DEV_ECC_SBE_VOL_TOTAL = ...
+NVML_FI_DEV_ECC_DBE_VOL_TOTAL = ...
+NVML_FI_DEV_ECC_SBE_AGG_TOTAL = ...
+NVML_FI_DEV_ECC_DBE_AGG_TOTAL = ...
+NVML_FI_DEV_ECC_SBE_VOL_L1 = ...
+NVML_FI_DEV_ECC_DBE_VOL_L1 = ...
+NVML_FI_DEV_ECC_SBE_VOL_L2 = ...
+NVML_FI_DEV_ECC_DBE_VOL_L2 = ...
+NVML_FI_DEV_ECC_SBE_VOL_DEV = ...
+NVML_FI_DEV_ECC_DBE_VOL_DEV = ...
+NVML_FI_DEV_ECC_SBE_VOL_REG = ...
+NVML_FI_DEV_ECC_DBE_VOL_REG = ...
+NVML_FI_DEV_ECC_SBE_VOL_TEX = ...
+NVML_FI_DEV_ECC_DBE_VOL_TEX = ...
+NVML_FI_DEV_ECC_DBE_VOL_CBU = ...
+NVML_FI_DEV_ECC_SBE_AGG_L1 = ...
+NVML_FI_DEV_ECC_DBE_AGG_L1 = ...
+NVML_FI_DEV_ECC_SBE_AGG_L2 = ...
+NVML_FI_DEV_ECC_DBE_AGG_L2 = ...
+NVML_FI_DEV_ECC_SBE_AGG_DEV = ...
+NVML_FI_DEV_ECC_DBE_AGG_DEV = ...
+NVML_FI_DEV_ECC_SBE_AGG_REG = ...
+NVML_FI_DEV_ECC_DBE_AGG_REG = ...
+NVML_FI_DEV_ECC_SBE_AGG_TEX = ...
+NVML_FI_DEV_ECC_DBE_AGG_TEX = ...
+NVML_FI_DEV_ECC_DBE_AGG_CBU = ...
+NVML_FI_DEV_RETIRED_SBE = ...
+NVML_FI_DEV_RETIRED_DBE = ...
+NVML_FI_DEV_RETIRED_PENDING = ...
+NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L0 = ...
+NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L1 = ...
+NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L2 = ...
+NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L3 = ...
+NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L4 = ...
+NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L5 = ...
+NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL = ...
+NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L0 = ...
+NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L1 = ...
+NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L2 = ...
+NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L3 = ...
+NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L4 = ...
+NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L5 = ...
+NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_TOTAL = ...
+NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L0 = ...
+NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L1 = ...
+NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L2 = ...
+NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L3 = ...
+NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L4 = ...
+NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L5 = ...
+NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_TOTAL = ...
+NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L0 = ...
+NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L1 = ...
+NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L2 = ...
+NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L3 = ...
+NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L4 = ...
+NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L5 = ...
+NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_TOTAL = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L0 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L1 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L2 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L3 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L4 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L5 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C0_TOTAL = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L0 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L1 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L2 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L3 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L4 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L5 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C1_TOTAL = ...
+NVML_FI_DEV_PERF_POLICY_POWER = ...
+NVML_FI_DEV_PERF_POLICY_THERMAL = ...
+NVML_FI_DEV_PERF_POLICY_SYNC_BOOST = ...
+NVML_FI_DEV_PERF_POLICY_BOARD_LIMIT = ...
+NVML_FI_DEV_PERF_POLICY_LOW_UTILIZATION = ...
+NVML_FI_DEV_PERF_POLICY_RELIABILITY = ...
+NVML_FI_DEV_PERF_POLICY_TOTAL_APP_CLOCKS = ...
+NVML_FI_DEV_PERF_POLICY_TOTAL_BASE_CLOCKS = ...
+NVML_FI_DEV_MEMORY_TEMP = ...
+NVML_FI_DEV_TOTAL_ENERGY_CONSUMPTION = ...
+NVML_FI_DEV_NVLINK_SPEED_MBPS_L0 = ...
+NVML_FI_DEV_NVLINK_SPEED_MBPS_L1 = ...
+NVML_FI_DEV_NVLINK_SPEED_MBPS_L2 = ...
+NVML_FI_DEV_NVLINK_SPEED_MBPS_L3 = ...
+NVML_FI_DEV_NVLINK_SPEED_MBPS_L4 = ...
+NVML_FI_DEV_NVLINK_SPEED_MBPS_L5 = ...
+NVML_FI_DEV_NVLINK_SPEED_MBPS_COMMON = ...
+NVML_FI_DEV_NVLINK_LINK_COUNT = ...
+NVML_FI_DEV_RETIRED_PENDING_SBE = ...
+NVML_FI_DEV_RETIRED_PENDING_DBE = ...
+NVML_FI_DEV_PCIE_REPLAY_COUNTER = ...
+NVML_FI_DEV_PCIE_REPLAY_ROLLOVER_COUNTER = ...
+NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L6 = ...
+NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L7 = ...
+NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L8 = ...
+NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L9 = ...
+NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L10 = ...
+NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_L11 = ...
+NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L6 = ...
+NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L7 = ...
+NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L8 = ...
+NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L9 = ...
+NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L10 = ...
+NVML_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_L11 = ...
+NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L6 = ...
+NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L7 = ...
+NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L8 = ...
+NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L9 = ...
+NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L10 = ...
+NVML_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_L11 = ...
+NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L6 = ...
+NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L7 = ...
+NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L8 = ...
+NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L9 = ...
+NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L10 = ...
+NVML_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_L11 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L6 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L7 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L8 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L9 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L10 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C0_L11 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L6 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L7 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L8 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L9 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L10 = ...
+NVML_FI_DEV_NVLINK_BANDWIDTH_C1_L11 = ...
+NVML_FI_DEV_NVLINK_SPEED_MBPS_L6 = ...
+NVML_FI_DEV_NVLINK_SPEED_MBPS_L7 = ...
+NVML_FI_DEV_NVLINK_SPEED_MBPS_L8 = ...
+NVML_FI_DEV_NVLINK_SPEED_MBPS_L9 = ...
+NVML_FI_DEV_NVLINK_SPEED_MBPS_L10 = ...
+NVML_FI_DEV_NVLINK_SPEED_MBPS_L11 = ...
+NVML_FI_DEV_NVLINK_THROUGHPUT_DATA_TX = ...
+NVML_FI_DEV_NVLINK_THROUGHPUT_DATA_RX = ...
+NVML_FI_DEV_NVLINK_THROUGHPUT_RAW_TX = ...
+NVML_FI_DEV_NVLINK_THROUGHPUT_RAW_RX = ...
+NVML_FI_DEV_REMAPPED_COR = ...
+NVML_FI_DEV_REMAPPED_UNC = ...
+NVML_FI_DEV_REMAPPED_PENDING = ...
+NVML_FI_DEV_REMAPPED_FAILURE = ...
+NVML_FI_DEV_NVLINK_REMOTE_NVLINK_ID = ...
+NVML_FI_DEV_NVSWITCH_CONNECTED_LINK_COUNT = ...
+NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L0 = ...
+NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L1 = ...
+NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L2 = ...
+NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L3 = ...
+NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L4 = ...
+NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L5 = ...
+NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L6 = ...
+NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L7 = ...
+NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L8 = ...
+NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L9 = ...
+NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L10 = ...
+NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_L11 = ...
+NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_TOTAL = ...
+NVML_FI_DEV_NVLINK_ERROR_DL_REPLAY = ...
+NVML_FI_DEV_NVLINK_ERROR_DL_RECOVERY = ...
+NVML_FI_DEV_NVLINK_ERROR_DL_CRC = ...
+NVML_FI_DEV_NVLINK_GET_SPEED = ...
+NVML_FI_DEV_NVLINK_GET_STATE = ...
+NVML_FI_DEV_NVLINK_GET_VERSION = ...
+NVML_FI_DEV_NVLINK_GET_POWER_STATE = ...
+NVML_FI_DEV_NVLINK_GET_POWER_THRESHOLD = ...
+NVML_FI_DEV_PCIE_L0_TO_RECOVERY_COUNTER = ...
+NVML_FI_DEV_C2C_LINK_COUNT = ...
+NVML_FI_DEV_C2C_LINK_GET_STATUS = ...
+NVML_FI_DEV_C2C_LINK_GET_MAX_BW = ...
+NVML_FI_DEV_PCIE_COUNT_CORRECTABLE_ERRORS = ...
+NVML_FI_DEV_PCIE_COUNT_NAKS_RECEIVED = ...
+NVML_FI_DEV_PCIE_COUNT_RECEIVER_ERROR = ...
+NVML_FI_DEV_PCIE_COUNT_BAD_TLP = ...
+NVML_FI_DEV_PCIE_COUNT_NAKS_SENT = ...
+NVML_FI_DEV_PCIE_COUNT_BAD_DLLP = ...
+NVML_FI_DEV_PCIE_COUNT_NON_FATAL_ERROR = ...
+NVML_FI_DEV_PCIE_COUNT_FATAL_ERROR = ...
+NVML_FI_DEV_PCIE_COUNT_UNSUPPORTED_REQ = ...
+NVML_FI_DEV_PCIE_COUNT_LCRC_ERROR = ...
+NVML_FI_DEV_PCIE_COUNT_LANE_ERROR = ...
+NVML_FI_DEV_IS_RESETLESS_MIG_SUPPORTED = ...
+NVML_FI_DEV_POWER_AVERAGE = ...
+NVML_FI_DEV_POWER_INSTANT = ...
+NVML_FI_DEV_POWER_MIN_LIMIT = ...
+NVML_FI_DEV_POWER_MAX_LIMIT = ...
+NVML_FI_DEV_POWER_DEFAULT_LIMIT = ...
+NVML_FI_DEV_POWER_CURRENT_LIMIT = ...
+NVML_FI_DEV_ENERGY = ...
+NVML_FI_DEV_POWER_REQUESTED_LIMIT = ...
+NVML_FI_DEV_TEMPERATURE_SHUTDOWN_TLIMIT = ...
+NVML_FI_DEV_TEMPERATURE_SLOWDOWN_TLIMIT = ...
+NVML_FI_DEV_TEMPERATURE_MEM_MAX_TLIMIT = ...
+NVML_FI_DEV_TEMPERATURE_GPU_MAX_TLIMIT = ...
+NVML_FI_DEV_PCIE_COUNT_TX_BYTES = ...
+NVML_FI_DEV_PCIE_COUNT_RX_BYTES = ...
+NVML_FI_DEV_IS_MIG_MODE_INDEPENDENT_MIG_QUERY_CAPABLE = ...
+NVML_FI_DEV_NVLINK_GET_POWER_THRESHOLD_MAX = ...
+NVML_FI_DEV_NVLINK_COUNT_XMIT_PACKETS = ...
+NVML_FI_DEV_NVLINK_COUNT_XMIT_BYTES = ...
+NVML_FI_DEV_NVLINK_COUNT_RCV_PACKETS = ...
+NVML_FI_DEV_NVLINK_COUNT_RCV_BYTES = ...
+NVML_FI_DEV_NVLINK_COUNT_VL15_DROPPED = ...
+NVML_FI_DEV_NVLINK_COUNT_MALFORMED_PACKET_ERRORS = ...
+NVML_FI_DEV_NVLINK_COUNT_BUFFER_OVERRUN_ERRORS = ...
+NVML_FI_DEV_NVLINK_COUNT_RCV_ERRORS = ...
+NVML_FI_DEV_NVLINK_COUNT_RCV_REMOTE_ERRORS = ...
+NVML_FI_DEV_NVLINK_COUNT_RCV_GENERAL_ERRORS = ...
+NVML_FI_DEV_NVLINK_COUNT_LOCAL_LINK_INTEGRITY_ERRORS = ...
+NVML_FI_DEV_NVLINK_COUNT_XMIT_DISCARDS = ...
+NVML_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_SUCCESSFUL_EVENTS = ...
+NVML_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_FAILED_EVENTS = ...
+NVML_FI_DEV_NVLINK_COUNT_LINK_RECOVERY_EVENTS = ...
+NVML_FI_DEV_NVLINK_COUNT_RAW_BER_LANE0 = ...
+NVML_FI_DEV_NVLINK_COUNT_RAW_BER_LANE1 = ...
+NVML_FI_DEV_NVLINK_COUNT_RAW_BER = ...
+NVML_FI_DEV_NVLINK_COUNT_EFFECTIVE_ERRORS = ...
+NVML_FI_DEV_NVLINK_COUNT_EFFECTIVE_BER = ...
+NVML_FI_DEV_NVLINK_COUNT_SYMBOL_ERRORS = ...
+NVML_FI_DEV_NVLINK_COUNT_SYMBOL_BER = ...
+NVML_FI_DEV_NVLINK_GET_POWER_THRESHOLD_MIN = ...
+NVML_FI_DEV_NVLINK_GET_POWER_THRESHOLD_UNITS = ...
+NVML_FI_DEV_NVLINK_GET_POWER_THRESHOLD_SUPPORTED = ...
+NVML_FI_DEV_RESET_STATUS = ...
+NVML_FI_DEV_DRAIN_AND_RESET_STATUS = ...
+NVML_FI_DEV_PCIE_OUTBOUND_ATOMICS_MASK = ...
+NVML_FI_DEV_PCIE_INBOUND_ATOMICS_MASK = ...
+NVML_FI_DEV_GET_GPU_RECOVERY_ACTION = ...
+NVML_FI_DEV_C2C_LINK_ERROR_INTR = ...
+NVML_FI_DEV_C2C_LINK_ERROR_REPLAY = ...
+NVML_FI_DEV_C2C_LINK_ERROR_REPLAY_B2B = ...
+NVML_FI_DEV_C2C_LINK_POWER_STATE = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_0 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_1 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_2 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_3 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_4 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_5 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_6 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_7 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_8 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_9 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_10 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_11 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_12 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_13 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_14 = ...
+NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_15 = ...
+NVML_FI_PWR_SMOOTHING_ENABLED = ...
+NVML_FI_PWR_SMOOTHING_PRIV_LVL = ...
+NVML_FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED = ...
+NVML_FI_PWR_SMOOTHING_APPLIED_TMP_CEIL = ...
+NVML_FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR = ...
+NVML_FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING = ...
+NVML_FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING = ...
+NVML_FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING = ...
+NVML_FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES = ...
+NVML_FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR = ...
+NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE = ...
+NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE = ...
+NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL = ...
+NVML_FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE = ...
+NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR = ...
+NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE = ...
+NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE = ...
+NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL = ...
+NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_POWER_CAP = ...
+NVML_FI_DEV_CLOCKS_EVENT_REASON_SYNC_BOOST = ...
+NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN = ...
+NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN = ...
+NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN = ...
+NVML_FI_DEV_POWER_SYNC_BALANCING_FREQ = ...
+NVML_FI_DEV_POWER_SYNC_BALANCING_AF = ...
+NVML_FI_DEV_EDPP_MULTIPLIER = ...
+NVML_FI_PWR_SMOOTHING_PRIMARY_POWER_FLOOR = ...
+NVML_FI_PWR_SMOOTHING_SECONDARY_POWER_FLOOR = ...
+NVML_FI_PWR_SMOOTHING_MIN_PRIMARY_FLOOR_ACT_OFFSET = ...
+NVML_FI_PWR_SMOOTHING_MIN_PRIMARY_FLOOR_ACT_POINT = ...
+NVML_FI_PWR_SMOOTHING_WINDOW_MULTIPLIER = ...
+NVML_FI_PWR_SMOOTHING_DELAYED_PWR_SMOOTHING_SUPPORTED = ...
+NVML_FI_PWR_SMOOTHING_PROFILE_SECONDARY_POWER_FLOOR = ...
+NVML_FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_ACT_WIN_MULT = ...
+NVML_FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_TAR_WIN_MULT = ...
+NVML_FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_ACT_OFFSET = ...
+NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_SECONDARY_POWER_FLOOR = ...
+NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_ACT_WIN_MULT = ...
+NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_TAR_WIN_MULT = ...
+NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_ACT_OFFSET = ...
+NVML_FI_DEV_NVLINK_COUNT_RAW_ERRORS_LANE0 = ...
+NVML_FI_DEV_NVLINK_COUNT_RAW_ERRORS_LANE1 = ...
+NVML_FI_DEV_NVLINK_COUNT_RAW_BER_LANE0_V2 = ...
+NVML_FI_DEV_NVLINK_COUNT_RAW_BER_LANE1_V2 = ...
+NVML_FI_DEV_NVLINK_COUNT_RAW_BER_V2 = ...
+NVML_FI_DEV_NVLINK_PLR_XMIT_BLOCKS = ...
+NVML_FI_DEV_NVLINK_PLR_XMIT_RETRY_BLOCKS = ...
+NVML_FI_MAX = ...
+NVML_NVLINK_STATE_INACTIVE = ...
+NVML_NVLINK_STATE_ACTIVE = ...
+NVML_NVLINK_STATE_SLEEP = ...
+NVML_NVLINK_LOW_POWER_THRESHOLD_UNIT_100US = ...
+NVML_NVLINK_LOW_POWER_THRESHOLD_UNIT_50US = ...
+NVML_C2C_POWER_STATE_FULL_POWER = ...
+NVML_C2C_POWER_STATE_LOW_POWER = ...
+NVML_GPU_VIRTUALIZATION_MODE_NONE = ...
+NVML_GPU_VIRTUALIZATION_MODE_PASSTHROUGH = ...
+NVML_GPU_VIRTUALIZATION_MODE_VGPU = ...
+NVML_GPU_VIRTUALIZATION_MODE_HOST_VGPU = ...
+NVML_GPU_VIRTUALIZATION_MODE_HOST_VSGA = ...
+nvmlLib = ...
+libLoadLock = ...
+_nvmlLib_refcount = ...
+_nvmlVgpuTypeId_t = c_uint
+_nvmlVgpuInstance_t = c_uint
+_nvmlVgpuVmIdType_t = c_uint
+NVML_VGPU_VM_ID_DOMAIN_ID = ...
+NVML_VGPU_VM_ID_UUID = ...
+_nvmlGridLicenseFeatureCode_t = c_uint
+NVML_GRID_LICENSE_FEATURE_CODE_UNKNOWN = ...
+NVML_GRID_LICENSE_FEATURE_CODE_VGPU = ...
+NVML_GRID_LICENSE_FEATURE_CODE_NVIDIA_RTX = ...
+NVML_GRID_LICENSE_FEATURE_CODE_VWORKSTATION = ...
+NVML_GRID_LICENSE_FEATURE_CODE_GAMING = ...
+NVML_GRID_LICENSE_FEATURE_CODE_COMPUTE = ...
+_nvmlGridLicenseExpiryStatus_t = c_uint8
+NVML_GRID_LICENSE_EXPIRY_NOT_AVAILABLE = ...
+NVML_GRID_LICENSE_EXPIRY_INVALID = ...
+NVML_GRID_LICENSE_EXPIRY_VALID = ...
+NVML_GRID_LICENSE_EXPIRY_NOT_APPLICABLE = ...
+NVML_GRID_LICENSE_EXPIRY_PERMANENT = ...
+_nvmlVgpuCapability_t = c_uint
+NVML_VGPU_CAP_NVLINK_P2P = ...
+NVML_VGPU_CAP_GPUDIRECT = ...
+NVML_VGPU_CAP_MULTI_VGPU_EXCLUSIVE = ...
+NVML_VGPU_CAP_EXCLUSIVE_TYPE = ...
+NVML_VGPU_CAP_EXCLUSIVE_SIZE = ...
+NVML_VGPU_CAP_COUNT = ...
+_nvmlVgpuDriverCapability_t = c_uint
+NVML_VGPU_DRIVER_CAP_HETEROGENEOUS_MULTI_VGPU = ...
+NVML_VGPU_DRIVER_CAP_WARM_UPDATE = ...
+NVML_VGPU_DRIVER_CAP_COUNT = ...
+_nvmlDeviceVgpuCapability_t = c_uint
+NVML_DEVICE_VGPU_CAP_FRACTIONAL_MULTI_VGPU = ...
+NVML_DEVICE_VGPU_CAP_HETEROGENEOUS_TIMESLICE_PROFILES = ...
+NVML_DEVICE_VGPU_CAP_HETEROGENEOUS_TIMESLICE_SIZES = ...
+NVML_DEVICE_VGPU_CAP_READ_DEVICE_BUFFER_BW = ...
+NVML_DEVICE_VGPU_CAP_WRITE_DEVICE_BUFFER_BW = ...
+NVML_DEVICE_VGPU_CAP_DEVICE_STREAMING = ...
+NVML_DEVICE_VGPU_CAP_MINI_QUARTER_GPU = ...
+NVML_DEVICE_VGPU_CAP_COMPUTE_MEDIA_ENGINE_GPU = ...
+NVML_DEVICE_VGPU_CAP_WARM_UPDATE = ...
+NVML_DEVICE_VGPU_CAP_HOMOGENEOUS_PLACEMENTS = ...
+NVML_DEVICE_VGPU_CAP_MIG_TIMESLICING_SUPPORTED = ...
+NVML_DEVICE_VGPU_CAP_MIG_TIMESLICING_ENABLED = ...
+NVML_DEVICE_VGPU_CAP_COUNT = ...
+_nvmlVgpuGuestInfoState_t = c_uint
+NVML_VGPU_INSTANCE_GUEST_INFO_STATE_UNINITIALIZED = ...
+NVML_VGPU_INSTANCE_GUEST_INFO_STATE_INITIALIZED = ...
+_nvmlVgpuVmCompatibility_t = c_uint
+NVML_VGPU_VM_COMPATIBILITY_NONE = ...
+NVML_VGPU_VM_COMPATIBILITY_COLD = ...
+NVML_VGPU_VM_COMPATIBILITY_HIBERNATE = ...
+NVML_VGPU_VM_COMPATIBILITY_SLEEP = ...
+NVML_VGPU_VM_COMPATIBILITY_LIVE = ...
+_nvmlVgpuPgpuCompatibilityLimitCode_t = c_uint
+NVML_VGPU_COMPATIBILITY_LIMIT_NONE = ...
+NVML_VGPU_COMPATIBILITY_LIMIT_HOST_DRIVER = ...
+NVML_VGPU_COMPATIBILITY_LIMIT_GUEST_DRIVER = ...
+NVML_VGPU_COMPATIBILITY_LIMIT_GPU = ...
+NVML_VGPU_COMPATIBILITY_LIMIT_OTHER = ...
+_nvmlHostVgpuMode_t = c_uint
+NVML_HOST_VGPU_MODE_NON_SRIOV = ...
+NVML_HOST_VGPU_MODE_SRIOV = ...
+_nvmlConfComputeGpusReadyState_t = c_uint
+NVML_CC_ACCEPTING_CLIENT_REQUESTS_FALSE = ...
+NVML_CC_ACCEPTING_CLIENT_REQUESTS_TRUE = ...
+_nvmlConfComputeGpuCaps_t = c_uint
+NVML_CC_SYSTEM_GPUS_CC_NOT_CAPABLE = ...
+NVML_CC_SYSTEM_GPUS_CC_CAPABLE = ...
+_nvmlConfComputeCpuCaps_t = c_uint
+NVML_CC_SYSTEM_CPU_CAPS_NONE = ...
+NVML_CC_SYSTEM_CPU_CAPS_AMD_SEV = ...
+NVML_CC_SYSTEM_CPU_CAPS_INTEL_TDX = ...
+NVML_CC_SYSTEM_CPU_CAPS_AMD_SEV_SNP = ...
+NVML_CC_SYSTEM_CPU_CAPS_AMD_SNP_VTOM = ...
+_nvmlConfComputeDevToolsMode_t = c_uint
+NVML_CC_SYSTEM_DEVTOOLS_MODE_OFF = ...
+NVML_CC_SYSTEM_DEVTOOLS_MODE_ON = ...
+NVML_CC_SYSTEM_MULTIGPU_NONE = ...
+NVML_CC_SYSTEM_MULTIGPU_PROTECTED_PCIE = ...
+NVML_CC_SYSTEM_MULTIGPU_NVLE = ...
+NVML_CC_SYSTEM_ENVIRONMENT_UNAVAILABLE = ...
+NVML_CC_SYSTEM_ENVIRONMENT_SIM = ...
+NVML_CC_SYSTEM_ENVIRONMENT_PROD = ...
+_nvmlConfComputeCcFeature_t = c_uint
+NVML_CC_SYSTEM_FEATURE_DISABLED = ...
+NVML_CC_SYSTEM_FEATURE_ENABLED = ...
+_nvmlConfComputeCcKeyRotationThreshAttackerAdv_t = c_uint
+NVML_CC_KEY_ROTATION_THRESH_ATTACKER_ADVANTAGE_MIN = ...
+NVML_CC_KEY_ROTATION_THRESH_ATTACKER_ADVANTAGE_MAX = ...
+NVML_GSP_FIRMWARE_VERSION_BUF_SIZE = ...
+
+class NVMLLibraryMismatchError(Exception): ...
+
+class NVMLError(Exception):
+    _valClassMapping = ...
+    _errcode_to_string = ...
+    def __new__(typ, value):  # -> Self:
+        """
+        Maps value to a proper subclass of NVMLError.
+        See _extractNVMLErrorsAsClasses function for more details
+        """
+        ...
+
+    def __str__(self) -> str: ...
+    def __eq__(self, other) -> bool: ...
+
+def nvmlExceptionClass(nvmlErrorCode): ...
+
+_nvmlGetFunctionPointer_cache = ...
+
+class nvmlFriendlyObject:
+    def __init__(self, dictionary) -> None: ...
+    def __str__(self) -> str: ...
+
+def nvmlStructToFriendlyObject(struct):  # -> nvmlFriendlyObject:
+    ...
+def nvmlFriendlyObjectToStruct(obj, model): ...
+
+class struct_c_nvmlUnit_t(Structure): ...
+
+c_nvmlUnit_t = ...
+
+class _PrintableStructure(Structure):
+    """
+    Abstract class that produces nicer __str__ output than ctypes.Structure.
+    e.g. instead of:
+      >>> print str(obj)
+      <class_name object at 0x7fdf82fef9e0>
+    this class will print
+      class_name(field_name: formatted_value, field_name: formatted_value)
+
+    _fmt_ dictionary of <str _field_ name> -> <str format>
+    e.g. class that has _field_ 'hex_value', c_uint could be formatted with
+      _fmt_ = {"hex_value" : "%08X"}
+    to produce nicer output.
+    Default fomratting string for all fields can be set with key "<default>" like:
+      _fmt_ = {"<default>" : "%d MHz"} # e.g all values are numbers in MHz.
+    If not set it's assumed to be just "%s"
+
+    Exact format of returned str from this class is subject to change in the future.
+    """
+
+    _fmt_ = ...
+    def __str__(self) -> str: ...
+    def __getattribute__(self, name):  # -> <subclass of bytes and str> | str | Any:
+        ...
+    def __setattr__(self, name, value) -> None: ...
+
+class c_nvmlUnitInfo_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlC2cModeInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlC2cModeInfo_v1 = ...
+
+class c_nvmlLedState_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlPSUInfo_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlUnitFanInfo_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlUnitFanSpeeds_t(_PrintableStructure):
+    _fields_ = ...
+
+class struct_c_nvmlDevice_t(Structure): ...
+
+c_nvmlDevice_t = ...
+
+class nvmlPciInfoExt_v1_t(_PrintableStructure):
+    _fields_ = ...
+    _fmt_ = ...
+
+nvmlPciInfoExt_v1 = ...
+
+class nvmlPciInfo_v2_t(_PrintableStructure):
+    _fields_ = ...
+    _fmt_ = ...
+
+class nvmlPciInfo_t(_PrintableStructure):
+    _fields_ = ...
+    _fmt_ = ...
+
+class c_nvmlSystemDriverBranchInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+SystemDriverBranchInfo_v1 = ...
+
+class c_nvmlExcludedDeviceInfo_t(_PrintableStructure):
+    _fields_ = ...
+
+class nvmlNvLinkUtilizationControl_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlMemory_t(_PrintableStructure):
+    _fields_ = ...
+    _fmt_ = ...
+
+class c_nvmlMemory_v2_t(_PrintableStructure):
+    _fields_ = ...
+    _fmt_ = ...
+
+nvmlMemory_v2 = ...
+
+class c_nvmlBAR1Memory_t(_PrintableStructure):
+    _fields_ = ...
+    _fmt_ = ...
+
+class nvmlClkMonFaultInfo_t(Structure):
+    _fields_ = ...
+
+MAX_CLK_DOMAINS = ...
+
+class nvmlClkMonStatus_t(Structure):
+    _fields_ = ...
+
+class c_nvmlProcessInfo_v2_t(_PrintableStructure):
+    _fields_ = ...
+    _fmt_ = ...
+
+c_nvmlProcessInfo_v3_t = c_nvmlProcessInfo_v2_t
+c_nvmlProcessInfo_t = c_nvmlProcessInfo_v3_t
+_nvmlProcessMode_t = c_uint
+NVML_PROCESS_MODE_COMPUTE = ...
+NVML_PROCESS_MODE_GRAPHICS = ...
+NVML_PROCESS_MODE_MPS = ...
+
+class c_nvmlProcessDetail_v1_t(Structure):
+    _fields_ = ...
+
+class c_nvmlProcessDetailList_v1_t(_PrintableStructure):
+    _fields_ = ...
+    _fmt_ = ...
+
+c_nvmlProcessDetailList_t = c_nvmlProcessDetailList_v1_t
+nvmlProcessDetailList_v1 = ...
+
+class c_nvmlBridgeChipInfo_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlBridgeChipHierarchy_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlEccErrorCounts_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlUtilization_t(_PrintableStructure):
+    _fields_ = ...
+    _fmt_ = ...
+    gpu: c_uint
+    memory: c_uint
+
+class c_nvmlHwbcEntry_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlValue_t(Union):
+    _fields_ = ...
+
+class c_nvmlSample_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlViolationTime_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlFieldValue_t(_PrintableStructure):
+    _fields_ = ...
+
+NVML_NVLINK_TOTAL_SUPPORTED_BW_MODES = ...
+nvmlNvlinkSupportedBwModes_v1 = ...
+
+class c_nvmlNvlinkSupportedBwModes_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlNvlinkGetBwMode_v1 = ...
+
+class c_nvmlNvlinkGetBwMode_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlNvlinkSetBwMode_v1 = ...
+
+class c_nvmlNvlinkSetBwMode_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+class c_nvmlVgpuHeterogeneousMode_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+VgpuHeterogeneousMode_v1 = ...
+
+class c_nvmlVgpuPlacementId_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+VgpuPlacementId_v1 = ...
+
+class c_nvmlVgpuPlacementList_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+VgpuPlacementList_v1 = ...
+NVML_VGPU_PGPU_HETEROGENEOUS_MODE = ...
+NVML_VGPU_PGPU_HOMOGENEOUS_MODE = ...
+
+class c_nvmlVgpuPlacementList_v2_t(_PrintableStructure):
+    _fields_ = ...
+
+VgpuPlacementList_v2 = ...
+
+class c_nvmlVgpuTypeBar1Info_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+VgpuTypeBar1Info_v1 = ...
+
+class c_nvmlVgpuInstanceUtilizationSample_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuInstanceUtilizationInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuInstancesUtilizationInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+VgpuInstancesUtilizationInfo_v1 = ...
+
+class c_nvmlVgpuProcessUtilizationSample_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuProcessUtilizationInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuProcessesUtilizationInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+VgpuProcessesUtilizationInfo_v1 = ...
+
+class nvmlVgpuRuntimeState_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+VgpuRuntimeState_v1 = ...
+
+class c_nvmlVgpuLicenseExpiry_t(_PrintableStructure):
+    _fields_ = ...
+
+NVML_GRID_LICENSE_STATE_UNKNOWN = ...
+NVML_GRID_LICENSE_STATE_UNINITIALIZED = ...
+NVML_GRID_LICENSE_STATE_UNLICENSED_UNRESTRICTED = ...
+NVML_GRID_LICENSE_STATE_UNLICENSED_RESTRICTED = ...
+NVML_GRID_LICENSE_STATE_UNLICENSED = ...
+NVML_GRID_LICENSE_STATE_LICENSED = ...
+
+class c_nvmlVgpuLicenseInfo_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlEncoderSession_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlProcessUtilizationSample_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlProcessUtilizationInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlProcessesUtilizationInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+ProcessesUtilizationInfo_v1 = ...
+
+class c_nvmlGridLicenseExpiry_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlGridLicensableFeature_v4_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlGridLicensableFeatures_v4_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlGridLicensableFeature_v3_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlGridLicensableFeatures_v3_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlGridLicensableFeature_v2_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlGridLicensableFeatures_v2_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlGridLicensableFeature_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlGridLicensableFeatures_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlMarginTemperature_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlMarginTemperature_v1 = ...
+NVML_DEVICE_UUID_ASCII_LEN = ...
+NVML_DEVICE_UUID_BINARY_LEN = ...
+NVML_UUID_TYPE_NONE = ...
+NVML_UUID_TYPE_ASCII = ...
+NVML_UUID_TYPE_BINARY = ...
+
+class c_nvmlUUIDValue_t(Union):
+    _fields_ = ...
+
+nvmlUUID_v1 = ...
+
+class c_nvmlUUID_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlPdi_v1 = ...
+
+class c_nvmlPdi_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlRepairStatus_v1 = ...
+
+class c_nvmlRepairStatus_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlNvLinkInfo_v1 = ...
+
+class c_nvmlNvLinkInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+NVML_NVLINK_FIRMWARE_UCODE_TYPE_MSE = ...
+NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR = ...
+NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_UPHY = ...
+NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_CLN = ...
+NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_DLN = ...
+NVML_NVLINK_FIRMWARE_VERSION_LENGTH = ...
+
+class c_nvmlNvlinkFirmwareVersion_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlNvlinkFirmwareInfo_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlNvLinkInfo_v2 = ...
+
+class c_nvmlNvLinkInfo_v2_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+NVML_PRM_DATA_MAX_SIZE = ...
+
+class c_nvmlPRMTLV_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self, size=...) -> None: ...
+
+def nvmlDeviceReadWritePRM_v1(
+    handle: _Pointer[struct_c_nvmlDevice_t], c_info
+) -> None: ...
+
+NVML_DEVICE_ADDRESSING_MODE_NONE = ...
+NVML_DEVICE_ADDRESSING_MODE_HMM = ...
+NVML_DEVICE_ADDRESSING_MODE_ATS = ...
+nvmlDeviceAddressingMode_v1 = ...
+
+class c_nvmlDeviceAddressingMode_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlUnrepairableMemory_v1 = ...
+
+class c_nvmlUnrepairableMemory_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+NVML_PRM_COUNTER_ID_NONE = ...
+NVML_PRM_COUNTER_ID_PPCNT_PHYSICAL_LAYER_CTRS_LINK_DOWN_EVENTS = ...
+NVML_PRM_COUNTER_ID_PPCNT_PHYSICAL_LAYER_CTRS_SUCCESSFUL_RECOVERY_EVENTS = ...
+NVML_PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TOTAL_SUCCESSFUL_RECOVERY_EVENTS = ...
+NVML_PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TIME_SINCE_LAST_RECOVERY = ...
+NVML_PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TIME_BETWEEN_LAST_TWO_RECOVERIES = ...
+NVML_PRM_COUNTER_ID_PPCNT_PORTCOUNTERS_PORT_XMIT_WAIT = ...
+NVML_PRM_COUNTER_ID_PPCNT_PLR_RCV_CODES = ...
+NVML_PRM_COUNTER_ID_PPCNT_PLR_RCV_CODE_ERR = ...
+NVML_PRM_COUNTER_ID_PPCNT_PLR_RCV_UNCORRECTABLE_CODE = ...
+NVML_PRM_COUNTER_ID_PPCNT_PLR_XMIT_CODES = ...
+NVML_PRM_COUNTER_ID_PPCNT_PLR_XMIT_RETRY_CODES = ...
+NVML_PRM_COUNTER_ID_PPCNT_PLR_XMIT_RETRY_EVENTS = ...
+NVML_PRM_COUNTER_ID_PPCNT_PLR_SYNC_EVENTS = ...
+NVML_PRM_COUNTER_ID_PPRM_OPER_RECOVERY = ...
+
+class c_nvmlPRMCounterInput_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self, port=...) -> None: ...
+
+class c_nvmlPRMCounterValue_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+class c_nvmlPRMCounter_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self, ctrId=..., inData=...) -> None: ...
+
+class c_nvmlPRMCounterList_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self, num, ctrs=...) -> None: ...
+
+def nvmlDeviceReadPRMCounters_v1(
+    handle: _Pointer[struct_c_nvmlDevice_t], c_info
+) -> None: ...
+
+class struct_c_nvmlEventSet_t(Structure): ...
+
+c_nvmlEventSet_t = ...
+nvmlEventTypeSingleBitEccError = ...
+nvmlEventTypeDoubleBitEccError = ...
+nvmlEventTypePState = ...
+nvmlEventTypeXidCriticalError = ...
+nvmlEventTypeClock = ...
+nvmlEventTypePowerSourceChange = ...
+nvmlEventMigConfigChange = ...
+nvmlEventTypeSingleBitEccErrorStorm = ...
+nvmlEventTypeDramRetirementEvent = ...
+nvmlEventTypeDramRetirementFailure = ...
+nvmlEventTypeNonFatalPoisonError = ...
+nvmlEventTypeFatalPoisonError = ...
+nvmlEventTypeGpuUnavailableError = ...
+nvmlEventTypeGpuRecoveryAction = ...
+nvmlEventTypeNone = ...
+nvmlEventTypeAll = ...
+nvmlClocksEventReasonGpuIdle = ...
+nvmlClocksEventReasonApplicationsClocksSetting = ...
+nvmlClocksEventReasonUserDefinedClocks = ...
+nvmlClocksEventReasonSwPowerCap = ...
+nvmlClocksEventReasonHwSlowdown = ...
+nvmlClocksEventReasonSyncBoost = ...
+nvmlClocksEventReasonSwThermalSlowdown = ...
+nvmlClocksEventReasonHwThermalSlowdown = ...
+nvmlClocksEventReasonHwPowerBrakeSlowdown = ...
+nvmlClocksEventReasonDisplayClockSetting = ...
+nvmlClocksEventReasonNone = ...
+nvmlClocksEventReasonAll = ...
+nvmlClocksThrottleReasonGpuIdle = ...
+nvmlClocksThrottleReasonApplicationsClocksSetting = ...
+nvmlClocksThrottleReasonUserDefinedClocks = ...
+nvmlClocksThrottleReasonSwPowerCap = ...
+nvmlClocksThrottleReasonHwSlowdown = ...
+nvmlClocksThrottleReasonSyncBoost = ...
+nvmlClocksThrottleReasonSwThermalSlowdown = ...
+nvmlClocksThrottleReasonHwThermalSlowdown = ...
+nvmlClocksThrottleReasonHwPowerBrakeSlowdown = ...
+nvmlClocksThrottleReasonDisplayClockSetting = ...
+nvmlClocksThrottleReasonNone = ...
+nvmlClocksThrottleReasonAll = ...
+
+class c_nvmlEventData_t(_PrintableStructure):
+    _fields_ = ...
+    _fmt_ = ...
+
+class struct_c_nvmlSystemEventSet_t(Structure): ...
+
+c_nvmlSystemEventSet_t = ...
+nvmlSystemEventTypeGpuDriverUnbind = ...
+nvmlSystemEventTypeGpuDriverBind = ...
+nvmlSystemEventTypeCount = ...
+nvmlSystemEventSetCreateRequest_v1 = ...
+
+class c_nvmlSystemEventSetCreateRequest_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlSystemEventSetFreeRequest_v1 = ...
+
+class c_nvmlSystemEventSetFreeRequest_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlSystemRegisterEventRequest_v1 = ...
+
+class c_nvmlSystemRegisterEventRequest_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+class c_nvmlSystemEventData_v1_t(_PrintableStructure):
+    _fields_ = ...
+    _fmt_ = ...
+
+nvmlSystemEventSetWaitRequest_v1 = ...
+
+class c_nvmlSystemEventSetWaitRequest_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+def c_nvmlSystemEventSetCreate(req) -> None:
+    """
+    C API binding for c_nvmlSystemEventSetFree
+    :param req: reference of c_nvmlSystemEventSetCreateRequest_v1_t
+    """
+    ...
+
+def c_nvmlSystemEventSetFree(req) -> None:
+    """
+    C API binding for c_nvmlSystemEventSetFree
+    :param req: reference of c_nvmlSystemEventSetFreeRequest_v1_t
+    """
+    ...
+
+def c_nvmlSystemRegisterEvents(req) -> None:
+    """
+    C API binding for nvmlSystemRegisterEvents
+    :param req: reference of c_nvmlSystemRegisterEventRequest_v1_t
+    """
+    ...
+
+def c_nvmlSystemEventSetWait(req):  # -> Literal[10] | None:
+    """
+    C API binding for nvmlSystemEventSetWait
+    :param req: reference of c_nvmlSystemEventSetWaitRequest_v1_t
+    """
+    ...
+
+def nvmlSystemEventSetCreate(version):
+    """
+    Create an NVML system event set.
+    :param version: The version of the request.
+    :return: A ctypes pointer to the created c_nvmlSystemEventSet_t structure.
+    """
+    ...
+
+def nvmlSystemEventSetFree(version, setHandle) -> None:
+    """
+    Free an NVML system event set.
+    :param version: The version of the request.
+    :param setHandle: A ctypes pointer to a c_nvmlSystemEventSet_t structure.
+    """
+    ...
+
+def nvmlSystemRegisterEvents(version, eventTypes, setHandle) -> None:
+    """
+    Register events for an NVML system event set.
+    :param version: The version of the request.
+    :param eventTypes: A bitmask of event types to register.
+    :param setHandle: A ctypes pointer to a c_nvmlSystemEventSet_t structure.
+    """
+    ...
+
+def nvmlSystemEventSetWait(
+    version, setHandle, timeoutMs, eventData, eventDataSize
+):  # -> list[Any]:
+    """
+    Wait for events in an NVML system event set.
+    :param version: The version of the request.
+    :param setHandle: A ctypes pointer to a c_nvmlSystemEventSet_t structure.
+    :param timeoutMs: Timeout in milliseconds.
+    :param eventData: A ctypes array of c_nvmlSystemEventData_v1_t for event data.
+    :param eventDataSize: Number of c_nvmlSystemEventData_v1_t in evenData array..
+    :return: A list of dictionaries containing event data.
+    """
+    ...
+
+class c_nvmlAccountingStats_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuVersion_t(Structure):
+    _fields_ = ...
+
+class c_nvmlVgpuMetadata_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuPgpuMetadata_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuPgpuCompatibility_t(Structure):
+    _fields_ = ...
+
+NVML_VGPU_SCHEDULER_POLICY_UNKNOWN = ...
+NVML_VGPU_SCHEDULER_POLICY_BEST_EFFORT = ...
+NVML_VGPU_SCHEDULER_POLICY_EQUAL_SHARE = ...
+NVML_VGPU_SCHEDULER_POLICY_FIXED_SHARE = ...
+NVML_SUPPORTED_VGPU_SCHEDULER_POLICY_COUNT = ...
+NVML_SCHEDULER_SW_MAX_LOG_ENTRIES = ...
+NVML_VGPU_SCHEDULER_ARR_DEFAULT = ...
+NVML_VGPU_SCHEDULER_ARR_DISABLE = ...
+NVML_VGPU_SCHEDULER_ARR_ENABLE = ...
+NVML_VGPU_SCHEDULER_ENGINE_TYPE_GRAPHICS = ...
+NVML_VGPU_SCHEDULER_ENGINE_TYPE_NVENC1 = ...
+NVML_VGPU_SCHEDULER_ENGINE_TYPE_NVENC0 = ...
+
+class c_nvmlVgpuSchedDataWithARR_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuSchedData_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuSchedulerParams_t(Union):
+    _fields_ = ...
+
+class c_nvmlVgpuSchedulerLogEntry_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuSchedulerLog_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuSchedulerGetState_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuSchedSetDataWithARR_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuSchedSetData_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuSchedulerSetParams_t(Union):
+    _fields_ = ...
+
+class c_nvmlVgpuSchedulerSetState_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuSchedulerCapabilities_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuTypeIdInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlVgpuTypeIdInfo_v1 = ...
+
+class c_nvmlVgpuTypeMaxInstance_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlVgpuTypeMaxInstance_v1 = ...
+
+class c_nvmlActiveVgpuInstanceInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlActiveVgpuInstanceInfo_v1 = ...
+
+class c_nvmlVgpuSchedulerState_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlVgpuSchedulerState_v1 = ...
+
+class c_nvmlVgpuSchedulerStateInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlVgpuSchedulerStateInfo_v1 = ...
+
+class c_nvmlVgpuSchedulerLogInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlVgpuSchedulerLogInfo_v1 = ...
+
+class c_nvmlVgpuCreatablePlacementInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlVgpuCreatablePlacementInfo_v1 = ...
+
+class c_nvmlVgpuSchedulerStateInfo_v2_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuSchedulerLogEntry_v2_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuSchedulerLogInfo_v2_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlVgpuSchedulerState_v2_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlFBCStats_t(Structure):
+    _fields_ = ...
+
+class c_nvmlFBCSession_t(_PrintableStructure):
+    _fields_ = ...
+
+NVML_DEVICE_MIG_DISABLE = ...
+NVML_DEVICE_MIG_ENABLE = ...
+NVML_GPU_INSTANCE_PROFILE_1_SLICE = ...
+NVML_GPU_INSTANCE_PROFILE_2_SLICE = ...
+NVML_GPU_INSTANCE_PROFILE_3_SLICE = ...
+NVML_GPU_INSTANCE_PROFILE_4_SLICE = ...
+NVML_GPU_INSTANCE_PROFILE_7_SLICE = ...
+NVML_GPU_INSTANCE_PROFILE_8_SLICE = ...
+NVML_GPU_INSTANCE_PROFILE_6_SLICE = ...
+NVML_GPU_INSTANCE_PROFILE_1_SLICE_REV1 = ...
+NVML_GPU_INSTANCE_PROFILE_2_SLICE_REV1 = ...
+NVML_GPU_INSTANCE_PROFILE_1_SLICE_REV2 = ...
+NVML_GPU_INSTANCE_PROFILE_1_SLICE_GFX = ...
+NVML_GPU_INSTANCE_PROFILE_2_SLICE_GFX = ...
+NVML_GPU_INSTANCE_PROFILE_4_SLICE_GFX = ...
+NVML_GPU_INSTANCE_PROFILE_1_SLICE_NO_ME = ...
+NVML_GPU_INSTANCE_PROFILE_2_SLICE_NO_ME = ...
+NVML_GPU_INSTANCE_PROFILE_1_SLICE_ALL_ME = ...
+NVML_GPU_INSTANCE_PROFILE_2_SLICE_ALL_ME = ...
+NVML_GPU_INSTANCE_PROFILE_COUNT = ...
+
+class c_nvmlGpuInstancePlacement_t(Structure):
+    _fields_ = ...
+
+class c_nvmlGpuInstanceProfileInfo_t(Structure):
+    _fields_ = ...
+
+nvmlGpuInstanceProfileInfo_v2 = ...
+
+class c_nvmlGpuInstanceProfileInfo_v2_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+class c_nvmlGpuInstanceInfo_t(Structure):
+    _fields_ = ...
+
+class struct_c_nvmlGpuInstance_t(Structure): ...
+
+c_nvmlGpuInstance_t = ...
+NVML_COMPUTE_INSTANCE_PROFILE_1_SLICE = ...
+NVML_COMPUTE_INSTANCE_PROFILE_2_SLICE = ...
+NVML_COMPUTE_INSTANCE_PROFILE_3_SLICE = ...
+NVML_COMPUTE_INSTANCE_PROFILE_4_SLICE = ...
+NVML_COMPUTE_INSTANCE_PROFILE_7_SLICE = ...
+NVML_COMPUTE_INSTANCE_PROFILE_8_SLICE = ...
+NVML_COMPUTE_INSTANCE_PROFILE_6_SLICE = ...
+NVML_COMPUTE_INSTANCE_PROFILE_1_SLICE_REV1 = ...
+NVML_COMPUTE_INSTANCE_PROFILE_COUNT = ...
+NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED = ...
+NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT = ...
+
+class c_nvmlComputeInstancePlacement_t(Structure):
+    _fields_ = ...
+
+class c_nvmlComputeInstanceProfileInfo_t(Structure):
+    _fields_ = ...
+
+nvmlComputeInstanceProfileInfo_v2 = ...
+
+class c_nvmlComputeInstanceProfileInfo_v2_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+class c_nvmlComputeInstanceInfo_t(Structure):
+    _fields_ = ...
+
+NVML_MAX_GPU_UTILIZATIONS = ...
+NVML_GPU_UTILIZATION_DOMAIN_GPU = ...
+NVML_GPU_UTILIZATION_DOMAIN_FB = ...
+NVML_GPU_UTILIZATION_DOMAIN_VID = ...
+NVML_GPU_UTILIZATION_DOMAIN_BUS = ...
+
+class c_nvmlGpuDynamicPstatesUtilization_t(Structure):
+    _fields_ = ...
+
+class c_nvmlGpuDynamicPstatesInfo_t(Structure):
+    _fields_ = ...
+
+NVML_MAX_THERMAL_SENSORS_PER_GPU = ...
+NVML_THERMAL_TARGET_NONE = ...
+NVML_THERMAL_TARGET_GPU = ...
+NVML_THERMAL_TARGET_MEMORY = ...
+NVML_THERMAL_TARGET_POWER_SUPPLY = ...
+NVML_THERMAL_TARGET_BOARD = ...
+NVML_THERMAL_TARGET_VCD_BOARD = ...
+NVML_THERMAL_TARGET_VCD_INLET = ...
+NVML_THERMAL_TARGET_VCD_OUTLET = ...
+NVML_THERMAL_TARGET_ALL = ...
+NVML_THERMAL_TARGET_UNKNOWN = ...
+NVML_THERMAL_CONTROLLER_NONE = ...
+NVML_THERMAL_CONTROLLER_GPU_INTERNAL = ...
+NVML_THERMAL_CONTROLLER_ADM1032 = ...
+NVML_THERMAL_CONTROLLER_ADT7461 = ...
+NVML_THERMAL_CONTROLLER_MAX6649 = ...
+NVML_THERMAL_CONTROLLER_MAX1617 = ...
+NVML_THERMAL_CONTROLLER_LM99 = ...
+NVML_THERMAL_CONTROLLER_LM89 = ...
+NVML_THERMAL_CONTROLLER_LM64 = ...
+NVML_THERMAL_CONTROLLER_G781 = ...
+NVML_THERMAL_CONTROLLER_ADT7473 = ...
+NVML_THERMAL_CONTROLLER_SBMAX6649 = ...
+NVML_THERMAL_CONTROLLER_VBIOSEVT = ...
+NVML_THERMAL_CONTROLLER_OS = ...
+NVML_THERMAL_CONTROLLER_NVSYSCON_CANOAS = ...
+NVML_THERMAL_CONTROLLER_NVSYSCON_E551 = ...
+NVML_THERMAL_CONTROLLER_MAX6649R = ...
+NVML_THERMAL_CONTROLLER_ADT7473S = ...
+NVML_THERMAL_CONTROLLER_UNKNOWN = ...
+
+class c_nvmlGpuThermalSensor_t(Structure):
+    _fields_ = ...
+
+class c_nvmlGpuThermalSettings_t(Structure):
+    _fields_ = ...
+
+_nvmlCoolerControl_t = c_uint
+NVML_THERMAL_COOLER_SIGNAL_NONE = ...
+NVML_THERMAL_COOLER_SIGNAL_TOGGLE = ...
+NVML_THERMAL_COOLER_SIGNAL_VARIABLE = ...
+NVML_THERMAL_COOLER_SIGNAL_COUNT = ...
+_nvmlCoolerTarget_t = c_uint
+NVML_THERMAL_COOLER_TARGET_NONE = ...
+NVML_THERMAL_COOLER_TARGET_GPU = ...
+NVML_THERMAL_COOLER_TARGET_MEMORY = ...
+NVML_THERMAL_COOLER_TARGET_POWER_SUPPLY = ...
+NVML_THERMAL_COOLER_TARGET_GPU_RELATED = ...
+
+class c_nvmlCoolerInfo_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlCoolerInfo_v1 = ...
+
+def nvmlDeviceGetCoolerInfo(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[<subclass of bytes and str> | str | Any]:
+    ...
+
+class struct_c_nvmlComputeInstance_t(Structure): ...
+
+c_nvmlComputeInstance_t = ...
+
+class c_nvmlDeviceAttributes(Structure):
+    _fields_ = ...
+
+class c_nvmlRowRemapperHistogramValues(Structure):
+    _fields_ = ...
+
+NVML_GPU_CERT_CHAIN_SIZE = ...
+NVML_GPU_ATTESTATION_CERT_CHAIN_SIZE = ...
+NVML_CC_GPU_CEC_NONCE_SIZE = ...
+NVML_CC_GPU_ATTESTATION_REPORT_SIZE = ...
+NVML_CC_GPU_CEC_ATTESTATION_REPORT_SIZE = ...
+NVML_CC_CEC_ATTESTATION_REPORT_NOT_PRESENT = ...
+NVML_CC_CEC_ATTESTATION_REPORT_PRESENT = ...
+
+class c_nvmlConfComputeSystemState_t(Structure):
+    _fields_ = ...
+
+nvmlSystemConfComputeSettings_v1 = ...
+
+class c_nvmlSystemConfComputeSettings_v1_t(Structure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+class c_nvmlConfComputeSystemCaps_t(Structure):
+    _fields_ = ...
+
+class c_nvmlConfComputeMemSizeInfo_t(Structure):
+    _fields_ = ...
+
+class c_nvmlConfComputeGpuCertificate_t(Structure):
+    _fields_ = ...
+
+class c_nvmlConfComputeGpuAttestationReport_t(Structure):
+    _fields_ = ...
+
+class c_nvmlConfComputeSetKeyRotationThresholdInfo_t(Structure):
+    _fields_ = ...
+
+ConfComputeSetKeyRotationThresholdInfo_v1 = ...
+
+class c_nvmlConfComputeGetKeyRotationThresholdInfo_t(Structure):
+    _fields_ = ...
+
+ConfComputeGetKeyRotationThresholdInfo_v1 = ...
+
+def convertStrBytes(
+    func,
+):  # -> _Wrapped[..., Any, ..., <subclass of bytes and str> | str | Any]:
+    """
+    In python 3, strings are unicode instead of bytes, and need to be converted for ctypes
+    Args from caller: (1, 'string', <__main__.c_nvmlDevice_t at 0xFFFFFFFF>)
+    Args passed to function: (1, b'string', <__main__.c_nvmlDevice_t at 0xFFFFFFFF)>
+    ----
+    Returned from function: b'returned string'
+    Returned to caller: 'returned string'
+    """
+    ...
+
+def throwOnVersionMismatch(func):  # -> _Wrapped[..., Any, ..., Any]:
+    ...
+def nvmlInitWithFlags(flags) -> None: ...
+def nvmlInit() -> None: ...
+def nvmlShutdown() -> None: ...
+@convertStrBytes
+def nvmlErrorString(result): ...
+@convertStrBytes
+def nvmlSystemGetNVMLVersion():  # -> Any:
+    ...
+def nvmlSystemGetCudaDriverVersion() -> int: ...
+def nvmlSystemGetCudaDriverVersion_v2() -> int: ...
+@convertStrBytes
+def nvmlSystemGetProcessName(pid):  # -> Any:
+    ...
+@convertStrBytes
+def nvmlSystemGetDriverVersion():  # -> Any:
+    ...
+def nvmlSystemGetHicVersion():  # -> list[Any] | Array[c_nvmlHwbcEntry_t]:
+    ...
+def nvmlSystemGetDriverBranch():  # -> c_nvmlSystemDriverBranchInfo_v1_t:
+    ...
+def nvmlUnitGetCount() -> int: ...
+def nvmlUnitGetHandleByIndex(index: int):  # -> _Pointer[struct_c_nvmlUnit_t]:
+    ...
+def nvmlUnitGetUnitInfo(unit):  # -> c_nvmlUnitInfo_t:
+    ...
+def nvmlUnitGetLedState(unit):  # -> c_nvmlLedState_t:
+    ...
+def nvmlUnitGetPsuInfo(unit):  # -> c_nvmlPSUInfo_t:
+    ...
+def nvmlUnitGetTemperature(unit, type) -> int: ...
+def nvmlUnitGetFanSpeedInfo(unit):  # -> c_nvmlUnitFanSpeeds_t:
+    ...
+def nvmlUnitGetDeviceCount(unit) -> int: ...
+def nvmlUnitGetDevices(unit):  # -> Array[_Pointer[struct_c_nvmlDevice_t]]:
+    ...
+def nvmlDeviceGetCount() -> int: ...
+def nvmlDeviceGetHandleByIndex(index: int) -> _Pointer[struct_c_nvmlDevice_t]: ...
+@convertStrBytes
+def nvmlDeviceGetHandleBySerial(serial):  # -> _Pointer[struct_c_nvmlDevice_t]:
+    ...
+@convertStrBytes
+def nvmlDeviceGetHandleByUUID(uuid):  # -> _Pointer[struct_c_nvmlDevice_t]:
+    ...
+@convertStrBytes
+def nvmlDeviceGetHandleByUUIDV(uuid, type):  # -> _Pointer[struct_c_nvmlDevice_t]:
+    ...
+@convertStrBytes
+def nvmlDeviceGetHandleByPciBusId(pciBusId):  # -> _Pointer[struct_c_nvmlDevice_t]:
+    ...
+@convertStrBytes
+def nvmlDeviceGetName(handle: _Pointer[struct_c_nvmlDevice_t]):  # -> Any:
+    ...
+
+class c_nvmlDevicePerfModes_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlDevicePerfModes_v1 = ...
+
+@convertStrBytes
+def nvmlDeviceGetPerformanceModes(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> <subclass of bytes and str> | str | Any:
+    ...
+
+class c_nvmlDeviceCurrentClockFreqs_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlDeviceCurrentClockFreqs_v1 = ...
+
+@convertStrBytes
+def nvmlDeviceGetCurrentClockFreqs(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> <subclass of bytes and str> | str | Any:
+    ...
+def nvmlDeviceGetBoardId(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetMultiGpuBoard(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetBrand(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetC2cModeInfoV1(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> c_nvmlC2cModeInfo_v1_t:
+    ...
+def nvmlDeviceGetC2cModeInfoV(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> c_nvmlC2cModeInfo_v1_t:
+    ...
+@convertStrBytes
+def nvmlDeviceGetBoardPartNumber(handle: _Pointer[struct_c_nvmlDevice_t]):  # -> Any:
+    ...
+@convertStrBytes
+def nvmlDeviceGetSerial(handle: _Pointer[struct_c_nvmlDevice_t]):  # -> Any:
+    ...
+def nvmlDeviceGetModuleId(
+    handle: _Pointer[struct_c_nvmlDevice_t], moduleId=...
+) -> int: ...
+def nvmlDeviceGetMemoryAffinity(
+    handle: _Pointer[struct_c_nvmlDevice_t], nodeSetSize, scope
+): ...
+def nvmlDeviceGetCpuAffinityWithinScope(
+    handle: _Pointer[struct_c_nvmlDevice_t], cpuSetSize, scope
+): ...
+def nvmlDeviceGetCpuAffinity(handle: _Pointer[struct_c_nvmlDevice_t], cpuSetSize): ...
+def nvmlDeviceSetCpuAffinity(handle: _Pointer[struct_c_nvmlDevice_t]) -> None: ...
+def nvmlDeviceClearCpuAffinity(handle: _Pointer[struct_c_nvmlDevice_t]) -> None: ...
+def nvmlDeviceGetNumaNodeId(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetAddressingMode(device):  # -> <subclass of bytes and str> | str | Any:
+    ...
+def nvmlDeviceGetMinorNumber(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+@convertStrBytes
+def nvmlDeviceGetUUID(handle: _Pointer[struct_c_nvmlDevice_t]):  # -> Any:
+    ...
+@convertStrBytes
+def nvmlDeviceGetInforomVersion(
+    handle: _Pointer[struct_c_nvmlDevice_t], infoRomObject
+):  # -> Any:
+    ...
+@convertStrBytes
+def nvmlDeviceGetInforomImageVersion(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> Any:
+    ...
+def nvmlDeviceGetInforomConfigurationChecksum(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceValidateInforom(handle: _Pointer[struct_c_nvmlDevice_t]) -> None: ...
+def nvmlDeviceGetLastBBXFlushTime(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[int]:
+    ...
+def nvmlDeviceGetDisplayMode(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetDisplayActive(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetPersistenceMode(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetPciInfoExt(
+    handle: _Pointer[struct_c_nvmlDevice_t], c_info
+) -> None: ...
+def nvmlDeviceGetPciInfo_v3(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> nvmlPciInfo_t:
+    ...
+def nvmlDeviceGetPciInfo(handle: _Pointer[struct_c_nvmlDevice_t]):  # -> nvmlPciInfo_t:
+    ...
+def nvmlDeviceGetClockInfo(handle: _Pointer[struct_c_nvmlDevice_t], type) -> int: ...
+def nvmlDeviceGetMaxClockInfo(handle: _Pointer[struct_c_nvmlDevice_t], type) -> int: ...
+def nvmlDeviceGetApplicationsClock(
+    handle: _Pointer[struct_c_nvmlDevice_t], type
+) -> int: ...
+def nvmlDeviceGetMaxCustomerBoostClock(
+    handle: _Pointer[struct_c_nvmlDevice_t], type
+) -> int: ...
+def nvmlDeviceGetClock(handle: _Pointer[struct_c_nvmlDevice_t], type, id) -> int: ...
+def nvmlDeviceGetDefaultApplicationsClock(
+    handle: _Pointer[struct_c_nvmlDevice_t], type
+) -> int: ...
+def nvmlDeviceGetSupportedMemoryClocks(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetSupportedGraphicsClocks(
+    handle: _Pointer[struct_c_nvmlDevice_t], memoryClockMHz
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetFanSpeed(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetFanSpeed_v2(handle: _Pointer[struct_c_nvmlDevice_t], fan) -> int: ...
+
+class c_nvmlFanSpeedInfo_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlFanSpeedInfo_v1 = ...
+
+def nvmlDeviceGetFanSpeedRPM(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> <subclass of bytes and str> | str | Any:
+    ...
+def nvmlDeviceGetTargetFanSpeed(
+    handle: _Pointer[struct_c_nvmlDevice_t], fan
+) -> int: ...
+def nvmlDeviceGetNumFans(device) -> int: ...
+def nvmlDeviceSetDefaultFanSpeed_v2(
+    handle: _Pointer[struct_c_nvmlDevice_t], index: int
+):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetMinMaxFanSpeed(
+    handle: _Pointer[struct_c_nvmlDevice_t], minSpeed=..., maxSpeed=...
+):  # -> list[int] | Literal[0]:
+    ...
+def nvmlDeviceGetFanControlPolicy_v2(
+    handle: _Pointer[struct_c_nvmlDevice_t], fan, fanControlPolicy=...
+) -> int: ...
+def nvmlDeviceSetFanControlPolicy(
+    handle: _Pointer[struct_c_nvmlDevice_t], fan, fanControlPolicy
+):  # -> Literal[0]:
+    ...
+
+class c_nvmlTemperature_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlTemperature_v1 = ...
+
+def nvmlDeviceGetTemperatureV1(
+    handle: _Pointer[struct_c_nvmlDevice_t], sensor: int
+):  # -> <subclass of bytes and str> | str | Any:
+    ...
+def nvmlDeviceGetTemperatureV(
+    handle: _Pointer[struct_c_nvmlDevice_t], sensor: int, version=...
+) -> c_int: ...
+def nvmlDeviceGetTemperature(
+    handle: _Pointer[struct_c_nvmlDevice_t], sensor: int
+) -> int: ...
+def nvmlDeviceGetTemperatureThreshold(
+    handle: _Pointer[struct_c_nvmlDevice_t], threshold
+) -> int: ...
+def nvmlDeviceSetTemperatureThreshold(
+    handle: _Pointer[struct_c_nvmlDevice_t], threshold, temp
+) -> None: ...
+def nvmlDeviceGetMarginTemperature(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> <subclass of bytes and str> | str | Any:
+    ...
+def nvmlDeviceGetPowerState(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetPerformanceState(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetPowerManagementMode(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceGetPowerManagementLimit(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceGetPowerManagementLimitConstraints(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[int]:
+    ...
+def nvmlDeviceGetPowerManagementDefaultLimit(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceGetEnforcedPowerLimit(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetPowerUsage(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetTotalEnergyConsumption(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceGetGpuOperationMode(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[int]:
+    ...
+def nvmlDeviceGetCurrentGpuOperationMode(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceGetPendingGpuOperationMode(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceGetMemoryInfo(
+    handle: _Pointer[struct_c_nvmlDevice_t], version=...
+):  # -> c_nvmlMemory_t | c_nvmlMemory_v2_t:
+    ...
+def nvmlDeviceGetBAR1MemoryInfo(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> c_nvmlBAR1Memory_t:
+    ...
+def nvmlDeviceGetComputeMode(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetCudaComputeCapability(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> tuple[int, int]:
+    ...
+def nvmlDeviceGetEccMode(handle: _Pointer[struct_c_nvmlDevice_t]):  # -> list[int]:
+    ...
+def nvmlDeviceGetCurrentEccMode(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetPendingEccMode(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetDefaultEccMode(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[int]:
+    ...
+def nvmlDeviceGetTotalEccErrors(
+    handle: _Pointer[struct_c_nvmlDevice_t], errorType, counterType
+) -> int: ...
+def nvmlDeviceGetDetailedEccErrors(
+    handle: _Pointer[struct_c_nvmlDevice_t], errorType, counterType
+):  # -> c_nvmlEccErrorCounts_t:
+    ...
+def nvmlDeviceGetMemoryErrorCounter(
+    handle: _Pointer[struct_c_nvmlDevice_t], errorType, counterType, locationType
+) -> int: ...
+def nvmlDeviceGetUtilizationRates(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> c_nvmlUtilization_t: ...
+def nvmlDeviceGetEncoderUtilization(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[int]:
+    ...
+def nvmlDeviceGetDecoderUtilization(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[int]:
+    ...
+def nvmlDeviceGetJpgUtilization(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[int]:
+    ...
+def nvmlDeviceGetOfaUtilization(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[int]:
+    ...
+def nvmlDeviceGetPcieReplayCounter(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetDriverModel_v1(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[int]:
+    ...
+def nvmlDeviceGetDriverModel_v2(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[int]:
+    ...
+def nvmlDeviceGetDriverModel(handle: _Pointer[struct_c_nvmlDevice_t]):  # -> list[int]:
+    ...
+def nvmlDeviceGetCurrentDriverModel(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetPendingDriverModel(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+@convertStrBytes
+def nvmlDeviceGetVbiosVersion(handle: _Pointer[struct_c_nvmlDevice_t]):  # -> Any:
+    ...
+def nvmlDeviceGetComputeRunningProcesses_v2(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetComputeRunningProcesses_v3(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[Any]:
+    ...
+@throwOnVersionMismatch
+def nvmlDeviceGetComputeRunningProcesses(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetGraphicsRunningProcesses_v2(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetGraphicsRunningProcesses_v3(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[Any]:
+    ...
+@throwOnVersionMismatch
+def nvmlDeviceGetGraphicsRunningProcesses(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[Any]:
+    ...
+@throwOnVersionMismatch
+def nvmlDeviceGetMPSComputeRunningProcesses(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetMPSComputeRunningProcesses_v2(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetMPSComputeRunningProcesses_v3(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetRunningProcessDetailList(
+    handle: _Pointer[struct_c_nvmlDevice_t], version, mode
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetAutoBoostedClocksEnabled(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[int]:
+    ...
+def nvmlUnitSetLedState(unit, color) -> None: ...
+def nvmlDeviceSetPersistenceMode(
+    handle: _Pointer[struct_c_nvmlDevice_t], mode
+) -> None: ...
+def nvmlDeviceSetComputeMode(handle: _Pointer[struct_c_nvmlDevice_t], mode) -> None: ...
+def nvmlDeviceSetEccMode(handle: _Pointer[struct_c_nvmlDevice_t], mode) -> None: ...
+def nvmlDeviceClearEccErrorCounts(
+    handle: _Pointer[struct_c_nvmlDevice_t], counterType
+) -> None: ...
+def nvmlDeviceSetDriverModel(
+    handle: _Pointer[struct_c_nvmlDevice_t], model
+) -> None: ...
+def nvmlDeviceSetAutoBoostedClocksEnabled(
+    handle: _Pointer[struct_c_nvmlDevice_t], enabled
+) -> None: ...
+def nvmlDeviceSetDefaultAutoBoostedClocksEnabled(
+    handle: _Pointer[struct_c_nvmlDevice_t], enabled, flags
+) -> None: ...
+def nvmlDeviceSetGpuLockedClocks(
+    handle: _Pointer[struct_c_nvmlDevice_t], minGpuClockMHz, maxGpuClockMHz
+) -> None: ...
+def nvmlDeviceResetGpuLockedClocks(handle: _Pointer[struct_c_nvmlDevice_t]) -> None: ...
+def nvmlDeviceSetMemoryLockedClocks(
+    handle: _Pointer[struct_c_nvmlDevice_t], minMemClockMHz, maxMemClockMHz
+) -> None: ...
+def nvmlDeviceResetMemoryLockedClocks(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> None: ...
+def nvmlDeviceGetClkMonStatus(
+    handle: _Pointer[struct_c_nvmlDevice_t], c_clkMonInfo=...
+):  # -> nvmlClkMonStatus_t | Literal[0]:
+    ...
+def nvmlDeviceSetApplicationsClocks(
+    handle: _Pointer[struct_c_nvmlDevice_t], maxMemClockMHz, maxGraphicsClockMHz
+) -> None: ...
+def nvmlDeviceResetApplicationsClocks(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> None: ...
+def nvmlDeviceSetPowerManagementLimit(
+    handle: _Pointer[struct_c_nvmlDevice_t], limit
+) -> None: ...
+def nvmlDeviceSetGpuOperationMode(
+    handle: _Pointer[struct_c_nvmlDevice_t], mode
+) -> None: ...
+def nvmlEventSetCreate():  # -> _Pointer[struct_c_nvmlEventSet_t]:
+    ...
+def nvmlDeviceRegisterEvents(
+    handle: _Pointer[struct_c_nvmlDevice_t], eventTypes, eventSet
+) -> None: ...
+def nvmlDeviceGetSupportedEventTypes(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlEventSetWait_v2(eventSet, timeoutms):  # -> c_nvmlEventData_t:
+    ...
+def nvmlEventSetWait(eventSet, timeoutms):  # -> c_nvmlEventData_t:
+    ...
+def nvmlEventSetFree(eventSet) -> None: ...
+def nvmlDeviceOnSameBoard(
+    handle1: _Pointer[struct_c_nvmlDevice_t], handle2: _Pointer[struct_c_nvmlDevice_t]
+):  # -> bool:
+    ...
+def nvmlDeviceGetCurrPcieLinkGeneration(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceGetMaxPcieLinkGeneration(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceGetCurrPcieLinkWidth(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetMaxPcieLinkWidth(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetGpuMaxPcieLinkGeneration(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceGetSupportedClocksThrottleReasons(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceGetSupportedClocksEventReasons(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceGetCurrentClocksThrottleReasons(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceGetCurrentClocksEventReasons(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceGetIndex(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceGetAccountingMode(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceSetAccountingMode(
+    handle: _Pointer[struct_c_nvmlDevice_t], mode
+) -> None: ...
+def nvmlDeviceClearAccountingPids(handle: _Pointer[struct_c_nvmlDevice_t]) -> None: ...
+def nvmlDeviceGetAccountingStats(
+    handle: _Pointer[struct_c_nvmlDevice_t], pid
+):  # -> c_nvmlAccountingStats_t:
+    ...
+def nvmlDeviceGetAccountingPids(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[int]:
+    ...
+def nvmlDeviceGetAccountingBufferSize(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+) -> int: ...
+def nvmlDeviceGetRetiredPages(device, sourceFilter):  # -> list[int]:
+    ...
+def nvmlDeviceGetRetiredPages_v2(device, sourceFilter):  # -> list[dict[str, int]]:
+    ...
+def nvmlDeviceGetRetiredPagesPendingStatus(device) -> int: ...
+def nvmlDeviceGetAPIRestriction(device, apiType) -> int: ...
+def nvmlDeviceSetAPIRestriction(
+    handle: _Pointer[struct_c_nvmlDevice_t], apiType, isRestricted
+) -> None: ...
+def nvmlDeviceGetBridgeChipInfo(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> c_nvmlBridgeChipHierarchy_t:
+    ...
+def nvmlDeviceGetSamples(device, sampling_type, timeStamp):  # -> tuple[int, list[Any]]:
+    ...
+def nvmlDeviceGetViolationStatus(device, perfPolicyType):  # -> c_nvmlViolationTime_t:
+    ...
+def nvmlDeviceGetPcieThroughput(device, counter) -> int: ...
+def nvmlSystemGetTopologyGpuSet(cpuNumber):  # -> list[Any]:
+    ...
+def nvmlDeviceGetTopologyNearestGpus(device, level):  # -> list[Any]:
+    ...
+def nvmlDeviceGetTopologyCommonAncestor(device1, device2) -> int: ...
+def nvmlDeviceGetNvLinkUtilizationCounter(device, link, counter):  # -> tuple[int, int]:
+    ...
+def nvmlDeviceFreezeNvLinkUtilizationCounter(device, link, counter, freeze) -> None: ...
+def nvmlDeviceResetNvLinkUtilizationCounter(device, link, counter) -> None: ...
+def nvmlDeviceSetNvLinkUtilizationControl(
+    device, link, counter, control, reset
+) -> None: ...
+def nvmlDeviceGetNvLinkUtilizationControl(
+    device, link, counter
+):  # -> nvmlNvLinkUtilizationControl_t:
+    ...
+def nvmlDeviceGetNvLinkCapability(device, link, capability) -> int: ...
+def nvmlDeviceGetNvLinkErrorCounter(device, link, counter) -> int: ...
+def nvmlDeviceResetNvLinkErrorCounters(device, link) -> None: ...
+def nvmlDeviceGetNvLinkRemotePciInfo(device, link):  # -> nvmlPciInfo_t:
+    ...
+def nvmlDeviceGetNvLinkRemoteDeviceType(
+    handle: _Pointer[struct_c_nvmlDevice_t], link
+) -> int: ...
+def nvmlDeviceGetNvLinkState(device, link) -> int: ...
+def nvmlDeviceGetNvLinkVersion(device, link) -> int: ...
+def nvmlDeviceModifyDrainState(pciInfo, newState) -> None: ...
+def nvmlDeviceQueryDrainState(pciInfo) -> int: ...
+def nvmlDeviceRemoveGpu(pciInfo) -> None: ...
+def nvmlDeviceDiscoverGpus(pciInfo) -> None: ...
+def nvmlDeviceGetFieldValues(
+    handle: _Pointer[struct_c_nvmlDevice_t], fieldIds
+):  # -> Array[c_nvmlFieldValue_t]:
+    ...
+def nvmlDeviceClearFieldValues(
+    handle: _Pointer[struct_c_nvmlDevice_t], fieldIds
+):  # -> Array[c_nvmlFieldValue_t]:
+    ...
+def nvmlDeviceGetVirtualizationMode(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceSetVirtualizationMode(
+    handle: _Pointer[struct_c_nvmlDevice_t], virtualization_mode
+): ...
+def nvmlDeviceGetVgpuHeterogeneousMode(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> <subclass of bytes and str> | str | Any:
+    ...
+def nvmlDeviceSetVgpuHeterogeneousMode(
+    handle: _Pointer[struct_c_nvmlDevice_t], heterogeneous_mode
+):  # -> Literal[0]:
+    ...
+def nvmlVgpuInstanceGetPlacementId(
+    vgpuInstance,
+):  # -> <subclass of bytes and str> | str | Any:
+    ...
+def nvmlDeviceGetVgpuTypeSupportedPlacements(
+    handle: _Pointer[struct_c_nvmlDevice_t], vgpuTypeId, mode=..., version=...
+):  # -> c_nvmlVgpuPlacementList_v2_t | c_nvmlVgpuPlacementList_v1_t:
+    ...
+def nvmlDeviceGetVgpuTypeCreatablePlacements(
+    handle: _Pointer[struct_c_nvmlDevice_t], vgpuTypeId, version=...
+):  # -> c_nvmlVgpuPlacementList_v2_t | c_nvmlVgpuPlacementList_v1_t:
+    ...
+def nvmlGetVgpuDriverCapabilities(capability) -> int: ...
+def nvmlDeviceGetVgpuCapabilities(
+    handle: _Pointer[struct_c_nvmlDevice_t], capability
+) -> int: ...
+def nvmlDeviceSetVgpuCapabilities(
+    handle: _Pointer[struct_c_nvmlDevice_t], capability, state
+):  # -> Literal[0]:
+    ...
+def nvmlDeviceVgpuForceGspUnload(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetSupportedVgpus(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetCreatableVgpus(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[Any]:
+    ...
+def nvmlVgpuTypeGetGpuInstanceProfileId(vgpuTypeId) -> int: ...
+@convertStrBytes
+def nvmlVgpuTypeGetClass(vgpuTypeId):  # -> Any:
+    ...
+@convertStrBytes
+def nvmlVgpuTypeGetName(vgpuTypeId):  # -> Any:
+    ...
+def nvmlVgpuTypeGetDeviceID(vgpuTypeId):  # -> tuple[int, int]:
+    ...
+def nvmlVgpuTypeGetFramebufferSize(vgpuTypeId) -> int: ...
+def nvmlVgpuTypeGetNumDisplayHeads(vgpuTypeId) -> int: ...
+def nvmlVgpuTypeGetResolution(vgpuTypeId):  # -> tuple[int, int]:
+    ...
+@convertStrBytes
+def nvmlVgpuTypeGetLicense(vgpuTypeId):  # -> Any:
+    ...
+def nvmlVgpuTypeGetFrameRateLimit(vgpuTypeId) -> int: ...
+def nvmlVgpuTypeGetGspHeapSize(vgpuTypeId) -> int: ...
+def nvmlVgpuTypeGetFbReservation(vgpuTypeId) -> int: ...
+def nvmlVgpuInstanceGetRuntimeStateSize(vgpuInstance):  # -> nvmlVgpuRuntimeState_v1_t:
+    ...
+def nvmlVgpuTypeGetMaxInstances(
+    handle: _Pointer[struct_c_nvmlDevice_t], vgpuTypeId
+) -> int: ...
+def nvmlVgpuTypeGetMaxInstancesPerVm(vgpuTypeId) -> int: ...
+def nvmlVgpuTypeGetBAR1Info(vgpuTypeId):  # -> c_nvmlVgpuTypeBar1Info_v1_t:
+    ...
+def nvmlDeviceGetActiveVgpus(handle: _Pointer[struct_c_nvmlDevice_t]):  # -> list[Any]:
+    ...
+@convertStrBytes
+def nvmlVgpuInstanceGetVmID(vgpuInstance):  # -> tuple[Any, int]:
+    ...
+@convertStrBytes
+def nvmlVgpuInstanceGetUUID(vgpuInstance):  # -> Any:
+    ...
+@convertStrBytes
+def nvmlVgpuInstanceGetMdevUUID(vgpuInstance):  # -> Any:
+    ...
+@convertStrBytes
+def nvmlVgpuInstanceGetVmDriverVersion(vgpuInstance):  # -> Any:
+    ...
+def nvmlVgpuInstanceGetLicenseStatus(vgpuInstance) -> int: ...
+def nvmlVgpuInstanceGetLicenseInfo_v2(vgpuInstance):  # -> c_nvmlVgpuLicenseInfo_t:
+    ...
+def nvmlVgpuInstanceGetLicenseInfo(vgpuInstance):  # -> c_nvmlVgpuLicenseInfo_t:
+    ...
+def nvmlVgpuInstanceGetFrameRateLimit(vgpuInstance) -> int: ...
+def nvmlVgpuInstanceGetEccMode(vgpuInstance) -> int: ...
+def nvmlVgpuInstanceGetType(vgpuInstance) -> int: ...
+def nvmlVgpuInstanceGetEncoderCapacity(vgpuInstance) -> int: ...
+def nvmlVgpuInstanceSetEncoderCapacity(vgpuInstance, encoder_capacity): ...
+def nvmlVgpuInstanceGetFbUsage(vgpuInstance) -> int: ...
+def nvmlVgpuTypeGetCapabilities(vgpuTypeId, capability) -> int: ...
+def nvmlVgpuInstanceGetGpuInstanceId(vgpuInstance) -> int: ...
+@convertStrBytes
+def nvmlVgpuInstanceGetGpuPciId(vgpuInstance):  # -> Any:
+    ...
+def nvmlDeviceGetVgpuUtilization(
+    handle: _Pointer[struct_c_nvmlDevice_t], timeStamp
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetVgpuInstancesUtilizationInfo(
+    handle: _Pointer[struct_c_nvmlDevice_t], timeStamp
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetP2PStatus(device1, device2, p2pIndex) -> int: ...
+def nvmlDeviceGetGridLicensableFeatures_v4(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> c_nvmlGridLicensableFeatures_v4_t:
+    ...
+def nvmlDeviceGetGridLicensableFeatures(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> c_nvmlGridLicensableFeatures_v4_t:
+    ...
+def nvmlDeviceGetGspFirmwareVersion(
+    handle: _Pointer[struct_c_nvmlDevice_t], version=...
+):  # -> Any | Literal[0]:
+    ...
+def nvmlDeviceGetGspFirmwareMode(
+    handle: _Pointer[struct_c_nvmlDevice_t], isEnabled=..., defaultMode=...
+):  # -> list[int] | Literal[0]:
+    ...
+def nvmlDeviceGetEncoderCapacity(
+    handle: _Pointer[struct_c_nvmlDevice_t], encoderQueryType
+) -> int: ...
+def nvmlDeviceGetVgpuProcessUtilization(
+    handle: _Pointer[struct_c_nvmlDevice_t], timeStamp
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetVgpuProcessesUtilizationInfo(
+    handle: _Pointer[struct_c_nvmlDevice_t], timeStamp
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetEncoderStats(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> tuple[int, int, int]:
+    ...
+def nvmlDeviceGetEncoderSessions(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetFBCStats(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> c_nvmlFBCStats_t:
+    ...
+def nvmlDeviceGetFBCSessions(handle: _Pointer[struct_c_nvmlDevice_t]):  # -> list[Any]:
+    ...
+def nvmlVgpuInstanceGetEncoderStats(vgpuInstance):  # -> tuple[int, int, int]:
+    ...
+def nvmlVgpuInstanceGetEncoderSessions(vgpuInstance):  # -> list[Any]:
+    ...
+def nvmlVgpuInstanceGetFBCStats(vgpuInstance):  # -> c_nvmlFBCStats_t:
+    ...
+def nvmlVgpuInstanceGetFBCSessions(vgpuInstance):  # -> list[Any]:
+    ...
+def nvmlDeviceGetProcessUtilization(
+    handle: _Pointer[struct_c_nvmlDevice_t], timeStamp
+):  # -> list[Any]:
+    ...
+def nvmlDeviceGetProcessesUtilizationInfo(
+    handle: _Pointer[struct_c_nvmlDevice_t], timeStamp
+): ...
+def nvmlVgpuInstanceGetMetadata(vgpuInstance):  # -> c_nvmlVgpuMetadata_t:
+    ...
+def nvmlDeviceGetVgpuMetadata(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> c_nvmlVgpuPgpuMetadata_t:
+    ...
+def nvmlGetVgpuCompatibility(
+    vgpuMetadata, pgpuMetadata
+):  # -> c_nvmlVgpuPgpuCompatibility_t:
+    ...
+@convertStrBytes
+def nvmlDeviceGetPgpuMetadataString(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> tuple[Any, int]:
+    ...
+def nvmlDeviceGetVgpuSchedulerLog(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> c_nvmlVgpuSchedulerLog_t:
+    ...
+def nvmlDeviceGetVgpuSchedulerState(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> c_nvmlVgpuSchedulerGetState_t:
+    ...
+def nvmlDeviceGetVgpuSchedulerCapabilities(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> c_nvmlVgpuSchedulerCapabilities_t:
+    ...
+def nvmlDeviceSetVgpuSchedulerState(
+    handle: _Pointer[struct_c_nvmlDevice_t], sched_state
+):  # -> Literal[0]:
+    ...
+def nvmlSetVgpuVersion(vgpuVersion):  # -> Literal[0]:
+    ...
+def nvmlGetVgpuVersion(
+    supported=..., current=...
+):  # -> list[tuple[Any, Any]] | Literal[0]:
+    ...
+def nvmlVgpuInstanceGetAccountingMode(vgpuInstance) -> int: ...
+def nvmlVgpuInstanceGetAccountingPids(vgpuInstance):  # -> tuple[c_uint, Array[c_uint]]:
+    ...
+def nvmlVgpuInstanceGetAccountingStats(
+    vgpuInstance, pid
+):  # -> c_nvmlAccountingStats_t:
+    ...
+def nvmlVgpuInstanceClearAccountingPids(vgpuInstance):  # -> Literal[0]:
+    ...
+def nvmlGpuInstanceGetCreatableVgpus(gpuInstance, c_vgpus): ...
+def nvmlVgpuTypeGetMaxInstancesPerGpuInstance(
+    vgpuTypeId,
+):  # -> <subclass of bytes and str> | str | Any:
+    ...
+def nvmlGpuInstanceGetActiveVgpus(gpuInstance, c_vgpu_instance_info): ...
+def nvmlGpuInstanceSetVgpuSchedulerState(gpuInstance, sched_state): ...
+def nvmlGpuInstanceGetVgpuSchedulerState(gpuInstance, c_vgpu_sched_state_info): ...
+def nvmlGpuInstanceGetVgpuSchedulerLog(gpuInstance, c_vgpu_sched_log_info): ...
+def nvmlGpuInstanceGetVgpuTypeCreatablePlacements(gpuInstance, c_vgpu_placements): ...
+def nvmlGpuInstanceGetVgpuHeterogeneousMode(
+    gpuInstance,
+):  # -> <subclass of bytes and str> | str | Any:
+    ...
+def nvmlGpuInstanceSetVgpuHeterogeneousMode(
+    gpuInstance, heterogeneous_mode
+):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetVgpuSchedulerState_v2(
+    handle: _Pointer[struct_c_nvmlDevice_t], c_vgpu_sched_state_info_v2
+): ...
+def nvmlGpuInstanceGetVgpuSchedulerState_v2(
+    gpuInstance, c_vgpu_sched_state_info_v2
+): ...
+def nvmlDeviceGetVgpuSchedulerLog_v2(
+    handle: _Pointer[struct_c_nvmlDevice_t], c_vgpu_sched_log_info_v2
+): ...
+def nvmlGpuInstanceGetVgpuSchedulerLog_v2(gpuInstance, c_vgpu_sched_log_info_v2): ...
+def nvmlDeviceSetVgpuSchedulerState_v2(
+    handle: _Pointer[struct_c_nvmlDevice_t], sched_state_v2
+):  # -> Literal[0]:
+    ...
+def nvmlGpuInstanceSetVgpuSchedulerState_v2(
+    gpuInstance, sched_state_v2
+):  # -> Literal[0]:
+    ...
+def nvmlGetExcludedDeviceCount() -> int: ...
+def nvmlGetExcludedDeviceInfoByIndex(index: int):  # -> c_nvmlExcludedDeviceInfo_t:
+    ...
+def nvmlDeviceGetHostVgpuMode(handle: _Pointer[struct_c_nvmlDevice_t]) -> int: ...
+def nvmlDeviceSetMigMode(device, mode) -> int: ...
+def nvmlDeviceGetMigMode(device):  # -> list[int]:
+    ...
+def nvmlDeviceGetGpuInstanceProfileInfo(
+    device, profile, version=...
+):  # -> c_nvmlGpuInstanceProfileInfo_v2_t | c_nvmlGpuInstanceProfileInfo_t:
+    ...
+def nvmlDeviceGetGpuInstanceProfileInfoById(
+    device, profileId
+):  # -> c_nvmlGpuInstanceProfileInfo_v2_t:
+    ...
+
+nvmlDeviceGetGpuInstanceProfileInfoV = ...
+nvmlDeviceGetGpuInstanceProfileInfoByIdV = ...
+
+def nvmlDeviceGetGpuInstanceRemainingCapacity(device, profileId) -> int: ...
+def nvmlDeviceGetGpuInstancePossiblePlacements(
+    device, profileId, placementsRef, countRef
+):  # -> Literal[0]:
+    ...
+def nvmlDeviceCreateGpuInstance(
+    device, profileId
+):  # -> _Pointer[struct_c_nvmlGpuInstance_t]:
+    ...
+def nvmlDeviceCreateGpuInstanceWithPlacement(
+    device, profileId, placement
+):  # -> _Pointer[struct_c_nvmlGpuInstance_t]:
+    ...
+def nvmlGpuInstanceDestroy(gpuInstance):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetGpuInstances(
+    device, profileId, gpuInstancesRef, countRef
+):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetGpuInstanceById(
+    device, gpuInstanceId
+):  # -> _Pointer[struct_c_nvmlGpuInstance_t]:
+    ...
+def nvmlGpuInstanceGetInfo(gpuInstance):  # -> c_nvmlGpuInstanceInfo_t:
+    ...
+def nvmlGpuInstanceGetComputeInstanceProfileInfo(
+    device, profile, engProfile, version=...
+):  # -> c_nvmlComputeInstanceProfileInfo_v2_t | c_nvmlComputeInstanceProfileInfo_t:
+    ...
+
+nvmlGpuInstanceGetComputeInstanceProfileInfoV = ...
+
+def nvmlGpuInstanceGetComputeInstanceRemainingCapacity(
+    gpuInstance, profileId
+) -> int: ...
+def nvmlGpuInstanceGetComputeInstancePossiblePlacements(
+    gpuInstance, profileId, placementsRef, countRef
+):  # -> Literal[0]:
+    ...
+def nvmlGpuInstanceCreateComputeInstance(
+    gpuInstance, profileId
+):  # -> _Pointer[struct_c_nvmlComputeInstance_t]:
+    ...
+def nvmlGpuInstanceCreateComputeInstanceWithPlacement(
+    gpuInstance, profileId, placement
+):  # -> _Pointer[struct_c_nvmlComputeInstance_t]:
+    ...
+def nvmlComputeInstanceDestroy(computeInstance):  # -> Literal[0]:
+    ...
+def nvmlGpuInstanceGetComputeInstances(
+    gpuInstance, profileId, computeInstancesRef, countRef
+):  # -> Literal[0]:
+    ...
+def nvmlGpuInstanceGetComputeInstanceById(
+    gpuInstance, computeInstanceId
+):  # -> _Pointer[struct_c_nvmlComputeInstance_t]:
+    ...
+def nvmlComputeInstanceGetInfo_v2(computeInstance):  # -> c_nvmlComputeInstanceInfo_t:
+    ...
+def nvmlComputeInstanceGetInfo(computeInstance):  # -> c_nvmlComputeInstanceInfo_t:
+    ...
+def nvmlDeviceIsMigDeviceHandle(device):  # -> c_uint:
+    ...
+def nvmlDeviceGetGpuInstanceId(device) -> int: ...
+def nvmlDeviceGetComputeInstanceId(device) -> int: ...
+def nvmlDeviceGetMaxMigDeviceCount(device) -> int: ...
+def nvmlDeviceGetMigDeviceHandleByIndex(
+    device, index: int
+):  # -> _Pointer[struct_c_nvmlDevice_t]:
+    ...
+def nvmlDeviceGetDeviceHandleFromMigDeviceHandle(
+    migDevice,
+):  # -> _Pointer[struct_c_nvmlDevice_t]:
+    ...
+def nvmlDeviceGetAttributes_v2(device):  # -> c_nvmlDeviceAttributes:
+    ...
+def nvmlDeviceGetAttributes(device):  # -> c_nvmlDeviceAttributes:
+    ...
+def nvmlDeviceGetRemappedRows(device):  # -> tuple[int, int, int, int]:
+    ...
+def nvmlDeviceGetRowRemapperHistogram(device):  # -> c_nvmlRowRemapperHistogramValues:
+    ...
+def nvmlDeviceGetArchitecture(device) -> int: ...
+def nvmlDeviceGetBusType(device) -> int: ...
+def nvmlDeviceGetIrqNum(device) -> int: ...
+def nvmlDeviceGetNumGpuCores(device) -> int: ...
+def nvmlDeviceGetPowerSource(device) -> int: ...
+def nvmlDeviceGetMemoryBusWidth(device) -> int: ...
+def nvmlDeviceGetPcieLinkMaxSpeed(device) -> int: ...
+def nvmlDeviceGetAdaptiveClockInfoStatus(device) -> int: ...
+def nvmlDeviceGetPcieSpeed(device) -> int: ...
+def nvmlDeviceGetDynamicPstatesInfo(
+    device, c_dynamicpstatesinfo=...
+):  # -> c_nvmlGpuDynamicPstatesInfo_t | Literal[0]:
+    ...
+def nvmlDeviceSetFanSpeed_v2(
+    handle: _Pointer[struct_c_nvmlDevice_t], index: int, speed
+):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetThermalSettings(
+    device, sensorindex: int, c_thermalsettings=...
+):  # -> Any | Literal[0]:
+    ...
+def nvmlDeviceGetMinMaxClockOfPState(
+    device, clockType, pstate, minClockMHz=..., maxClockMHz=...
+):  # -> tuple[int, int] | Literal[0]:
+    ...
+
+_nvmlPowerMizerMode_t = c_uint
+NVML_POWER_MIZER_MODE_ADAPTIVE = ...
+NVML_POWER_MIZER_MODE_PREFER_MAXIMUM_PERFORMANCE = ...
+NVML_POWER_MIZER_MODE_AUTO = ...
+NVML_POWER_MIZER_MODE_PREFER_CONSISTENT_PERFORMANCE = ...
+
+class c_nvmlDevicePowerMizerModes_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+def nvmlDeviceGetPowerMizerMode_v1(device, info): ...
+def nvmlDeviceSetPowerMizerMode_v1(device, info): ...
+
+class c_nvmlClockOffset_t(_PrintableStructure):
+    _fields_ = ...
+
+nvmlClockOffset_v1 = ...
+
+def nvmlDeviceGetClockOffsets(device, info):  # -> Literal[0]:
+    ...
+def nvmlDeviceSetClockOffsets(device, info):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetSupportedPerformanceStates(device):  # -> list[Any]:
+    ...
+def nvmlDeviceGetGpcClkVfOffset(device) -> int: ...
+def nvmlDeviceSetGpcClkVfOffset(device, offset):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetGpcClkMinMaxVfOffset(
+    device, minOffset=..., maxOffset=...
+):  # -> tuple[int, int] | Literal[0]:
+    ...
+def nvmlDeviceGetMemClkVfOffset(device) -> int: ...
+def nvmlDeviceSetMemClkVfOffset(device, offset):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetMemClkMinMaxVfOffset(
+    device, minOffset=..., maxOffset=...
+):  # -> tuple[int, int] | Literal[0]:
+    ...
+def nvmlSystemSetConfComputeGpusReadyState(state):  # -> Literal[0]:
+    ...
+def nvmlSystemGetConfComputeGpusReadyState() -> int: ...
+def nvmlSystemGetConfComputeCapabilities():  # -> c_nvmlConfComputeSystemCaps_t:
+    ...
+def nvmlSystemGetConfComputeState():  # -> c_nvmlConfComputeSystemState_t:
+    ...
+def nvmlSystemGetConfComputeSettings(settings): ...
+def nvmlDeviceSetConfComputeUnprotectedMemSize(device, c_ccMemSize):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetConfComputeMemSizeInfo(device):  # -> c_nvmlConfComputeMemSizeInfo_t:
+    ...
+def nvmlDeviceGetConfComputeProtectedMemoryUsage(device):  # -> c_nvmlMemory_t:
+    ...
+def nvmlDeviceGetConfComputeGpuCertificate(
+    device,
+):  # -> c_nvmlConfComputeGpuCertificate_t:
+    ...
+def nvmlDeviceGetConfComputeGpuAttestationReport(
+    device, c_nonce
+):  # -> c_nvmlConfComputeGpuAttestationReport_t:
+    ...
+def nvmlSystemSetConfComputeKeyRotationThresholdInfo(max_atk_adv):  # -> Literal[0]:
+    ...
+def nvmlSystemGetConfComputeKeyRotationThresholdInfo():  # -> c_nvmlConfComputeGetKeyRotationThresholdInfo_t:
+    ...
+
+NVML_GPM_METRIC_GRAPHICS_UTIL = ...
+NVML_GPM_METRIC_SM_UTIL = ...
+NVML_GPM_METRIC_SM_OCCUPANCY = ...
+NVML_GPM_METRIC_INTEGER_UTIL = ...
+NVML_GPM_METRIC_ANY_TENSOR_UTIL = ...
+NVML_GPM_METRIC_DFMA_TENSOR_UTIL = ...
+NVML_GPM_METRIC_HMMA_TENSOR_UTIL = ...
+NVML_GPM_METRIC_DMMA_TENSOR_UTIL = ...
+NVML_GPM_METRIC_IMMA_TENSOR_UTIL = ...
+NVML_GPM_METRIC_DRAM_BW_UTIL = ...
+NVML_GPM_METRIC_FP64_UTIL = ...
+NVML_GPM_METRIC_FP32_UTIL = ...
+NVML_GPM_METRIC_FP16_UTIL = ...
+NVML_GPM_METRIC_PCIE_TX_PER_SEC = ...
+NVML_GPM_METRIC_PCIE_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVDEC_0_UTIL = ...
+NVML_GPM_METRIC_NVDEC_1_UTIL = ...
+NVML_GPM_METRIC_NVDEC_2_UTIL = ...
+NVML_GPM_METRIC_NVDEC_3_UTIL = ...
+NVML_GPM_METRIC_NVDEC_4_UTIL = ...
+NVML_GPM_METRIC_NVDEC_5_UTIL = ...
+NVML_GPM_METRIC_NVDEC_6_UTIL = ...
+NVML_GPM_METRIC_NVDEC_7_UTIL = ...
+NVML_GPM_METRIC_NVJPG_0_UTIL = ...
+NVML_GPM_METRIC_NVJPG_1_UTIL = ...
+NVML_GPM_METRIC_NVJPG_2_UTIL = ...
+NVML_GPM_METRIC_NVJPG_3_UTIL = ...
+NVML_GPM_METRIC_NVJPG_4_UTIL = ...
+NVML_GPM_METRIC_NVJPG_5_UTIL = ...
+NVML_GPM_METRIC_NVJPG_6_UTIL = ...
+NVML_GPM_METRIC_NVJPG_7_UTIL = ...
+NVML_GPM_METRIC_NVOFA_0_UTIL = ...
+NVML_GPM_METRIC_NVOFA_1_UTIL = ...
+NVML_GPM_METRIC_NVLINK_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L0_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L0_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L1_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L1_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L2_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L2_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L3_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L3_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L4_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L4_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L5_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L5_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L6_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L6_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L7_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L7_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L8_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L8_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L9_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L9_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L10_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L10_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L11_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L11_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L12_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L12_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L13_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L13_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L14_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L14_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L15_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L15_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L16_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L16_TX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L17_RX_PER_SEC = ...
+NVML_GPM_METRIC_NVLINK_L17_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK0_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK0_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK0_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK0_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK1_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK1_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK1_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK1_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK2_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK2_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK2_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK2_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK3_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK3_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK3_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK3_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK4_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK4_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK4_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK4_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK5_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK5_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK5_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK5_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK6_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK6_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK6_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK6_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK7_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK7_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK7_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK7_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK8_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK8_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK8_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK8_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK9_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK9_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK9_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK9_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK10_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK10_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK10_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK10_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK11_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK11_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK11_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK11_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK12_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK12_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK12_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK12_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK13_TOTAL_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK13_TOTAL_RX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK13_DATA_TX_PER_SEC = ...
+NVML_GPM_METRIC_C2C_LINK13_DATA_RX_PER_SEC = ...
+NVML_GPM_METRIC_HOSTMEM_CACHE_HIT = ...
+NVML_GPM_METRIC_HOSTMEM_CACHE_MISS = ...
+NVML_GPM_METRIC_PEERMEM_CACHE_HIT = ...
+NVML_GPM_METRIC_PEERMEM_CACHE_MISS = ...
+NVML_GPM_METRIC_DRAM_CACHE_HIT = ...
+NVML_GPM_METRIC_DRAM_CACHE_MISS = ...
+NVML_GPM_METRIC_NVENC_0_UTIL = ...
+NVML_GPM_METRIC_NVENC_1_UTIL = ...
+NVML_GPM_METRIC_NVENC_2_UTIL = ...
+NVML_GPM_METRIC_NVENC_3_UTIL = ...
+NVML_GPM_METRIC_GR0_CTXSW_CYCLES_ELAPSED = ...
+NVML_GPM_METRIC_GR0_CTXSW_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_GR0_CTXSW_REQUESTS = ...
+NVML_GPM_METRIC_GR0_CTXSW_CYCLES_PER_REQ = ...
+NVML_GPM_METRIC_GR0_CTXSW_ACTIVE_PCT = ...
+NVML_GPM_METRIC_GR1_CTXSW_CYCLES_ELAPSED = ...
+NVML_GPM_METRIC_GR1_CTXSW_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_GR1_CTXSW_REQUESTS = ...
+NVML_GPM_METRIC_GR1_CTXSW_CYCLES_PER_REQ = ...
+NVML_GPM_METRIC_GR1_CTXSW_ACTIVE_PCT = ...
+NVML_GPM_METRIC_GR2_CTXSW_CYCLES_ELAPSED = ...
+NVML_GPM_METRIC_GR2_CTXSW_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_GR2_CTXSW_REQUESTS = ...
+NVML_GPM_METRIC_GR2_CTXSW_CYCLES_PER_REQ = ...
+NVML_GPM_METRIC_GR2_CTXSW_ACTIVE_PCT = ...
+NVML_GPM_METRIC_GR3_CTXSW_CYCLES_ELAPSED = ...
+NVML_GPM_METRIC_GR3_CTXSW_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_GR3_CTXSW_REQUESTS = ...
+NVML_GPM_METRIC_GR3_CTXSW_CYCLES_PER_REQ = ...
+NVML_GPM_METRIC_GR3_CTXSW_ACTIVE_PCT = ...
+NVML_GPM_METRIC_GR4_CTXSW_CYCLES_ELAPSED = ...
+NVML_GPM_METRIC_GR4_CTXSW_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_GR4_CTXSW_REQUESTS = ...
+NVML_GPM_METRIC_GR4_CTXSW_CYCLES_PER_REQ = ...
+NVML_GPM_METRIC_GR4_CTXSW_ACTIVE_PCT = ...
+NVML_GPM_METRIC_GR5_CTXSW_CYCLES_ELAPSED = ...
+NVML_GPM_METRIC_GR5_CTXSW_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_GR5_CTXSW_REQUESTS = ...
+NVML_GPM_METRIC_GR5_CTXSW_CYCLES_PER_REQ = ...
+NVML_GPM_METRIC_GR5_CTXSW_ACTIVE_PCT = ...
+NVML_GPM_METRIC_GR6_CTXSW_CYCLES_ELAPSED = ...
+NVML_GPM_METRIC_GR6_CTXSW_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_GR6_CTXSW_REQUESTS = ...
+NVML_GPM_METRIC_GR6_CTXSW_CYCLES_PER_REQ = ...
+NVML_GPM_METRIC_GR6_CTXSW_ACTIVE_PCT = ...
+NVML_GPM_METRIC_GR7_CTXSW_CYCLES_ELAPSED = ...
+NVML_GPM_METRIC_GR7_CTXSW_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_GR7_CTXSW_REQUESTS = ...
+NVML_GPM_METRIC_GR7_CTXSW_CYCLES_PER_REQ = ...
+NVML_GPM_METRIC_GR7_CTXSW_ACTIVE_PCT = ...
+NVML_GPM_METRIC_SM_CYCLES_ELAPSED = ...
+NVML_GPM_METRIC_SM_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_MMA_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_DMMA_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_HMMA_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_IMMA_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_DFMA_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_PCIE_TX = ...
+NVML_GPM_METRIC_PCIE_RX = ...
+NVML_GPM_METRIC_INTEGER_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_FP64_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_FP32_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_FP16_CYCLES_ACTIVE = ...
+NVML_GPM_METRIC_NVLINK_L0_RX = ...
+NVML_GPM_METRIC_NVLINK_L0_TX = ...
+NVML_GPM_METRIC_NVLINK_L1_RX = ...
+NVML_GPM_METRIC_NVLINK_L1_TX = ...
+NVML_GPM_METRIC_NVLINK_L2_RX = ...
+NVML_GPM_METRIC_NVLINK_L2_TX = ...
+NVML_GPM_METRIC_NVLINK_L3_RX = ...
+NVML_GPM_METRIC_NVLINK_L3_TX = ...
+NVML_GPM_METRIC_NVLINK_L4_RX = ...
+NVML_GPM_METRIC_NVLINK_L4_TX = ...
+NVML_GPM_METRIC_NVLINK_L5_RX = ...
+NVML_GPM_METRIC_NVLINK_L5_TX = ...
+NVML_GPM_METRIC_NVLINK_L6_RX = ...
+NVML_GPM_METRIC_NVLINK_L6_TX = ...
+NVML_GPM_METRIC_NVLINK_L7_RX = ...
+NVML_GPM_METRIC_NVLINK_L7_TX = ...
+NVML_GPM_METRIC_NVLINK_L8_RX = ...
+NVML_GPM_METRIC_NVLINK_L8_TX = ...
+NVML_GPM_METRIC_NVLINK_L9_RX = ...
+NVML_GPM_METRIC_NVLINK_L9_TX = ...
+NVML_GPM_METRIC_NVLINK_L10_RX = ...
+NVML_GPM_METRIC_NVLINK_L10_TX = ...
+NVML_GPM_METRIC_NVLINK_L11_RX = ...
+NVML_GPM_METRIC_NVLINK_L11_TX = ...
+NVML_GPM_METRIC_NVLINK_L12_RX = ...
+NVML_GPM_METRIC_NVLINK_L12_TX = ...
+NVML_GPM_METRIC_NVLINK_L13_RX = ...
+NVML_GPM_METRIC_NVLINK_L13_TX = ...
+NVML_GPM_METRIC_NVLINK_L14_RX = ...
+NVML_GPM_METRIC_NVLINK_L14_TX = ...
+NVML_GPM_METRIC_NVLINK_L15_RX = ...
+NVML_GPM_METRIC_NVLINK_L15_TX = ...
+NVML_GPM_METRIC_NVLINK_L16_RX = ...
+NVML_GPM_METRIC_NVLINK_L16_TX = ...
+NVML_GPM_METRIC_NVLINK_L17_RX = ...
+NVML_GPM_METRIC_NVLINK_L17_TX = ...
+NVML_GPM_METRIC_MAX = ...
+
+class c_nvmlUnitInfo_t(_PrintableStructure):
+    _fields_ = ...
+
+class struct_c_nvmlGpmSample_t(Structure): ...
+
+c_nvmlGpmSample_t = ...
+
+class c_metricInfo_t(Structure):
+    _fields_ = ...
+
+class c_nvmlGpmMetric_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlGpmMetricsGet_t(_PrintableStructure):
+    _fields_ = ...
+
+NVML_GPM_METRICS_GET_VERSION = ...
+
+class c_nvmlGpmSupport_t(_PrintableStructure):
+    _fields_ = ...
+
+NVML_GPM_SUPPORT_VERSION = ...
+
+def nvmlGpmMetricsGet(metricsGet): ...
+def nvmlGpmSampleFree(gpmSample) -> None: ...
+def nvmlGpmSampleAlloc():  # -> _Pointer[struct_c_nvmlGpmSample_t]:
+    ...
+def nvmlGpmSampleGet(device, gpmSample): ...
+def nvmlGpmMigSampleGet(device, gpuInstanceId, gpmSample): ...
+def nvmlGpmQueryDeviceSupport(device):  # -> c_nvmlGpmSupport_t:
+    ...
+def nvmlGpmSetStreamingEnabled(device, state):  # -> Literal[0]:
+    ...
+def nvmlGpmQueryIfStreamingEnabled(device) -> int: ...
+
+NVML_NVLINK_POWER_STATE_HIGH_SPEED = ...
+NVML_NVLINK_POWER_STATE_LOW = ...
+NVML_NVLINK_LOW_POWER_THRESHOLD_MIN = ...
+NVML_NVLINK_LOW_POWER_THRESHOLD_MAX = ...
+NVML_NVLINK_LOW_POWER_THRESHOLD_RESET = ...
+NVML_NVLINK_LOW_POWER_THRESHOLD_DEFAULT = ...
+
+class c_nvmlNvLinkPowerThres_t(Structure):
+    _fields_ = ...
+
+def nvmlDeviceSetNvLinkDeviceLowPowerThreshold(device, l1threshold):  # -> Literal[0]:
+    ...
+
+NVML_GPU_FABRIC_UUID_LEN = ...
+_nvmlGpuFabricState_t = c_uint
+NVML_GPU_FABRIC_STATE_NOT_SUPPORTED = ...
+NVML_GPU_FABRIC_STATE_NOT_STARTED = ...
+NVML_GPU_FABRIC_STATE_IN_PROGRESS = ...
+NVML_GPU_FABRIC_STATE_COMPLETED = ...
+
+class c_nvmlGpuFabricInfo_t(_PrintableStructure):
+    _fields_ = ...
+
+NVML_GPU_FABRIC_HEALTH_MASK_DEGRADED_BW_NOT_SUPPORTED = ...
+NVML_GPU_FABRIC_HEALTH_MASK_DEGRADED_BW_TRUE = ...
+NVML_GPU_FABRIC_HEALTH_MASK_DEGRADED_BW_FALSE = ...
+NVML_GPU_FABRIC_HEALTH_MASK_SHIFT_DEGRADED_BW = ...
+NVML_GPU_FABRIC_HEALTH_MASK_WIDTH_DEGRADED_BW = ...
+NVML_GPU_FABRIC_HEALTH_MASK_ROUTE_RECOVERY_NOT_SUPPORTED = ...
+NVML_GPU_FABRIC_HEALTH_MASK_ROUTE_RECOVERY_TRUE = ...
+NVML_GPU_FABRIC_HEALTH_MASK_ROUTE_RECOVERY_FALSE = ...
+NVML_GPU_FABRIC_HEALTH_MASK_SHIFT_ROUTE_RECOVERY = ...
+NVML_GPU_FABRIC_HEALTH_MASK_WIDTH_ROUTE_RECOVERY = ...
+NVML_GPU_FABRIC_HEALTH_MASK_ROUTE_UNHEALTHY_NOT_SUPPORTED = ...
+NVML_GPU_FABRIC_HEALTH_MASK_ROUTE_UNHEALTHY_TRUE = ...
+NVML_GPU_FABRIC_HEALTH_MASK_ROUTE_UNHEALTHY_FALSE = ...
+NVML_GPU_FABRIC_HEALTH_MASK_SHIFT_ROUTE_UNHEALTHY = ...
+NVML_GPU_FABRIC_HEALTH_MASK_WIDTH_ROUTE_UNHEALTHY = ...
+NVML_GPU_FABRIC_HEALTH_MASK_ACCESS_TIMEOUT_RECOVERY_NOT_SUPPORTED = ...
+NVML_GPU_FABRIC_HEALTH_MASK_ACCESS_TIMEOUT_RECOVERY_TRUE = ...
+NVML_GPU_FABRIC_HEALTH_MASK_ACCESS_TIMEOUT_RECOVERY_FALSE = ...
+NVML_GPU_FABRIC_HEALTH_MASK_SHIFT_ACCESS_TIMEOUT_RECOVERY = ...
+NVML_GPU_FABRIC_HEALTH_MASK_WIDTH_ACCESS_TIMEOUT_RECOVERY = ...
+NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_NOT_SUPPORTED = ...
+NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_NONE = ...
+NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INCORRECT_SYSGUID = ...
+NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INCORRECT_CHASSIS_SN = ...
+NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_NO_PARTITION = ...
+NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INSUFFICIENT_NVLINKS = ...
+NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INCOMPATIBLE_GPU_FW = ...
+NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIURATION_INVALID_LOCATION = ...
+NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INVALID_LOCATION = ...
+NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_GPU_STATE_INVALID = ...
+NVML_GPU_FABRIC_HEALTH_MASK_SHIFT_INCORRECT_CONFIGURATION = ...
+NVML_GPU_FABRIC_HEALTH_MASK_WIDTH_INCORRECT_CONFIGURATION = ...
+NVML_GPU_FABRIC_HEALTH_MASK_PARTITION_ASSIGNED_NOT_SUPPORTED = ...
+NVML_GPU_FABRIC_HEALTH_MASK_PARTITION_ASSIGNED_TRUE = ...
+NVML_GPU_FABRIC_HEALTH_MASK_PARTITION_ASSIGNED_FALSE = ...
+NVML_GPU_FABRIC_HEALTH_MASK_SHIFT_PARTITION_ASSIGNED = ...
+NVML_GPU_FABRIC_HEALTH_MASK_WIDTH_PARTITION_ASSIGNED = ...
+NVML_GPU_FABRIC_HEALTH_SUMMARY_NOT_SUPPORTED = ...
+NVML_GPU_FABRIC_HEALTH_SUMMARY_HEALTHY = ...
+NVML_GPU_FABRIC_HEALTH_SUMMARY_UNHEALTHY = ...
+NVML_GPU_FABRIC_HEALTH_SUMMARY_LIMITED_CAPACITY = ...
+nvmlGpuFabricInfo_v2 = ...
+
+class c_nvmlGpuFabricInfo_v2_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+c_nvmlGpuFabricInfoV_t = c_nvmlGpuFabricInfo_v2_t
+
+class c_nvmlGpuFabricInfo_v3_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlGpuFabricInfo_v3 = ...
+
+def nvmlDeviceGetGpuFabricInfo(device, gpuFabricInfo):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetGpuFabricInfoV(device, gpuFabricInfo):  # -> Literal[0]:
+    ...
+
+NVML_GPU_NVLINK_BW_MODE_FULL = ...
+NVML_GPU_NVLINK_BW_MODE_OFF = ...
+NVML_GPU_NVLINK_BW_MODE_MIN = ...
+NVML_GPU_NVLINK_BW_MODE_HALF = ...
+NVML_GPU_NVLINK_BW_MODE_3QUARTER = ...
+NVML_GPU_NVLINK_BW_MODE_COUNT = ...
+
+def nvmlSystemSetNvlinkBwMode(mode):  # -> Literal[0]:
+    ...
+def nvmlSystemGetNvlinkBwMode() -> int: ...
+
+_nvmlPowerScopeType_t = c_uint
+NVML_POWER_SCOPE_GPU = ...
+NVML_POWER_SCOPE_MODULE = ...
+NVML_POWER_SCOPE_MEMORY = ...
+
+class c_nvmlPowerValue_v2_t(_PrintableStructure):
+    _fields_ = ...
+    _fmt_ = ...
+
+nvmlPowerValue_v2 = ...
+
+def nvmlDeviceSetPowerManagementLimit_v2(
+    device, powerScope, powerLimit, version=...
+):  # -> Literal[0]:
+    ...
+
+class c_nvmlEccSramErrorStatus_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlEccSramErrorStatus_v1 = ...
+
+def nvmlDeviceGetSramEccErrorStatus(device, status):  # -> Literal[0]:
+    ...
+
+NVML_DEV_CAP_EGM = ...
+nvmlDeviceCapabilities_v1 = ...
+
+class c_nvmlDeviceCapabilities_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+def nvmlDeviceGetCapabilities(device, caps): ...
+
+class c_nvmlPlatformInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+class c_nvmlPlatformInfo_v2_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlPlatformInfo_v1 = ...
+nvmlPlatformInfo_v2 = ...
+
+def nvmlDeviceGetPlatformInfo(device, platformInfo):  # -> Literal[0]:
+    ...
+
+NVML_DEVICE_HOSTNAME_BUFFER_SIZE = ...
+
+class c_nvmlHostname_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlMask255_t(_PrintableStructure):
+    _fields_ = ...
+
+NVML_WORKLOAD_POWER_MAX_PROFILES = ...
+NVML_POWER_PROFILE_MAX_P = ...
+NVML_POWER_PROFILE_MAX_Q = ...
+NVML_POWER_PROFILE_COMPUTE = ...
+NVML_POWER_PROFILE_MEMORY_BOUND = ...
+NVML_POWER_PROFILE_NETWORK = ...
+NVML_POWER_PROFILE_BALANCED = ...
+NVML_POWER_PROFILE_LLM_INFERENCE = ...
+NVML_POWER_PROFILE_LLM_TRAINING = ...
+NVML_POWER_PROFILE_RBM = ...
+NVML_POWER_PROFILE_DCPCIE = ...
+NVML_POWER_PROFILE_HMMA_SPARSE = ...
+NVML_POWER_PROFILE_HMMA_DENSE = ...
+NVML_POWER_PROFILE_SYNC_BALANCED = ...
+NVML_POWER_PROFILE_HPC = ...
+NVML_POWER_PROFILE_MIG = ...
+NVML_POWER_PROFILE_MAX = ...
+nvmlWorkloadPowerProfileInfo_v1 = ...
+
+class c_nvmlWorkloadPowerProfileInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlWorkloadPowerProfileProfilesInfo_v1 = ...
+
+class c_nvmlWorkloadPowerProfileProfilesInfo_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlWorkloadPowerProfileCurrentProfiles_v1 = ...
+
+class c_nvmlWorkloadPowerProfileCurrentProfiles_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlWorkloadPowerProfileRequestedProfiles_v1 = ...
+
+class c_nvmlWorkloadPowerProfileRequestedProfiles_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+NVML_POWER_PROFILE_OPERATION_CLEAR = ...
+NVML_POWER_PROFILE_OPERATION_SET = ...
+NVML_POWER_PROFILE_OPERATION_SET_AND_OVERWRITE = ...
+NVML_POWER_PROFILE_OPERATION_MAX = ...
+nvmlWorkloadPowerProfileUpdateProfiles_v1 = ...
+
+class c_nvmlWorkloadPowerProfileUpdateProfiles_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+def nvmlDeviceWorkloadPowerProfileGetProfilesInfo(
+    device, profilesInfo
+):  # -> Literal[0]:
+    ...
+def nvmlDeviceWorkloadPowerProfileGetCurrentProfiles(
+    device, currentProfiles
+):  # -> Literal[0]:
+    ...
+def nvmlDeviceWorkloadPowerProfileSetRequestedProfiles(
+    device, requestedProfiles
+):  # -> Literal[0]:
+    ...
+def nvmlDeviceWorkloadPowerProfileClearRequestedProfiles(
+    device, requestedProfiles
+):  # -> Literal[0]:
+    ...
+def nvmlDeviceWorkloadPowerProfileUpdateProfiles_v1(
+    device, updateProfiles
+):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetNvlinkSupportedBwModes(device, supportedBwModes):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetNvlinkBwMode(device, getBwMode):  # -> Literal[0]:
+    ...
+def nvmlDeviceSetNvlinkBwMode(device, setBwMode):  # -> Literal[0]:
+    ...
+
+nvmlDramEncryptionInfo_v1 = ...
+
+class c_nvmlDramEncryptionInfo_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+def nvmlDeviceGetDramEncryptionMode(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> list[<subclass of bytes and str> | str | Any]:
+    ...
+def nvmlDeviceGetCurrentDramEncryptionMode(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> <subclass of bytes and str> | str | Any:
+    ...
+def nvmlDeviceGetPendingDramEncryptionMode(
+    handle: _Pointer[struct_c_nvmlDevice_t],
+):  # -> <subclass of bytes and str> | str | Any:
+    ...
+def nvmlDeviceSetDramEncryptionMode(
+    handle: _Pointer[struct_c_nvmlDevice_t], mode
+) -> None: ...
+
+NVML_POWER_SMOOTHING_NUM_PROFILE_PARAMS = ...
+NVML_POWER_SMOOTHING_MAX_NUM_PROFILES = ...
+NVML_POWER_SMOOTHING_ADMIN_OVERRIDE_NOT_SET = ...
+NVML_POWER_SMOOTHING_PROFILE_PARAM_PERCENT_TMP_FLOOR = ...
+NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_UP_RATE = ...
+NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_RATE = ...
+NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_HYSTERESIS = ...
+NVML_POWER_SMOOTHING_PROFILE_PARAM_SECONDARY_POWER_FLOOR = ...
+NVML_POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_ACT_WIN_MULT = ...
+NVML_POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_TAR_WIN_MULT = ...
+NVML_POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_ACT_OFFSET = ...
+nvmlPowerSmoothingState_v1 = ...
+
+class c_nvmlPowerSmoothingState_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlPowerSmoothingProfile_v1 = ...
+
+class c_nvmlPowerSmoothingProfile_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+def nvmlDevicePowerSmoothingActivatePresetProfile(device, profile) -> None: ...
+def nvmlDevicePowerSmoothingUpdatePresetProfileParam(device, profile) -> None: ...
+def nvmlDevicePowerSmoothingSetState(device, state) -> None: ...
+
+class c_nvmlEccSramUniqueUncorrectedErrorEntry_v1_t(_PrintableStructure):
+    _fields_ = ...
+
+class c_nvmlEccSramUniqueUncorrectedErrorCounts_v1_t(_PrintableStructure):
+    _fields_ = ...
+    def __init__(self) -> None: ...
+
+nvmlEccSramUniqueUncorrectedErrorCounts_v1 = ...
+
+def nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts(device, counts):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetPdi(device):  # -> <subclass of bytes and str> | str | Any:
+    ...
+def nvmlDeviceGetNvLinkInfo(device, info):  # -> Literal[0]:
+    ...
+def nvmlDeviceGetRepairStatus(
+    device,
+):  # -> list[<subclass of bytes and str> | str | Any]:
+    ...
+@convertStrBytes
+def nvmlDeviceSetHostname_v1(device, hostname) -> None: ...
+def nvmlDeviceGetHostname_v1(device):  # -> <subclass of bytes and str> | str | Any:
+    ...
+def nvmlDeviceGetUnrepairableMemoryFlag_v1(
+    device,
+):  # -> <subclass of bytes and str> | str | Any:
+    ...
+
+NVML_RUSD_POLL_NONE = ...
+NVML_RUSD_POLL_CLOCK = ...
+NVML_RUSD_POLL_PERF = ...
+NVML_RUSD_POLL_MEMORY = ...
+NVML_RUSD_POLL_POWER = ...
+NVML_RUSD_POLL_THERMAL = ...
+NVML_RUSD_POLL_PCI = ...
+NVML_RUSD_POLL_FAN = ...
+NVML_RUSD_POLL_PROC_UTIL = ...
+NVML_RUSD_POLL_ALL = ...
+nvmlRusdSettings_v1 = ...
+
+class c_nvmlRusdSettings_v1_t(_PrintableStructure):
+    _fields_ = ...
+    _fmt_ = ...
+    def __init__(self) -> None: ...
+
+def nvmlDeviceSetRusdSettings_v1(device, settings) -> None: ...
diff --git a/.mlx_typings/safetensors/__init__.pyi b/.typings/safetensors/__init__.pyi
similarity index 100%
rename from .mlx_typings/safetensors/__init__.pyi
rename to .typings/safetensors/__init__.pyi
diff --git a/.vscode/extensions.json b/.vscode/extensions.json
index b2e0978e1..7b7d7b21f 100644
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@@ -2,7 +2,8 @@
     "recommendations": [
         "detachhead.basedpyright",
         "ms-python.python",
-        "jnoortheen.nix-ide"
+        "jnoortheen.nix-ide",
+        "charliermarsh.ruff",
     ],
     "unwantedRecommendations": [
         "ms-python.vscode-pylance",
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 19dfab4d4..37c85975a 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -26,6 +26,9 @@
         "editor.defaultFormatter": "jnoortheen.nix-ide"
     },
     
+    "[python]": {
+      "editor.defaultFormatter": "charliermarsh.ruff",
+    },
     "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
     "basedpyright.analysis.configFilePath": "${workspaceFolder}/pyproject.toml",
     "basedpyright.importStrategy": "fromEnvironment",
diff --git a/Cargo.lock b/Cargo.lock
index e8710b04d..50cd29ebf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -91,9 +91,9 @@ dependencies = [
 
 [[package]]
 name = "anstream"
-version = "0.6.21"
+version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
 dependencies = [
  "anstyle",
  "anstyle-parse",
@@ -106,15 +106,15 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.13"
+version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
 
 [[package]]
 name = "anstyle-parse"
-version = "0.2.7"
+version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
 dependencies = [
  "utf8parse",
 ]
@@ -141,15 +141,18 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.100"
+version = "1.0.102"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
 
 [[package]]
 name = "arc-swap"
-version = "1.7.1"
+version = "1.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
+checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207"
+dependencies = [
+ "rustversion",
+]
 
 [[package]]
 name = "arrayref"
@@ -165,9 +168,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "asn1-rs"
-version = "0.7.1"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56624a96882bb8c26d61312ae18cb45868e5a9992ea73c58e45c3101e56a1e60"
+checksum = "b7f43a50ac4fdca5df8e885c21b835997f0a1cdee65494a6847694a98652d9d8"
 dependencies = [
  "asn1-rs-derive",
  "asn1-rs-impl",
@@ -175,7 +178,7 @@ dependencies = [
  "nom",
  "num-traits",
  "rusticata-macros",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "time",
 ]
 
@@ -187,7 +190,7 @@ checksum = "3109e49b1e4909e9db6515a30c633684d68cdeaa252f215214cb4fa1a5bfee2c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
  "synstructure",
 ]
 
@@ -199,14 +202,14 @@ checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
 name = "asn1_der"
-version = "0.7.6"
+version = "0.7.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "155a5a185e42c6b77ac7b88a15143d930a9e9727a5b7b77eed417404ab15c247"
+checksum = "4858a9d740c5007a9069007c3b4e91152d0506f13c1b31dd49051fd537656156"
 
 [[package]]
 name = "async-channel"
@@ -257,7 +260,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -268,7 +271,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -328,17 +331,17 @@ dependencies = [
  "n0-watcher",
  "netdev",
  "netwatch",
- "nix 0.31.2",
- "rand 0.10.0",
+ "nix 0.31.3",
+ "rand 0.10.1",
  "route_manager",
  "slab",
- "socket2 0.6.1",
- "thiserror 2.0.17",
+ "socket2 0.6.3",
+ "thiserror 2.0.18",
  "tokio",
  "tracing",
  "tracing-subscriber",
  "tun-rs",
- "winnow 1.0.1",
+ "winnow 1.0.3",
  "zerocopy",
 ]
 
@@ -387,9 +390,9 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 
 [[package]]
 name = "base64ct"
-version = "1.8.0"
+version = "1.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba"
+checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06"
 
 [[package]]
 name = "bimap"
@@ -399,15 +402,9 @@ checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7"
 
 [[package]]
 name = "bitflags"
-version = "1.3.2"
+version = "2.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
-
-[[package]]
-name = "bitflags"
-version = "2.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
+checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
 
 [[package]]
 name = "blake2"
@@ -447,9 +444,9 @@ dependencies = [
 
 [[package]]
 name = "bumpalo"
-version = "3.19.0"
+version = "3.20.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
+checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
 
 [[package]]
 name = "byteorder"
@@ -459,9 +456,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.11.0"
+version = "1.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3"
+checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
 dependencies = [
  "serde",
 ]
@@ -483,7 +480,7 @@ checksum = "3b457277798202ccd365b9c112ebee08ddd57f1033916c8b8ea52f222e5b715d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -497,9 +494,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.2.48"
+version = "1.2.62"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c481bdbf0ed3b892f6f806287d72acd515b352a4ec27a208489b8c1bc839633a"
+checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98"
 dependencies = [
  "find-msvc-tools",
  "shlex",
@@ -536,7 +533,7 @@ checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601"
 dependencies = [
  "cfg-if",
  "cpufeatures 0.3.0",
- "rand_core 0.10.0",
+ "rand_core 0.10.1",
 ]
 
 [[package]]
@@ -554,9 +551,9 @@ dependencies = [
 
 [[package]]
 name = "chrono"
-version = "0.4.42"
+version = "0.4.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
+checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
 dependencies = [
  "iana-time-zone",
  "js-sys",
@@ -579,9 +576,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.53"
+version = "4.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
+checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -589,9 +586,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.53"
+version = "4.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
+checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
 dependencies = [
  "anstream",
  "anstyle",
@@ -601,21 +598,21 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.49"
+version = "4.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
+checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9"
 dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
 name = "clap_lex"
-version = "0.7.6"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
+checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
 
 [[package]]
 name = "color-eyre"
@@ -646,9 +643,9 @@ dependencies = [
 
 [[package]]
 name = "colorchoice"
-version = "1.0.4"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
 
 [[package]]
 name = "concurrent-queue"
@@ -716,15 +713,6 @@ version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
-[[package]]
-name = "core2"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505"
-dependencies = [
- "memchr",
-]
-
 [[package]]
 name = "cpufeatures"
 version = "0.2.17"
@@ -856,20 +844,20 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
 name = "data-encoding"
-version = "2.9.0"
+version = "2.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476"
+checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8"
 
 [[package]]
 name = "data-encoding-macro"
-version = "0.1.18"
+version = "0.1.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d"
+checksum = "3259c913752a86488b501ed8680446a5ed2d5aeac6e596cb23ba3800768ea32c"
 dependencies = [
  "data-encoding",
  "data-encoding-macro-internal",
@@ -877,12 +865,12 @@ dependencies = [
 
 [[package]]
 name = "data-encoding-macro-internal"
-version = "0.1.16"
+version = "0.1.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976"
+checksum = "ccc2776f0c61eca1ca32528f85548abd1a4be8fb53d1b21c013e4f18da1e7090"
 dependencies = [
  "data-encoding",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -893,7 +881,7 @@ checksum = "780eb241654bf097afb00fc5f054a09b687dad862e485fdcf8399bb056565370"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -923,9 +911,9 @@ dependencies = [
 
 [[package]]
 name = "deranged"
-version = "0.5.5"
+version = "0.5.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587"
+checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c"
 dependencies = [
  "powerfmt",
 ]
@@ -949,7 +937,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustc_version",
- "syn 2.0.111",
+ "syn",
  "unicode-xid",
 ]
 
@@ -977,7 +965,7 @@ version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "block2",
  "libc",
  "objc2",
@@ -991,7 +979,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -1007,9 +995,9 @@ dependencies = [
 
 [[package]]
 name = "dtoa"
-version = "1.0.10"
+version = "1.0.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04"
+checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590"
 
 [[package]]
 name = "ecdsa"
@@ -1093,14 +1081,14 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
 name = "env_filter"
-version = "0.1.4"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2"
+checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef"
 dependencies = [
  "log",
  "regex",
@@ -1108,9 +1096,9 @@ dependencies = [
 
 [[package]]
 name = "env_logger"
-version = "0.11.8"
+version = "0.11.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f"
+checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a"
 dependencies = [
  "anstream",
  "anstyle",
@@ -1167,11 +1155,13 @@ dependencies = [
  "libp2p",
  "log",
  "networking",
+ "pidfile-rs",
  "pin-project",
  "pyo3",
  "pyo3-async-runtimes",
  "pyo3-log",
  "pyo3-stub-gen",
+ "thiserror 2.0.18",
  "tokio",
  "util",
 ]
@@ -1184,7 +1174,7 @@ checksum = "311a6d2f1f9d60bff73d2c78a0af97ed27f79672f15c238192a5bbb64db56d00"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -1199,11 +1189,11 @@ dependencies = [
 
 [[package]]
 name = "fastrand"
-version = "2.3.0"
+version = "2.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
 dependencies = [
- "getrandom 0.2.16",
+ "getrandom 0.3.4",
 ]
 
 [[package]]
@@ -1224,9 +1214,19 @@ checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d"
 
 [[package]]
 name = "find-msvc-tools"
-version = "0.1.5"
+version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844"
+checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
+
+[[package]]
+name = "flopen"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fbfb8b5fbd1f27929f216650081a07b6ceb0741f0542c8c43ff7ef8e93a35a5d"
+dependencies = [
+ "libc",
+ "nix 0.31.3",
+]
 
 [[package]]
 name = "flume"
@@ -1269,9 +1269,9 @@ dependencies = [
 
 [[package]]
 name = "futures"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
+checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d"
 dependencies = [
  "futures-channel",
  "futures-core",
@@ -1307,9 +1307,9 @@ dependencies = [
 
 [[package]]
 name = "futures-channel"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
 dependencies = [
  "futures-core",
  "futures-sink",
@@ -1317,27 +1317,26 @@ dependencies = [
 
 [[package]]
 name = "futures-core"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
 
 [[package]]
 name = "futures-executor"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
+checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d"
 dependencies = [
  "futures-core",
  "futures-task",
  "futures-util",
- "num_cpus",
 ]
 
 [[package]]
 name = "futures-io"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
+checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718"
 
 [[package]]
 name = "futures-lite"
@@ -1354,13 +1353,13 @@ dependencies = [
 
 [[package]]
 name = "futures-macro"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
+checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -1376,15 +1375,15 @@ dependencies = [
 
 [[package]]
 name = "futures-sink"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
+checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893"
 
 [[package]]
 name = "futures-task"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
 
 [[package]]
 name = "futures-timer"
@@ -1398,9 +1397,9 @@ dependencies = [
 
 [[package]]
 name = "futures-util"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
 dependencies = [
  "futures-channel",
  "futures-core",
@@ -1410,7 +1409,6 @@ dependencies = [
  "futures-task",
  "memchr",
  "pin-project-lite",
- "pin-utils",
  "slab",
 ]
 
@@ -1442,13 +1440,13 @@ dependencies = [
 
 [[package]]
 name = "getifaddrs"
-version = "0.6.1"
+version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "802c6e75f730346652928a55c200ca680741aaaa60c4973d52a999f520c4fde4"
+checksum = "a542e1b7ac1f3d62c5777d430d66eca9cb59e813c46b86e29fa9ce94ff9a4810"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "libc",
- "windows-sys 0.59.0",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -1473,9 +1471,9 @@ dependencies = [
 
 [[package]]
 name = "getrandom"
-version = "0.2.16"
+version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
 dependencies = [
  "cfg-if",
  "js-sys",
@@ -1507,7 +1505,7 @@ dependencies = [
  "cfg-if",
  "libc",
  "r-efi 6.0.0",
- "rand_core 0.10.0",
+ "rand_core 0.10.1",
  "wasip2",
  "wasip3",
 ]
@@ -1553,9 +1551,9 @@ dependencies = [
 
 [[package]]
 name = "h2"
-version = "0.4.12"
+version = "0.4.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386"
+checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733"
 dependencies = [
  "atomic-waker",
  "bytes",
@@ -1585,8 +1583,6 @@ version = "0.15.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
 dependencies = [
- "allocator-api2",
- "equivalent",
  "foldhash 0.1.5",
 ]
 
@@ -1601,6 +1597,12 @@ dependencies = [
  "foldhash 0.2.0",
 ]
 
+[[package]]
+name = "hashbrown"
+version = "0.17.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
+
 [[package]]
 name = "hashlink"
 version = "0.9.1"
@@ -1610,6 +1612,24 @@ dependencies = [
  "hashbrown 0.14.5",
 ]
 
+[[package]]
+name = "hashlink"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
+dependencies = [
+ "hashbrown 0.15.5",
+]
+
+[[package]]
+name = "hashlink"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea0b22561a9c04a7cb1a302c013e0259cd3b4bb619f145b32f72b8b4bcbed230"
+dependencies = [
+ "hashbrown 0.16.1",
+]
+
 [[package]]
 name = "heck"
 version = "0.5.0"
@@ -1650,10 +1670,10 @@ dependencies = [
  "idna",
  "ipnet",
  "once_cell",
- "rand 0.9.2",
+ "rand 0.9.4",
  "ring",
  "socket2 0.5.10",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tinyvec",
  "tokio",
  "tracing",
@@ -1673,10 +1693,10 @@ dependencies = [
  "moka",
  "once_cell",
  "parking_lot",
- "rand 0.9.2",
+ "rand 0.9.4",
  "resolv-conf",
  "smallvec",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tokio",
  "tracing",
 ]
@@ -1740,9 +1760,9 @@ checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
 
 [[package]]
 name = "hyper"
-version = "1.8.1"
+version = "1.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11"
+checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca"
 dependencies = [
  "atomic-waker",
  "bytes",
@@ -1754,7 +1774,6 @@ dependencies = [
  "httparse",
  "itoa",
  "pin-project-lite",
- "pin-utils",
  "smallvec",
  "tokio",
  "want",
@@ -1762,20 +1781,19 @@ dependencies = [
 
 [[package]]
 name = "hyper-util"
-version = "0.1.19"
+version = "0.1.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f"
+checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
 dependencies = [
  "bytes",
  "futures-channel",
- "futures-core",
  "futures-util",
  "http",
  "http-body",
  "hyper",
  "libc",
  "pin-project-lite",
- "socket2 0.6.1",
+ "socket2 0.6.3",
  "tokio",
  "tower-service",
  "tracing",
@@ -1783,9 +1801,9 @@ dependencies = [
 
 [[package]]
 name = "iana-time-zone"
-version = "0.1.64"
+version = "0.1.65"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
+checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
 dependencies = [
  "android_system_properties",
  "core-foundation-sys",
@@ -1807,12 +1825,13 @@ dependencies = [
 
 [[package]]
 name = "icu_collections"
-version = "2.1.1"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
+checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c"
 dependencies = [
  "displaydoc",
  "potential_utf",
+ "utf8_iter",
  "yoke",
  "zerofrom",
  "zerovec",
@@ -1820,9 +1839,9 @@ dependencies = [
 
 [[package]]
 name = "icu_locale_core"
-version = "2.1.1"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
+checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29"
 dependencies = [
  "displaydoc",
  "litemap",
@@ -1833,9 +1852,9 @@ dependencies = [
 
 [[package]]
 name = "icu_normalizer"
-version = "2.1.1"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
+checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4"
 dependencies = [
  "icu_collections",
  "icu_normalizer_data",
@@ -1847,15 +1866,15 @@ dependencies = [
 
 [[package]]
 name = "icu_normalizer_data"
-version = "2.1.1"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
+checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38"
 
 [[package]]
 name = "icu_properties"
-version = "2.1.1"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99"
+checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de"
 dependencies = [
  "icu_collections",
  "icu_locale_core",
@@ -1867,15 +1886,15 @@ dependencies = [
 
 [[package]]
 name = "icu_properties_data"
-version = "2.1.1"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899"
+checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14"
 
 [[package]]
 name = "icu_provider"
-version = "2.1.1"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
+checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421"
 dependencies = [
  "displaydoc",
  "icu_locale_core",
@@ -1905,9 +1924,9 @@ dependencies = [
 
 [[package]]
 name = "idna_adapter"
-version = "1.2.1"
+version = "1.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
+checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714"
 dependencies = [
  "icu_normalizer",
  "icu_properties",
@@ -1915,19 +1934,19 @@ dependencies = [
 
 [[package]]
 name = "if-addrs"
-version = "0.10.2"
+version = "0.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cabb0019d51a643781ff15c9c8a3e5dedc365c47211270f4e8f82812fedd8f0a"
+checksum = "c0a05c691e1fae256cf7013d99dad472dc52d5543322761f83ec8d47eab40d2b"
 dependencies = [
  "libc",
- "windows-sys 0.48.0",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
 name = "if-watch"
-version = "3.2.1"
+version = "3.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cdf9d64cfcf380606e64f9a0bcf493616b65331199f984151a6fa11a7b3cde38"
+checksum = "71c02a5161c313f0cbdbadc511611893584a10a7b6153cb554bdf83ddce99ec2"
 dependencies = [
  "async-io",
  "core-foundation 0.9.4",
@@ -1936,14 +1955,14 @@ dependencies = [
  "if-addrs",
  "ipnet",
  "log",
- "netlink-packet-core 0.7.0",
- "netlink-packet-route 0.17.1",
- "netlink-proto 0.11.5",
+ "netlink-packet-core",
+ "netlink-packet-route 0.28.0",
+ "netlink-proto",
  "netlink-sys",
  "rtnetlink",
  "system-configuration",
  "tokio",
- "windows 0.53.0",
+ "windows 0.62.2",
 ]
 
 [[package]]
@@ -1961,7 +1980,7 @@ dependencies = [
  "hyper",
  "hyper-util",
  "log",
- "rand 0.9.2",
+ "rand 0.9.4",
  "tokio",
  "url",
  "xmltree",
@@ -1975,12 +1994,12 @@ checksum = "964de6e86d545b246d84badc0fef527924ace5134f30641c203ef52ba83f58d5"
 
 [[package]]
 name = "indexmap"
-version = "2.12.1"
+version = "2.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
+checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
 dependencies = [
  "equivalent",
- "hashbrown 0.16.1",
+ "hashbrown 0.17.1",
  "serde",
  "serde_core",
 ]
@@ -2005,23 +2024,24 @@ dependencies = [
 
 [[package]]
 name = "inventory"
-version = "0.3.21"
+version = "0.3.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc61209c082fbeb19919bee74b176221b27223e27b65d781eb91af24eb1fb46e"
+checksum = "a4f0c30c76f2f4ccee3fe55a2435f691ca00c0e4bd87abe4f4a851b1d4dac39b"
 dependencies = [
  "rustversion",
 ]
 
 [[package]]
 name = "ipconfig"
-version = "0.3.2"
+version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f"
+checksum = "4d40460c0ce33d6ce4b0630ad68ff63d6661961c48b6dba35e5a4d81cfb48222"
 dependencies = [
- "socket2 0.5.10",
+ "socket2 0.6.3",
  "widestring",
- "windows-sys 0.48.0",
- "winreg 0.50.0",
+ "windows-registry",
+ "windows-result 0.4.1",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -2038,7 +2058,7 @@ checksum = "f981dadd5a072a9e0efcd24bdcc388e570073f7e51b33505ceb1ef4668c80c86"
 dependencies = [
  "cfg_aliases",
  "libc",
- "socket2 0.6.1",
+ "socket2 0.6.3",
  "windows-sys 0.61.2",
 ]
 
@@ -2051,7 +2071,7 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -2080,15 +2100,15 @@ dependencies = [
 
 [[package]]
 name = "itoa"
-version = "1.0.15"
+version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
 
 [[package]]
 name = "jiff"
-version = "0.2.16"
+version = "0.2.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35"
+checksum = "f00b5dbd620d61dfdcb6007c9c1f6054ebd75319f163d886a9055cec1155073d"
 dependencies = [
  "jiff-static",
  "log",
@@ -2099,21 +2119,23 @@ dependencies = [
 
 [[package]]
 name = "jiff-static"
-version = "0.2.16"
+version = "0.2.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69"
+checksum = "e000de030ff8022ea1da3f466fbb0f3a809f5e51ed31f6dd931c35181ad8e6d7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
 name = "js-sys"
-version = "0.3.83"
+version = "0.3.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8"
+checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08"
 dependencies = [
+ "cfg-if",
+ "futures-util",
  "once_cell",
  "wasm-bindgen",
 ]
@@ -2134,9 +2156,9 @@ dependencies = [
 
 [[package]]
 name = "keccak"
-version = "0.1.5"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ecc2af9a1119c51f12a14607e783cb977bde58bc069ff0c3da1095e635d70654"
+checksum = "cb26cec98cce3a3d96cbb7bced3c4b16e3d13f27ec56dbd62cbc8f39cfb9d653"
 dependencies = [
  "cpufeatures 0.2.17",
 ]
@@ -2167,9 +2189,9 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
 
 [[package]]
 name = "libc"
-version = "0.2.183"
+version = "0.2.186"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
+checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
 
 [[package]]
 name = "libloading"
@@ -2191,7 +2213,7 @@ dependencies = [
  "either",
  "futures",
  "futures-timer",
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
  "libp2p-allow-block-list",
  "libp2p-autonat",
  "libp2p-connection-limits",
@@ -2227,7 +2249,7 @@ dependencies = [
  "multiaddr",
  "pin-project",
  "rw-stream-sink",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
 ]
 
 [[package]]
@@ -2259,9 +2281,9 @@ dependencies = [
  "libp2p-swarm",
  "quick-protobuf",
  "quick-protobuf-codec",
- "rand 0.8.5",
+ "rand 0.8.6",
  "rand_core 0.6.4",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tracing",
  "web-time",
 ]
@@ -2279,9 +2301,9 @@ dependencies = [
 
 [[package]]
 name = "libp2p-core"
-version = "0.43.1"
+version = "0.43.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d28e2d2def7c344170f5c6450c0dbe3dfef655610dbfde2f6ac28a527abbe36"
+checksum = "249128cd37a2199aff30a7675dffa51caf073b51aa612d2f544b19932b9aebca"
 dependencies = [
  "either",
  "fnv",
@@ -2294,9 +2316,9 @@ dependencies = [
  "parking_lot",
  "pin-project",
  "quick-protobuf",
- "rand 0.8.5",
+ "rand 0.8.6",
  "rw-stream-sink",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tracing",
  "unsigned-varint 0.8.0",
  "web-time",
@@ -2304,22 +2326,22 @@ dependencies = [
 
 [[package]]
 name = "libp2p-dcutr"
-version = "0.14.0"
+version = "0.14.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1f4f0eec23bc79cabfdf6934718f161fc42a1d98e2c9d44007c80eb91534200c"
+checksum = "2b4107305e12158af3e66960b6181789c547394c9c9a8696f721521602bfc73a"
 dependencies = [
  "asynchronous-codec",
  "either",
  "futures",
  "futures-bounded",
  "futures-timer",
+ "hashlink 0.10.0",
  "libp2p-core",
  "libp2p-identity",
  "libp2p-swarm",
- "lru",
  "quick-protobuf",
  "quick-protobuf-codec",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tracing",
  "web-time",
 ]
@@ -2356,17 +2378,17 @@ dependencies = [
  "libp2p-swarm",
  "quick-protobuf",
  "quick-protobuf-codec",
- "rand 0.8.5",
+ "rand 0.8.6",
  "smallvec",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tracing",
 ]
 
 [[package]]
 name = "libp2p-gossipsub"
-version = "0.49.2"
+version = "0.49.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7f58e37d8d6848e5c4c9e3c35c6f61133235bff2960c9c00a663b0849301221"
+checksum = "a538e571cd38f504f761c61b8f79127489ea7a7d6f05c41ca15d31ffb5726326"
 dependencies = [
  "async-channel",
  "asynchronous-codec",
@@ -2377,15 +2399,15 @@ dependencies = [
  "fnv",
  "futures",
  "futures-timer",
- "getrandom 0.2.16",
- "hashlink",
+ "getrandom 0.2.17",
+ "hashlink 0.9.1",
  "hex_fmt",
  "libp2p-core",
  "libp2p-identity",
  "libp2p-swarm",
  "quick-protobuf",
  "quick-protobuf-codec",
- "rand 0.8.5",
+ "rand 0.8.6",
  "regex",
  "serde",
  "sha2",
@@ -2410,15 +2432,15 @@ dependencies = [
  "quick-protobuf",
  "quick-protobuf-codec",
  "smallvec",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tracing",
 ]
 
 [[package]]
 name = "libp2p-identity"
-version = "0.2.12"
+version = "0.2.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3104e13b51e4711ff5738caa1fb54467c8604c2e94d607e27745bcf709068774"
+checksum = "f0c7892c221730ba55f7196e98b0b8ba5e04b4155651736036628e9f73ed6fc3"
 dependencies = [
  "asn1_der",
  "bs58",
@@ -2428,12 +2450,12 @@ dependencies = [
  "multihash",
  "p256",
  "quick-protobuf",
- "rand 0.8.5",
+ "rand 0.8.6",
  "ring",
  "sec1",
  "serde",
  "sha2",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tracing",
  "zeroize",
 ]
@@ -2456,11 +2478,11 @@ dependencies = [
  "libp2p-swarm",
  "quick-protobuf",
  "quick-protobuf-codec",
- "rand 0.8.5",
+ "rand 0.8.6",
  "serde",
  "sha2",
  "smallvec",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tracing",
  "uint",
  "web-time",
@@ -2478,7 +2500,7 @@ dependencies = [
  "libp2p-core",
  "libp2p-identity",
  "libp2p-swarm",
- "rand 0.8.5",
+ "rand 0.8.6",
  "smallvec",
  "socket2 0.5.10",
  "tokio",
@@ -2534,10 +2556,10 @@ dependencies = [
  "multiaddr",
  "multihash",
  "quick-protobuf",
- "rand 0.8.5",
+ "rand 0.8.6",
  "snow",
  "static_assertions",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tracing",
  "x25519-dalek",
  "zeroize",
@@ -2554,7 +2576,7 @@ dependencies = [
  "libp2p-core",
  "libp2p-identity",
  "libp2p-swarm",
- "rand 0.8.5",
+ "rand 0.8.6",
  "tracing",
  "web-time",
 ]
@@ -2583,7 +2605,7 @@ checksum = "cf240b834dfa3f8b48feb2c4b87bb2cf82751543001b6ee86077f48183b18d52"
 dependencies = [
  "futures",
  "pin-project",
- "rand 0.8.5",
+ "rand 0.8.6",
  "salsa20",
  "sha3",
  "tracing",
@@ -2602,20 +2624,20 @@ dependencies = [
  "libp2p-identity",
  "libp2p-tls",
  "quinn",
- "rand 0.8.5",
+ "rand 0.8.6",
  "ring",
  "rustls",
  "socket2 0.5.10",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tokio",
  "tracing",
 ]
 
 [[package]]
 name = "libp2p-relay"
-version = "0.21.0"
+version = "0.21.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "551b24ae04c63859bf5e25644acdd6aa469deb5c5cd872ca21c2c9b45a5a5192"
+checksum = "d8b9b0392ed623243ad298326b9f806d51191829ac7585cc825c54c6c67b04d9"
 dependencies = [
  "asynchronous-codec",
  "bytes",
@@ -2628,32 +2650,33 @@ dependencies = [
  "libp2p-swarm",
  "quick-protobuf",
  "quick-protobuf-codec",
- "rand 0.8.5",
+ "rand 0.8.6",
  "static_assertions",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tracing",
  "web-time",
 ]
 
 [[package]]
 name = "libp2p-rendezvous"
-version = "0.17.0"
+version = "0.17.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15285d828c2b4a34cb660c2e74cd6938116daceab1f4357bae933d5b08cca933"
+checksum = "31114bab295403e9934ae2e4415c45d681353829ea218390eed8f5bcc82dd1fb"
 dependencies = [
  "async-trait",
  "asynchronous-codec",
  "bimap",
  "futures",
  "futures-timer",
+ "hashlink 0.11.0",
  "libp2p-core",
  "libp2p-identity",
  "libp2p-request-response",
  "libp2p-swarm",
  "quick-protobuf",
  "quick-protobuf-codec",
- "rand 0.8.5",
- "thiserror 2.0.17",
+ "rand 0.8.6",
+ "thiserror 2.0.18",
  "tracing",
  "web-time",
 ]
@@ -2671,7 +2694,7 @@ dependencies = [
  "libp2p-core",
  "libp2p-identity",
  "libp2p-swarm",
- "rand 0.8.5",
+ "rand 0.8.6",
  "serde",
  "serde_json",
  "smallvec",
@@ -2680,21 +2703,21 @@ dependencies = [
 
 [[package]]
 name = "libp2p-swarm"
-version = "0.47.0"
+version = "0.47.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6aa762e5215919a34e31c35d4b18bf2e18566ecab7f8a3d39535f4a3068f8b62"
+checksum = "ce88c6c4bf746c8482480345ea3edfd08301f49e026889d1cbccfa1808a9ed9e"
 dependencies = [
  "either",
  "fnv",
  "futures",
  "futures-timer",
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
+ "hashlink 0.10.0",
  "libp2p-core",
  "libp2p-identity",
  "libp2p-swarm-derive",
- "lru",
  "multistream-select",
- "rand 0.8.5",
+ "rand 0.8.6",
  "smallvec",
  "tokio",
  "tracing",
@@ -2710,21 +2733,21 @@ checksum = "dd297cf53f0cb3dee4d2620bb319ae47ef27c702684309f682bdb7e55a18ae9c"
 dependencies = [
  "heck",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
 name = "libp2p-tcp"
-version = "0.44.0"
+version = "0.44.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65b4e030c52c46c8d01559b2b8ca9b7c4185f10576016853129ca1fe5cd1a644"
+checksum = "fb6585b9309699f58704ec9ab0bb102eca7a3777170fa91a8678d73ca9cafa93"
 dependencies = [
  "futures",
  "futures-timer",
  "if-watch",
  "libc",
  "libp2p-core",
- "socket2 0.5.10",
+ "socket2 0.6.3",
  "tokio",
  "tracing",
 ]
@@ -2743,7 +2766,7 @@ dependencies = [
  "ring",
  "rustls",
  "rustls-webpki",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "x509-parser",
  "yasna",
 ]
@@ -2789,7 +2812,7 @@ dependencies = [
  "libp2p-noise",
  "quick-protobuf",
  "quick-protobuf-codec",
- "rand 0.8.5",
+ "rand 0.8.6",
  "serde",
  "sha2",
  "tinytemplate",
@@ -2804,14 +2827,14 @@ checksum = "3830f0bf6f0f16ded2c735599fe70baea43a8c1a2d76152216693329217301dd"
 dependencies = [
  "bytes",
  "futures",
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
  "hex",
  "js-sys",
  "libp2p-core",
  "libp2p-identity",
  "libp2p-webrtc-utils",
  "send_wrapper 0.6.0",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tracing",
  "wasm-bindgen",
  "wasm-bindgen-futures",
@@ -2833,7 +2856,7 @@ dependencies = [
  "pin-project-lite",
  "rw-stream-sink",
  "soketto",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tracing",
  "url",
  "webpki-roots 0.26.11",
@@ -2850,7 +2873,7 @@ dependencies = [
  "js-sys",
  "libp2p-core",
  "send_wrapper 0.6.0",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tracing",
  "wasm-bindgen",
  "web-sys",
@@ -2870,7 +2893,7 @@ dependencies = [
  "multiaddr",
  "multihash",
  "send_wrapper 0.6.0",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tracing",
  "wasm-bindgen",
  "wasm-bindgen-futures",
@@ -2886,23 +2909,23 @@ dependencies = [
  "either",
  "futures",
  "libp2p-core",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tracing",
  "yamux 0.12.1",
- "yamux 0.13.8",
+ "yamux 0.13.10",
 ]
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.11.0"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
+checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
 
 [[package]]
 name = "litemap"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
+checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0"
 
 [[package]]
 name = "lock_api"
@@ -2932,15 +2955,6 @@ dependencies = [
  "tracing-subscriber",
 ]
 
-[[package]]
-name = "lru"
-version = "0.12.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38"
-dependencies = [
- "hashbrown 0.15.5",
-]
-
 [[package]]
 name = "lru-slab"
 version = "0.1.2"
@@ -2967,13 +2981,13 @@ checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
 
 [[package]]
 name = "match-lookup"
-version = "0.1.1"
+version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1265724d8cb29dbbc2b0f06fffb8bf1a8c0cf73a78eede9ba73a4a66c52a981e"
+checksum = "757aee279b8bdbb9f9e676796fd459e4207a1f986e87886700abf589f5abf771"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 1.0.109",
+ "syn",
 ]
 
 [[package]]
@@ -3037,9 +3051,9 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "1.1.1"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1"
 dependencies = [
  "libc",
  "log",
@@ -3049,9 +3063,9 @@ dependencies = [
 
 [[package]]
 name = "moka"
-version = "0.12.11"
+version = "0.12.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8261cd88c312e0004c1d51baad2980c66528dfdb2bee62003e643a4d8f86b077"
+checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046"
 dependencies = [
  "crossbeam-channel",
  "crossbeam-epoch",
@@ -3059,7 +3073,6 @@ dependencies = [
  "equivalent",
  "parking_lot",
  "portable-atomic",
- "rustc_version",
  "smallvec",
  "tagptr",
  "uuid",
@@ -3098,11 +3111,10 @@ dependencies = [
 
 [[package]]
 name = "multihash"
-version = "0.19.3"
+version = "0.19.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b430e7953c29dd6a09afc29ff0bb69c6e306329ee6794700aee27b76a1aea8d"
+checksum = "577c63b00ad74d57e8c9aa870b5fccebf2fd64a308a5aee9f1bb88e4aea19447"
 dependencies = [
- "core2",
  "serde",
  "unsigned-varint 0.8.0",
 ]
@@ -3139,7 +3151,7 @@ checksum = "03755949235714b2b307e5ae89dd8c1c2531fb127d9b8b7b4adf9c876cd3ed18"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -3176,9 +3188,9 @@ dependencies = [
 
 [[package]]
 name = "ndarray"
-version = "0.17.1"
+version = "0.17.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c7c9125e8f6f10c9da3aad044cc918cf8784fa34de857b1aa68038eb05a50a9"
+checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d"
 dependencies = [
  "matrixmultiply",
  "num-complex",
@@ -3199,13 +3211,13 @@ dependencies = [
  "core-foundation 0.10.1",
  "ipnet",
  "libc",
- "netlink-packet-core 0.8.1",
+ "netlink-packet-core",
  "netlink-packet-route 0.25.1",
  "netlink-sys",
  "nix 0.30.1",
  "scopeguard",
  "system-configuration-sys",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "widestring",
  "windows 0.61.3",
 ]
@@ -3222,7 +3234,7 @@ dependencies = [
  "ipnet",
  "libc",
  "mac-addr",
- "netlink-packet-core 0.8.1",
+ "netlink-packet-core",
  "netlink-packet-route 0.29.0",
  "netlink-sys",
  "objc2-core-foundation",
@@ -3232,17 +3244,6 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
-[[package]]
-name = "netlink-packet-core"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72724faf704479d67b388da142b186f916188505e7e0b26719019c525882eda4"
-dependencies = [
- "anyhow",
- "byteorder",
- "netlink-packet-utils",
-]
-
 [[package]]
 name = "netlink-packet-core"
 version = "0.8.1"
@@ -3252,30 +3253,16 @@ dependencies = [
  "paste",
 ]
 
-[[package]]
-name = "netlink-packet-route"
-version = "0.17.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "053998cea5a306971f88580d0829e90f270f940befd7cf928da179d4187a5a66"
-dependencies = [
- "anyhow",
- "bitflags 1.3.2",
- "byteorder",
- "libc",
- "netlink-packet-core 0.7.0",
- "netlink-packet-utils",
-]
-
 [[package]]
 name = "netlink-packet-route"
 version = "0.25.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3ec2f5b6839be2a19d7fa5aab5bc444380f6311c2b693551cb80f45caaa7b5ef"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "libc",
  "log",
- "netlink-packet-core 0.8.1",
+ "netlink-packet-core",
 ]
 
 [[package]]
@@ -3284,10 +3271,10 @@ version = "0.28.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4ce3636fa715e988114552619582b530481fd5ef176a1e5c1bf024077c2c9445"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "libc",
  "log",
- "netlink-packet-core 0.8.1",
+ "netlink-packet-core",
 ]
 
 [[package]]
@@ -3296,10 +3283,10 @@ version = "0.29.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "df9854ea6ad14e3f4698a7f03b65bce0833dd2d81d594a0e4a984170537146b6"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "libc",
  "log",
- "netlink-packet-core 0.8.1",
+ "netlink-packet-core",
 ]
 
 [[package]]
@@ -3308,36 +3295,10 @@ version = "0.30.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "be8919612f6028ab4eacbbfe1234a9a43e3722c6e0915e7ff519066991905092"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "libc",
  "log",
- "netlink-packet-core 0.8.1",
-]
-
-[[package]]
-name = "netlink-packet-utils"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ede8a08c71ad5a95cdd0e4e52facd37190977039a4704eb82a283f713747d34"
-dependencies = [
- "anyhow",
- "byteorder",
- "paste",
- "thiserror 1.0.69",
-]
-
-[[package]]
-name = "netlink-proto"
-version = "0.11.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72452e012c2f8d612410d89eea01e2d9b56205274abb35d53f60200b2ec41d60"
-dependencies = [
- "bytes",
- "futures",
- "log",
- "netlink-packet-core 0.7.0",
- "netlink-sys",
- "thiserror 2.0.17",
+ "netlink-packet-core",
 ]
 
 [[package]]
@@ -3349,9 +3310,9 @@ dependencies = [
  "bytes",
  "futures",
  "log",
- "netlink-packet-core 0.8.1",
+ "netlink-packet-core",
  "netlink-sys",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
 ]
 
 [[package]]
@@ -3383,16 +3344,16 @@ dependencies = [
  "n0-future",
  "n0-watcher",
  "netdev",
- "netlink-packet-core 0.8.1",
+ "netlink-packet-core",
  "netlink-packet-route 0.30.0",
- "netlink-proto 0.12.0",
+ "netlink-proto",
  "netlink-sys",
  "noq-udp",
  "objc2-core-foundation",
  "objc2-system-configuration",
  "pin-project-lite",
  "serde",
- "socket2 0.6.1",
+ "socket2 0.6.3",
  "time",
  "tokio",
  "tokio-util",
@@ -3422,24 +3383,13 @@ dependencies = [
  "util",
 ]
 
-[[package]]
-name = "nix"
-version = "0.26.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
-dependencies = [
- "bitflags 1.3.2",
- "cfg-if",
- "libc",
-]
-
 [[package]]
 name = "nix"
 version = "0.30.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "cfg-if",
  "cfg_aliases",
  "libc",
@@ -3448,11 +3398,11 @@ dependencies = [
 
 [[package]]
 name = "nix"
-version = "0.31.2"
+version = "0.31.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d6d0705320c1e6ba1d912b5e37cf18071b6c2e9b7fa8215a1e8a7651966f5d3"
+checksum = "cf20d2fde8ff38632c426f1165ed7436270b44f199fc55284c38276f9db47c3d"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "cfg-if",
  "cfg_aliases",
  "libc",
@@ -3483,16 +3433,16 @@ checksum = "ee91b05f4f3353290936ba1f3233518868fb4e2da99cb4c90d1f8cebb064e527"
 dependencies = [
  "cfg_aliases",
  "libc",
- "socket2 0.6.1",
+ "socket2 0.6.3",
  "tracing",
  "windows-sys 0.61.2",
 ]
 
 [[package]]
 name = "ntapi"
-version = "0.4.1"
+version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4"
+checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae"
 dependencies = [
  "winapi",
 ]
@@ -3527,9 +3477,9 @@ dependencies = [
 
 [[package]]
 name = "num-conv"
-version = "0.1.0"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
+checksum = "521739c6d2bac4aa25192232afe6841231376b2b26d4d9fae5ecf8ca5772e441"
 
 [[package]]
 name = "num-integer"
@@ -3549,16 +3499,6 @@ dependencies = [
  "autocfg",
 ]
 
-[[package]]
-name = "num_cpus"
-version = "1.17.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
-dependencies = [
- "hermit-abi",
- "libc",
-]
-
 [[package]]
 name = "numpy"
 version = "0.27.1"
@@ -3572,7 +3512,7 @@ dependencies = [
  "num-traits",
  "pyo3",
  "pyo3-build-config",
- "rustc-hash 2.1.1",
+ "rustc-hash 2.1.2",
 ]
 
 [[package]]
@@ -3590,7 +3530,7 @@ version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "block2",
  "dispatch2",
  "libc",
@@ -3609,7 +3549,7 @@ version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "709fe137109bd1e8b5a99390f77a7d8b2961dafc1a1c5db8f2e60329ad6d895a"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "objc2",
  "objc2-core-foundation",
 ]
@@ -3620,7 +3560,7 @@ version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7216bd11cbda54ccabcab84d523dc93b858ec75ecfb3a7d89513fa22464da396"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "dispatch2",
  "libc",
  "objc2",
@@ -3648,9 +3588,9 @@ dependencies = [
 
 [[package]]
 name = "once_cell"
-version = "1.21.3"
+version = "1.21.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
 dependencies = [
  "critical-section",
  "portable-atomic",
@@ -3670,9 +3610,9 @@ checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381"
 
 [[package]]
 name = "ordered-float"
-version = "5.1.0"
+version = "5.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d"
+checksum = "b7d950ca161dc355eaf28f82b11345ed76c6e1f6eb1f4f4479e0323b9e2fbd0e"
 dependencies = [
  "num-traits",
 ]
@@ -3781,7 +3721,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
 dependencies = [
  "phf_shared",
- "rand 0.8.5",
+ "rand 0.8.6",
 ]
 
 [[package]]
@@ -3794,36 +3734,42 @@ dependencies = [
 ]
 
 [[package]]
-name = "pin-project"
-version = "1.1.10"
+name = "pidfile-rs"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a"
+checksum = "d1a8aa9a30b1b65ef48b333931b80f2324a14e00208eb2b8f5788f1180791bcc"
+dependencies = [
+ "flopen",
+ "libc",
+ "log",
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "pin-project"
+version = "1.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2466b2336ed02bcdca6b294417127b90ec92038d1d5c4fbeac971a922e0e0924"
 dependencies = [
  "pin-project-internal",
 ]
 
 [[package]]
 name = "pin-project-internal"
-version = "1.1.10"
+version = "1.1.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
+checksum = "c96395f0a926bc13b1c17622aaddda1ecb55d49c8f1bf9777e4d877800a43f8b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
 name = "pin-project-lite"
-version = "0.2.16"
+version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
-
-[[package]]
-name = "pin-utils"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
 
 [[package]]
 name = "pkcs8"
@@ -3837,9 +3783,9 @@ dependencies = [
 
 [[package]]
 name = "plist"
-version = "1.8.0"
+version = "1.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "740ebea15c5d1428f910cd1a5f52cebf8d25006245ed8ade92702f4943d91e07"
+checksum = "092791278e026273c1b65bbdcfbba3a300f2994c896bd01ab01da613c29c46f1"
 dependencies = [
  "base64",
  "indexmap",
@@ -3887,24 +3833,24 @@ dependencies = [
 
 [[package]]
 name = "portable-atomic"
-version = "1.11.1"
+version = "1.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
+checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
 
 [[package]]
 name = "portable-atomic-util"
-version = "0.2.4"
+version = "0.2.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
+checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618"
 dependencies = [
  "portable-atomic",
 ]
 
 [[package]]
 name = "potential_utf"
-version = "0.1.4"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77"
+checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564"
 dependencies = [
  "zerovec",
 ]
@@ -3931,7 +3877,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
 dependencies = [
  "proc-macro2",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -3945,9 +3891,9 @@ dependencies = [
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.103"
+version = "1.0.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
 dependencies = [
  "unicode-ident",
 ]
@@ -3972,7 +3918,7 @@ checksum = "440f724eba9f6996b75d63681b0a92b06947f1457076d503a4d2e2c8f56442b8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -4016,7 +3962,7 @@ checksum = "bcd7d70ee0ca1661c40407e6f84e4463ef2658c90a9e2fbbd4515b2bcdfcaeca"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -4040,9 +3986,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-log"
-version = "0.13.2"
+version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f8bae9ad5ba08b0b0ed2bb9c2bdbaeccc69cafca96d78cf0fbcea0d45d122bb"
+checksum = "26c2ec80932c5c3b2d4fbc578c9b56b2d4502098587edb8bef5b6bfcad43682e"
 dependencies = [
  "arc-swap",
  "log",
@@ -4058,7 +4004,7 @@ dependencies = [
  "proc-macro2",
  "pyo3-macros-backend",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -4071,7 +4017,7 @@ dependencies = [
  "proc-macro2",
  "pyo3-build-config",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -4108,7 +4054,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustpython-parser",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -4135,9 +4081,9 @@ dependencies = [
 
 [[package]]
 name = "quick-xml"
-version = "0.38.4"
+version = "0.39.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
+checksum = "cdcc8dd4e2f670d309a5f0e83fe36dfdc05af317008fea29144da1a2ac858e5e"
 dependencies = [
  "memchr",
 ]
@@ -4154,10 +4100,10 @@ dependencies = [
  "pin-project-lite",
  "quinn-proto",
  "quinn-udp",
- "rustc-hash 2.1.1",
+ "rustc-hash 2.1.2",
  "rustls",
- "socket2 0.6.1",
- "thiserror 2.0.17",
+ "socket2 0.6.3",
+ "thiserror 2.0.18",
  "tokio",
  "tracing",
  "web-time",
@@ -4165,20 +4111,20 @@ dependencies = [
 
 [[package]]
 name = "quinn-proto"
-version = "0.11.13"
+version = "0.11.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31"
+checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098"
 dependencies = [
  "bytes",
  "getrandom 0.3.4",
  "lru-slab",
- "rand 0.9.2",
+ "rand 0.9.4",
  "ring",
- "rustc-hash 2.1.1",
+ "rustc-hash 2.1.2",
  "rustls",
  "rustls-pki-types",
  "slab",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tinyvec",
  "tracing",
  "web-time",
@@ -4193,16 +4139,16 @@ dependencies = [
  "cfg_aliases",
  "libc",
  "once_cell",
- "socket2 0.6.1",
+ "socket2 0.6.3",
  "tracing",
  "windows-sys 0.60.2",
 ]
 
 [[package]]
 name = "quote"
-version = "1.0.42"
+version = "1.0.45"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
+checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
 dependencies = [
  "proc-macro2",
 ]
@@ -4234,9 +4180,9 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.8.5"
+version = "0.8.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a"
 dependencies = [
  "libc",
  "rand_chacha 0.3.1",
@@ -4245,23 +4191,23 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.9.2"
+version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
 dependencies = [
  "rand_chacha 0.9.0",
- "rand_core 0.9.3",
+ "rand_core 0.9.5",
 ]
 
 [[package]]
 name = "rand"
-version = "0.10.0"
+version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8"
+checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207"
 dependencies = [
  "chacha20 0.10.0",
  "getrandom 0.4.2",
- "rand_core 0.10.0",
+ "rand_core 0.10.1",
 ]
 
 [[package]]
@@ -4291,7 +4237,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
 dependencies = [
  "ppv-lite86",
- "rand_core 0.9.3",
+ "rand_core 0.9.5",
 ]
 
 [[package]]
@@ -4309,23 +4255,23 @@ version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 dependencies = [
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
 ]
 
 [[package]]
 name = "rand_core"
-version = "0.9.3"
+version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
 dependencies = [
  "getrandom 0.3.4",
 ]
 
 [[package]]
 name = "rand_core"
-version = "0.10.0"
+version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba"
+checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69"
 
 [[package]]
 name = "rand_hc"
@@ -4344,9 +4290,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
 
 [[package]]
 name = "rayon"
-version = "1.11.0"
+version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
+checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d"
 dependencies = [
  "either",
  "rayon-core",
@@ -4381,14 +4327,14 @@ version = "0.5.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
 ]
 
 [[package]]
 name = "regex"
-version = "1.12.2"
+version = "1.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -4398,9 +4344,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.13"
+version = "0.4.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -4409,9 +4355,9 @@ dependencies = [
 
 [[package]]
 name = "regex-syntax"
-version = "0.8.8"
+version = "0.8.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
 
 [[package]]
 name = "resolv-conf"
@@ -4437,7 +4383,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
 dependencies = [
  "cc",
  "cfg-if",
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
  "libc",
  "untrusted",
  "windows-sys 0.52.0",
@@ -4451,7 +4397,7 @@ checksum = "319bb478ff9aae1dc7a544fa599e9eb47e2521621dc3921fba6abcaaa312092c"
 dependencies = [
  "flume",
  "libc",
- "netlink-packet-core 0.8.1",
+ "netlink-packet-core",
  "netlink-packet-route 0.28.0",
  "netlink-sys",
  "windows-sys 0.61.2",
@@ -4459,18 +4405,18 @@ dependencies = [
 
 [[package]]
 name = "rtnetlink"
-version = "0.13.1"
+version = "0.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a552eb82d19f38c3beed3f786bd23aa434ceb9ac43ab44419ca6d67a7e186c0"
+checksum = "4b960d5d873a75b5be9761b1e73b146f52dddcd27bac75263f40fba686d4d7b5"
 dependencies = [
- "futures",
+ "futures-channel",
+ "futures-util",
  "log",
- "netlink-packet-core 0.7.0",
- "netlink-packet-route 0.17.1",
- "netlink-packet-utils",
- "netlink-proto 0.11.5",
+ "netlink-packet-core",
+ "netlink-packet-route 0.28.0",
+ "netlink-proto",
  "netlink-sys",
- "nix 0.26.4",
+ "nix 0.30.1",
  "thiserror 1.0.69",
  "tokio",
 ]
@@ -4489,9 +4435,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
 
 [[package]]
 name = "rustc-hash"
-version = "2.1.1"
+version = "2.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
+checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
 
 [[package]]
 name = "rustc_version"
@@ -4513,11 +4459,11 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "1.1.2"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
+checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "errno",
  "libc",
  "linux-raw-sys",
@@ -4526,9 +4472,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.23.35"
+version = "0.23.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f"
+checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b"
 dependencies = [
  "once_cell",
  "ring",
@@ -4540,9 +4486,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-pki-types"
-version = "1.13.1"
+version = "1.14.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "708c0f9d5f54ba0272468c1d306a52c495b31fa155e91bc25371e6df7996908c"
+checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9"
 dependencies = [
  "web-time",
  "zeroize",
@@ -4550,9 +4496,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-webpki"
-version = "0.103.8"
+version = "0.103.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52"
+checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e"
 dependencies = [
  "ring",
  "rustls-pki-types",
@@ -4633,12 +4579,6 @@ dependencies = [
  "static_assertions",
 ]
 
-[[package]]
-name = "ryu"
-version = "1.0.20"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
-
 [[package]]
 name = "salsa20"
 version = "0.10.2"
@@ -4676,9 +4616,9 @@ dependencies = [
 
 [[package]]
 name = "semver"
-version = "1.0.27"
+version = "1.0.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
+checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd"
 
 [[package]]
 name = "send_wrapper"
@@ -4722,27 +4662,27 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.145"
+version = "1.0.149"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
 dependencies = [
  "itoa",
  "memchr",
- "ryu",
  "serde",
  "serde_core",
+ "zmij",
 ]
 
 [[package]]
 name = "serde_spanned"
-version = "1.0.3"
+version = "1.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e24345aa0fe688594e73770a5f6d1b216508b4f93484c0026d521acd30134392"
+checksum = "6662b5879511e06e8999a8a235d848113e942c9124f211511b16466ee2995f26"
 dependencies = [
  "serde_core",
 ]
@@ -4771,9 +4711,9 @@ dependencies = [
 
 [[package]]
 name = "sha3"
-version = "0.10.8"
+version = "0.10.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60"
+checksum = "77fd7028345d415a4034cf8777cd4f8ab1851274233b45f84e3d955502d93874"
 dependencies = [
  "digest",
  "keccak",
@@ -4796,10 +4736,11 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
 
 [[package]]
 name = "signal-hook-registry"
-version = "1.4.7"
+version = "1.4.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
 dependencies = [
+ "errno",
  "libc",
 ]
 
@@ -4815,15 +4756,15 @@ dependencies = [
 
 [[package]]
 name = "siphasher"
-version = "1.0.1"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
+checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649"
 
 [[package]]
 name = "slab"
-version = "0.4.11"
+version = "0.4.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
 
 [[package]]
 name = "smallvec"
@@ -4860,12 +4801,12 @@ dependencies = [
 
 [[package]]
 name = "socket2"
-version = "0.6.1"
+version = "0.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881"
+checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e"
 dependencies = [
  "libc",
- "windows-sys 0.60.2",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -4879,7 +4820,7 @@ dependencies = [
  "futures",
  "httparse",
  "log",
- "rand 0.8.5",
+ "rand 0.8.6",
  "sha1",
 ]
 
@@ -4891,7 +4832,7 @@ checksum = "c87e960f4dca2788eeb86bbdde8dd246be8948790b7618d656e68f9b720a86e8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -4945,20 +4886,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
 
 [[package]]
 name = "syn"
-version = "1.0.109"
+version = "2.0.117"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "syn"
-version = "2.0.111"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4973,7 +4903,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -4992,11 +4922,11 @@ dependencies = [
 
 [[package]]
 name = "system-configuration"
-version = "0.6.1"
+version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
+checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "core-foundation 0.9.4",
  "system-configuration-sys",
 ]
@@ -5019,9 +4949,9 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
 
 [[package]]
 name = "target-lexicon"
-version = "0.13.3"
+version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c"
+checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
 
 [[package]]
 name = "thiserror"
@@ -5034,11 +4964,11 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "2.0.17"
+version = "2.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
 dependencies = [
- "thiserror-impl 2.0.17",
+ "thiserror-impl 2.0.18",
 ]
 
 [[package]]
@@ -5049,18 +4979,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "2.0.17"
+version = "2.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -5074,30 +5004,30 @@ dependencies = [
 
 [[package]]
 name = "time"
-version = "0.3.44"
+version = "0.3.47"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d"
+checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c"
 dependencies = [
  "deranged",
  "itoa",
  "num-conv",
  "powerfmt",
- "serde",
+ "serde_core",
  "time-core",
  "time-macros",
 ]
 
 [[package]]
 name = "time-core"
-version = "0.1.6"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
+checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
 
 [[package]]
 name = "time-macros"
-version = "0.2.24"
+version = "0.2.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
+checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215"
 dependencies = [
  "num-conv",
  "time-core",
@@ -5114,9 +5044,9 @@ dependencies = [
 
 [[package]]
 name = "tinystr"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869"
+checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d"
 dependencies = [
  "displaydoc",
  "zerovec",
@@ -5134,9 +5064,9 @@ dependencies = [
 
 [[package]]
 name = "tinyvec"
-version = "1.10.0"
+version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
+checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3"
 dependencies = [
  "tinyvec_macros",
 ]
@@ -5149,9 +5079,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.48.0"
+version = "1.52.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
+checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe"
 dependencies = [
  "bytes",
  "libc",
@@ -5159,7 +5089,7 @@ dependencies = [
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
- "socket2 0.6.1",
+ "socket2 0.6.3",
  "tokio-macros",
  "tracing",
  "windows-sys 0.61.2",
@@ -5167,20 +5097,20 @@ dependencies = [
 
 [[package]]
 name = "tokio-macros"
-version = "2.6.0"
+version = "2.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
+checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
 name = "tokio-util"
-version = "0.7.17"
+version = "0.7.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594"
+checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098"
 dependencies = [
  "bytes",
  "futures-core",
@@ -5192,9 +5122,9 @@ dependencies = [
 
 [[package]]
 name = "toml"
-version = "0.9.8"
+version = "0.9.12+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0dc8b1fb61449e27716ec0e1bdf0f6b8f3e8f6b05391e8497b8b6d7804ea6d8"
+checksum = "cf92845e79fc2e2def6a5d828f0801e29a2f8acc037becc5ab08595c7d5e9863"
 dependencies = [
  "indexmap",
  "serde_core",
@@ -5202,32 +5132,32 @@ dependencies = [
  "toml_datetime",
  "toml_parser",
  "toml_writer",
- "winnow 0.7.14",
+ "winnow 0.7.15",
 ]
 
 [[package]]
 name = "toml_datetime"
-version = "0.7.3"
+version = "0.7.5+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533"
+checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347"
 dependencies = [
  "serde_core",
 ]
 
 [[package]]
 name = "toml_parser"
-version = "1.0.4"
+version = "1.1.2+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e"
+checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526"
 dependencies = [
- "winnow 0.7.14",
+ "winnow 1.0.3",
 ]
 
 [[package]]
 name = "toml_writer"
-version = "1.0.4"
+version = "1.1.1+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df8b2b54733674ad286d16267dcfc7a71ed5c776e4ac7aa3c3e2561f7c637bf2"
+checksum = "756daf9b1013ebe47a8776667b466417e2d4c5679d441c26230efd9ef78692db"
 
 [[package]]
 name = "tower-service"
@@ -5255,7 +5185,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -5291,9 +5221,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-subscriber"
-version = "0.3.22"
+version = "0.3.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
+checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319"
 dependencies = [
  "matchers",
  "nu-ansi-term",
@@ -5329,19 +5259,19 @@ dependencies = [
  "libloading",
  "log",
  "netconfig-rs",
- "nix 0.31.2",
+ "nix 0.31.3",
  "route_manager",
  "scopeguard",
  "widestring",
  "windows-sys 0.61.2",
- "winreg 0.55.0",
+ "winreg",
 ]
 
 [[package]]
 name = "typenum"
-version = "1.19.0"
+version = "1.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
+checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de"
 
 [[package]]
 name = "uint"
@@ -5409,15 +5339,15 @@ dependencies = [
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.22"
+version = "1.0.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
 
 [[package]]
 name = "unicode-segmentation"
-version = "1.12.0"
+version = "1.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c"
 
 [[package]]
 name = "unicode-width"
@@ -5450,7 +5380,7 @@ dependencies = [
  "getopts",
  "log",
  "phf_codegen",
- "rand 0.8.5",
+ "rand 0.8.6",
 ]
 
 [[package]]
@@ -5489,9 +5419,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
 
 [[package]]
 name = "url"
-version = "2.5.7"
+version = "2.5.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b"
+checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
 dependencies = [
  "form_urlencoded",
  "idna",
@@ -5517,11 +5447,11 @@ version = "0.0.1"
 
 [[package]]
 name = "uuid"
-version = "1.19.0"
+version = "1.23.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a"
+checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76"
 dependencies = [
- "getrandom 0.3.4",
+ "getrandom 0.4.2",
  "js-sys",
  "wasm-bindgen",
 ]
@@ -5561,11 +5491,11 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
 
 [[package]]
 name = "wasip2"
-version = "1.0.1+wasi-0.2.4"
+version = "1.0.3+wasi-0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
+checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6"
 dependencies = [
- "wit-bindgen 0.46.0",
+ "wit-bindgen 0.57.1",
 ]
 
 [[package]]
@@ -5579,9 +5509,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.106"
+version = "0.2.121"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd"
+checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790"
 dependencies = [
  "cfg-if",
  "once_cell",
@@ -5592,22 +5522,19 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.56"
+version = "0.4.71"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c"
+checksum = "96492d0d3ffba25305a7dc88720d250b1401d7edca02cc3bcd50633b424673b8"
 dependencies = [
- "cfg-if",
  "js-sys",
- "once_cell",
  "wasm-bindgen",
- "web-sys",
 ]
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.106"
+version = "0.2.121"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3"
+checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -5615,22 +5542,22 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.106"
+version = "0.2.121"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40"
+checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2"
 dependencies = [
  "bumpalo",
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.106"
+version = "0.2.121"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4"
+checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441"
 dependencies = [
  "unicode-ident",
 ]
@@ -5663,7 +5590,7 @@ version = "0.244.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "hashbrown 0.15.5",
  "indexmap",
  "semver",
@@ -5671,9 +5598,9 @@ dependencies = [
 
 [[package]]
 name = "web-sys"
-version = "0.3.83"
+version = "0.3.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac"
+checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -5695,14 +5622,14 @@ version = "0.26.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
 dependencies = [
- "webpki-roots 1.0.4",
+ "webpki-roots 1.0.7",
 ]
 
 [[package]]
 name = "webpki-roots"
-version = "1.0.4"
+version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e"
+checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d"
 dependencies = [
  "rustls-pki-types",
 ]
@@ -5735,16 +5662,6 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
-[[package]]
-name = "windows"
-version = "0.53.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "efc5cf48f83140dcaab716eeaea345f9e93d0018fb81162753a3f76c3397b538"
-dependencies = [
- "windows-core 0.53.0",
- "windows-targets 0.52.6",
-]
-
 [[package]]
 name = "windows"
 version = "0.57.0"
@@ -5798,16 +5715,6 @@ dependencies = [
  "windows-core 0.62.2",
 ]
 
-[[package]]
-name = "windows-core"
-version = "0.53.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9dcc5b895a6377f1ab9fa55acedab1fd5ac0db66ad1e6c7f47e28a22e446a5dd"
-dependencies = [
- "windows-result 0.1.2",
- "windows-targets 0.52.6",
-]
-
 [[package]]
 name = "windows-core"
 version = "0.57.0"
@@ -5876,7 +5783,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -5887,7 +5794,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -5898,7 +5805,7 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -5909,7 +5816,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
@@ -5944,6 +5851,17 @@ dependencies = [
  "windows-link 0.2.1",
 ]
 
+[[package]]
+name = "windows-registry"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720"
+dependencies = [
+ "windows-link 0.2.1",
+ "windows-result 0.4.1",
+ "windows-strings 0.5.1",
+]
+
 [[package]]
 name = "windows-result"
 version = "0.1.2"
@@ -5989,15 +5907,6 @@ dependencies = [
  "windows-link 0.2.1",
 ]
 
-[[package]]
-name = "windows-sys"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
-dependencies = [
- "windows-targets 0.48.5",
-]
-
 [[package]]
 name = "windows-sys"
 version = "0.52.0"
@@ -6034,21 +5943,6 @@ dependencies = [
  "windows-link 0.2.1",
 ]
 
-[[package]]
-name = "windows-targets"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
-dependencies = [
- "windows_aarch64_gnullvm 0.48.5",
- "windows_aarch64_msvc 0.48.5",
- "windows_i686_gnu 0.48.5",
- "windows_i686_msvc 0.48.5",
- "windows_x86_64_gnu 0.48.5",
- "windows_x86_64_gnullvm 0.48.5",
- "windows_x86_64_msvc 0.48.5",
-]
-
 [[package]]
 name = "windows-targets"
 version = "0.52.6"
@@ -6100,12 +5994,6 @@ dependencies = [
  "windows-link 0.2.1",
 ]
 
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
-
 [[package]]
 name = "windows_aarch64_gnullvm"
 version = "0.52.6"
@@ -6118,12 +6006,6 @@ version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
 
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
-
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.52.6"
@@ -6136,12 +6018,6 @@ version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
 
-[[package]]
-name = "windows_i686_gnu"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
-
 [[package]]
 name = "windows_i686_gnu"
 version = "0.52.6"
@@ -6166,12 +6042,6 @@ version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
 
-[[package]]
-name = "windows_i686_msvc"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
-
 [[package]]
 name = "windows_i686_msvc"
 version = "0.52.6"
@@ -6184,12 +6054,6 @@ version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
 
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
-
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.52.6"
@@ -6202,12 +6066,6 @@ version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
 
-[[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
-
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.52.6"
@@ -6220,12 +6078,6 @@ version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
 
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.48.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
-
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.52.6"
@@ -6240,29 +6092,19 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
 
 [[package]]
 name = "winnow"
-version = "0.7.14"
+version = "0.7.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
+checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945"
 
 [[package]]
 name = "winnow"
-version = "1.0.1"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09dac053f1cd375980747450bfc7250c264eaae0583872e845c0c7cd578872b5"
+checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1"
 dependencies = [
  "memchr",
 ]
 
-[[package]]
-name = "winreg"
-version = "0.50.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
-dependencies = [
- "cfg-if",
- "windows-sys 0.48.0",
-]
-
 [[package]]
 name = "winreg"
 version = "0.55.0"
@@ -6273,12 +6115,6 @@ dependencies = [
  "windows-sys 0.59.0",
 ]
 
-[[package]]
-name = "wit-bindgen"
-version = "0.46.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
-
 [[package]]
 name = "wit-bindgen"
 version = "0.51.0"
@@ -6288,6 +6124,12 @@ dependencies = [
  "wit-bindgen-rust-macro",
 ]
 
+[[package]]
+name = "wit-bindgen"
+version = "0.57.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e"
+
 [[package]]
 name = "wit-bindgen-core"
 version = "0.51.0"
@@ -6309,7 +6151,7 @@ dependencies = [
  "heck",
  "indexmap",
  "prettyplease",
- "syn 2.0.111",
+ "syn",
  "wasm-metadata",
  "wit-bindgen-core",
  "wit-component",
@@ -6325,7 +6167,7 @@ dependencies = [
  "prettyplease",
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
  "wit-bindgen-core",
  "wit-bindgen-rust",
 ]
@@ -6337,7 +6179,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
 dependencies = [
  "anyhow",
- "bitflags 2.10.0",
+ "bitflags",
  "indexmap",
  "log",
  "serde",
@@ -6369,24 +6211,24 @@ dependencies = [
 
 [[package]]
 name = "wmi"
-version = "0.18.3"
+version = "0.18.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "003e65f4934cf9449b9ce913ad822cd054a5af669d24f93db101fdb02856bb23"
+checksum = "7c81b85c57a57500e56669586496bf2abd5cf082b9d32995251185d105208b64"
 dependencies = [
  "chrono",
  "futures",
  "log",
  "serde",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "windows 0.62.2",
  "windows-core 0.62.2",
 ]
 
 [[package]]
 name = "writeable"
-version = "0.6.2"
+version = "0.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
+checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4"
 
 [[package]]
 name = "x25519-dalek"
@@ -6413,7 +6255,7 @@ dependencies = [
  "nom",
  "oid-registry",
  "rusticata-macros",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "time",
 ]
 
@@ -6443,22 +6285,22 @@ dependencies = [
  "nohash-hasher",
  "parking_lot",
  "pin-project",
- "rand 0.8.5",
+ "rand 0.8.6",
  "static_assertions",
 ]
 
 [[package]]
 name = "yamux"
-version = "0.13.8"
+version = "0.13.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "deab71f2e20691b4728b349c6cee8fc7223880fa67b6b4f92225ec32225447e5"
+checksum = "1991f6690292030e31b0144d73f5e8368936c58e45e7068254f7138b23b00672"
 dependencies = [
  "futures",
  "log",
  "nohash-hasher",
  "parking_lot",
  "pin-project",
- "rand 0.9.2",
+ "rand 0.9.4",
  "static_assertions",
  "web-time",
 ]
@@ -6474,9 +6316,9 @@ dependencies = [
 
 [[package]]
 name = "yoke"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954"
+checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca"
 dependencies = [
  "stable_deref_trait",
  "yoke-derive",
@@ -6485,54 +6327,54 @@ dependencies = [
 
 [[package]]
 name = "yoke-derive"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
+checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
  "synstructure",
 ]
 
 [[package]]
 name = "zerocopy"
-version = "0.8.31"
+version = "0.8.48"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3"
+checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.8.31"
+version = "0.8.48"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
+checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
 name = "zerofrom"
-version = "0.1.6"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
+checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272"
 dependencies = [
  "zerofrom-derive",
 ]
 
 [[package]]
 name = "zerofrom-derive"
-version = "0.1.6"
+version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
+checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
  "synstructure",
 ]
 
@@ -6547,20 +6389,20 @@ dependencies = [
 
 [[package]]
 name = "zeroize_derive"
-version = "1.4.2"
+version = "1.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69"
+checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
 
 [[package]]
 name = "zerotrie"
-version = "0.2.3"
+version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851"
+checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf"
 dependencies = [
  "displaydoc",
  "yoke",
@@ -6569,9 +6411,9 @@ dependencies = [
 
 [[package]]
 name = "zerovec"
-version = "0.11.5"
+version = "0.11.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002"
+checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239"
 dependencies = [
  "yoke",
  "zerofrom",
@@ -6580,11 +6422,17 @@ dependencies = [
 
 [[package]]
 name = "zerovec-derive"
-version = "0.11.2"
+version = "0.11.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
+checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.111",
+ "syn",
 ]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
diff --git a/Cargo.toml b/Cargo.toml
index 1c17582df..a93fd6022 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,10 +1,10 @@
 [workspace]
 resolver = "3"
 members = [
-  "rust/networking",
-  "rust/exo_pyo3_bindings",
-  "rust/util",
-  "rust/babblerd",
+    "rust/networking",
+    "rust/exo_pyo3_bindings",
+    "rust/util",
+    "rust/babblerd",
 ]
 
 [workspace.package]
@@ -48,6 +48,7 @@ log = "0.4"
 
 # networking
 libp2p = "0.56"
+libp2p-tcp = "0.44"
 
 [workspace.lints.rust]
 static_mut_refs = "warn"      # Or use "warn" instead of deny
diff --git a/app/EXO/EXO/ContentView.swift b/app/EXO/EXO/ContentView.swift
index 0743df132..30a4fc59b 100644
--- a/app/EXO/EXO/ContentView.swift
+++ b/app/EXO/EXO/ContentView.swift
@@ -16,22 +16,13 @@ struct ContentView: View {
     @EnvironmentObject private var updater: SparkleUpdater
     @EnvironmentObject private var thunderboltBridgeService: ThunderboltBridgeService
     @EnvironmentObject private var settingsWindowController: SettingsWindowController
+    @EnvironmentObject private var bugReportWindowController: BugReportWindowController
     @State private var focusedNode: NodeViewModel?
     @State private var deletingInstanceIDs: Set<String> = []
     @State private var showAllNodes = false
     @State private var showAllInstances = false
     @State private var baseURLCopied = false
     @State private var showAdvanced = false
-    @State private var showDebugInfo = false
-    private enum BugReportPhase: Equatable {
-        case idle
-        case prompting
-        case sending(String)
-        case success(String)
-        case failure(String)
-    }
-    @State private var bugReportPhase: BugReportPhase = .idle
-    @State private var bugReportUserDescription: String = ""
     @State private var uninstallInProgress = false
     @State private var pendingNamespace: String = ""
     @State private var pendingHFToken: String = ""
@@ -294,6 +285,13 @@ struct ContentView: View {
             ) {
                 updater.checkForUpdates()
             }
+            HoverButton(
+                title: "Share Bug Report…",
+                tint: .primary,
+                trailingSystemImage: "ladybug"
+            ) {
+                bugReportWindowController.open()
+            }
             .padding(.bottom, 8)
             HoverButton(title: "Quit", tint: .secondary) {
                 controller.stop()
@@ -477,40 +475,6 @@ struct ContentView: View {
         }
     }
 
-    private var debugSection: some View {
-        VStack(alignment: .leading, spacing: 4) {
-            HoverButton(
-                title: "Debug Info",
-                tint: .primary,
-                trailingSystemImage: showDebugInfo ? "chevron.up" : "chevron.down",
-                small: true
-            ) {
-                showDebugInfo.toggle()
-            }
-            if showDebugInfo {
-                VStack(alignment: .leading, spacing: 4) {
-                    Text("Version: \(buildTag)")
-                        .font(.caption2)
-                        .foregroundColor(.secondary)
-                    Text("Commit: \(buildCommit)")
-                        .font(.caption2)
-                        .foregroundColor(.secondary)
-                    Text(thunderboltStatusText)
-                        .font(.caption2)
-                        .foregroundColor(thunderboltStatusColor)
-                    clusterThunderboltBridgeView
-                    interfaceIpList
-                    rdmaStatusView
-                    sendBugReportButton
-                        .padding(.top, 6)
-                }
-                .padding(.leading, 8)
-                .transition(.opacity)
-            }
-        }
-        .animation(.easeInOut(duration: 0.25), value: showDebugInfo)
-    }
-
     private var rdmaStatusView: some View {
         let rdmaStatuses = stateService.latestSnapshot?.nodeRdmaCtl ?? [:]
         let localNodeId = stateService.localNodeId
@@ -559,118 +523,6 @@ struct ContentView: View {
         }
     }
 
-    private var sendBugReportButton: some View {
-        VStack(alignment: .leading, spacing: 6) {
-            switch bugReportPhase {
-            case .idle:
-                Button {
-                    bugReportPhase = .prompting
-                    bugReportUserDescription = ""
-                } label: {
-                    HStack {
-                        Text("Send Bug Report")
-                            .font(.caption)
-                            .fontWeight(.semibold)
-                        Spacer()
-                    }
-                    .padding(.vertical, 6)
-                    .padding(.horizontal, 8)
-                    .background(
-                        RoundedRectangle(cornerRadius: 6)
-                            .fill(Color.accentColor.opacity(0.12))
-                    )
-                }
-                .buttonStyle(.plain)
-
-            case .prompting:
-                VStack(alignment: .leading, spacing: 6) {
-                    Text("What's the issue? (optional)")
-                        .font(.caption2)
-                        .foregroundColor(.secondary)
-                    TextEditor(text: $bugReportUserDescription)
-                        .font(.caption2)
-                        .frame(height: 60)
-                        .overlay(
-                            RoundedRectangle(cornerRadius: 4)
-                                .stroke(Color.secondary.opacity(0.3), lineWidth: 1)
-                        )
-                    HStack(spacing: 8) {
-                        Button("Send") {
-                            Task {
-                                await sendBugReport()
-                            }
-                        }
-                        .font(.caption2)
-                        .buttonStyle(.borderedProminent)
-                        .controlSize(.small)
-                        Button("Cancel") {
-                            bugReportPhase = .idle
-                        }
-                        .font(.caption2)
-                        .buttonStyle(.bordered)
-                        .controlSize(.small)
-                    }
-                }
-                .padding(8)
-                .background(
-                    RoundedRectangle(cornerRadius: 6)
-                        .fill(Color.accentColor.opacity(0.06))
-                )
-
-            case .sending(let message):
-                HStack(spacing: 6) {
-                    ProgressView()
-                        .scaleEffect(0.6)
-                    Text(message)
-                        .font(.caption2)
-                        .foregroundColor(.secondary)
-                }
-
-            case .success(let message):
-                VStack(alignment: .leading, spacing: 6) {
-                    Text(message)
-                        .font(.caption2)
-                        .foregroundColor(.secondary)
-                        .fixedSize(horizontal: false, vertical: true)
-                    Button {
-                        openGitHubIssue()
-                    } label: {
-                        HStack(spacing: 4) {
-                            Image(systemName: "arrow.up.right.square")
-                                .imageScale(.small)
-                            Text("Create GitHub Issue")
-                                .font(.caption2)
-                        }
-                    }
-                    .buttonStyle(.bordered)
-                    .controlSize(.small)
-                    Button("Done") {
-                        bugReportPhase = .idle
-                        bugReportUserDescription = ""
-                    }
-                    .font(.caption2)
-                    .buttonStyle(.plain)
-                    .foregroundColor(.secondary)
-                }
-
-            case .failure(let message):
-                VStack(alignment: .leading, spacing: 4) {
-                    Text(message)
-                        .font(.caption2)
-                        .foregroundColor(.red)
-                        .fixedSize(horizontal: false, vertical: true)
-                    Button("Dismiss") {
-                        bugReportPhase = .idle
-                    }
-                    .font(.caption2)
-                    .buttonStyle(.plain)
-                    .foregroundColor(.secondary)
-                }
-            }
-        }
-        .animation(.easeInOut(duration: 0.2), value: bugReportPhase)
-    }
-
     private var processToggleBinding: Binding<Bool> {
         Binding(
             get: {
@@ -711,61 +563,6 @@ struct ContentView: View {
         )
     }
 
-    private func sendBugReport() async {
-        bugReportPhase = .sending("Collecting logs...")
-        let service = BugReportService()
-        let description = bugReportUserDescription.trimmingCharacters(in: .whitespacesAndNewlines)
-        do {
-            let outcome = try await service.sendReport(
-                isManual: true,
-                userDescription: description.isEmpty ? nil : description
-            )
-            if outcome.success {
-                bugReportPhase = .success(outcome.message)
-            } else {
-                bugReportPhase = .failure(outcome.message)
-            }
-        } catch {
-            bugReportPhase = .failure(error.localizedDescription)
-        }
-    }
-
-    private func openGitHubIssue() {
-        let description = bugReportUserDescription.trimmingCharacters(in: .whitespacesAndNewlines)
-
-        var bodyParts: [String] = []
-        bodyParts.append("## Describe the bug")
-        bodyParts.append("")
-        if !description.isEmpty {
-            bodyParts.append(description)
-        } else {
-            bodyParts.append("A clear and concise description of what the bug is.")
-        }
-        bodyParts.append("")
-        bodyParts.append("## Environment")
-        bodyParts.append("")
-        bodyParts.append("- macOS Version: \(ProcessInfo.processInfo.operatingSystemVersionString)")
-        bodyParts.append("- EXO Version: \(buildTag) (\(buildCommit))")
-        bodyParts.append("")
-        bodyParts.append("## Additional context")
-        bodyParts.append("")
-        bodyParts.append("A bug report with diagnostic logs was submitted via the app.")
-
-        let body = bodyParts.joined(separator: "\n")
-
-        var components = URLComponents(string: "https://github.com/exo-explore/exo/issues/new")!
-        components.queryItems = [
-            URLQueryItem(name: "template", value: "bug_report.md"),
-            URLQueryItem(name: "title", value: "[BUG] "),
-            URLQueryItem(name: "body", value: body),
-            URLQueryItem(name: "labels", value: "bug"),
-        ]
-
-        if let url = components.url {
-            NSWorkspace.shared.open(url)
-        }
-    }
-
     private func showUninstallConfirmationAlert() {
         let alert = NSAlert()
         alert.messageText = "Uninstall EXO"
@@ -848,13 +645,6 @@ struct ContentView: View {
         }
     }
 
-    private var buildTag: String {
-        Bundle.main.infoDictionary?["EXOBuildTag"] as? String ?? "unknown"
-    }
-
-    private var buildCommit: String {
-        Bundle.main.infoDictionary?["EXOBuildCommit"] as? String ?? "unknown"
-    }
 }
 
 private struct HoverButton: View {
diff --git a/app/EXO/EXO/EXOApp.swift b/app/EXO/EXO/EXOApp.swift
index 45a6cdda1..5757b0eba 100644
--- a/app/EXO/EXO/EXOApp.swift
+++ b/app/EXO/EXO/EXOApp.swift
@@ -22,6 +22,7 @@ struct EXOApp: App {
     @StateObject private var updater: SparkleUpdater
     @StateObject private var thunderboltBridgeService: ThunderboltBridgeService
     @StateObject private var settingsWindowController: SettingsWindowController
+    @StateObject private var bugReportWindowController: BugReportWindowController
     private let terminationObserver: TerminationObserver
     private let firstLaunchPopout = FirstLaunchPopout()
     private let ciContext = CIContext(options: nil)
@@ -46,6 +47,7 @@ struct EXOApp: App {
         let thunderboltBridge = ThunderboltBridgeService(clusterStateService: service)
         _thunderboltBridgeService = StateObject(wrappedValue: thunderboltBridge)
         _settingsWindowController = StateObject(wrappedValue: SettingsWindowController())
+        _bugReportWindowController = StateObject(wrappedValue: BugReportWindowController())
         enableLaunchAtLoginIfNeeded()
         // Install LaunchDaemon to disable Thunderbolt Bridge on startup (prevents network loops)
         NetworkSetupHelper.promptAndInstallIfNeeded()
@@ -66,6 +68,7 @@ struct EXOApp: App {
                 .environmentObject(updater)
                 .environmentObject(thunderboltBridgeService)
                 .environmentObject(settingsWindowController)
+                .environmentObject(bugReportWindowController)
         } label: {
             menuBarIcon
                 .onReceive(controller.$isFirstLaunchReady) { ready in
diff --git a/app/EXO/EXO/ExoProcessController.swift b/app/EXO/EXO/ExoProcessController.swift
index bf8666605..ff756379b 100644
--- a/app/EXO/EXO/ExoProcessController.swift
+++ b/app/EXO/EXO/ExoProcessController.swift
@@ -9,11 +9,14 @@ private let enableImageModelsKey = "EXOEnableImageModels"
 private let offlineModeKey = "EXOOfflineMode"
 private let fastSynchEnabledKey = "EXOFastSynchEnabled"
 private let onboardingCompletedKey = "EXOOnboardingCompleted"
+private let defaultModelsDirKey = "EXODefaultModelsDir"
+private let additionalModelsDirsKey = "EXOAdditionalModelsDirs"
+private let readOnlyModelsDirsKey = "EXOReadOnlyModelsDirs"
 private let customEnvironmentVariablesKey = "EXOCustomEnvironmentVariables"
 
 /// A user-defined environment variable that is injected into the exo child
-/// process at launch. Used to pass arbitrary key/value settings to exo
-/// without having to add first-class UI for each one.
+/// process at launch. Used as an escape hatch for env vars that don't have
+/// first-class typed UI in Settings.
 struct CustomEnvironmentVariable: Codable, Identifiable, Equatable {
     var id: UUID
     var key: String
@@ -106,6 +109,30 @@ final class ExoProcessController: ObservableObject {
             UserDefaults.standard.set(fastSynchEnabled, forKey: fastSynchEnabledKey)
         }
     }
+    @Published var defaultModelsDir: String = {
+        return UserDefaults.standard.string(forKey: defaultModelsDirKey) ?? ""
+    }()
+    {
+        didSet {
+            UserDefaults.standard.set(defaultModelsDir, forKey: defaultModelsDirKey)
+        }
+    }
+    @Published var additionalModelsDirs: String = {
+        return UserDefaults.standard.string(forKey: additionalModelsDirsKey) ?? ""
+    }()
+    {
+        didSet {
+            UserDefaults.standard.set(additionalModelsDirs, forKey: additionalModelsDirsKey)
+        }
+    }
+    @Published var readOnlyModelsDirs: String = {
+        return UserDefaults.standard.string(forKey: readOnlyModelsDirsKey) ?? ""
+    }()
+    {
+        didSet {
+            UserDefaults.standard.set(readOnlyModelsDirs, forKey: readOnlyModelsDirsKey)
+        }
+    }
     @Published var customEnvironmentVariables: [CustomEnvironmentVariable] = {
         guard
             let data = UserDefaults.standard.data(forKey: customEnvironmentVariablesKey),
@@ -364,8 +391,21 @@ final class ExoProcessController: ObservableObject {
 
         environment["PATH"] = paths.joined(separator: ":")
 
+        let trimmedDefaultModelsDir = defaultModelsDir.trimmingCharacters(in: .whitespaces)
+        if !trimmedDefaultModelsDir.isEmpty {
+            environment["EXO_DEFAULT_MODELS_DIR"] = trimmedDefaultModelsDir
+        }
+        let trimmedAdditionalModelsDirs = additionalModelsDirs.trimmingCharacters(in: .whitespaces)
+        if !trimmedAdditionalModelsDirs.isEmpty {
+            environment["EXO_MODELS_DIRS"] = trimmedAdditionalModelsDirs
+        }
+        let trimmedReadOnlyModelsDirs = readOnlyModelsDirs.trimmingCharacters(in: .whitespaces)
+        if !trimmedReadOnlyModelsDirs.isEmpty {
+            environment["EXO_MODELS_READ_ONLY_DIRS"] = trimmedReadOnlyModelsDirs
+        }
+
         // Apply user-defined arbitrary environment variables last so that
-        // power users can override any of the built-in keys above when
+        // power users can override any of the typed fields above when
         // necessary. Empty keys are ignored.
         for variable in customEnvironmentVariables {
             let trimmedKey = variable.key.trimmingCharacters(in: .whitespaces)
diff --git a/app/EXO/EXO/Info.plist b/app/EXO/EXO/Info.plist
index 575170a64..9993a0fbc 100644
--- a/app/EXO/EXO/Info.plist
+++ b/app/EXO/EXO/Info.plist
@@ -9,7 +9,7 @@
 	<key>EXOBuildCommit</key>
 	<string>$(EXO_BUILD_COMMIT)</string>
 	<key>EXOBugReportPresignedUrlEndpoint</key>
-	<string>$(EXO_BUG_REPORT_PRESIGNED_URL_ENDPOINT)</string>
+	<string>https://reports.exolabs.net/presigned-urls</string>
 	<key>NSLocalNetworkUsageDescription</key>
 	<string>EXO needs local network access to discover and connect to other devices in your cluster for distributed AI inference.</string>
 	<key>NSBonjourServices</key>
diff --git a/app/EXO/EXO/Services/ClusterStateService.swift b/app/EXO/EXO/Services/ClusterStateService.swift
index 27c7ffdf4..c4222cde9 100644
--- a/app/EXO/EXO/Services/ClusterStateService.swift
+++ b/app/EXO/EXO/Services/ClusterStateService.swift
@@ -17,7 +17,7 @@ final class ClusterStateService: ObservableObject {
 
     init(
         baseURL: URL = URL(string: "http://127.0.0.1:52415")!,
-        session: URLSession = .shared
+        session: URLSession = ClusterStateService.makeNonCachingSession()
     ) {
         self.baseURL = baseURL
         self.endpoint = baseURL.appendingPathComponent("state")
@@ -27,6 +27,23 @@ final class ClusterStateService: ObservableObject {
         self.decoder = decoder
     }
 
+    /// `URLSession.shared` carries an on-disk `URLCache` that persists every
+    /// response body under `~/Library/Caches/exolabs.EXO/`. We poll `/state`
+    /// at 2 Hz from `startPolling`, so leaving the shared cache attached
+    /// dirties ~500–620 KB/sec of file-backed memory and trips macOS's
+    /// per-process `disk writes` resource limit (microstackshot reports
+    /// observed on M3 Ultra producing GBs of cached responses per hour).
+    /// Cluster-state polling responses are time-sensitive and small; they
+    /// gain nothing from being cached on disk. Use an ephemeral session
+    /// with `urlCache = nil` so neither response bodies nor metadata
+    /// touch disk.
+    private static func makeNonCachingSession() -> URLSession {
+        let config = URLSessionConfiguration.ephemeral
+        config.urlCache = nil
+        config.requestCachePolicy = .reloadIgnoringLocalCacheData
+        return URLSession(configuration: config)
+    }
+
     func startPolling(interval: TimeInterval = 0.5) {
         stopPolling()
         Task {
diff --git a/app/EXO/EXO/Views/BugReportWindowController.swift b/app/EXO/EXO/Views/BugReportWindowController.swift
new file mode 100644
index 000000000..1e1c96045
--- /dev/null
+++ b/app/EXO/EXO/Views/BugReportWindowController.swift
@@ -0,0 +1,242 @@
+import AppKit
+import SwiftUI
+
+/// Manages a standalone window for the bug-report flow.
+/// Ensures only one instance exists and brings it to front on repeated opens.
+@MainActor
+final class BugReportWindowController: ObservableObject {
+    private var window: NSWindow?
+
+    func open() {
+        if let existing = window, existing.isVisible {
+            existing.makeKeyAndOrderFront(nil)
+            NSApp.activate()
+            return
+        }
+
+        let view = BugReportView(onDismiss: { [weak self] in
+            self?.window?.close()
+        })
+
+        let hostingController = NSHostingController(rootView: view)
+        hostingController.sizingOptions = [.preferredContentSize, .minSize]
+
+        let newWindow = NSWindow(contentViewController: hostingController)
+        newWindow.styleMask = [.titled, .closable, .resizable]
+        newWindow.title = "Send a Bug Report"
+        newWindow.center()
+        newWindow.setFrameAutosaveName("ExoBugReportWindow")
+        newWindow.isReleasedWhenClosed = false
+        newWindow.makeKeyAndOrderFront(nil)
+        NSApp.activate()
+
+        window = newWindow
+    }
+}
+
+private struct BugReportView: View {
+    fileprivate enum Phase: Equatable {
+        case prompting
+        case sending(String)
+        case success(String)
+        case failure(String)
+    }
+
+    let onDismiss: () -> Void
+
+    @State private var phase: Phase = .prompting
+    @State private var userDescription: String = ""
+    @FocusState private var descriptionFocused: Bool
+
+    var body: some View {
+        VStack(alignment: .leading, spacing: 12) {
+            switch phase {
+            case .prompting:
+                promptingView
+            case .sending(let message):
+                sendingView(message: message)
+            case .success(let message):
+                successView(message: message)
+            case .failure(let message):
+                failureView(message: message)
+            }
+        }
+        .padding(16)
+        .frame(minWidth: 380)
+        .animation(.easeInOut(duration: 0.2), value: phase)
+        .onAppear { descriptionFocused = true }
+    }
+
+    private var promptingView: some View {
+        VStack(alignment: .leading, spacing: 8) {
+            Text("Description (optional)")
+                .font(.subheadline)
+                .foregroundColor(.secondary)
+            ZStack(alignment: .topLeading) {
+                if userDescription.isEmpty {
+                    Text("What were you doing when it broke?")
+                        .font(.body)
+                        .foregroundColor(Color(nsColor: .placeholderTextColor))
+                        .padding(.horizontal, 10)
+                        .padding(.vertical, 8)
+                        .allowsHitTesting(false)
+                }
+                TextEditor(text: $userDescription)
+                    .font(.body)
+                    .scrollContentBackground(.hidden)
+                    .padding(4)
+                    .frame(height: 72)
+                    .focused($descriptionFocused)
+            }
+            .background(
+                RoundedRectangle(cornerRadius: 6)
+                    .fill(Color(nsColor: .textBackgroundColor))
+            )
+            .overlay(
+                RoundedRectangle(cornerRadius: 6)
+                    .strokeBorder(Color(nsColor: .separatorColor), lineWidth: 1)
+            )
+
+            Text("Diagnostic logs will be uploaded with your report.")
+                .font(.caption)
+                .foregroundColor(.secondary)
+
+            HStack {
+                Spacer()
+                Button("Cancel") { onDismiss() }
+                    .keyboardShortcut(.cancelAction)
+                Button("Send") {
+                    Task { await send() }
+                }
+                .keyboardShortcut(.defaultAction)
+            }
+            .padding(.top, 4)
+        }
+    }
+
+    private func sendingView(message: String) -> some View {
+        VStack(alignment: .leading, spacing: 12) {
+            HStack(spacing: 10) {
+                ProgressView().controlSize(.small)
+                Text(message)
+                    .foregroundColor(.secondary)
+            }
+            HStack {
+                Spacer()
+                Button("Cancel") { onDismiss() }
+                    .keyboardShortcut(.cancelAction)
+                    .disabled(true)
+                Button("Send") {}
+                    .disabled(true)
+            }
+        }
+    }
+
+    private func successView(message: String) -> some View {
+        VStack(alignment: .leading, spacing: 12) {
+            HStack(alignment: .top, spacing: 10) {
+                Image(systemName: "checkmark.circle.fill")
+                    .foregroundColor(.green)
+                    .font(.title2)
+                Text(message)
+                    .fixedSize(horizontal: false, vertical: true)
+            }
+            HStack {
+                Button {
+                    openGitHubIssue()
+                } label: {
+                    HStack(spacing: 4) {
+                        Image(systemName: "arrow.up.right.square")
+                        Text("Open GitHub Issue")
+                    }
+                }
+                Spacer()
+                Button("Done") { onDismiss() }
+                    .keyboardShortcut(.defaultAction)
+            }
+        }
+    }
+
+    private func failureView(message: String) -> some View {
+        VStack(alignment: .leading, spacing: 12) {
+            HStack(alignment: .top, spacing: 10) {
+                Image(systemName: "exclamationmark.triangle.fill")
+                    .foregroundColor(.orange)
+                    .font(.title2)
+                Text(message)
+                    .fixedSize(horizontal: false, vertical: true)
+            }
+            HStack {
+                Spacer()
+                Button("Try Again") {
+                    phase = .prompting
+                }
+                Button("Close") { onDismiss() }
+                    .keyboardShortcut(.defaultAction)
+            }
+        }
+    }
+
+    private func send() async {
+        phase = .sending("Collecting logs and uploading…")
+        let service = BugReportService()
+        let description = userDescription.trimmingCharacters(in: .whitespacesAndNewlines)
+        do {
+            let outcome = try await service.sendReport(
+                isManual: true,
+                userDescription: description.isEmpty ? nil : description
+            )
+            if outcome.success {
+                phase = .success(outcome.message)
+            } else {
+                phase = .failure(outcome.message)
+            }
+        } catch {
+            phase = .failure(error.localizedDescription)
+        }
+    }
+
+    private func openGitHubIssue() {
+        let description = userDescription.trimmingCharacters(in: .whitespacesAndNewlines)
+
+        var bodyParts: [String] = []
+        bodyParts.append("## Describe the bug")
+        bodyParts.append("")
+        if !description.isEmpty {
+            bodyParts.append(description)
+        } else {
+            bodyParts.append("A clear and concise description of what the bug is.")
+        }
+        bodyParts.append("")
+        bodyParts.append("## Environment")
+        bodyParts.append("")
+        bodyParts.append("- macOS Version: \(ProcessInfo.processInfo.operatingSystemVersionString)")
+        bodyParts.append("- EXO Version: \(buildTag) (\(buildCommit))")
+        bodyParts.append("")
+        bodyParts.append("## Additional context")
+        bodyParts.append("")
+        bodyParts.append("A bug report with diagnostic logs was submitted via the app.")
+
+        let body = bodyParts.joined(separator: "\n")
+
+        var components = URLComponents(string: "https://github.com/exo-explore/exo/issues/new")!
+        components.queryItems = [
+            URLQueryItem(name: "template", value: "bug_report.md"),
+            URLQueryItem(name: "title", value: "[BUG] "),
+            URLQueryItem(name: "body", value: body),
+            URLQueryItem(name: "labels", value: "bug"),
+        ]
+
+        if let url = components.url {
+            NSWorkspace.shared.open(url)
+        }
+    }
+
+    private var buildTag: String {
+        Bundle.main.infoDictionary?["EXOBuildTag"] as? String ?? "unknown"
+    }
+
+    private var buildCommit: String {
+        Bundle.main.infoDictionary?["EXOBuildCommit"] as? String ?? "unknown"
+    }
+}
diff --git a/app/EXO/EXO/Views/SettingsView.swift b/app/EXO/EXO/Views/SettingsView.swift
index 5aa98a506..bb1f1bb79 100644
--- a/app/EXO/EXO/Views/SettingsView.swift
+++ b/app/EXO/EXO/Views/SettingsView.swift
@@ -16,10 +16,11 @@ struct SettingsView: View {
     @State private var pendingEnableImageModels = false
     @State private var pendingOfflineMode = false
     @State private var pendingFastSynchEnabled = false
+    @State private var pendingDefaultModelsDir: String = ""
+    @State private var pendingAdditionalModelsDirs: String = ""
+    @State private var pendingReadOnlyModelsDirs: String = ""
     @State private var pendingCustomEnvironmentVariables: [CustomEnvironmentVariable] = []
     @State private var needsRestart = false
-    @State private var bugReportInFlight = false
-    @State private var bugReportMessage: String?
     @State private var uninstallInProgress = false
 
     var body: some View {
@@ -45,7 +46,7 @@ struct SettingsView: View {
                     Label("About", systemImage: "info.circle")
                 }
         }
-        .frame(width: 450, height: 400)
+        .frame(width: 640, height: 560)
         .onAppear {
             pendingNamespace = controller.customNamespace
             pendingHFToken = controller.hfToken
@@ -53,6 +54,9 @@ struct SettingsView: View {
             pendingEnableImageModels = controller.enableImageModels
             pendingOfflineMode = controller.offlineMode
             pendingFastSynchEnabled = controller.fastSynchEnabled
+            pendingDefaultModelsDir = controller.defaultModelsDir
+            pendingAdditionalModelsDirs = controller.additionalModelsDirs
+            pendingReadOnlyModelsDirs = controller.readOnlyModelsDirs
             pendingCustomEnvironmentVariables = controller.customEnvironmentVariables
             needsRestart = false
         }
@@ -64,9 +68,9 @@ struct SettingsView: View {
         Form {
             Section {
                 LabeledContent("Cluster Namespace") {
-                    TextField("default", text: $pendingNamespace)
+                    TextField("", text: $pendingNamespace, prompt: Text("default"))
                         .textFieldStyle(.roundedBorder)
-                        .frame(width: 200)
+                        .frame(width: 260)
                 }
                 Text("Nodes with the same namespace form a cluster. Leave empty for default.")
                     .font(.caption)
@@ -75,9 +79,9 @@ struct SettingsView: View {
 
             Section {
                 LabeledContent("HuggingFace Token") {
-                    SecureField("optional", text: $pendingHFToken)
+                    SecureField("", text: $pendingHFToken, prompt: Text("optional"))
                         .textFieldStyle(.roundedBorder)
-                        .frame(width: 200)
+                        .frame(width: 260)
                 }
                 Text("Required for gated models. Get yours at huggingface.co/settings/tokens")
                     .font(.caption)
@@ -86,9 +90,9 @@ struct SettingsView: View {
 
             Section {
                 LabeledContent("HuggingFace Endpoint") {
-                    TextField("default", text: $pendingHFEndpoint)
+                    TextField("", text: $pendingHFEndpoint, prompt: Text("default"))
                         .textFieldStyle(.roundedBorder)
-                        .frame(width: 200)
+                        .frame(width: 260)
                 }
                 Text("Defaults to huggingface.co. Use a mirror (e.g. hf-mirror.com) for China.")
                     .font(.caption)
@@ -196,8 +200,6 @@ struct SettingsView: View {
                 VStack(alignment: .leading, spacing: 2) {
                     rdmaStatusView
                 }
-
-                sendBugReportButton
             }
 
             Section("Danger Zone") {
@@ -222,11 +224,58 @@ struct SettingsView: View {
 
     private var environmentTab: some View {
         Form {
-            Section("Custom Environment Variables") {
-                Text("Passed to the exo process at launch. Override built-in defaults here.")
+            Section("Models Directories") {
+                LabeledContent("Default Models Directory") {
+                    TextField(
+                        "",
+                        text: $pendingDefaultModelsDir,
+                        prompt: Text("~/.exo/models")
+                    )
+                    .textFieldStyle(.roundedBorder)
+                    .font(.system(.body, design: .monospaced))
+                    .frame(width: 260)
+                }
+                Text("Sets EXO_DEFAULT_MODELS_DIR. Where models are downloaded.")
                     .font(.caption)
                     .foregroundColor(.secondary)
 
+                LabeledContent("Additional Directories") {
+                    TextField(
+                        "",
+                        text: $pendingAdditionalModelsDirs,
+                        prompt: Text("optional, colon-separated")
+                    )
+                    .textFieldStyle(.roundedBorder)
+                    .font(.system(.body, design: .monospaced))
+                    .frame(width: 260)
+                }
+                Text("Sets EXO_MODELS_DIRS. Extra writable model directories.")
+                    .font(.caption)
+                    .foregroundColor(.secondary)
+
+                LabeledContent("Read-Only Directories") {
+                    TextField(
+                        "",
+                        text: $pendingReadOnlyModelsDirs,
+                        prompt: Text("optional, colon-separated")
+                    )
+                    .textFieldStyle(.roundedBorder)
+                    .font(.system(.body, design: .monospaced))
+                    .frame(width: 260)
+                }
+                Text("Sets EXO_MODELS_READ_ONLY_DIRS. Never written to.")
+                    .font(.caption)
+                    .foregroundColor(.secondary)
+            }
+
+            Section("Custom Environment Variables") {
+                Text(
+                    "Escape hatch for env vars that don't have typed fields above. "
+                        + "Values here override the typed fields on conflict."
+                )
+                .font(.caption)
+                .foregroundColor(.secondary)
+
                 if pendingCustomEnvironmentVariables.isEmpty {
                     Text("No custom variables.")
                         .font(.caption)
@@ -451,63 +500,30 @@ struct SettingsView: View {
         }
     }
 
-    private var sendBugReportButton: some View {
-        VStack(alignment: .leading, spacing: 4) {
-            Button {
-                Task {
-                    await sendBugReport()
-                }
-            } label: {
-                HStack {
-                    if bugReportInFlight {
-                        ProgressView()
-                            .scaleEffect(0.6)
-                    }
-                    Text("Send Bug Report")
-                        .font(.caption)
-                        .fontWeight(.semibold)
-                    Spacer()
-                }
-            }
-            .disabled(bugReportInFlight)
-
-            if let message = bugReportMessage {
-                Text(message)
-                    .font(.caption2)
-                    .foregroundColor(.secondary)
-                    .fixedSize(horizontal: false, vertical: true)
-            }
-        }
-    }
-
     // MARK: - Actions
 
-    private func sendBugReport() async {
-        bugReportInFlight = true
-        bugReportMessage = "Collecting logs..."
-        let service = BugReportService()
-        do {
-            let outcome = try await service.sendReport(isManual: true)
-            bugReportMessage = outcome.message
-        } catch {
-            bugReportMessage = error.localizedDescription
-        }
-        bugReportInFlight = false
-    }
-
     private func showUninstallConfirmationAlert() {
         let alert = NSAlert()
         alert.messageText = "Uninstall EXO"
         alert.informativeText = """
-            This will remove EXO and all its system components:
+            This will remove EXO and all its components:
 
             • Network configuration daemon
             • Launch at login registration
             • EXO network location
+            • EXO data directory (~/.exo)
 
             The app will be moved to Trash.
             """
         alert.alertStyle = .warning
+
+        let checkbox = NSButton(
+            checkboxWithTitle: "Keep downloaded models (~/.exo/models)",
+            target: nil, action: nil)
+        checkbox.state = .off
+        checkbox.sizeToFit()
+        alert.accessoryView = checkbox
+
         alert.addButton(withTitle: "Uninstall")
         alert.addButton(withTitle: "Cancel")
 
@@ -517,11 +533,11 @@ struct SettingsView: View {
 
         let response = alert.runModal()
         if response == .alertFirstButtonReturn {
-            performUninstall()
+            performUninstall(keepModels: checkbox.state == .on)
         }
     }
 
-    private func performUninstall() {
+    private func performUninstall(keepModels: Bool) {
         uninstallInProgress = true
 
         controller.cancelPendingLaunch()
@@ -531,6 +547,7 @@ struct SettingsView: View {
         DispatchQueue.global(qos: .utility).async {
             do {
                 try NetworkSetupHelper.uninstall()
+                try Self.removeExoDirectory(keepModels: keepModels)
 
                 DispatchQueue.main.async {
                     LaunchAtLoginHelper.disable()
@@ -554,6 +571,23 @@ struct SettingsView: View {
         }
     }
 
+    private static func removeExoDirectory(keepModels: Bool) throws {
+        let fm = FileManager.default
+        let exoDir = ExoProcessController.exoDirectoryURL
+        guard fm.fileExists(atPath: exoDir.path) else { return }
+
+        if !keepModels {
+            try fm.removeItem(at: exoDir)
+            return
+        }
+
+        let contents = try fm.contentsOfDirectory(
+            at: exoDir, includingPropertiesForKeys: nil, options: [])
+        for entry in contents where entry.lastPathComponent != "models" {
+            try? fm.removeItem(at: entry)
+        }
+    }
+
     private func moveAppToTrash() {
         guard let appURL = Bundle.main.bundleURL as URL? else { return }
         do {
@@ -580,7 +614,10 @@ struct SettingsView: View {
     }
 
     private var hasEnvironmentChanges: Bool {
-        pendingCustomEnvironmentVariables != controller.customEnvironmentVariables
+        pendingDefaultModelsDir != controller.defaultModelsDir
+            || pendingAdditionalModelsDirs != controller.additionalModelsDirs
+            || pendingReadOnlyModelsDirs != controller.readOnlyModelsDirs
+            || pendingCustomEnvironmentVariables != controller.customEnvironmentVariables
     }
 
     private func applyGeneralSettings() {
@@ -602,6 +639,17 @@ struct SettingsView: View {
     }
 
     private func applyEnvironmentSettings() {
+        controller.defaultModelsDir = pendingDefaultModelsDir.trimmingCharacters(
+            in: .whitespaces)
+        controller.additionalModelsDirs = pendingAdditionalModelsDirs.trimmingCharacters(
+            in: .whitespaces)
+        controller.readOnlyModelsDirs = pendingReadOnlyModelsDirs.trimmingCharacters(
+            in: .whitespaces)
+
+        pendingDefaultModelsDir = controller.defaultModelsDir
+        pendingAdditionalModelsDirs = controller.additionalModelsDirs
+        pendingReadOnlyModelsDirs = controller.readOnlyModelsDirs
+
         // Trim whitespace from keys and drop empty ones so that the stored
         // form matches what is actually injected into the child process and
         // hasEnvironmentChanges doesn't show a stale diff after save.
@@ -629,6 +677,7 @@ struct SettingsView: View {
 
         pendingCustomEnvironmentVariables = sanitized
         controller.customEnvironmentVariables = sanitized
+
         restartIfRunning()
     }
 
diff --git a/app/EXO/EXO/Views/SettingsWindowController.swift b/app/EXO/EXO/Views/SettingsWindowController.swift
index 98517f922..9ede2a4cd 100644
--- a/app/EXO/EXO/Views/SettingsWindowController.swift
+++ b/app/EXO/EXO/Views/SettingsWindowController.swift
@@ -30,7 +30,7 @@ final class SettingsWindowController: ObservableObject {
         let hostingView = NSHostingView(rootView: settingsView)
 
         let newWindow = NSWindow(
-            contentRect: NSRect(x: 0, y: 0, width: 450, height: 400),
+            contentRect: NSRect(x: 0, y: 0, width: 640, height: 560),
             styleMask: [.titled, .closable],
             backing: .buffered,
             defer: false
diff --git a/app/EXO/uninstall-exo.sh b/app/EXO/uninstall-exo.sh
index c51f33a40..b14490df8 100755
--- a/app/EXO/uninstall-exo.sh
+++ b/app/EXO/uninstall-exo.sh
@@ -3,25 +3,55 @@
 # EXO Uninstaller Script
 #
 # This script removes all EXO system components that persist after deleting the app.
-# Run with: sudo ./uninstall-exo.sh
+# Run with: sudo ./uninstall-exo.sh [--keep-models]
+#
+# Options:
+#   --keep-models   Preserve ~/.exo/models when removing the EXO data directory.
 #
 # Components removed:
 # - LaunchDaemon: /Library/LaunchDaemons/io.exo.networksetup.plist
 # - Network script: /Library/Application Support/EXO/
 # - Log files: /var/log/io.exo.networksetup.*
 # - Network location: "exo"
+# - EXO data directory: ~/.exo (or all of ~/.exo except models/ when --keep-models is set)
 # - Launch at login registration
 #
 
 set -euo pipefail
 
+KEEP_MODELS=0
+for arg in "$@"; do
+  case "$arg" in
+  --keep-models)
+    KEEP_MODELS=1
+    ;;
+  -h | --help)
+    echo "Usage: sudo ./uninstall-exo.sh [--keep-models]"
+    echo "  --keep-models   Preserve ~/.exo/models when removing the EXO data directory."
+    exit 0
+    ;;
+  *)
+    echo "Unknown argument: $arg" >&2
+    echo "Usage: sudo ./uninstall-exo.sh [--keep-models]" >&2
+    exit 2
+    ;;
+  esac
+done
+
 LABEL="io.exo.networksetup"
-SCRIPT_DEST="/Library/Application Support/EXO/disable_bridge_enable_dhcp.sh"
+# Current script path. Older installs used a different filename; keep the
+# legacy path here so a fresh uninstall still cleans up upgraded machines.
+CURRENT_SCRIPT_DEST="/Library/Application Support/EXO/disable_bridge.sh"
+LEGACY_SCRIPT_DEST="/Library/Application Support/EXO/disable_bridge_enable_dhcp.sh"
 PLIST_DEST="/Library/LaunchDaemons/io.exo.networksetup.plist"
 LOG_OUT="/var/log/${LABEL}.log"
 LOG_ERR="/var/log/${LABEL}.err.log"
 APP_BUNDLE_ID="io.exo.EXO"
 
+# Resolve the invoking user's home, even when run via sudo.
+USER_HOME="$(eval echo "~${SUDO_USER:-$USER}")"
+EXO_DIR="$USER_HOME/.exo"
+
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
@@ -69,11 +99,17 @@ else
   echo_warn "LaunchDaemon plist not found (already removed?)"
 fi
 
-# Remove the script and parent directory
-if [[ -f $SCRIPT_DEST ]]; then
-  rm -f "$SCRIPT_DEST"
-  echo_info "Removed network setup script"
-else
+# Remove the script (current and legacy filenames) — backwards-compatible:
+# tolerate either, both, or neither being present.
+removed_any_script=0
+for script in "$CURRENT_SCRIPT_DEST" "$LEGACY_SCRIPT_DEST"; do
+  if [[ -f $script ]]; then
+    rm -f "$script"
+    echo_info "Removed network setup script: $script"
+    removed_any_script=1
+  fi
+done
+if [[ $removed_any_script -eq 0 ]]; then
   echo_warn "Network setup script not found (already removed?)"
 fi
 
@@ -115,6 +151,22 @@ if networksetup -listnetworkservices 2>/dev/null | grep -q "Thunderbolt Bridge";
   echo_info "Re-enabled Thunderbolt Bridge"
 fi
 
+# Remove EXO data directory (~/.exo)
+EXO_DIR_REMOVED=""
+if [[ -d $EXO_DIR ]]; then
+  if [[ $KEEP_MODELS == "1" && -d "$EXO_DIR/models" ]]; then
+    find "$EXO_DIR" -mindepth 1 -maxdepth 1 ! -name models -exec rm -rf {} +
+    EXO_DIR_REMOVED="kept_models"
+    echo_info "Removed ~/.exo (preserved models/)"
+  else
+    rm -rf "$EXO_DIR"
+    EXO_DIR_REMOVED="full"
+    echo_info "Removed ~/.exo"
+  fi
+else
+  echo_warn "~/.exo not found (already removed?)"
+fi
+
 # Note about launch at login registration
 # SMAppService-based login items cannot be removed from a shell script.
 # They can only be unregistered from within the app itself or manually via System Settings.
@@ -144,6 +196,10 @@ echo "  • Network setup LaunchDaemon"
 echo "  • Network configuration script"
 echo "  • Log files"
 echo "  • 'exo' network location"
+case "$EXO_DIR_REMOVED" in
+full) echo "  • EXO data directory (~/.exo)" ;;
+kept_models) echo "  • EXO data directory (~/.exo, models preserved)" ;;
+esac
 echo ""
 echo "Your network has been restored to use the 'Automatic' location."
 echo "Thunderbolt Bridge has been re-enabled (if present)."
diff --git a/bench/METHODOLOGY.md b/bench/METHODOLOGY.md
index a899d02a7..d42624c06 100644
--- a/bench/METHODOLOGY.md
+++ b/bench/METHODOLOGY.md
@@ -28,7 +28,7 @@ Chat template formatting means that it may be impossible to attain very small pp
 
 When a request reaches the server via the `/bench/chat/completions` endpoint, three things change compared to a normal chat completion:
 
-- **KV prefix cache is disabled**. Every request starts from a cold cache, ensuring prefill timing is not affected by prior requests.
+- **KV prefix cache is disabled by default**. Every request starts from a cold cache, ensuring prefill timing is not affected by prior requests. See [Prefix Cache Mode](#prefix-cache-mode) for the `--use-prefix-cache` option.
 - **EOS tokens are banned**. A logits processor suppresses all end-of-sequence tokens, forcing the model to generate exactly `max_tokens` tokens. This guarantees consistent generation length for fair TPS comparison — the model cannot short-circuit a run by stopping early.
 - **No model output parsing**. The bench collection path concatenates raw token text without any model-specific post-processing (thinking tag extraction, structured output handling, etc.). This is to avoid model outputs such as tool parsing or any structural mistakes from breaking the benchmark - we are testing for speed; see Exo-Eval for performance metrics.
 
@@ -94,6 +94,22 @@ agg_gen_tps = per_req_tps * concurrency
 
 ---
 
+## Prefix Cache Mode
+
+When `--use-prefix-cache` is passed, the KV prefix cache remains active during benchmarking. This speeds up repeated runs by skipping redundant prefill work, which is useful when prompt processing is not the focus of the benchmark (e.g. when measuring generation throughput or power consumption across many configurations).
+
+Each response includes a `prefix_cache_hit` field (`"none"`, `"partial"`, or `"exact"`):
+
+- **none**: Cold prefill — no cached KV state was available. The reported `prompt_tps` is the real prefill throughput.
+- **partial**: A prefix of the prompt was found in cache. Only the remaining tokens were prefilled. The reported `prompt_tps` reflects the real throughput on the uncached portion. This occurs when multiple ascending `--pp` values share a common prefix (e.g. `--pp 1000,5000` — the 5000-token prompt reuses the 1000-token cache entry and prefills the remaining 4000 tokens).
+- **exact**: The entire prompt was found in cache (e.g. same `--pp` value on a `--repeat`). No prefill work was done. The reported `prompt_tps` is the TPS from when the cache entry was originally created, not a new measurement.
+
+**Prompt TPS is approximate in this mode.** Exact-hit runs report the stored TPS from the original cold/partial prefill rather than a freshly measured value. For accurate cold prefill numbers, run without `--use-prefix-cache`.
+
+Ascending `--pp` order (e.g. `--pp 1000,5000,10000`) gives the most useful data: each size gets a meaningful partial hit except the first which is cold. Descending order produces exact hits with approximate TPS from a longer prompt's original run.
+
+---
+
 ## Warmup
 
 Before measurement begins, `--warmup N` (default: 0) discarded requests are sent using the first pp/tg pair. Warmup results are not included in the output.
diff --git a/bench/eval_configs/models.toml b/bench/eval_configs/models.toml
index 477e96cd9..d33698fe1 100644
--- a/bench/eval_configs/models.toml
+++ b/bench/eval_configs/models.toml
@@ -7,7 +7,7 @@
 #   name, patterns, reasoning
 #
 # Optional per-model overrides (CLI flags take priority over these):
-#   temperature, top_p, max_tokens, reasoning_effort
+#   temperature, top_p, max_tokens, reasoning_effort, enable_thinking
 #
 # Fallback defaults (when no per-model config):
 #   reasoning:     temperature=1.0, max_tokens=131072, reasoning_effort="high"
@@ -18,10 +18,9 @@
 
 # ─── Qwen3.5 (Feb 2026) ─────────────────────────────────────────────
 # Source: HuggingFace model cards (Qwen/Qwen3.5-*)
-# 35B-A3B thinking general: temp=1.0, top_p=0.95, top_k=20
-# 397B thinking: temp=0.6, top_p=0.95, top_k=20
-# Non-thinking: temp=0.7, top_p=0.8, top_k=20
-# max_tokens: 32768 general, 81920 for complex math/code
+# Model card recommends: temp=0.6, top_p=0.95, top_k=20
+# We omit top_k to match vllm eval (which doesn't set it).
+# max_tokens=121072 to match vllm eval (131072 context - 10000 safety margin).
 
 [[model]]
 name = "Qwen3.5 2B"
@@ -29,7 +28,8 @@ patterns = ["Qwen3.5-2B"]
 reasoning = true
 temperature = 0.6
 top_p = 0.95
-max_tokens = 81920
+enable_thinking = true
+max_tokens = 121072
 
 [[model]]
 name = "Qwen3.5 9B"
@@ -37,7 +37,8 @@ patterns = ["Qwen3.5-9B"]
 reasoning = true
 temperature = 0.6
 top_p = 0.95
-max_tokens = 81920
+enable_thinking = true
+max_tokens = 121072
 
 [[model]]
 name = "Qwen3.5 27B"
@@ -45,15 +46,17 @@ patterns = ["Qwen3.5-27B"]
 reasoning = true
 temperature = 0.6
 top_p = 0.95
-max_tokens = 81920
+enable_thinking = true
+max_tokens = 121072
 
 [[model]]
 name = "Qwen3.5 35B A3B"
 patterns = ["Qwen3.5-35B-A3B"]
 reasoning = true
-temperature = 1.0
+temperature = 0.6
 top_p = 0.95
-max_tokens = 81920
+enable_thinking = true
+max_tokens = 121072
 
 [[model]]
 name = "Qwen3.5 122B A10B"
@@ -61,7 +64,8 @@ patterns = ["Qwen3.5-122B-A10B"]
 reasoning = true
 temperature = 0.6
 top_p = 0.95
-max_tokens = 81920
+enable_thinking = true
+max_tokens = 121072
 
 [[model]]
 name = "Qwen3.5 397B A17B"
@@ -69,12 +73,14 @@ patterns = ["Qwen3.5-397B-A17B"]
 reasoning = true
 temperature = 0.6
 top_p = 0.95
-max_tokens = 81920
+enable_thinking = true
+max_tokens = 121072
 
 # ─── Qwen3 (Apr 2025) ───────────────────────────────────────────────
 # Source: HuggingFace model cards (Qwen/Qwen3-*)
-# Thinking: temp=0.6, top_p=0.95, top_k=20
-# Non-thinking: temp=0.7, top_p=0.8, top_k=20
+# Model card recommends: temp=0.6, top_p=0.95, top_k=20
+# We omit top_k to match vllm eval (which doesn't set it).
+# Non-thinking: temp=0.7, top_p=0.8
 # max_tokens: 32768 general, 38912 for complex math/code
 
 [[model]]
@@ -83,6 +89,7 @@ patterns = ["Qwen3-0.6B"]
 reasoning = true
 temperature = 0.6
 top_p = 0.95
+enable_thinking = true
 max_tokens = 38912
 
 [[model]]
@@ -91,6 +98,7 @@ patterns = ["Qwen3-30B-A3B"]
 reasoning = true
 temperature = 0.6
 top_p = 0.95
+enable_thinking = true
 max_tokens = 38912
 
 [[model]]
@@ -99,6 +107,7 @@ patterns = ["Qwen3-235B-A22B"]
 reasoning = true
 temperature = 0.6
 top_p = 0.95
+enable_thinking = true
 max_tokens = 38912
 
 [[model]]
@@ -107,6 +116,7 @@ patterns = ["Qwen3-Next-80B-A3B-Thinking"]
 reasoning = true
 temperature = 0.6
 top_p = 0.95
+enable_thinking = true
 max_tokens = 38912
 
 [[model]]
@@ -129,9 +139,9 @@ max_tokens = 16384
 name = "Qwen3 Coder Next"
 patterns = ["Qwen3-Coder-Next"]
 reasoning = false
-temperature = 0.7
-top_p = 0.8
-max_tokens = 16384
+temperature = 1.0
+top_p = 0.95
+max_tokens = 121072
 
 # ─── GPT-OSS (OpenAI) ───────────────────────────────────────────────
 # Source: OpenAI GitHub README + HuggingFace discussion #21
@@ -165,10 +175,38 @@ patterns = ["DeepSeek-V3.1"]
 reasoning = true
 temperature = 0.0
 
+[[model]]
+name = "DeepSeek V3.2"
+patterns = ["DeepSeek-V3.2"]
+reasoning = true
+temperature = 1.0
+top_p = 0.95
+enable_thinking = true
+
+# ─── NVIDIA Nemotron ───────────────────────────────────────────────────
+# Source: HuggingFace model cards
+# All variants: temp=1.0, top_p=0.95, enable_thinking=true
+
+[[model]]
+name = "Nemotron Cascade 2 30B A3B"
+patterns = ["Nemotron-Cascade-2-30B-A3B"]
+reasoning = true
+temperature = 1.0
+top_p = 0.95
+enable_thinking = true
+
+[[model]]
+name = "Nemotron 3 Super 120B A12B"
+patterns = ["Nemotron-3-Super-120B-A12B", "NVIDIA-Nemotron-3-Super-120B-A12B"]
+reasoning = true
+temperature = 1.0
+top_p = 0.95
+enable_thinking = true
+
 # ─── GLM (ZhipuAI / THUDM) ──────────────────────────────────────────
 # Source: HuggingFace model cards + generation_config.json + docs.z.ai
 # GLM 4.5+: temp=1.0, top_p=0.95
-# Reasoning tasks: 131072 max_tokens; coding/SWE tasks: temp=0.7
+# max_tokens=121072 to match vllm eval (131072 context - 10000 safety margin)
 
 [[model]]
 name = "GLM-5"
@@ -176,7 +214,8 @@ patterns = ["GLM-5"]
 reasoning = true
 temperature = 1.0
 top_p = 0.95
-max_tokens = 131072
+enable_thinking = true
+max_tokens = 121072
 
 [[model]]
 name = "GLM 4.5 Air"
@@ -191,7 +230,8 @@ patterns = ["GLM-4.7-"]
 reasoning = true
 temperature = 1.0
 top_p = 0.95
-max_tokens = 131072
+enable_thinking = true
+max_tokens = 121072
 # Note: matches both GLM-4.7 and GLM-4.7-Flash
 
 # ─── Kimi (Moonshot AI) ─────────────────────────────────────────────
@@ -213,7 +253,8 @@ patterns = ["Kimi-K2.5"]
 reasoning = true
 temperature = 1.0
 top_p = 0.95
-max_tokens = 131072
+enable_thinking = true
+max_tokens = 121072
 
 [[model]]
 name = "Kimi K2 Instruct"
@@ -223,7 +264,17 @@ temperature = 0.6
 
 # ─── MiniMax ─────────────────────────────────────────────────────────
 # Source: HuggingFace model cards + generation_config.json
-# All models: temp=1.0, top_p=0.95, top_k=40
+# All models: temp=1.0, top_p=0.95
+# max_tokens=90000 to match vllm eval (100000 context - 10000 safety margin)
+
+[[model]]
+name = "MiniMax M2.7"
+patterns = ["MiniMax-M2.7"]
+reasoning = true
+temperature = 1.0
+top_p = 0.95
+enable_thinking = true
+max_tokens = 90000
 
 [[model]]
 name = "MiniMax M2.5"
@@ -231,6 +282,8 @@ patterns = ["MiniMax-M2.5"]
 reasoning = true
 temperature = 1.0
 top_p = 0.95
+enable_thinking = true
+max_tokens = 90000
 
 [[model]]
 name = "MiniMax M2.1"
@@ -251,6 +304,8 @@ patterns = ["Step-3.5-Flash"]
 reasoning = true
 temperature = 1.0
 top_p = 0.95
+enable_thinking = true
+max_tokens = 121072
 
 # ─── Llama (Meta) ───────────────────────────────────────────────────
 # Source: generation_config.json + meta-llama/llama-models generation.py
diff --git a/bench/eval_tool_calls.py b/bench/eval_tool_calls.py
index 18071473a..7b219bc92 100644
--- a/bench/eval_tool_calls.py
+++ b/bench/eval_tool_calls.py
@@ -3,19 +3,20 @@ from __future__ import annotations
 
 import argparse
 import contextlib
+import io
 import json
 import os
 import sys
 import time
 import tomllib
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Literal
 
 import httpx
-from harness import (
-    ExoClient,
-    ExoHttpError,
+from exo_tools.client import ExoClient, ExoHttpError
+from exo_tools.harness import (
     add_common_instance_args,
     capture_cluster_snapshot,
     instance_id_from_instance,
@@ -209,7 +210,7 @@ def _openai_build_request(
         "model": model,
         "messages": messages,
         "tools": tools,
-        "max_tokens": 16384,
+        "max_tokens": 4096,
         "temperature": 0.0,
     }
     return "/v1/chat/completions", body
@@ -276,7 +277,7 @@ def _openai_build_followup(
         "model": model,
         "messages": followup_messages,
         "tools": tools,
-        "max_tokens": 16384,
+        "max_tokens": 4096,
         "temperature": 0.0,
     }
     return "/v1/chat/completions", body
@@ -379,7 +380,7 @@ def _claude_build_request(
         "model": model,
         "messages": claude_messages,
         "tools": claude_tools,
-        "max_tokens": 16384,
+        "max_tokens": 4096,
         "temperature": 0.0,
     }
     if system_content is not None:
@@ -489,7 +490,7 @@ def _claude_build_followup(
         "model": model,
         "messages": claude_messages,
         "tools": claude_tools,
-        "max_tokens": 16384,
+        "max_tokens": 4096,
         "temperature": 0.0,
     }
     if system_content is not None:
@@ -913,6 +914,12 @@ Examples:
         default=1,
         help="Repeat each scenario N times (default: 1)",
     )
+    parser.add_argument(
+        "--concurrency",
+        type=int,
+        default=1,
+        help="Run up to N scenarios in parallel against the same instance (default: 1)",
+    )
     parser.add_argument(
         "--scenarios",
         nargs="*",
@@ -935,6 +942,13 @@ Examples:
     )
     args = parser.parse_args()
 
+    if args.concurrency < 1:
+        print(
+            f"--concurrency must be >= 1 (got {args.concurrency})",
+            file=sys.stderr,
+        )
+        sys.exit(2)
+
     all_scenarios = load_scenarios(SCENARIOS_PATH)
     if args.scenarios:
         scenarios = [s for s in all_scenarios if s.name in args.scenarios]
@@ -1010,42 +1024,72 @@ Examples:
     cluster_snapshot = capture_cluster_snapshot(exo)
     all_results: list[ScenarioResult] = []
 
+    tasks: list[tuple[int, Scenario, ApiName]] = [
+        (run_idx, scenario, api_name)
+        for run_idx in range(args.repeat)
+        for scenario in scenarios
+        for api_name in api_names
+    ]
+
+    def _run_one(
+        http_client: httpx.Client,
+        task: tuple[int, Scenario, ApiName],
+    ) -> tuple[tuple[int, Scenario, ApiName], list[ScenarioResult], str]:
+        run_idx, scenario, api_name = task
+        buf = io.StringIO()
+        run_tag = f"[run {run_idx + 1}/{args.repeat}]" if args.repeat > 1 else ""
+        print(
+            f"\n  {run_tag}[{api_name:>9}] {scenario.name}: {scenario.description}",
+            file=buf,
+        )
+        scenario_results = run_scenario(
+            http_client,
+            args.host,
+            args.port,
+            full_model_id,
+            scenario,
+            api_name,
+            args.timeout,
+            args.verbose,
+        )
+        for r in scenario_results:
+            status = "PASS" if r.passed else "FAIL"
+            print(
+                f"    [{r.phase:>10}] {status}  ({r.latency_ms:.0f}ms)",
+                file=buf,
+            )
+            for check_name, check_ok in r.checks.items():
+                mark = "+" if check_ok else "-"
+                print(f"      {mark} {check_name}", file=buf)
+            if r.error:
+                print(f"      ! {r.error}", file=buf)
+        return task, scenario_results, buf.getvalue()
+
     try:
         with httpx.Client() as http_client:
-            for run_idx in range(args.repeat):
-                if args.repeat > 1:
-                    print(f"\n--- Run {run_idx + 1}/{args.repeat} ---", file=log)
-
-                for scenario in scenarios:
-                    for api_name in api_names:
-                        print(
-                            f"\n  [{api_name:>9}] {scenario.name}: {scenario.description}",
-                            file=log,
-                        )
-
-                        scenario_results = run_scenario(
-                            http_client,
-                            args.host,
-                            args.port,
-                            full_model_id,
-                            scenario,
-                            api_name,
-                            args.timeout,
-                            args.verbose,
-                        )
+            if args.concurrency == 1:
+                current_run = -1
+                for task in tasks:
+                    run_idx = task[0]
+                    if args.repeat > 1 and run_idx != current_run:
+                        print(f"\n--- Run {run_idx + 1}/{args.repeat} ---", file=log)
+                        current_run = run_idx
+                    _, scenario_results, buffered = _run_one(http_client, task)
+                    all_results.extend(scenario_results)
+                    log.write(buffered)
+                    log.flush()
+            else:
+                print(
+                    f"Running {len(tasks)} tasks with concurrency={args.concurrency}",
+                    file=log,
+                )
+                with ThreadPoolExecutor(max_workers=args.concurrency) as pool:
+                    futures = [pool.submit(_run_one, http_client, t) for t in tasks]
+                    for fut in as_completed(futures):
+                        _, scenario_results, buffered = fut.result()
                         all_results.extend(scenario_results)
-
-                        for r in scenario_results:
-                            status = "PASS" if r.passed else "FAIL"
-                            print(
-                                f"    [{r.phase:>10}] {status}  ({r.latency_ms:.0f}ms)",
-                                file=log,
-                            )
-                            for check_name, check_ok in r.checks.items():
-                                mark = "+" if check_ok else "-"
-                                print(f"      {mark} {check_name}", file=log)
-                            if r.error:
-                                print(f"      ! {r.error}", file=log)
+                        log.write(buffered)
+                        log.flush()
     finally:
         try:
             exo.request_json("DELETE", f"/instance/{instance_id}")
diff --git a/bench/exo_bench.py b/bench/exo_bench.py
index 3e16079d7..3322402b5 100644
--- a/bench/exo_bench.py
+++ b/bench/exo_bench.py
@@ -30,11 +30,11 @@ from pathlib import Path
 from statistics import mean
 from typing import Any
 
-from harness import (
-    ExoClient,
-    ExoHttpError,
+from exo_tools.client import ExoClient, ExoHttpError
+from exo_tools.harness import (
     add_common_instance_args,
     capture_cluster_snapshot,
+    find_existing_instance,
     instance_id_from_instance,
     node_ids_from_instance,
     nodes_used_in_instance,
@@ -79,7 +79,7 @@ def load_tokenizer_for_bench(model_id: str) -> Any:
         model_path = Path(
             snapshot_download(
                 model_id,
-                allow_patterns=["*.json", "*.py", "*.tiktoken", "*.model"],
+                allow_patterns=["*.json", "*.py", "*.tiktoken", "*.model", "*.jinja"],
             )
         )
 
@@ -122,8 +122,48 @@ def load_tokenizer_for_bench(model_id: str) -> Any:
 
         return hf_tokenizer
 
-    # Default: use AutoTokenizer
-    return AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+    # TODO: Change back to using only transformers
+    try:
+        return AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+    except (AttributeError, ValueError):
+        from huggingface_hub import snapshot_download
+        from transformers import PretrainedConfig
+
+        model_path = Path(
+            snapshot_download(
+                model_id,
+                allow_patterns=[
+                    "*.json",
+                    "*.py",
+                    "tokenizer.model",
+                    "*.tiktoken",
+                    "tiktoken.model",
+                    "*.txt",
+                    "*.jsonl",
+                    "*.jinja",
+                ],
+            )
+        )
+        stub_kwargs: dict[str, Any] = {}
+        config_file = model_path / "config.json"
+        if config_file.exists():
+            with open(config_file) as f:
+                raw = json.load(f)
+            for key in (
+                "model_type",
+                "max_position_embeddings",
+                "vocab_size",
+                "bos_token_id",
+                "eos_token_id",
+                "pad_token_id",
+            ):
+                if key in raw:
+                    stub_kwargs[key] = raw[key]
+        return AutoTokenizer.from_pretrained(
+            str(model_path),
+            config=PretrainedConfig(**stub_kwargs),
+            trust_remote_code=True,
+        )
 
 
 def format_peak_memory(b: float) -> str:
@@ -230,28 +270,79 @@ def parse_int_list(values: list[str]) -> list[int]:
 
 
 def run_one_completion(
-    client: ExoClient, model_id: str, pp_hint: int, tg: int, prompt_sizer: PromptSizer
+    client: ExoClient,
+    model_id: str,
+    pp_hint: int,
+    tg: int,
+    prompt_sizer: PromptSizer,
+    *,
+    use_prefix_cache: bool = False,
+    stream: bool = False,
 ) -> tuple[dict[str, Any], int]:
     content, pp_tokens = prompt_sizer.build(pp_hint)
     payload: dict[str, Any] = {
         "model": model_id,
         "messages": [{"role": "user", "content": content}],
-        "stream": False,
         "max_tokens": tg,
         "logprobs": False,
+        "use_prefix_cache": use_prefix_cache,
     }
 
-    t0 = time.perf_counter()
-    out = client.post_bench_chat_completions(payload)
-    elapsed = time.perf_counter() - t0
+    if not stream:
+        payload["stream"] = False
+        t0 = time.perf_counter()
+        out = client.post_bench_chat_completions(payload)
+        elapsed = time.perf_counter() - t0
 
-    stats = out.get("generation_stats")
+        stats = out.get("generation_stats")
+        choices = out.get("choices") or [{}]
+        message = choices[0].get("message", {}) if choices else {}
+        content = message.get("content") or ""
+        preview = content[:200] if content else ""
+    else:
+        tokens = 0
+        first_token_time = None
+        t0 = time.perf_counter()
+        text_parts: list[str] = []
+        stats = None
 
-    # Extract preview, handling None content (common for thinking models)
-    choices = out.get("choices") or [{}]
-    message = choices[0].get("message", {}) if choices else {}
-    content = message.get("content") or ""
-    preview = content[:200] if content else ""
+        for raw_line in client.stream_bench_chat_completions(payload):
+            line = raw_line.strip()
+            if line.startswith(": generation_stats "):
+                with contextlib.suppress(json.JSONDecodeError):
+                    stats = json.loads(line[len(": generation_stats ") :])
+                continue
+            if not line.startswith("data: "):
+                continue
+            data = line[6:]
+            if data == "[DONE]":
+                break
+            try:
+                chunk = json.loads(data)
+                delta = chunk.get("choices", [{}])[0].get("delta", {})
+                if delta.get("content"):
+                    if first_token_time is None:
+                        first_token_time = time.perf_counter()
+                    tokens += 1
+                    text_parts.append(delta["content"])
+            except json.JSONDecodeError:
+                pass
+
+        elapsed = time.perf_counter() - t0
+        preview = "".join(text_parts)[:200]
+
+        if not stats:
+            ttft = (first_token_time - t0) if first_token_time else elapsed
+            gen_time = elapsed - ttft if tokens > 1 else elapsed
+            gen_tps = (tokens - 1) / gen_time if tokens > 1 and gen_time > 0 else 0.0
+            prompt_tps = pp_tokens / ttft if ttft > 0 else 0.0
+            stats = {
+                "prompt_tokens": pp_tokens,
+                "generation_tokens": tokens,
+                "prompt_tps": round(prompt_tps, 2),
+                "generation_tps": round(gen_tps, 2),
+                "peak_memory_usage": {"inBytes": 0},
+            }
 
     return {
         "elapsed_s": elapsed,
@@ -271,9 +362,19 @@ class PromptSizer:
     def _make_counter(tokenizer: Any) -> Callable[[str], int]:
         def count_fn(user_content: str) -> int:
             messages = [{"role": "user", "content": user_content}]
-            ids = tokenizer.apply_chat_template(
-                messages, tokenize=True, add_generation_prompt=True
-            )
+            try:
+                ids = tokenizer.apply_chat_template(
+                    messages, tokenize=True, add_generation_prompt=True
+                )
+            except ValueError:
+                # Models without a Jinja chat template (e.g. DeepSeek V4 which
+                # ships its own Python encoder). Use the exo-side V4 encoder.
+                from exo.worker.engines.mlx.deepseek_v4_encoding import (
+                    encode_messages as encode_v4,
+                )
+
+                prompt = encode_v4(messages, thinking_mode="thinking")
+                ids = tokenizer.encode(prompt, add_special_tokens=False)
             # Fix for transformers 5.x
             if hasattr(ids, "input_ids"):
                 ids = ids.input_ids
@@ -368,6 +469,11 @@ def main() -> int:
         action="store_true",
         help="Force all pp×tg combinations (cartesian product) even when lists have equal length.",
     )
+    ap.add_argument(
+        "--stream",
+        action="store_true",
+        help="Use /bench/chat/completions with streaming SSE response (bench=True still applies: no EOS detection, no KV cache).",
+    )
     ap.add_argument(
         "--no-system-metrics",
         action="store_true",
@@ -379,6 +485,11 @@ def main() -> int:
         default=1.0,
         help="System metrics polling interval in seconds (default: 1.0).",
     )
+    ap.add_argument(
+        "--use-prefix-cache",
+        action="store_true",
+        help="Enable KV prefix cache during bench (default: disabled for cold-cache measurements).",
+    )
     args = ap.parse_args()
 
     pp_list = parse_int_list(args.pp)
@@ -394,6 +505,15 @@ def main() -> int:
         logger.error("--concurrency values must be >= 1")
         return 2
 
+    if args.use_prefix_cache:
+        logger.warning(
+            "--use-prefix-cache: prompt TPS will be approximate. See METHODOLOGY.md for details."
+        )
+        if pp_list != sorted(pp_list):
+            logger.warning(
+                "--pp values are not in ascending order: prompt TPS will be less accurate. Use ascending --pp for best results."
+            )
+
     # Log pairing mode
     use_combinations = args.all_combinations or len(pp_list) != len(tg_list)
     if use_combinations:
@@ -419,81 +539,124 @@ def main() -> int:
         logger.error("[exo-bench] tokenizer usable but prompt sizing failed")
         raise
 
-    selected = settle_and_fetch_placements(
-        client, full_model_id, args, settle_timeout=args.settle_timeout
-    )
+    # Optionally reuse a running instance for this model
+    reused_instance_id: str | None = None
+    if args.reuse_instance:
+        existing = find_existing_instance(client, full_model_id)
+        if existing:
+            reused_instance_id = existing
+            logger.info(f"Reusing existing instance {reused_instance_id}")
+        else:
+            logger.warning(
+                "--reuse-instance: no existing instance found, creating a new one"
+            )
 
-    if not selected:
-        logger.error("No valid placements matched your filters.")
-        return 1
-
-    selected.sort(
-        key=lambda p: (
-            str(p.get("instance_meta", "")),
-            str(p.get("sharding", "")),
-            -nodes_used_in_instance(p["instance"]),
-        ),
-        reverse=True,
-    )
-
-    logger.debug(f"exo-bench model: short_id={short_id} full_id={full_model_id}")
-    logger.info(f"placements: {len(selected)}")
-    for p in selected:
-        logger.info(
-            f"  - {p['sharding']} / {p['instance_meta']} / nodes={nodes_used_in_instance(p['instance'])}"
+    if reused_instance_id is not None:
+        # Use the existing instance directly — skip placement iteration
+        selected = []
+        download_duration_s = None
+    else:
+        selected = settle_and_fetch_placements(
+            client, full_model_id, args, settle_timeout=args.settle_timeout
         )
 
-    if args.dry_run:
-        return 0
+        if not selected:
+            logger.error("No valid placements matched your filters.")
+            return 1
 
-    settle_deadline = (
-        time.monotonic() + args.settle_timeout if args.settle_timeout > 0 else None
-    )
+        selected.sort(
+            key=lambda p: (
+                str(p.get("instance_meta", "")),
+                str(p.get("sharding", "")),
+                nodes_used_in_instance(p["instance"]),
+            ),
+            reverse=True,
+        )
 
-    logger.info("Planning phase: checking downloads...")
-    download_duration_s = run_planning_phase(
-        client,
-        full_model_id,
-        selected[0],
-        args.danger_delete_downloads,
-        args.timeout,
-        settle_deadline,
-    )
-    if download_duration_s is not None:
-        logger.info(f"Download: {download_duration_s:.1f}s (freshly downloaded)")
-    else:
-        logger.info("Download: model already cached")
+        logger.debug(f"exo-bench model: short_id={short_id} full_id={full_model_id}")
+        logger.info(f"placements: {len(selected)}")
+        for p in selected:
+            logger.info(
+                f"  - {p['sharding']} / {p['instance_meta']} / nodes={nodes_used_in_instance(p['instance'])}"
+            )
+
+        if args.dry_run:
+            return 0
+
+        settle_deadline = (
+            time.monotonic() + args.settle_timeout if args.settle_timeout > 0 else None
+        )
+
+        logger.info("Planning phase: checking downloads...")
+        download_duration_s = run_planning_phase(
+            client,
+            full_model_id,
+            selected[0],
+            args.danger_delete_downloads,
+            args.timeout,
+            settle_deadline,
+        )
+        if download_duration_s is not None:
+            logger.info(f"Download: {download_duration_s:.1f}s (freshly downloaded)")
+        else:
+            logger.info("Download: model already cached")
 
     cluster_snapshot = capture_cluster_snapshot(client)
     all_rows: list[dict[str, Any]] = []
     all_system_metrics: dict[str, dict[str, dict[str, float]]] = {}
 
+    # If reusing an existing instance, run a single benchmark pass against it
+    if reused_instance_id is not None:
+        selected = [None]
+
     for preview in selected:
-        instance = preview["instance"]
-        instance_id = instance_id_from_instance(instance)
+        created_instance = False
+        if preview is not None:
+            instance = preview["instance"]
+            instance_id = instance_id_from_instance(instance)
 
-        sharding = str(preview["sharding"])
-        instance_meta = str(preview["instance_meta"])
-        n_nodes = nodes_used_in_instance(instance)
+            sharding = str(preview["sharding"])
+            instance_meta = str(preview["instance_meta"])
+            n_nodes = nodes_used_in_instance(instance)
 
-        logger.info("=" * 80)
-        logger.info(
-            f"PLACEMENT: {sharding} / {instance_meta} / nodes={n_nodes} / instance_id={instance_id}"
-        )
+            logger.info("=" * 80)
+            logger.info(
+                f"PLACEMENT: {sharding} / {instance_meta} / nodes={n_nodes} / instance_id={instance_id}"
+            )
 
-        client.request_json("POST", "/instance", body={"instance": instance})
-        try:
-            wait_for_instance_ready(client, instance_id)
-        except (RuntimeError, TimeoutError) as e:
-            logger.error(f"Failed to initialize placement: {e}")
-            with contextlib.suppress(ExoHttpError):
-                client.request_json("DELETE", f"/instance/{instance_id}")
-            continue
+            # Delete any existing instances to free resources before placing
+            try:
+                state = client.request_json("GET", "/state")
+                for old_id in list(state.get("instances", {}).keys()):
+                    logger.info(f"Deleting stale instance {old_id}")
+                    with contextlib.suppress(ExoHttpError):
+                        client.request_json("DELETE", f"/instance/{old_id}")
+                if state.get("instances"):
+                    time.sleep(2)
+            except Exception as e:
+                logger.warning(f"Failed to clean up stale instances: {e}")
 
-        time.sleep(1)
+            client.request_json("POST", "/instance", body={"instance": instance})
+            try:
+                wait_for_instance_ready(client, instance_id)
+            except (RuntimeError, TimeoutError) as e:
+                logger.error(f"Failed to initialize placement: {e}")
+                with contextlib.suppress(ExoHttpError):
+                    client.request_json("DELETE", f"/instance/{instance_id}")
+                continue
+
+            time.sleep(1)
+            created_instance = True
+        else:
+            instance_id = reused_instance_id
+            sharding = "reused"
+            instance_meta = "reused"
+            n_nodes = 0
+            logger.info("=" * 80)
+            logger.info(f"Using existing instance {instance_id}")
 
         sampler: SystemMetricsSampler | None = None
-        if not args.no_system_metrics:
+        if not args.no_system_metrics and preview is not None:
             nids = node_ids_from_instance(instance)
             sampler = SystemMetricsSampler(
                 ExoClient(args.host, args.port, timeout_s=30),
@@ -502,11 +665,20 @@ def main() -> int:
             )
             sampler.start()
 
+        def _do_one(c: ExoClient, pp: int, tg: int) -> tuple[dict[str, Any], int]:
+            return run_one_completion(
+                c,
+                full_model_id,
+                pp,
+                tg,
+                prompt_sizer,
+                use_prefix_cache=args.use_prefix_cache,
+                stream=args.stream,
+            )
+
         try:
             for i in range(args.warmup):
-                run_one_completion(
-                    client, full_model_id, pp_list[0], tg_list[0], prompt_sizer
-                )
+                _do_one(client, pp_list[0], tg_list[0])
                 logger.debug(f"  warmup {i + 1}/{args.warmup} done")
 
             # If pp and tg lists have same length, run in tandem (zip)
@@ -528,9 +700,7 @@ def main() -> int:
                             # Sequential: single request
                             try:
                                 inf_t0 = time.monotonic()
-                                row, actual_pp_tokens = run_one_completion(
-                                    client, full_model_id, pp, tg, prompt_sizer
-                                )
+                                row, actual_pp_tokens = _do_one(client, pp, tg)
                                 inference_windows.append((inf_t0, time.monotonic()))
                             except Exception as e:
                                 logger.error(e)
@@ -566,6 +736,7 @@ def main() -> int:
                                 "stream": False,
                                 "max_tokens": tg,
                                 "logprobs": False,
+                                "use_prefix_cache": args.use_prefix_cache,
                             }
                             barrier = threading.Barrier(concurrency)
                             batch_start = threading.Event()
@@ -678,10 +849,12 @@ def main() -> int:
                         gen_tps = per_req_tps * concurrency
                         ptok = mean(x["stats"]["prompt_tokens"] for x in runs)
                         gtok = mean(x["stats"]["generation_tokens"] for x in runs)
-                        peak = mean(
-                            x["stats"]["peak_memory_usage"]["inBytes"] for x in runs
-                        )
 
+                        def _peak_bytes(s: dict[str, Any]) -> float:
+                            pm = s["peak_memory_usage"]
+                            return pm.get("inBytes") or pm.get("in_bytes", 0)
+
+                        peak = mean(_peak_bytes(x["stats"]) for x in runs)
                         summary = (
                             f"prompt_tps={prompt_tps:.2f} gen_tps={gen_tps:.2f}    "
                             f"prompt_tokens={ptok} gen_tokens={gtok}    "
@@ -706,15 +879,16 @@ def main() -> int:
                 if placement_metrics:
                     all_system_metrics.update(placement_metrics)
 
-            try:
-                client.request_json("DELETE", f"/instance/{instance_id}")
-            except ExoHttpError as e:
-                if e.status != 404:
-                    raise
-            wait_for_instance_gone(client, instance_id)
-            logger.debug(f"Deleted instance {instance_id}")
+            if created_instance and instance_id is not None:
+                try:
+                    client.request_json("DELETE", f"/instance/{instance_id}")
+                except ExoHttpError as e:
+                    if e.status != 404:
+                        raise
+                wait_for_instance_gone(client, instance_id)
+                logger.debug(f"Deleted instance {instance_id}")
 
-            time.sleep(5)
+                time.sleep(5)
 
     output: dict[str, Any] = {"runs": all_rows}
     if cluster_snapshot:
diff --git a/bench/exo_eval.py b/bench/exo_eval.py
index fb4d55f3b..04b14e209 100644
--- a/bench/exo_eval.py
+++ b/bench/exo_eval.py
@@ -42,11 +42,11 @@ from pathlib import Path
 from typing import Any
 
 import httpx
-from harness import (
-    ExoClient,
-    ExoHttpError,
+from exo_tools.client import ExoClient, ExoHttpError
+from exo_tools.harness import (
     add_common_instance_args,
     capture_cluster_snapshot,
+    find_existing_instance,
     instance_id_from_instance,
     nodes_used_in_instance,
     resolve_model_short_id,
@@ -62,6 +62,15 @@ from loguru import logger
 # ---------------------------------------------------------------------------
 
 MAX_RETRIES = 30
+INSTANCE_HEALTH_CHECK_AFTER = (
+    3  # Check instance health after this many consecutive failures
+)
+
+
+class InstanceFailedError(RuntimeError):
+    """Raised when the exo instance is detected as failed/gone."""
+
+
 DEFAULT_MAX_TOKENS = 16_384
 REASONING_MAX_TOKENS = 131_072
 TEMPERATURE_NON_REASONING = 0.0
@@ -271,7 +280,7 @@ def run_humaneval_test(
 
 @dataclass
 class QuestionResult:
-    question_id: int
+    question_id: int | str
     prompt: str
     response: str
     extracted_answer: str | None
@@ -281,7 +290,11 @@ class QuestionResult:
     prompt_tokens: int = 0
     completion_tokens: int = 0
     reasoning_tokens: int = 0
+    reasoning_content: str = ""
+    finish_reason: str = ""
     elapsed_s: float = 0.0
+    power_watts: float = 0.0
+    energy_joules: float = 0.0
 
 
 @dataclass
@@ -517,6 +530,10 @@ class ApiResult:
     prompt_tokens: int
     completion_tokens: int
     reasoning_tokens: int
+    reasoning_content: str = ""
+    finish_reason: str = ""
+    power_watts: float = 0.0
+    energy_joules: float = 0.0
 
 
 async def _call_api(
@@ -530,6 +547,9 @@ async def _call_api(
     system_message: str | None = None,
     reasoning_effort: str | None = None,
     top_p: float | None = None,
+    top_k: int | None = None,
+    min_p: float | None = None,
+    enable_thinking: bool | None = None,
 ) -> ApiResult:
     messages = []
     if system_message:
@@ -546,6 +566,12 @@ async def _call_api(
         body["reasoning_effort"] = reasoning_effort
     if top_p is not None:
         body["top_p"] = top_p
+    if top_k is not None:
+        body["top_k"] = top_k
+    if min_p is not None:
+        body["min_p"] = min_p
+    if enable_thinking is not None:
+        body["enable_thinking"] = enable_thinking
 
     resp = await client.post(
         f"{base_url}/v1/chat/completions",
@@ -554,19 +580,40 @@ async def _call_api(
     )
     resp.raise_for_status()
     data = resp.json()
-    content = data["choices"][0]["message"]["content"]
-    if not content or not content.strip():
+    choice = data["choices"][0]
+    message = choice["message"]
+    content = message.get("content") or ""
+    reasoning_content = message.get("reasoning_content") or ""
+    finish_reason = choice.get("finish_reason") or ""
+
+    # For thinking models, empty content is expected when finish_reason is "length"
+    if not content.strip() and finish_reason != "length" and not reasoning_content:
         raise ValueError("Empty response from model")
     usage = data.get("usage", {})
     details = usage.get("completion_tokens_details", {})
+    power = data.get("power_usage") or {}
     return ApiResult(
         content=content,
         prompt_tokens=usage.get("prompt_tokens", 0),
         completion_tokens=usage.get("completion_tokens", 0),
         reasoning_tokens=details.get("reasoning_tokens", 0) if details else 0,
+        reasoning_content=reasoning_content,
+        finish_reason=finish_reason,
+        power_watts=power.get("total_avg_sys_power_watts", 0.0),
+        energy_joules=power.get("total_energy_joules", 0.0),
     )
 
 
+async def _check_instance_health(base_url: str) -> bool:
+    """Return True if the exo instance is still reachable."""
+    try:
+        async with httpx.AsyncClient() as c:
+            resp = await c.get(f"{base_url}/models", timeout=5.0)
+            return resp.status_code == 200
+    except Exception:
+        return False
+
+
 async def call_with_retries(
     client: httpx.AsyncClient,
     base_url: str,
@@ -578,8 +625,14 @@ async def call_with_retries(
     system_message: str | None = None,
     reasoning_effort: str | None = None,
     top_p: float | None = None,
+    top_k: int | None = None,
+    min_p: float | None = None,
+    enable_thinking: bool | None = None,
+    instance_failed: asyncio.Event | None = None,
 ) -> ApiResult | None:
     for attempt in range(MAX_RETRIES):
+        if instance_failed and instance_failed.is_set():
+            raise InstanceFailedError("Instance already marked as failed")
         try:
             return await _call_api(
                 client,
@@ -592,8 +645,30 @@ async def call_with_retries(
                 system_message,
                 reasoning_effort,
                 top_p,
+                top_k,
+                min_p,
+                enable_thinking,
             )
         except Exception as e:
+            is_conn_error = isinstance(
+                e,
+                (
+                    httpx.ConnectError,
+                    httpx.RemoteProtocolError,
+                    ConnectionRefusedError,
+                    OSError,
+                ),
+            )
+            if (
+                is_conn_error
+                and attempt >= INSTANCE_HEALTH_CHECK_AFTER
+                and not await _check_instance_health(base_url)
+            ):
+                if instance_failed:
+                    instance_failed.set()
+                raise InstanceFailedError(
+                    f"Instance is down after {attempt + 1} failures: {e}"
+                ) from e
             if attempt < MAX_RETRIES - 1:
                 wait = min(2**attempt, 60)
                 logger.warning(
@@ -618,10 +693,16 @@ async def evaluate_benchmark(
     max_tokens: int,
     concurrency: int = 1,
     limit: int | None = None,
+    offset: int = 0,
     timeout: float | None = None,
     reasoning_effort: str | None = None,
     top_p: float | None = None,
+    top_k: int | None = None,
+    min_p: float | None = None,
+    enable_thinking: bool | None = None,
     difficulty: str | None = None,
+    checkpoint_path: Path | None = None,
+    release_version: str | None = None,
 ) -> list[QuestionResult]:
     """Run a benchmark. Returns per-question results."""
     import datasets
@@ -652,7 +733,21 @@ async def evaluate_benchmark(
         ds = ds.filter(lambda x: x["difficulty"] == difficulty)
         logger.info(f"Filtered to {len(ds)} {difficulty} problems")
 
+    if release_version and "release_version" in ds.column_names:
+        ds = ds.filter(lambda x: x["release_version"] == release_version)
+        logger.info(
+            f"Filtered to {len(ds)} problems with release_version={release_version}"
+        )
+
+    # Sort by question_id to match LCB runner ordering (scenario_router.py:60).
+    # This ensures [offset:offset+limit] slices select the same problems as vllm.
+    if "question_id" in ds.column_names:
+        ds = ds.sort("question_id")
+
     total = len(ds)
+    if offset > 0:
+        ds = ds.select(range(min(offset, total), total))
+        total = len(ds)
     if limit and limit < total:
         ds = ds.select(range(limit))
         total = limit
@@ -660,6 +755,13 @@ async def evaluate_benchmark(
     logger.info(
         f"Evaluating {benchmark_name}: {total} questions, concurrency={concurrency}, "
         f"temperature={temperature}, max_tokens={max_tokens}"
+        + (f", top_k={top_k}" if top_k is not None else "")
+        + (f", min_p={min_p}" if min_p is not None else "")
+        + (
+            f", enable_thinking={enable_thinking}"
+            if enable_thinking is not None
+            else ""
+        )
     )
 
     if config.kind == "code":
@@ -667,16 +769,64 @@ async def evaluate_benchmark(
             "Code benchmarks execute model-generated code. Use a sandboxed environment."
         )
 
+    # Load checkpoint for resume
+    checkpoint_data: dict[str | int, dict[str, Any]] = {}
+    if checkpoint_path and checkpoint_path.exists():
+        with open(checkpoint_path) as f:
+            for line in f:
+                entry = json.loads(line)
+                checkpoint_data[entry["question_id"]] = entry
+        logger.info(f"Loaded {len(checkpoint_data)} checkpointed results")
+
     semaphore = asyncio.Semaphore(concurrency)
+    instance_failed = asyncio.Event()
     results: list[QuestionResult | None] = [None] * total
     completed = 0
     lock = asyncio.Lock()
 
+    def _get_question_id(idx: int, doc: dict) -> str | int:
+        """Get a stable question ID for checkpointing."""
+        if benchmark_name == "livecodebench":
+            return doc.get("question_id", idx)
+        elif benchmark_name == "humaneval":
+            return doc.get("task_id", idx)
+        return idx
+
     async def process_question(
         idx: int, doc: dict, http_client: httpx.AsyncClient
     ) -> None:
         nonlocal completed
         system_msg = None
+        question_id = _get_question_id(idx, doc)
+
+        # Bail out early if instance is already dead
+        if instance_failed.is_set():
+            return
+
+        # Check checkpoint
+        if question_id in checkpoint_data:
+            cached = checkpoint_data[question_id]
+            results[idx] = QuestionResult(
+                question_id=question_id,
+                prompt=cached.get("prompt", ""),
+                response=cached.get("response", ""),
+                extracted_answer=cached.get("extracted_answer"),
+                gold_answer=cached.get("gold_answer", ""),
+                correct=cached.get("correct", False),
+                error=cached.get("error"),
+                prompt_tokens=cached.get("prompt_tokens", 0),
+                completion_tokens=cached.get("completion_tokens", 0),
+                reasoning_tokens=cached.get("reasoning_tokens", 0),
+                reasoning_content=cached.get("reasoning_content", ""),
+                finish_reason=cached.get("finish_reason", ""),
+                elapsed_s=cached.get("elapsed_s", 0.0),
+                power_watts=cached.get("power_watts", 0.0),
+                energy_joules=cached.get("energy_joules", 0.0),
+            )
+            async with lock:
+                completed += 1
+            logger.info(f"  [{completed}/{total}] {question_id} (cached)")
+            return
 
         if benchmark_name == "gpqa_diamond":
             prompt, gold = format_gpqa_question(doc, idx)
@@ -697,24 +847,50 @@ async def evaluate_benchmark(
             raise ValueError(f"Unknown benchmark: {benchmark_name}")
 
         async with semaphore:
+            if instance_failed.is_set():
+                return
             t0 = time.monotonic()
-            api_result = await call_with_retries(
-                http_client,
-                base_url,
-                model,
-                prompt,
-                temperature,
-                max_tokens,
-                timeout,
-                system_message=system_msg,
-                reasoning_effort=reasoning_effort,
-                top_p=top_p,
-            )
+            try:
+                # Race the API call against the instance_failed event
+                api_task = asyncio.create_task(
+                    call_with_retries(
+                        http_client,
+                        base_url,
+                        model,
+                        prompt,
+                        temperature,
+                        max_tokens,
+                        timeout,
+                        system_message=system_msg,
+                        reasoning_effort=reasoning_effort,
+                        top_p=top_p,
+                        top_k=top_k,
+                        min_p=min_p,
+                        enable_thinking=enable_thinking,
+                        instance_failed=instance_failed,
+                    )
+                )
+                failed_waiter = asyncio.create_task(instance_failed.wait())
+                done, pending = await asyncio.wait(
+                    [api_task, failed_waiter],
+                    return_when=asyncio.FIRST_COMPLETED,
+                )
+                for p in pending:
+                    p.cancel()
+                    with contextlib.suppress(asyncio.CancelledError):
+                        await p
+                if instance_failed.is_set() and api_task not in done:
+                    logger.error(f"Instance failed, aborting {question_id}")
+                    return
+                api_result = api_task.result()
+            except InstanceFailedError:
+                logger.error(f"Instance failed, skipping {question_id}")
+                return
             elapsed = time.monotonic() - t0
 
         if api_result is None:
             result = QuestionResult(
-                question_id=idx,
+                question_id=question_id,
                 prompt=prompt,
                 response="",
                 extracted_answer=None,
@@ -729,13 +905,17 @@ async def evaluate_benchmark(
                 "prompt_tokens": api_result.prompt_tokens,
                 "completion_tokens": api_result.completion_tokens,
                 "reasoning_tokens": api_result.reasoning_tokens,
+                "reasoning_content": api_result.reasoning_content,
+                "finish_reason": api_result.finish_reason,
                 "elapsed_s": elapsed,
+                "power_watts": api_result.power_watts,
+                "energy_joules": api_result.energy_joules,
             }
 
             if config.kind == "mc":
                 extracted = extract_mc_answer(response, valid_letters)
                 result = QuestionResult(
-                    question_id=idx,
+                    question_id=question_id,
                     prompt=prompt,
                     response=response,
                     extracted_answer=extracted,
@@ -749,7 +929,7 @@ async def evaluate_benchmark(
                     check_aime_answer(extracted, int(gold)) if extracted else False
                 )
                 result = QuestionResult(
-                    question_id=idx,
+                    question_id=question_id,
                     prompt=prompt,
                     response=response,
                     extracted_answer=extracted,
@@ -763,7 +943,7 @@ async def evaluate_benchmark(
                 code = extract_code_block(response, preserve_indent=keep_indent)
                 if code is None:
                     result = QuestionResult(
-                        question_id=idx,
+                        question_id=question_id,
                         prompt=prompt,
                         response=response,
                         extracted_answer=None,
@@ -778,7 +958,7 @@ async def evaluate_benchmark(
                         code,
                     )
                     result = QuestionResult(
-                        question_id=idx,
+                        question_id=question_id,
                         prompt=prompt,
                         response=response,
                         extracted_answer="pass" if passed else "fail",
@@ -793,7 +973,7 @@ async def evaluate_benchmark(
                         exec_meta["sample"],
                     )
                     result = QuestionResult(
-                        question_id=idx,
+                        question_id=question_id,
                         prompt=prompt,
                         response=response,
                         extracted_answer="pass" if passed else "fail",
@@ -804,7 +984,7 @@ async def evaluate_benchmark(
                     )
                 else:
                     result = QuestionResult(
-                        question_id=idx,
+                        question_id=question_id,
                         prompt=prompt,
                         response=response,
                         extracted_answer=None,
@@ -815,7 +995,7 @@ async def evaluate_benchmark(
                     )
             else:
                 result = QuestionResult(
-                    question_id=idx,
+                    question_id=question_id,
                     prompt=prompt,
                     response=response,
                     extracted_answer=None,
@@ -827,24 +1007,82 @@ async def evaluate_benchmark(
 
         results[idx] = result
 
+        # Write checkpoint (skip infra failures so they get retried on resume,
+        # but keep wrong answers — they are legitimate results)
+        if checkpoint_path is not None and result.response:
+            _write_checkpoint(checkpoint_path, result)
+
         async with lock:
             completed += 1
             n = completed
-        if n % max(1, total // 20) == 0 or n == total:
-            correct_so_far = sum(1 for r in results if r is not None and r.correct)
-            answered = sum(1 for r in results if r is not None)
-            logger.info(
-                f"  [{n}/{total}] {correct_so_far}/{answered} correct "
-                f"({correct_so_far / max(answered, 1):.1%})"
-            )
+
+        # Log progress
+        thinking_info = ""
+        if result.reasoning_content:
+            thinking_info = f", {len(result.reasoning_content)} chars thinking"
+        logger.info(
+            f"  [{n}/{total}] {question_id}: {len(result.response)} chars{thinking_info}, "
+            f"tokens: {result.prompt_tokens}+{result.completion_tokens} "
+            f"[{result.finish_reason}]"
+            + (f" {result.extracted_answer}" if result.extracted_answer else "")
+        )
+
+    async def _health_monitor() -> None:
+        """Periodically check if the instance is still alive."""
+        # Wait a bit before first check to let things start
+        await asyncio.sleep(10)
+        while not instance_failed.is_set():
+            if not await _check_instance_health(base_url):
+                # Double-check to avoid false positives
+                await asyncio.sleep(2)
+                if not await _check_instance_health(base_url):
+                    logger.error("Health monitor: instance is down!")
+                    instance_failed.set()
+                    return
+            await asyncio.sleep(5)
 
     async with httpx.AsyncClient() as http_client:
+        monitor = asyncio.create_task(_health_monitor())
         tasks = [process_question(i, doc, http_client) for i, doc in enumerate(ds)]
         await asyncio.gather(*tasks)
+        monitor.cancel()
+        with contextlib.suppress(asyncio.CancelledError):
+            await monitor
+
+    if instance_failed.is_set():
+        completed_count = sum(1 for r in results if r is not None)
+        logger.error(
+            f"Instance failed! Completed {completed_count}/{total} problems. "
+            f"Checkpoint saved — restart to resume remaining problems."
+        )
+        raise InstanceFailedError("Instance failed during evaluation")
 
     return [r for r in results if r is not None]
 
 
+def _write_checkpoint(path: Path, result: QuestionResult) -> None:
+    """Append a single result to the JSONL checkpoint file."""
+    entry = {
+        "question_id": result.question_id,
+        "prompt": result.prompt,
+        "response": result.response,
+        "extracted_answer": result.extracted_answer,
+        "gold_answer": result.gold_answer,
+        "correct": result.correct,
+        "error": result.error,
+        "prompt_tokens": result.prompt_tokens,
+        "completion_tokens": result.completion_tokens,
+        "reasoning_tokens": result.reasoning_tokens,
+        "reasoning_content": result.reasoning_content,
+        "finish_reason": result.finish_reason,
+        "elapsed_s": round(result.elapsed_s, 2),
+        "power_watts": round(result.power_watts, 2),
+        "energy_joules": round(result.energy_joules, 2),
+    }
+    with open(path, "a") as f:
+        f.write(json.dumps(entry) + "\n")
+
+
 # ---------------------------------------------------------------------------
 # Results display
 # ---------------------------------------------------------------------------
@@ -867,6 +1105,8 @@ def print_results(
     total_elapsed = sum(r.elapsed_s for r in results)
     wall_clock = max(r.elapsed_s for r in results) if results else 0.0
     avg_gen_tps = total_completion_tokens / total_elapsed if total_elapsed > 0 else 0.0
+    total_energy = sum(r.energy_joules for r in results)
+    avg_power = sum(r.power_watts for r in results) / max(total, 1)
 
     label = f"[c={concurrency}] " if concurrency is not None else ""
     print(f"\n{label}{benchmark_name}: {correct}/{total} ({accuracy:.1%})")
@@ -878,6 +1118,10 @@ def print_results(
         f"  |  total time: {total_elapsed:.1f}s  wall clock: {wall_clock:.1f}s"
     )
     print(tok_line)
+    if total_energy > 0:
+        print(
+            f"  power: avg {avg_power:.1f}W  |  total energy: {total_energy:.1f}J ({total_energy / 3600:.2f}Wh)"
+        )
     if errors:
         print(f"  API errors: {errors}")
     if no_extract:
@@ -896,6 +1140,8 @@ def print_results(
         "total_elapsed_s": total_elapsed,
         "wall_clock_s": wall_clock,
         "avg_gen_tps": avg_gen_tps,
+        "avg_power_watts": avg_power,
+        "total_energy_joules": total_energy,
     }
 
 
@@ -1053,7 +1299,11 @@ def save_results(
                 "prompt_tokens": r.prompt_tokens,
                 "completion_tokens": r.completion_tokens,
                 "reasoning_tokens": r.reasoning_tokens,
+                "reasoning_content": r.reasoning_content,
+                "finish_reason": r.finish_reason,
                 "elapsed_s": round(r.elapsed_s, 2),
+                "power_watts": round(r.power_watts, 2),
+                "energy_joules": round(r.energy_joules, 2),
             }
             for r in results
         ],
@@ -1069,6 +1319,15 @@ def save_results(
 # ---------------------------------------------------------------------------
 
 
+def _checkpoint_path(
+    results_dir: str, benchmark: str, model: str, concurrency: int
+) -> Path:
+    """Return the JSONL checkpoint path for a benchmark run."""
+    out_dir = Path(results_dir) / model.replace("/", "_") / benchmark
+    out_dir.mkdir(parents=True, exist_ok=True)
+    return out_dir / f"c{concurrency}.checkpoint.jsonl"
+
+
 def parse_int_list(values: list[str]) -> list[int]:
     items: list[int] = []
     for v in values:
@@ -1096,6 +1355,12 @@ def main() -> int:
         default=None,
         help="Max questions per benchmark (for fast iteration).",
     )
+    ap.add_argument(
+        "--offset",
+        type=int,
+        default=0,
+        help="Skip first N questions (0-based).",
+    )
 
     reasoning_group = ap.add_mutually_exclusive_group()
     reasoning_group.add_argument(
@@ -1115,6 +1380,8 @@ def main() -> int:
         "--temperature", type=float, default=None, help="Override temperature."
     )
     ap.add_argument("--top-p", type=float, default=None, help="Override top_p.")
+    ap.add_argument("--top-k", type=int, default=None, help="Override top_k.")
+    ap.add_argument("--min-p", type=float, default=None, help="Override min_p.")
     ap.add_argument(
         "--max-tokens", type=int, default=None, help="Override max output tokens."
     )
@@ -1148,15 +1415,31 @@ def main() -> int:
         choices=["easy", "medium", "hard"],
         help="Filter by difficulty (livecodebench only). E.g. --difficulty hard",
     )
+    ap.add_argument(
+        "--release-version",
+        default=None,
+        help="LCB dataset release version (livecodebench only). E.g. release_v5",
+    )
     ap.add_argument(
         "--results-dir",
         default="eval_results",
         help="Directory for result JSON files (default: eval_results).",
     )
     ap.add_argument(
-        "--skip-instance-setup",
+        "--enable-thinking",
+        type=lambda v: v.lower() in ("true", "1", "yes"),
+        default=None,
+        help="Enable thinking mode for models that support it.",
+    )
+    ap.add_argument(
+        "--force",
         action="store_true",
-        help="Skip exo instance management (assumes model is already running).",
+        help="Discard any existing checkpoint and run from scratch.",
+    )
+    ap.add_argument(
+        "--keep-instance",
+        action="store_true",
+        help="Skip deleting the instance after eval (for chaining runs).",
     )
 
     args, _ = ap.parse_known_args()
@@ -1177,13 +1460,26 @@ def main() -> int:
     # Instance management
     client = ExoClient(args.host, args.port, timeout_s=args.timeout)
     instance_id: str | None = None
+    created_instance = False
 
-    if not args.skip_instance_setup:
-        short_id, full_model_id = resolve_model_short_id(
-            client,
-            args.model,
-            force_download=args.force_download,
-        )
+    _short_id, full_model_id = resolve_model_short_id(
+        client,
+        args.model,
+        force_download=args.force_download,
+    )
+
+    # Optionally reuse a running instance for this model
+    if args.reuse_instance:
+        existing = find_existing_instance(client, full_model_id)
+        if existing:
+            instance_id = existing
+            logger.info(f"Reusing existing instance {instance_id}")
+        else:
+            logger.warning(
+                "--reuse-instance: no existing instance found, creating a new one"
+            )
+
+    if instance_id is None:
         selected = settle_and_fetch_placements(
             client,
             full_model_id,
@@ -1198,7 +1494,7 @@ def main() -> int:
             key=lambda p: (
                 str(p.get("instance_meta", "")),
                 str(p.get("sharding", "")),
-                -nodes_used_in_instance(p["instance"]),
+                nodes_used_in_instance(p["instance"]),
             ),
             reverse=True,
         )
@@ -1225,6 +1521,18 @@ def main() -> int:
         if download_duration is not None:
             logger.info(f"Download: {download_duration:.1f}s")
 
+        # Delete any existing instances to free resources before placing
+        try:
+            state = client.request_json("GET", "/state")
+            for old_id in list(state.get("instances", {}).keys()):
+                logger.info(f"Deleting stale instance {old_id}")
+                with contextlib.suppress(ExoHttpError):
+                    client.request_json("DELETE", f"/instance/{old_id}")
+            if state.get("instances"):
+                time.sleep(2)
+        except Exception as e:
+            logger.warning(f"Failed to clean up stale instances: {e}")
+
         client.request_json("POST", "/instance", body={"instance": instance})
         try:
             wait_for_instance_ready(client, instance_id)
@@ -1234,10 +1542,9 @@ def main() -> int:
                 client.request_json("DELETE", f"/instance/{instance_id}")
             return 1
         time.sleep(1)
-        cluster_snapshot = capture_cluster_snapshot(client)
-    else:
-        full_model_id = args.model
-        cluster_snapshot = None
+        created_instance = True
+
+    cluster_snapshot = capture_cluster_snapshot(client)
 
     # Auto-detect reasoning from model config
     model_config = load_model_config(full_model_id)
@@ -1291,16 +1598,57 @@ def main() -> int:
         reasoning_effort = str(cfg["reasoning_effort"])
     else:
         reasoning_effort = "high" if is_reasoning else None
+
+    if args.top_k is not None:
+        top_k: int | None = args.top_k
+    elif "top_k" in cfg:
+        top_k = int(cfg["top_k"])
+    else:
+        top_k = None
+
+    if args.min_p is not None:
+        min_p: float | None = args.min_p
+    elif "min_p" in cfg:
+        min_p = float(cfg["min_p"])
+    else:
+        min_p = None
+
+    if args.enable_thinking is not None:
+        enable_thinking: bool | None = args.enable_thinking
+    elif "enable_thinking" in cfg:
+        enable_thinking = bool(cfg["enable_thinking"])
+    else:
+        enable_thinking = None
+
     base_url = f"http://{args.host}:{args.port}"
 
     logger.info(f"Model: {full_model_id}")
     logger.info(
         f"Settings: temperature={temperature}, max_tokens={max_tokens}, "
         + (f"top_p={top_p}, " if top_p is not None else "")
+        + (f"top_k={top_k}, " if top_k is not None else "")
+        + (f"min_p={min_p}, " if min_p is not None else "")
         + f"reasoning={'yes' if is_reasoning else 'no'}"
         + (f", reasoning_effort={reasoning_effort}" if reasoning_effort else "")
+        + (
+            f", enable_thinking={enable_thinking}"
+            if enable_thinking is not None
+            else ""
+        )
     )
 
+    # Common kwargs for evaluate_benchmark
+    eval_kwargs: dict[str, Any] = {
+        "reasoning_effort": reasoning_effort,
+        "top_p": top_p,
+        "top_k": top_k,
+        "min_p": min_p,
+        "enable_thinking": enable_thinking,
+        "difficulty": args.difficulty,
+        "offset": args.offset,
+        "release_version": args.release_version,
+    }
+
     try:
         if args.compare_concurrency:
             concurrency_levels = parse_int_list(args.compare_concurrency)
@@ -1309,6 +1657,11 @@ def main() -> int:
                 for c in concurrency_levels:
                     logger.info(f"\n{'=' * 50}")
                     logger.info(f"Running {task_name} at concurrency={c}")
+                    checkpoint_path = _checkpoint_path(
+                        args.results_dir, task_name, full_model_id, c
+                    )
+                    if args.force and checkpoint_path.exists():
+                        checkpoint_path.unlink()
                     results = asyncio.run(
                         evaluate_benchmark(
                             task_name,
@@ -1319,9 +1672,8 @@ def main() -> int:
                             concurrency=c,
                             limit=args.limit,
                             timeout=args.request_timeout,
-                            reasoning_effort=reasoning_effort,
-                            top_p=top_p,
-                            difficulty=args.difficulty,
+                            checkpoint_path=checkpoint_path,
+                            **eval_kwargs,
                         )
                     )
                     if results:
@@ -1336,10 +1688,18 @@ def main() -> int:
                             cluster=cluster_snapshot,
                         )
                         results_by_c[c] = results
+                    # Clean up checkpoint on success
+                    if checkpoint_path.exists():
+                        checkpoint_path.unlink()
                 if len(results_by_c) >= 2:
                     print_comparison(task_name, results_by_c)
         else:
             for task_name in task_names:
+                checkpoint_path = _checkpoint_path(
+                    args.results_dir, task_name, full_model_id, args.num_concurrent
+                )
+                if args.force and checkpoint_path.exists():
+                    checkpoint_path.unlink()
                 results = asyncio.run(
                     evaluate_benchmark(
                         task_name,
@@ -1350,9 +1710,8 @@ def main() -> int:
                         concurrency=args.num_concurrent,
                         limit=args.limit,
                         timeout=args.request_timeout,
-                        reasoning_effort=reasoning_effort,
-                        top_p=top_p,
-                        difficulty=args.difficulty,
+                        checkpoint_path=checkpoint_path,
+                        **eval_kwargs,
                     )
                 )
                 if results:
@@ -1366,14 +1725,25 @@ def main() -> int:
                         scores,
                         cluster=cluster_snapshot,
                     )
+                # Clean up checkpoint on success
+                if checkpoint_path.exists():
+                    checkpoint_path.unlink()
     finally:
-        if instance_id is not None:
-            try:
-                client.request_json("DELETE", f"/instance/{instance_id}")
-            except ExoHttpError as e:
-                if e.status != 404:
-                    raise
-            wait_for_instance_gone(client, instance_id)
+        if created_instance and instance_id is not None:
+            if args.keep_instance:
+                logger.info(f"Keeping instance {instance_id} (--keep-instance)")
+            else:
+                try:
+                    client.request_json("DELETE", f"/instance/{instance_id}")
+                except ExoHttpError as e:
+                    if e.status != 404:
+                        raise
+                try:
+                    wait_for_instance_gone(client, instance_id)
+                except TimeoutError:
+                    logger.warning(
+                        f"Timed out waiting for instance {instance_id} to be deleted"
+                    )
 
     return 0
 
diff --git a/bench/prefill-decode.toml b/bench/prefill-decode.toml
new file mode 100644
index 000000000..a26d02007
--- /dev/null
+++ b/bench/prefill-decode.toml
@@ -0,0 +1,36 @@
+# Prefill/Decode disaggregation benchmark config.
+#
+# Top-level keys are bench-wide. [prefill] and [decode] sections set per-side
+# placement filters and (optionally) per-side model.
+#
+# Example:
+#   uv run python bench/prefill_decode_bench.py --config bench/prefill-decode.toml
+
+host = "james"
+port = 52415
+timeout = 7200.0
+settle_timeout = 60.0
+
+# Workload
+pp = [4096]
+tg = [512]
+repeat = 1
+warmup = 0
+
+json_out = "bench/prefill_decode_results.json"
+
+[prefill]
+model = "mlx-community/gpt-oss-20b-MXFP4-Q8"
+node = "mike"
+instance_meta = "ring"
+sharding = "pipeline"
+min_nodes = 1
+max_nodes = 1
+
+[decode]
+model = "mlx-community/gpt-oss-20b-MXFP4-Q8"
+node = "james"
+instance_meta = "ring"
+sharding = "pipeline"
+min_nodes = 1
+max_nodes = 1
diff --git a/bench/prefill_decode_bench.py b/bench/prefill_decode_bench.py
new file mode 100644
index 000000000..588ddb063
--- /dev/null
+++ b/bench/prefill_decode_bench.py
@@ -0,0 +1,784 @@
+# type: ignore
+#!/usr/bin/env python3
+"""Disaggregated prefill-decode benchmark for exo (MLX → MLX).
+
+Spins up two MLX instances on the cluster, marks one as Prefill source and
+the other as Decode target via /v1/instance-links, then sends chat
+completions to the API. The master routes the request to the decode
+instance and stamps `prefill_endpoint` pointing at the prefill instance —
+the worker decides per-request whether to ship prefill remotely
+(uncached_count > REMOTE_PREFILL_MIN_TOKENS).
+
+Usage:
+    uv run python bench/prefill_decode_bench.py --model <id> --pp 2048,8192 --tg 128
+    uv run python bench/prefill_decode_bench.py --model <id> --pp 4096 --tg 128 --repeat 3
+    uv run python bench/prefill_decode_bench.py --model <id> --pp 2048 --tg 128 --dry-run
+"""
+
+from __future__ import annotations
+
+import argparse
+import contextlib
+import copy
+import itertools
+import json
+import sys
+import time
+import tomllib
+from pathlib import Path
+from statistics import mean
+from typing import Any
+
+from exo_bench import (
+    PromptSizer,
+    format_peak_memory,
+    load_tokenizer_for_bench,
+    parse_int_list,
+)
+from exo_tools.client import ExoClient, ExoHttpError
+from exo_tools.harness import (
+    add_common_instance_args,
+    instance_id_from_instance,
+    node_ids_from_instance,
+    nodes_used_in_instance,
+    resolve_model_short_id,
+    run_planning_phase,
+    settle_and_fetch_placements,
+    unwrap_instance,
+    wait_for_instance_gone,
+    wait_for_instance_ready,
+)
+from loguru import logger
+
+
+def _node_id_to_friendly(client: ExoClient) -> dict[str, str]:
+    identities = client.get_node_identities() or {}
+    out: dict[str, str] = {}
+    for node_id, identity in identities.items():
+        if isinstance(identity, dict):
+            name = identity.get("friendlyName") or identity.get("friendly_name")
+            if isinstance(name, str):
+                out[str(node_id)] = name
+    return out
+
+
+def _placement_node_friendly_names(
+    placement: dict[str, Any], id_to_friendly: dict[str, str]
+) -> list[str]:
+    instance = placement["instance"]
+    return [id_to_friendly.get(nid, nid) for nid in node_ids_from_instance(instance)]
+
+
+def _filter_by_node(
+    placements: list[dict[str, Any]],
+    friendly_name: str,
+    id_to_friendly: dict[str, str],
+) -> list[dict[str, Any]]:
+    target = friendly_name.lower()
+    matched: list[dict[str, Any]] = []
+    for p in placements:
+        names = [n.lower() for n in _placement_node_friendly_names(p, id_to_friendly)]
+        if any(target == n or target in n for n in names):
+            matched.append(p)
+    return matched
+
+
+def _node_id_by_friendly(id_to_friendly: dict[str, str], target: str) -> str | None:
+    target_lc = target.lower()
+    for nid, name in id_to_friendly.items():
+        if target_lc == name.lower() or target_lc in name.lower():
+            return nid
+    return None
+
+
+def _load_toml(path: str) -> dict[str, Any]:
+    with Path(path).open("rb") as f:
+        return tomllib.load(f)
+
+
+_TOP_LEVEL_TOML_KEYS = {
+    "host",
+    "port",
+    "timeout",
+    "settle_timeout",
+    "model",
+    "pp",
+    "tg",
+    "repeat",
+    "warmup",
+    "json_out",
+    "instance_meta",
+    "sharding",
+    "min_nodes",
+    "max_nodes",
+    "force_download",
+    "danger_delete_downloads",
+    "all_combinations",
+}
+
+
+def _inject_toml_into_argv() -> None:
+    """If --config X is in sys.argv, pre-load it and inject required CLI args
+    (--model, --pp, --tg) so argparse's required=True checks pass."""
+    argv = sys.argv
+    if "--config" not in argv:
+        return
+    idx = argv.index("--config")
+    if idx + 1 >= len(argv):
+        return
+    cfg_path = argv[idx + 1]
+    cfg = _load_toml(cfg_path)
+    decode = cfg.get("decode", {})
+
+    def _has(flag: str) -> bool:
+        return any(a == flag or a.startswith(flag + "=") for a in argv)
+
+    # --model: prefer top-level, then [decode].model
+    if not _has("--model"):
+        model = cfg.get("model") or decode.get("model")
+        if model:
+            argv += ["--model", str(model)]
+    if not _has("--pp"):
+        pp = cfg.get("pp")
+        if pp:
+            argv += (
+                ["--pp", *(str(x) for x in pp)]
+                if isinstance(pp, list)
+                else [
+                    "--pp",
+                    str(pp),
+                ]
+            )
+    if not _has("--tg"):
+        tg = cfg.get("tg")
+        if tg:
+            argv += (
+                ["--tg", *(str(x) for x in tg)]
+                if isinstance(tg, list)
+                else [
+                    "--tg",
+                    str(tg),
+                ]
+            )
+
+
+def _merge_toml_into_args(args: argparse.Namespace, cfg: dict[str, Any]) -> None:
+    """Apply top-level toml keys onto args namespace where args has a default."""
+    for key, value in cfg.items():
+        if key in {"prefill", "decode"}:
+            continue
+        if key not in _TOP_LEVEL_TOML_KEYS:
+            continue
+        attr = key
+        current = getattr(args, attr, None)
+        if current in (None, [], False):
+            setattr(args, attr, value)
+
+
+def _side_args(
+    base: argparse.Namespace, overrides: dict[str, Any]
+) -> argparse.Namespace:
+    out = copy.copy(base)
+    for k in (
+        "instance_meta",
+        "sharding",
+        "min_nodes",
+        "max_nodes",
+        "skip_pipeline_jaccl",
+        "skip_tensor_ring",
+    ):
+        if k in overrides:
+            setattr(out, k, overrides[k])
+    return out
+
+
+def _pick_two_distinct_placements(
+    placements: list[dict[str, Any]],
+) -> tuple[dict[str, Any], dict[str, Any]] | None:
+    if len(placements) < 2:
+        return None
+    seen_nodes: set[tuple[str, ...]] = set()
+    chosen: list[dict[str, Any]] = []
+    for p in placements:
+        nodes = tuple(sorted(str(n) for n in p.get("nodes", [])))
+        if nodes in seen_nodes:
+            continue
+        seen_nodes.add(nodes)
+        chosen.append(p)
+        if len(chosen) == 2:
+            return chosen[0], chosen[1]
+    return None
+
+
+def _create_instance_link(
+    client: ExoClient,
+    prefill_instance_id: str,
+    decode_instance_id: str,
+) -> str:
+    out = client.request_json(
+        "POST",
+        "/v1/instance-links",
+        body={
+            "prefill_instances": [prefill_instance_id],
+            "decode_instances": [decode_instance_id],
+        },
+    )
+    return str(out.get("commandId", ""))
+
+
+def _list_instance_links(client: ExoClient) -> list[dict[str, Any]]:
+    out = client.request_json("GET", "/v1/instance-links")
+    return out if isinstance(out, list) else []
+
+
+def _delete_instance_link(client: ExoClient, link_id: str) -> None:
+    client.request_json("DELETE", f"/v1/instance-links/{link_id}")
+
+
+def run_one(
+    client: ExoClient,
+    model_id: str,
+    pp_hint: int,
+    tg: int,
+    prompt_sizer: PromptSizer,
+) -> tuple[dict[str, Any], int]:
+    content, pp_tokens = prompt_sizer.build(pp_hint)
+    payload: dict[str, Any] = {
+        "model": model_id,
+        "messages": [{"role": "user", "content": content}],
+        "stream": False,
+        "max_tokens": tg,
+    }
+
+    t0 = time.perf_counter()
+    out = client.post_bench_chat_completions(payload)
+    elapsed = time.perf_counter() - t0
+
+    stats = out.get("generation_stats")
+    choices = out.get("choices") or [{}]
+    message = choices[0].get("message", {}) if choices else {}
+    text = message.get("content") or ""
+    preview = text[:200] if text else ""
+
+    return {
+        "elapsed_s": elapsed,
+        "output_text_preview": preview,
+        "stats": stats,
+    }, pp_tokens
+
+
+def _run_phase(
+    *,
+    client: ExoClient,
+    label: str,
+    pp_tg_pairs: list[tuple[int, int]],
+    model_id: str,
+    prompt_sizer: PromptSizer,
+    warmup: int,
+    repeat: int,
+    common_meta: dict[str, Any],
+) -> list[dict[str, Any]]:
+    logger.info(f"=== phase: {label} (model={model_id}) ===")
+    rows: list[dict[str, Any]] = []
+    for i in range(warmup):
+        run_one(client, model_id, pp_tg_pairs[0][0], pp_tg_pairs[0][1], prompt_sizer)
+        logger.debug(f"  warmup {i + 1}/{warmup} done")
+
+    for pp, tg in pp_tg_pairs:
+        logger.info(f"--- {label}: pp={pp} tg={tg} ---")
+        runs: list[dict[str, Any]] = []
+        for r in range(repeat):
+            time.sleep(2)
+            try:
+                row, actual_pp_tokens = run_one(client, model_id, pp, tg, prompt_sizer)
+            except Exception as e:
+                logger.error(e)
+                continue
+            row.update(common_meta)
+            row.update(
+                {
+                    "phase": label,
+                    "phase_model_id": model_id,
+                    "pp_tokens": actual_pp_tokens,
+                    "tg": tg,
+                    "repeat_index": r,
+                }
+            )
+            runs.append(row)
+            rows.append(row)
+
+        if runs:
+            prompt_tps = mean(x["stats"]["prompt_tps"] for x in runs)
+            gen_tps = mean(x["stats"]["generation_tps"] for x in runs)
+            ptok = mean(x["stats"]["prompt_tokens"] for x in runs)
+            gtok = mean(x["stats"]["generation_tokens"] for x in runs)
+            peak = mean(x["stats"]["peak_memory_usage"]["inBytes"] for x in runs)
+            avg_elapsed = mean(x["elapsed_s"] for x in runs)
+            logger.info(
+                f"[{label}] prompt_tps={prompt_tps:.2f} gen_tps={gen_tps:.2f}    "
+                f"prompt_tokens={ptok} gen_tokens={gtok}    "
+                f"peak_memory={format_peak_memory(peak)}    "
+                f"avg_elapsed={avg_elapsed:.2f}s"
+            )
+        time.sleep(2)
+    return rows
+
+
+def _summarise(rows: list[dict[str, Any]]) -> dict[tuple[int, int], dict[str, float]]:
+    grouped: dict[tuple[int, int], list[dict[str, Any]]] = {}
+    for r in rows:
+        key = (int(r["pp_tokens"]), int(r["tg"]))
+        grouped.setdefault(key, []).append(r)
+    out: dict[tuple[int, int], dict[str, float]] = {}
+    for key, runs in grouped.items():
+        out[key] = {
+            "prompt_tps": mean(x["stats"]["prompt_tps"] for x in runs),
+            "gen_tps": mean(x["stats"]["generation_tps"] for x in runs),
+            "elapsed_s": mean(x["elapsed_s"] for x in runs),
+        }
+    return out
+
+
+def _print_diff(
+    disagg_rows: list[dict[str, Any]],
+    decode_alone_rows: list[dict[str, Any]],
+    prefill_alone_rows: list[dict[str, Any]],
+) -> None:
+    disagg = _summarise(disagg_rows)
+    decode_alone = _summarise(decode_alone_rows)
+    prefill_alone = _summarise(prefill_alone_rows)
+    keys = set(disagg.keys()) | set(decode_alone.keys()) | set(prefill_alone.keys())
+
+    width = 64
+    for key in sorted(keys):
+        pp, tg = key
+        logger.info("─" * width)
+        logger.info(f"  pp={pp}  tg={tg}")
+        logger.info("─" * width)
+        logger.info(
+            f"  {'phase':<16} {'elapsed':>10}  {'prompt_tps':>11}  {'gen_tps':>9}"
+        )
+        for label, summary in (
+            ("disaggregated", disagg.get(key)),
+            ("decode_alone", decode_alone.get(key)),
+            ("prefill_alone", prefill_alone.get(key)),
+        ):
+            if summary is None:
+                logger.info(f"  {label:<16} {'—':>10}  {'—':>11}  {'—':>9}")
+                continue
+            logger.info(
+                f"  {label:<16} "
+                f"{summary['elapsed_s']:>9.2f}s  "
+                f"{summary['prompt_tps']:>11.1f}  "
+                f"{summary['gen_tps']:>9.2f}"
+            )
+
+        d = disagg.get(key)
+        da = decode_alone.get(key)
+        pa = prefill_alone.get(key)
+        if d and da and d["elapsed_s"] > 0:
+            logger.info(
+                f"  speedup vs decode_alone:  {da['elapsed_s'] / d['elapsed_s']:.2f}x"
+            )
+        if d and pa and d["elapsed_s"] > 0:
+            logger.info(
+                f"  speedup vs prefill_alone: {pa['elapsed_s'] / d['elapsed_s']:.2f}x"
+            )
+    logger.info("─" * width)
+
+
+def main() -> int:
+    _inject_toml_into_argv()
+    ap = argparse.ArgumentParser(
+        prog="prefill-decode-bench",
+        description="Benchmark MLX-MLX disaggregated prefill/decode via instance links.",
+    )
+    add_common_instance_args(ap)
+    ap.add_argument(
+        "--pp",
+        nargs="+",
+        required=True,
+        help="Prompt-size hints (ints, must be >1000). Accepts commas.",
+    )
+    ap.add_argument(
+        "--tg",
+        nargs="+",
+        required=True,
+        help="Generation lengths (ints). Accepts commas.",
+    )
+    ap.add_argument(
+        "--repeat", type=int, default=1, help="Repetitions per (pp,tg) pair."
+    )
+    ap.add_argument(
+        "--warmup",
+        type=int,
+        default=0,
+        help="Warmup runs (uses first pp/tg).",
+    )
+    ap.add_argument(
+        "--json-out",
+        default="bench/prefill_decode_results.json",
+        help="Write raw per-run results JSON to this path.",
+    )
+    ap.add_argument("--stdout", action="store_true", help="Write results to stdout")
+    ap.add_argument(
+        "--dry-run", action="store_true", help="List selected placements and exit."
+    )
+    ap.add_argument(
+        "--all-combinations",
+        action="store_true",
+        help="Force all pp×tg combinations even when lists have equal length.",
+    )
+    ap.add_argument(
+        "--prefill-model",
+        default=None,
+        help="Model id for the prefill instance. Defaults to --model.",
+    )
+    ap.add_argument(
+        "--prefill-node",
+        default=None,
+        help="friendly_name of the node hosting the prefill instance.",
+    )
+    ap.add_argument(
+        "--decode-node",
+        default=None,
+        help="friendly_name of the node hosting the decode instance.",
+    )
+    ap.add_argument(
+        "--config",
+        default=None,
+        help="TOML config file. CLI flags override toml values.",
+    )
+    ap.add_argument(
+        "--compare-baseline",
+        action="store_true",
+        help="Also run each (pp,tg) pair without the prefill/decode link "
+        "(decode instance does its own prefill) and report the diff.",
+    )
+    args = ap.parse_args()
+    cfg = _load_toml(args.config) if args.config else {}
+    _merge_toml_into_args(args, cfg)
+    prefill_overrides = cfg.get("prefill", {}) if cfg else {}
+    decode_overrides = cfg.get("decode", {}) if cfg else {}
+    if args.prefill_model is None and "model" in prefill_overrides:
+        args.prefill_model = prefill_overrides["model"]
+    if args.prefill_node is None and "node" in prefill_overrides:
+        args.prefill_node = prefill_overrides["node"]
+    if args.decode_node is None and "node" in decode_overrides:
+        args.decode_node = decode_overrides["node"]
+    if "model" in decode_overrides and not args.model:
+        args.model = decode_overrides["model"]
+
+    pp_list = parse_int_list(args.pp)
+    tg_list = parse_int_list(args.tg)
+    if not pp_list or not tg_list:
+        logger.error("pp and tg lists must be non-empty")
+        return 2
+    for pp in pp_list:
+        if pp <= 1000:
+            logger.error(
+                f"pp={pp} must be >1000 (remote prefill triggers when uncached >1000)"
+            )
+            return 2
+    if args.repeat <= 0:
+        logger.error("--repeat must be >= 1")
+        return 2
+
+    use_combinations = args.all_combinations or len(pp_list) != len(tg_list)
+    if use_combinations:
+        logger.info(
+            f"pp/tg mode: combinations (product) — {len(pp_list) * len(tg_list)} pairs"
+        )
+    else:
+        logger.info(f"pp/tg mode: tandem (zip) — {len(pp_list)} pairs")
+
+    client = ExoClient(args.host, args.port, timeout_s=args.timeout)
+
+    decode_short_id, decode_full_id = resolve_model_short_id(
+        client, args.model, force_download=args.force_download
+    )
+    if args.prefill_model:
+        prefill_short_id, prefill_full_id = resolve_model_short_id(
+            client, args.prefill_model, force_download=args.force_download
+        )
+    else:
+        prefill_short_id, prefill_full_id = decode_short_id, decode_full_id
+
+    tokenizer = load_tokenizer_for_bench(decode_full_id)
+    if tokenizer is None:
+        raise RuntimeError("[prefill-decode-bench] decode tokenizer load failed")
+    try:
+        decode_prompt_sizer = PromptSizer(tokenizer)
+    except Exception:
+        logger.error("[prefill-decode-bench] decode prompt sizing failed")
+        raise
+
+    if prefill_full_id == decode_full_id:
+        prefill_prompt_sizer = decode_prompt_sizer
+    else:
+        prefill_tokenizer = load_tokenizer_for_bench(prefill_full_id)
+        if prefill_tokenizer is None:
+            raise RuntimeError("[prefill-decode-bench] prefill tokenizer load failed")
+        prefill_prompt_sizer = PromptSizer(prefill_tokenizer)
+
+    id_to_friendly = _node_id_to_friendly(client)
+
+    prefill_args = _side_args(args, prefill_overrides)
+    decode_args = _side_args(args, decode_overrides)
+
+    if prefill_full_id == decode_full_id and prefill_overrides == decode_overrides:
+        placements = settle_and_fetch_placements(
+            client, decode_full_id, args, settle_timeout=args.settle_timeout
+        )
+        prefill_candidates = (
+            _filter_by_node(placements, args.prefill_node, id_to_friendly)
+            if args.prefill_node
+            else placements
+        )
+        decode_candidates = (
+            _filter_by_node(placements, args.decode_node, id_to_friendly)
+            if args.decode_node
+            else placements
+        )
+        if args.prefill_node and not prefill_candidates:
+            logger.error(f"No placement on prefill node {args.prefill_node!r}.")
+            return 1
+        if args.decode_node and not decode_candidates:
+            logger.error(f"No placement on decode node {args.decode_node!r}.")
+            return 1
+        if args.prefill_node and args.decode_node:
+            prefill_p = prefill_candidates[0]
+            decode_p = decode_candidates[0]
+        else:
+            pair = _pick_two_distinct_placements(placements)
+            if pair is None:
+                logger.error(
+                    "Need at least two distinct-node MLX placements for the same model."
+                )
+                return 1
+            prefill_p, decode_p = pair
+            if args.prefill_node:
+                prefill_p = prefill_candidates[0]
+            if args.decode_node:
+                decode_p = decode_candidates[0]
+    else:
+        prefill_node_id = (
+            _node_id_by_friendly(id_to_friendly, args.prefill_node)
+            if args.prefill_node
+            else None
+        )
+        decode_node_id = (
+            _node_id_by_friendly(id_to_friendly, args.decode_node)
+            if args.decode_node
+            else None
+        )
+        if args.prefill_node and prefill_node_id is None:
+            logger.error(f"Unknown node {args.prefill_node!r}.")
+            return 1
+        if args.decode_node and decode_node_id is None:
+            logger.error(f"Unknown node {args.decode_node!r}.")
+            return 1
+        prefill_placements = settle_and_fetch_placements(
+            client,
+            prefill_full_id,
+            prefill_args,
+            settle_timeout=args.settle_timeout,
+            node_id=prefill_node_id,
+        )
+        decode_placements = settle_and_fetch_placements(
+            client,
+            decode_full_id,
+            decode_args,
+            settle_timeout=args.settle_timeout,
+            node_id=decode_node_id,
+        )
+        if not prefill_placements:
+            logger.error(
+                f"No placement found for prefill model {prefill_full_id}"
+                f"{f' on node {args.prefill_node!r}' if args.prefill_node else ''}."
+            )
+            return 1
+        if not decode_placements:
+            logger.error(
+                f"No placement found for decode model {decode_full_id}"
+                f"{f' on node {args.decode_node!r}' if args.decode_node else ''}."
+            )
+            return 1
+        prefill_p = prefill_placements[0]
+        decode_p = decode_placements[0]
+
+    prefill_node_names = _placement_node_friendly_names(prefill_p, id_to_friendly)
+    decode_node_names = _placement_node_friendly_names(decode_p, id_to_friendly)
+    _ = unwrap_instance
+
+    prefill_instance = prefill_p["instance"]
+    decode_instance = decode_p["instance"]
+    prefill_id = instance_id_from_instance(prefill_instance)
+    decode_id = instance_id_from_instance(decode_instance)
+    prefill_meta = str(prefill_p.get("instance_meta", ""))
+    decode_meta = str(decode_p.get("instance_meta", ""))
+    prefill_nodes = nodes_used_in_instance(prefill_instance)
+    decode_nodes = nodes_used_in_instance(decode_instance)
+
+    logger.info("=" * 80)
+    logger.info(
+        f"PREFILL: {prefill_meta} / nodes={prefill_nodes} ({','.join(prefill_node_names)}) "
+        f"/ {prefill_short_id} ({prefill_full_id}) / instance_id={prefill_id}"
+    )
+    logger.info(
+        f"DECODE:  {decode_meta} / nodes={decode_nodes} ({','.join(decode_node_names)}) "
+        f"/ {decode_short_id} ({decode_full_id}) / instance_id={decode_id}"
+    )
+
+    if args.dry_run:
+        return 0
+
+    settle_deadline = (
+        time.monotonic() + args.settle_timeout if args.settle_timeout > 0 else None
+    )
+
+    logger.info("Planning phase: prefill...")
+    run_planning_phase(
+        client,
+        prefill_full_id,
+        prefill_p,
+        args.danger_delete_downloads,
+        args.timeout,
+        settle_deadline,
+    )
+    logger.info("Planning phase: decode...")
+    run_planning_phase(
+        client,
+        decode_full_id,
+        decode_p,
+        args.danger_delete_downloads,
+        args.timeout,
+        settle_deadline,
+    )
+
+    if use_combinations:
+        pp_tg_pairs = list(itertools.product(pp_list, tg_list))
+    else:
+        pp_tg_pairs = list(zip(pp_list, tg_list, strict=True))
+
+    common_meta = {
+        "decode_model_short_id": decode_short_id,
+        "decode_model_id": decode_full_id,
+        "prefill_model_short_id": prefill_short_id,
+        "prefill_model_id": prefill_full_id,
+        "prefill_instance_id": prefill_id,
+        "prefill_instance_meta": prefill_meta,
+        "prefill_nodes": prefill_nodes,
+        "decode_instance_id": decode_id,
+        "decode_instance_meta": decode_meta,
+        "decode_nodes": decode_nodes,
+    }
+
+    all_rows: list[dict[str, Any]] = []
+    disagg_rows: list[dict[str, Any]] = []
+    decode_alone_rows: list[dict[str, Any]] = []
+    prefill_alone_rows: list[dict[str, Any]] = []
+    link_id = ""
+    prefill_alive = False
+    decode_alive = False
+    try:
+        logger.info("Creating prefill instance...")
+        client.request_json("POST", "/instance", body={"instance": prefill_instance})
+        wait_for_instance_ready(client, prefill_id)
+        prefill_alive = True
+        logger.info("Prefill instance ready")
+
+        if args.compare_baseline:
+            time.sleep(2)
+            prefill_alone_rows = _run_phase(
+                client=client,
+                label="prefill_alone",
+                pp_tg_pairs=pp_tg_pairs,
+                model_id=prefill_full_id,
+                prompt_sizer=prefill_prompt_sizer,
+                warmup=args.warmup,
+                repeat=args.repeat,
+                common_meta=common_meta,
+            )
+            all_rows.extend(prefill_alone_rows)
+
+        logger.info("Creating decode instance...")
+        client.request_json("POST", "/instance", body={"instance": decode_instance})
+        wait_for_instance_ready(client, decode_id)
+        decode_alive = True
+        logger.info("Decode instance ready")
+
+        logger.info("Linking instances (prefill → decode)...")
+        _create_instance_link(client, prefill_id, decode_id)
+        time.sleep(1)
+        links = _list_instance_links(client)
+        if not links:
+            logger.error("Link did not appear in state.")
+            return 1
+        link_id = str(links[-1].get("linkId") or links[-1].get("link_id") or "")
+        logger.info(f"Link created: {link_id}")
+        time.sleep(2)
+
+        disagg_rows = _run_phase(
+            client=client,
+            label="disaggregated",
+            pp_tg_pairs=pp_tg_pairs,
+            model_id=decode_full_id,
+            prompt_sizer=decode_prompt_sizer,
+            warmup=args.warmup,
+            repeat=args.repeat,
+            common_meta=common_meta,
+        )
+        all_rows.extend(disagg_rows)
+
+        if args.compare_baseline:
+            logger.info("Removing link and prefill instance to isolate decode_alone.")
+            with contextlib.suppress(ExoHttpError):
+                if link_id:
+                    _delete_instance_link(client, link_id)
+                    link_id = ""
+            with contextlib.suppress(ExoHttpError):
+                client.request_json("DELETE", f"/instance/{prefill_id}")
+            wait_for_instance_gone(client, prefill_id)
+            prefill_alive = False
+            time.sleep(2)
+
+            decode_alone_rows = _run_phase(
+                client=client,
+                label="decode_alone",
+                pp_tg_pairs=pp_tg_pairs,
+                model_id=decode_full_id,
+                prompt_sizer=decode_prompt_sizer,
+                warmup=args.warmup,
+                repeat=args.repeat,
+                common_meta=common_meta,
+            )
+            all_rows.extend(decode_alone_rows)
+
+            _print_diff(disagg_rows, decode_alone_rows, prefill_alone_rows)
+    finally:
+        with contextlib.suppress(ExoHttpError):
+            if link_id:
+                _delete_instance_link(client, link_id)
+        if decode_alive:
+            with contextlib.suppress(ExoHttpError):
+                client.request_json("DELETE", f"/instance/{decode_id}")
+            wait_for_instance_gone(client, decode_id)
+        if prefill_alive:
+            with contextlib.suppress(ExoHttpError):
+                client.request_json("DELETE", f"/instance/{prefill_id}")
+            wait_for_instance_gone(client, prefill_id)
+        logger.debug("Deleted both instances")
+
+    if args.stdout:
+        json.dump(all_rows, sys.stdout, indent=2, ensure_ascii=False)
+    elif args.json_out:
+        with open(args.json_out, "w", encoding="utf-8") as f:
+            json.dump(all_rows, f, indent=2, ensure_ascii=False)
+        logger.debug(f"\nWrote results JSON: {args.json_out}")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/bench/test_mlx_bandwidth.py b/bench/test_mlx_bandwidth.py
deleted file mode 100644
index 6b165b8b0..000000000
--- a/bench/test_mlx_bandwidth.py
+++ /dev/null
@@ -1,377 +0,0 @@
-# type: ignore
-import argparse
-import json
-import os
-import statistics
-import sys
-import tempfile
-import time
-
-import mlx.core as mx
-
-DTYPE_MAP = {
-    "float32": (mx.float32, 4),
-    "float16": (mx.float16, 2),
-    "bfloat16": (mx.bfloat16, 2),
-}
-
-SIZES = [
-    1 * 1024,
-    4 * 1024,
-    16 * 1024,
-    64 * 1024,
-    256 * 1024,
-    1 * 1024 * 1024,
-    4 * 1024 * 1024,
-    16 * 1024 * 1024,
-    64 * 1024 * 1024,
-    256 * 1024 * 1024,
-    1 * 1024 * 1024 * 1024,
-    2 * 1024 * 1024 * 1024,
-    4 * 1024 * 1024 * 1024,
-    8 * 1024 * 1024 * 1024,
-]
-
-
-def format_bytes(n: int) -> str:
-    if n >= 1024 * 1024 * 1024:
-        return f"{n / (1024 * 1024 * 1024):.0f} GB"
-    if n >= 1024 * 1024:
-        return f"{n / (1024 * 1024):.0f} MB"
-    if n >= 1024:
-        return f"{n / 1024:.0f} KB"
-    return f"{n} B"
-
-
-def format_time(seconds: float) -> str:
-    if seconds >= 1.0:
-        return f"{seconds:.3f} s"
-    if seconds >= 0.001:
-        return f"{seconds * 1000:.2f} ms"
-    return f"{seconds * 1_000_000:.1f} us"
-
-
-def format_bandwidth(bytes_per_sec: float) -> str:
-    if bytes_per_sec >= 1024 * 1024 * 1024:
-        return f"{bytes_per_sec / (1024 * 1024 * 1024):.2f} GB/s"
-    if bytes_per_sec >= 1024 * 1024:
-        return f"{bytes_per_sec / (1024 * 1024):.1f} MB/s"
-    return f"{bytes_per_sec / 1024:.1f} KB/s"
-
-
-def barrier(group: mx.distributed.Group) -> None:
-    mx.eval(mx.distributed.all_sum(mx.array(1.0), group=group))
-
-
-def init_ring(
-    rank: int, self_ip: str, peer_ip: str, port: int, tmpdir: str
-) -> mx.distributed.Group:
-    if rank == 0:
-        hosts = [f"{self_ip}:{port}", f"{peer_ip}:{port}"]
-    else:
-        hosts = [f"{peer_ip}:{port}", f"{self_ip}:{port}"]
-
-    hostfile = os.path.join(tmpdir, "hosts.json")
-    with open(hostfile, "w") as f:
-        json.dump(hosts, f)
-
-    for var in ("MLX_HOSTFILE", "MLX_RANK", "MLX_IBV_DEVICES", "MLX_JACCL_COORDINATOR"):
-        os.environ.pop(var, None)
-
-    os.environ["MLX_HOSTFILE"] = hostfile
-    os.environ["MLX_RANK"] = str(rank)
-    return mx.distributed.init(backend="ring", strict=True)
-
-
-def init_jaccl(
-    rank: int, interface: str, coordinator: str, port: int, tmpdir: str
-) -> mx.distributed.Group:
-    devices = [[None, interface], [interface, None]]
-    devfile = os.path.join(tmpdir, "devices.json")
-    with open(devfile, "w") as f:
-        json.dump(devices, f)
-
-    for var in ("MLX_HOSTFILE", "MLX_RANK", "MLX_IBV_DEVICES", "MLX_JACCL_COORDINATOR"):
-        os.environ.pop(var, None)
-
-    os.environ["MLX_IBV_DEVICES"] = devfile
-    os.environ["MLX_RANK"] = str(rank)
-    if rank == 0:
-        os.environ["MLX_JACCL_COORDINATOR"] = f"0.0.0.0:{port}"
-    else:
-        os.environ["MLX_JACCL_COORDINATOR"] = coordinator
-
-    return mx.distributed.init(backend="jaccl", strict=True)
-
-
-def bench_unidirectional(
-    group: mx.distributed.Group,
-    rank: int,
-    size_bytes: int,
-    dtype: mx.Dtype,
-    element_size: int,
-    warmup: int,
-    iterations: int,
-) -> list[float]:
-    n_elements = size_bytes // element_size
-    tensor = mx.random.normal(shape=(n_elements,)).astype(dtype)
-    mx.eval(tensor)
-
-    for _ in range(warmup):
-        if rank == 0:
-            sent = mx.distributed.send(tensor, dst=1, group=group)
-            mx.eval(sent)
-        else:
-            received = mx.distributed.recv_like(tensor, src=0, group=group)
-            mx.eval(received)
-        barrier(group)
-
-    times: list[float] = []
-    for _ in range(iterations):
-        barrier(group)
-        t0 = time.perf_counter()
-        if rank == 0:
-            sent = mx.distributed.send(tensor, dst=1, group=group)
-            mx.eval(sent)
-        else:
-            received = mx.distributed.recv_like(tensor, src=0, group=group)
-            mx.eval(received)
-        barrier(group)
-        t1 = time.perf_counter()
-        times.append(t1 - t0)
-
-    return times
-
-
-def bench_rtt(
-    group: mx.distributed.Group,
-    rank: int,
-    size_bytes: int,
-    dtype: mx.Dtype,
-    element_size: int,
-    warmup: int,
-    iterations: int,
-) -> list[float]:
-    n_elements = size_bytes // element_size
-    tensor = mx.random.normal(shape=(n_elements,)).astype(dtype)
-    mx.eval(tensor)
-
-    for _ in range(warmup):
-        if rank == 0:
-            sent = mx.distributed.send(tensor, dst=1, group=group)
-            mx.eval(sent)
-            received = mx.distributed.recv_like(tensor, src=1, group=group)
-            mx.eval(received)
-        else:
-            received = mx.distributed.recv_like(tensor, src=0, group=group)
-            mx.eval(received)
-            sent = mx.distributed.send(received, dst=0, group=group)
-            mx.eval(sent)
-        barrier(group)
-
-    times: list[float] = []
-    for _ in range(iterations):
-        barrier(group)
-        t0 = time.perf_counter()
-        if rank == 0:
-            sent = mx.distributed.send(tensor, dst=1, group=group)
-            mx.eval(sent)
-            received = mx.distributed.recv_like(tensor, src=1, group=group)
-            mx.eval(received)
-        else:
-            received = mx.distributed.recv_like(tensor, src=0, group=group)
-            mx.eval(received)
-            sent = mx.distributed.send(received, dst=0, group=group)
-            mx.eval(sent)
-        barrier(group)
-        t1 = time.perf_counter()
-        times.append(t1 - t0)
-
-    return times
-
-
-def bench_all_gather(
-    group: mx.distributed.Group,
-    rank: int,
-    size_bytes: int,
-    dtype: mx.Dtype,
-    element_size: int,
-    warmup: int,
-    iterations: int,
-) -> list[float]:
-    n_elements = (size_bytes // 2) // element_size
-    tensor = mx.random.normal(shape=(n_elements,)).astype(dtype)
-    mx.eval(tensor)
-
-    for _ in range(warmup):
-        gathered = mx.distributed.all_gather(tensor, group=group)
-        mx.eval(gathered)
-        barrier(group)
-
-    times: list[float] = []
-    for _ in range(iterations):
-        barrier(group)
-        t0 = time.perf_counter()
-        gathered = mx.distributed.all_gather(tensor, group=group)
-        mx.eval(gathered)
-        t1 = time.perf_counter()
-        times.append(t1 - t0)
-
-    return times
-
-
-def print_table(title: str, rows: list[dict[str, str]]) -> None:
-    print(f"\n=== {title} ===")
-    headers = ["Size", "Median", "Min", "Max", "Bandwidth"]
-    widths = [
-        max(len(h), max((len(r[h]) for r in rows), default=0)) + 2 for h in headers
-    ]
-    header_line = "".join(h.ljust(w) for h, w in zip(headers, widths, strict=True))
-    print(header_line)
-    print("-" * len(header_line))
-    for row in rows:
-        print("".join(row[h].ljust(w) for h, w in zip(headers, widths, strict=True)))
-
-
-def run_bench(
-    name: str,
-    bench_fn,
-    group: mx.distributed.Group,
-    rank: int,
-    dtype: mx.Dtype,
-    element_size: int,
-    warmup: int,
-    iterations: int,
-    bw_multiplier: int = 1,
-) -> None:
-    rows: list[dict[str, str]] = []
-    for size in SIZES:
-        if rank == 0:
-            print(f"  {name}: {format_bytes(size)}...", end="", flush=True)
-        times = bench_fn(group, rank, size, dtype, element_size, warmup, iterations)
-        if rank == 0:
-            med = statistics.median(times)
-            mn = min(times)
-            mx_ = max(times)
-            bw = (size * bw_multiplier) / med
-            rows.append(
-                {
-                    "Size": format_bytes(size),
-                    "Median": format_time(med),
-                    "Min": format_time(mn),
-                    "Max": format_time(mx_),
-                    "Bandwidth": format_bandwidth(bw),
-                }
-            )
-            print(f" {format_bandwidth(bw)}")
-    if rank == 0:
-        print_table(name, rows)
-
-
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        description="MLX Distributed Communication Benchmark"
-    )
-    subparsers = parser.add_subparsers(dest="backend", required=True)
-
-    ring_parser = subparsers.add_parser("ring")
-    ring_parser.add_argument("--rank", type=int, required=True, choices=[0, 1])
-    ring_parser.add_argument("--self-ip", required=True)
-    ring_parser.add_argument("--peer-ip", required=True)
-    ring_parser.add_argument("--port", type=int, default=5555)
-
-    jaccl_parser = subparsers.add_parser("jaccl")
-    jaccl_parser.add_argument("--rank", type=int, required=True, choices=[0, 1])
-    jaccl_parser.add_argument("--interface", required=True)
-    jaccl_parser.add_argument(
-        "--coordinator",
-        type=str,
-        default=None,
-        help="IP:PORT of rank 0 (required for rank 1)",
-    )
-    jaccl_parser.add_argument(
-        "--port", type=int, default=9999, help="Coordinator port (rank 0 only)"
-    )
-
-    for p in [ring_parser, jaccl_parser]:
-        p.add_argument("--warmup", type=int, default=3)
-        p.add_argument("--iterations", type=int, default=10)
-        p.add_argument("--dtype", choices=list(DTYPE_MAP.keys()), default="float32")
-
-    args = parser.parse_args()
-
-    if args.backend == "jaccl" and args.rank == 1 and args.coordinator is None:
-        jaccl_parser.error("--coordinator is required for rank 1")
-
-    return args
-
-
-def main() -> int:
-    args = parse_args()
-    dtype, element_size = DTYPE_MAP[args.dtype]
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        if args.backend == "ring":
-            print(f"Initializing ring backend (rank {args.rank})...")
-            group = init_ring(args.rank, args.self_ip, args.peer_ip, args.port, tmpdir)
-        else:
-            print(f"Initializing jaccl backend (rank {args.rank})...")
-            group = init_jaccl(
-                args.rank, args.interface, args.coordinator or "", args.port, tmpdir
-            )
-
-        print(f"Rank {group.rank()} of {group.size()} initialized")
-        barrier(group)
-
-        if args.rank == 0:
-            print("\nMLX Distributed Communication Benchmark")
-            print(
-                f"Backend: {args.backend} | Dtype: {args.dtype} | Warmup: {args.warmup} | Iterations: {args.iterations}"
-            )
-
-        run_bench(
-            "Unidirectional (rank 0 -> rank 1)",
-            bench_unidirectional,
-            group,
-            args.rank,
-            dtype,
-            element_size,
-            args.warmup,
-            args.iterations,
-        )
-        run_bench(
-            "Round-Trip (ping-pong)",
-            bench_rtt,
-            group,
-            args.rank,
-            dtype,
-            element_size,
-            args.warmup,
-            args.iterations,
-            bw_multiplier=2,
-        )
-        run_bench(
-            "All-Gather",
-            bench_all_gather,
-            group,
-            args.rank,
-            dtype,
-            element_size,
-            args.warmup,
-            args.iterations,
-        )
-
-        if args.rank == 0:
-            print("\nDone.")
-        else:
-            print("Rank 1 complete.")
-
-    return 0
-
-
-if __name__ == "__main__":
-    try:
-        sys.exit(main())
-    except KeyboardInterrupt:
-        print("\nInterrupted.")
-        sys.exit(1)
diff --git a/dashboard/src/lib/components/HeaderNav.svelte b/dashboard/src/lib/components/HeaderNav.svelte
index d7dab78d5..36e5b659e 100644
--- a/dashboard/src/lib/components/HeaderNav.svelte
+++ b/dashboard/src/lib/components/HeaderNav.svelte
@@ -1,5 +1,8 @@
 <script lang="ts">
   import { browser } from "$app/environment";
+  import { featureFlags } from "$lib/stores/app.svelte";
+
+  const showAdvanced = $derived(featureFlags()["disaggregation"] === true);
 
   interface Props {
     showHome?: boolean;
@@ -297,5 +300,28 @@
       </svg>
       <span class="hidden sm:inline">Integrations</span>
     </a>
+    {#if showAdvanced}
+      <a
+        href="/#/advanced"
+        class="text-xs md:text-sm text-white/70 hover:text-exo-yellow transition-colors tracking-wider uppercase flex items-center gap-1.5 md:gap-2 cursor-pointer"
+        title="Advanced cluster settings"
+      >
+        <svg
+          class="w-4 h-4"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+        >
+          <circle cx="12" cy="12" r="3" />
+          <path
+            d="M19.4 15a1.65 1.65 0 0 0 .33 1.82l.06.06a2 2 0 0 1 0 2.83 2 2 0 0 1-2.83 0l-.06-.06a1.65 1.65 0 0 0-1.82-.33 1.65 1.65 0 0 0-1 1.51V21a2 2 0 0 1-4 0v-.09A1.65 1.65 0 0 0 9 19.4a1.65 1.65 0 0 0-1.82.33l-.06.06a2 2 0 0 1-2.83 0 2 2 0 0 1 0-2.83l.06-.06a1.65 1.65 0 0 0 .33-1.82 1.65 1.65 0 0 0-1.51-1H3a2 2 0 0 1 0-4h.09A1.65 1.65 0 0 0 4.6 9a1.65 1.65 0 0 0-.33-1.82l-.06-.06a2 2 0 0 1 0-2.83 2 2 0 0 1 2.83 0l.06.06a1.65 1.65 0 0 0 1.82.33H9a1.65 1.65 0 0 0 1-1.51V3a2 2 0 0 1 4 0v.09a1.65 1.65 0 0 0 1 1.51 1.65 1.65 0 0 0 1.82-.33l.06-.06a2 2 0 0 1 2.83 0 2 2 0 0 1 0 2.83l-.06.06a1.65 1.65 0 0 0-.33 1.82V9a1.65 1.65 0 0 0 1.51 1H21a2 2 0 0 1 0 4h-.09a1.65 1.65 0 0 0-1.51 1z"
+          />
+        </svg>
+        <span class="hidden sm:inline">Advanced</span>
+      </a>
+    {/if}
   </nav>
 </header>
diff --git a/dashboard/src/lib/components/IntegrationCard.svelte b/dashboard/src/lib/components/IntegrationCard.svelte
index 83978b4d3..2398b819e 100644
--- a/dashboard/src/lib/components/IntegrationCard.svelte
+++ b/dashboard/src/lib/components/IntegrationCard.svelte
@@ -1,4 +1,6 @@
 <script lang="ts">
+  import { copyText } from "$lib/utils/clipboard";
+
   interface Props {
     title: string;
     subtitle: string;
@@ -16,11 +18,17 @@
   }: Props = $props();
 
   let copied = $state(false);
+  let failed = $state(false);
 
   async function copyToClipboard() {
-    await navigator.clipboard.writeText(config);
-    copied = true;
-    setTimeout(() => (copied = false), 2000);
+    const ok = await copyText(config);
+    if (ok) {
+      copied = true;
+      setTimeout(() => (copied = false), 2000);
+    } else {
+      failed = true;
+      setTimeout(() => (failed = false), 2000);
+    }
   }
 </script>
 
@@ -37,9 +45,11 @@
       class="px-3 py-1.5 text-xs rounded border transition-all duration-200 cursor-pointer
         {copied
         ? 'border-green-500/50 text-green-400 bg-green-500/10'
-        : 'border-exo-light-gray/30 text-exo-light-gray hover:border-exo-yellow/50 hover:text-exo-yellow'}"
+        : failed
+          ? 'border-red-500/50 text-red-400 bg-red-500/10'
+          : 'border-exo-light-gray/30 text-exo-light-gray hover:border-exo-yellow/50 hover:text-exo-yellow'}"
     >
-      {copied ? "Copied!" : "Copy"}
+      {copied ? "Copied!" : failed ? "Copy failed" : "Copy"}
     </button>
   </div>
   {#if description}
diff --git a/dashboard/src/lib/components/PrefillDecodeDisaggregation.svelte b/dashboard/src/lib/components/PrefillDecodeDisaggregation.svelte
new file mode 100644
index 000000000..7b9c1f655
--- /dev/null
+++ b/dashboard/src/lib/components/PrefillDecodeDisaggregation.svelte
@@ -0,0 +1,565 @@
+<script lang="ts">
+  import { onMount, onDestroy } from "svelte";
+  import FamilyLogos from "$lib/components/FamilyLogos.svelte";
+  import {
+    instances,
+    instanceLinks,
+    nodeIdentities,
+    refreshState,
+    createInstanceLink,
+    updateInstanceLink,
+    deleteInstanceLink,
+    type Instance,
+  } from "$lib/stores/app.svelte";
+  import { deriveBaseModel, deriveFamily } from "$lib/utils/model_family";
+
+  type InstanceWrapper = {
+    MlxRingInstance?: Instance;
+    MlxJacclInstance?: Instance;
+    VllmInstance?: Instance;
+  };
+
+  let interval: ReturnType<typeof setInterval> | null = null;
+
+  onMount(() => {
+    refreshState();
+    interval = setInterval(refreshState, 3000);
+  });
+  onDestroy(() => {
+    if (interval) clearInterval(interval);
+  });
+
+  type InstanceRow = {
+    id: string;
+    modelId: string;
+    family: string;
+    baseModel: string;
+    nodeNames: string[];
+    nodeCount: number;
+  };
+
+  const instanceRows = $derived.by<InstanceRow[]>(() => {
+    const rows: InstanceRow[] = [];
+    const ids = nodeIdentities();
+    for (const [id, raw] of Object.entries(instances())) {
+      const wrapper = raw as InstanceWrapper;
+      const inst =
+        wrapper.MlxRingInstance ??
+        wrapper.MlxJacclInstance ??
+        wrapper.VllmInstance;
+      const modelId = inst?.shardAssignments?.modelId ?? "";
+      const nodeToRunner = inst?.shardAssignments?.nodeToRunner ?? {};
+      const nodeIds = Object.keys(nodeToRunner);
+      const nodeNames = nodeIds
+        .map((nodeId) => ids[nodeId]?.friendlyName ?? nodeId.slice(0, 6))
+        .filter((name) => !!name);
+      rows.push({
+        id,
+        modelId,
+        family: deriveFamily(modelId),
+        baseModel: deriveBaseModel(modelId),
+        nodeNames,
+        nodeCount: nodeIds.length,
+      });
+    }
+    rows.sort((a, b) => a.modelId.localeCompare(b.modelId));
+    return rows;
+  });
+
+  const instanceById = $derived(
+    Object.fromEntries(instanceRows.map((r) => [r.id, r])),
+  );
+
+  type LinkRow = {
+    linkId: string;
+    prefill: string[];
+    decode: string[];
+    families: string[];
+    multiNode: boolean;
+  };
+
+  const linkRows = $derived.by<LinkRow[]>(() => {
+    const rows: LinkRow[] = [];
+    for (const [, link] of Object.entries(instanceLinks())) {
+      const fams = new Set<string>();
+      let multiNode = false;
+      for (const id of [...link.prefillInstances, ...link.decodeInstances]) {
+        const r = instanceById[id];
+        if (r && r.baseModel) fams.add(r.baseModel.toLowerCase());
+        if (r && r.nodeCount > 1) multiNode = true;
+      }
+      rows.push({
+        linkId: link.linkId,
+        prefill: link.prefillInstances,
+        decode: link.decodeInstances,
+        families: Array.from(fams),
+        multiNode,
+      });
+    }
+    return rows;
+  });
+
+  let editingLinkId = $state<string | null>(null);
+  let editingPrefill = $state<Set<string>>(new Set());
+  let editingDecode = $state<Set<string>>(new Set());
+  let saving = $state(false);
+  let errorMessage = $state<string | null>(null);
+
+  function startCreate() {
+    editingLinkId = "new";
+    editingPrefill = new Set();
+    editingDecode = new Set();
+    errorMessage = null;
+  }
+
+  function startEdit(row: LinkRow) {
+    editingLinkId = row.linkId;
+    editingPrefill = new Set(row.prefill);
+    editingDecode = new Set(row.decode);
+    errorMessage = null;
+  }
+
+  function cancelEdit() {
+    editingLinkId = null;
+    editingPrefill = new Set();
+    editingDecode = new Set();
+    errorMessage = null;
+  }
+
+  type Role = "prefill" | "decode" | "none";
+
+  function roleOf(id: string): Role {
+    if (editingPrefill.has(id)) return "prefill";
+    if (editingDecode.has(id)) return "decode";
+    return "none";
+  }
+
+  function setRole(id: string, role: Role) {
+    const p = new Set(editingPrefill);
+    const d = new Set(editingDecode);
+    p.delete(id);
+    d.delete(id);
+    if (role === "prefill") p.add(id);
+    if (role === "decode") d.add(id);
+    editingPrefill = p;
+    editingDecode = d;
+  }
+
+  const editingFamilies = $derived.by<string[]>(() => {
+    const fams = new Set<string>();
+    for (const id of [...editingPrefill, ...editingDecode]) {
+      const r = instanceById[id];
+      if (r && r.baseModel) fams.add(r.baseModel.toLowerCase());
+    }
+    return Array.from(fams);
+  });
+
+  const editingMultiNode = $derived.by<string[]>(() => {
+    const names: string[] = [];
+    for (const id of [...editingPrefill, ...editingDecode]) {
+      const r = instanceById[id];
+      if (r && r.nodeCount > 1) {
+        names.push(r.baseModel || r.modelId);
+      }
+    }
+    return names;
+  });
+
+  const editingMismatch = $derived(editingFamilies.length > 1);
+  const canSave = $derived(
+    editingLinkId !== null &&
+      editingPrefill.size > 0 &&
+      editingDecode.size > 0 &&
+      !saving,
+  );
+
+  async function save() {
+    if (editingLinkId === null) return;
+    saving = true;
+    errorMessage = null;
+    try {
+      const prefill = Array.from(editingPrefill);
+      const decode = Array.from(editingDecode);
+      if (editingLinkId === "new") {
+        await createInstanceLink(prefill, decode);
+      } else {
+        await updateInstanceLink(editingLinkId, prefill, decode);
+      }
+      cancelEdit();
+      await refreshState();
+    } catch (err) {
+      errorMessage = err instanceof Error ? err.message : String(err);
+    } finally {
+      saving = false;
+    }
+  }
+
+  async function remove(linkId: string) {
+    if (!confirm("Remove this routing?")) return;
+    try {
+      await deleteInstanceLink(linkId);
+      if (editingLinkId === linkId) cancelEdit();
+      await refreshState();
+    } catch (err) {
+      errorMessage = err instanceof Error ? err.message : String(err);
+    }
+  }
+</script>
+
+<div class="font-mono text-foreground">
+  <div class="mb-6 space-y-4">
+    <details open class="group [&_summary::-webkit-details-marker]:hidden">
+      <summary
+        class="cursor-pointer list-none text-exo-yellow text-xs font-mono tracking-widest uppercase flex items-center gap-2 hover:opacity-80 transition-opacity"
+      >
+        <span
+          class="inline-block transition-transform group-open:rotate-90 text-exo-light-gray"
+          >▶</span
+        >
+        Prefill vs Decode
+      </summary>
+      <div class="mt-2 text-white/80 text-sm leading-relaxed">
+        Prefill is the compute-heavy pass that consumes the entire prompt and
+        builds a KV cache. Decode is the memory-bandwidth-bound loop that emits
+        tokens sequentially from that cache. The two phases have very different
+        bottlenecks, so running them on different hardware can be substantially
+        faster than doing both on one node.
+      </div>
+    </details>
+    <details class="group [&_summary::-webkit-details-marker]:hidden">
+      <summary
+        class="cursor-pointer list-none text-exo-yellow text-xs font-mono tracking-widest uppercase flex items-center gap-2 hover:opacity-80 transition-opacity"
+      >
+        <span
+          class="inline-block transition-transform group-open:rotate-90 text-exo-light-gray"
+          >▶</span
+        >
+        Linking Instances
+      </summary>
+      <div class="mt-2 text-white/80 text-sm leading-relaxed space-y-2">
+        <p>
+          A linked route here tells the cluster: when a request is sent to a
+          model in that cluster, the decode node (or the least active one if
+          there are multiple) will handle it. If it decides it must do a lot of
+          prefill not already cached in the prefix cache, it routes the request
+          to the prefill node over TCP IP. The prefill node streams the KV cache
+          back to the decode node which picks up from there.
+        </p>
+        <p>
+          Linked instances must be running the same model family — KV layouts
+          differ across architectures. More on the <a
+            class="text-exo-yellow underline underline-offset-2 hover:text-exo-yellow-darker transition-colors"
+            href="https://blog.exolabs.net/nvidia-dgx-spark/"
+            target="_blank"
+            rel="noreferrer noopener">blog</a
+          >.
+        </p>
+      </div>
+    </details>
+  </div>
+
+  {#if errorMessage}
+    <div
+      class="mb-4 px-4 py-3 bg-red-500/10 border border-red-500/40 text-red-300 text-sm"
+    >
+      {errorMessage}
+    </div>
+  {/if}
+
+  <section class="mt-12">
+    <h2
+      class="text-exo-yellow text-xs font-mono tracking-widest uppercase m-0 mb-3"
+    >
+      Existing routes
+    </h2>
+
+    {#if linkRows.length === 0}
+      {#if editingLinkId === null}
+        <div class="flex items-center justify-between">
+          <p class="text-exo-light-gray italic text-sm m-0">
+            No routes yet. Create one to enable remote prefill.
+          </p>
+          <button
+            class="px-3 py-1.5 text-xs font-mono tracking-wider uppercase bg-exo-yellow/15 border border-exo-yellow/50 text-exo-yellow hover:bg-exo-yellow/25 hover:border-exo-yellow/80 transition-colors"
+            onclick={startCreate}
+          >
+            + New route
+          </button>
+        </div>
+      {/if}
+    {:else}
+      {#if editingLinkId === null}
+        <div class="flex justify-end mb-3">
+          <button
+            class="px-3 py-1.5 text-xs font-mono tracking-wider uppercase bg-exo-yellow/15 border border-exo-yellow/50 text-exo-yellow hover:bg-exo-yellow/25 hover:border-exo-yellow/80 transition-colors"
+            onclick={startCreate}
+          >
+            + New route
+          </button>
+        </div>
+      {/if}
+      <div
+        class="bg-exo-dark-gray/60 border border-exo-medium-gray/40 flex flex-col"
+      >
+        {#each linkRows as row (row.linkId)}
+          {#if editingLinkId !== row.linkId}
+            <article
+              class="p-4 border-b border-exo-light-gray/25 last:border-b-0"
+            >
+              {#if row.multiNode}
+                <div
+                  class="mb-3 px-3 py-2 bg-red-500/10 border border-red-500/40 text-red-300 text-xs tracking-wide"
+                >
+                  ⚠ Multi-node instance detected. Remote prefill currently only
+                  works on single-node (rank-0) instances. This route will not
+                  function until that's supported.
+                </div>
+              {/if}
+              {#if row.families.length > 1}
+                <div
+                  class="mb-3 px-3 py-2 bg-amber-500/10 border border-amber-500/40 text-amber-300 text-xs tracking-wide"
+                >
+                  ⚠ Mixed model families: {row.families.join(", ")}
+                </div>
+              {/if}
+              <div
+                class="grid grid-cols-[1fr_auto_1fr_auto] items-center gap-x-3 gap-y-2"
+              >
+                <span
+                  class="inline-block justify-self-start text-[10px] font-mono tracking-widest uppercase px-2 py-0.5 bg-exo-yellow/15 border border-exo-yellow/40 text-exo-yellow"
+                  >Prefill</span
+                >
+                <span></span>
+                <span
+                  class="inline-block justify-self-start text-[10px] font-mono tracking-widest uppercase px-2 py-0.5 bg-exo-medium-gray/40 border border-exo-medium-gray/60 text-foreground"
+                  >Decode</span
+                >
+                <span></span>
+                <div class="min-w-0">
+                  <ul class="list-none p-0 m-0 flex flex-col gap-2">
+                    {#each row.prefill as id (id)}
+                      {@const r = instanceById[id]}
+                      {#if r}
+                        <li
+                          class="flex items-center gap-2 px-2.5 py-2 bg-exo-medium-gray/20 border border-exo-medium-gray/40"
+                        >
+                          <FamilyLogos family={r.family} />
+                          <div class="min-w-0 flex-1">
+                            <div
+                              class="text-exo-yellow text-xs font-mono truncate"
+                            >
+                              {r.baseModel || r.modelId}
+                            </div>
+                            <div
+                              class="text-exo-light-gray text-[11px] truncate"
+                            >
+                              {r.nodeNames.join(", ") || "?"}{r.nodeCount > 1
+                                ? ` (${r.nodeCount} nodes)`
+                                : ""}
+                            </div>
+                            <div
+                              class="text-exo-light-gray/40 text-[10px] font-mono truncate"
+                              title={r.id}
+                            >
+                              {r.id.slice(0, 8)}
+                            </div>
+                          </div>
+                        </li>
+                      {/if}
+                    {/each}
+                  </ul>
+                </div>
+                <div class="text-exo-yellow/60 text-xl px-2" aria-hidden="true">
+                  →
+                </div>
+                <div class="min-w-0">
+                  <ul class="list-none p-0 m-0 flex flex-col gap-2">
+                    {#each row.decode as id (id)}
+                      {@const r = instanceById[id]}
+                      {#if r}
+                        <li
+                          class="flex items-center gap-2 px-2.5 py-2 bg-exo-medium-gray/20 border border-exo-medium-gray/40"
+                        >
+                          <FamilyLogos family={r.family} />
+                          <div class="min-w-0 flex-1">
+                            <div
+                              class="text-exo-yellow text-xs font-mono truncate"
+                            >
+                              {r.baseModel || r.modelId}
+                            </div>
+                            <div
+                              class="text-exo-light-gray text-[11px] truncate"
+                            >
+                              {r.nodeNames.join(", ") || "?"}{r.nodeCount > 1
+                                ? ` (${r.nodeCount} nodes)`
+                                : ""}
+                            </div>
+                            <div
+                              class="text-exo-light-gray/40 text-[10px] font-mono truncate"
+                              title={r.id}
+                            >
+                              {r.id.slice(0, 8)}
+                            </div>
+                          </div>
+                        </li>
+                      {/if}
+                    {/each}
+                  </ul>
+                </div>
+                <div class="flex gap-2 pl-3">
+                  <button
+                    class="px-2 py-0.5 text-[11px] font-mono tracking-wider uppercase bg-exo-medium-gray/30 border border-exo-medium-gray/60 rounded text-foreground hover:border-exo-yellow/60 hover:text-exo-yellow disabled:opacity-40 disabled:cursor-not-allowed transition-colors"
+                    onclick={() => startEdit(row)}
+                    disabled={editingLinkId !== null}
+                  >
+                    Edit
+                  </button>
+                  <button
+                    class="px-2 py-0.5 text-[11px] font-mono tracking-wider uppercase bg-red-500/15 border border-red-500/40 rounded text-red-300 hover:bg-red-500/25 transition-colors"
+                    onclick={() => remove(row.linkId)}
+                  >
+                    Remove
+                  </button>
+                </div>
+              </div>
+            </article>
+          {/if}
+        {/each}
+      </div>
+    {/if}
+  </section>
+
+  {#if editingLinkId !== null && instanceRows.length === 0}
+    <section
+      class="mt-6 bg-exo-dark-gray/60 border border-exo-yellow/30 px-4 py-2.5 flex items-center justify-between gap-3"
+    >
+      <span class="text-exo-light-gray italic text-sm font-mono"
+        >No instances available.</span
+      >
+      <button
+        class="px-3 py-1 text-xs font-mono tracking-wider uppercase bg-exo-medium-gray/30 border border-exo-medium-gray/60 rounded text-foreground hover:border-exo-yellow/60 transition-colors"
+        onclick={cancelEdit}
+      >
+        Cancel
+      </button>
+    </section>
+  {:else if editingLinkId !== null}
+    <section class="mt-6 bg-exo-dark-gray/60 border border-exo-yellow/30 p-5">
+      <h2
+        class="text-exo-yellow text-xs font-mono tracking-widest uppercase m-0 mb-3"
+      >
+        {editingLinkId === "new" ? "New route" : "Edit route"}
+      </h2>
+
+      {#if editingMismatch}
+        <div
+          class="mb-3 px-3 py-2 bg-amber-500/10 border border-amber-500/40 text-amber-300 text-xs tracking-wide"
+        >
+          ⚠ Selected instances span multiple model families: <strong
+            >{editingFamilies.join(", ")}</strong
+          >. Linking across families produces a corrupt KV cache.
+        </div>
+      {/if}
+
+      {#if editingMultiNode.length > 0}
+        <div
+          class="mb-3 px-3 py-2 bg-red-500/10 border border-red-500/40 text-red-300 text-xs tracking-wide"
+        >
+          ⚠ Multi-node instance(s) selected: <strong
+            >{editingMultiNode.join(", ")}</strong
+          >. Remote prefill currently only works on single-node instances. This
+          route will not function until multi-node support lands.
+        </div>
+      {/if}
+
+      <p class="text-exo-light-gray text-xs mb-4">
+        Pick a role for each instance:
+        <span class="text-exo-yellow">Prefill</span>
+        serves KV cache,
+        <span class="text-foreground">Decode</span> consumes it.
+      </p>
+      <div
+        class="grid gap-2.5"
+        style="grid-template-columns: repeat(auto-fill, minmax(360px, 1fr));"
+      >
+        {#each instanceRows as row (row.id)}
+          {@const role = roleOf(row.id)}
+          <div
+            class="border p-3 flex flex-col gap-2.5 transition-colors {role ===
+            'prefill'
+              ? 'border-exo-yellow/60 bg-exo-dark-gray/60'
+              : role === 'decode'
+                ? 'border-exo-light-gray/60 bg-exo-dark-gray/60'
+                : 'border-exo-medium-gray/40 bg-exo-dark-gray/40'}"
+          >
+            <div class="flex items-center gap-2">
+              <FamilyLogos family={row.family} />
+              <div class="min-w-0 flex-1">
+                <div class="text-exo-yellow text-xs font-mono truncate">
+                  {row.baseModel || row.modelId}
+                </div>
+                <div class="text-exo-light-gray text-[11px] truncate">
+                  {row.nodeNames.join(", ") || "?"}{row.nodeCount > 1
+                    ? ` (${row.nodeCount} nodes)`
+                    : ""}
+                </div>
+                <div
+                  class="text-exo-light-gray/40 text-[10px] font-mono truncate"
+                  title={row.id}
+                >
+                  {row.id.slice(0, 8)}
+                </div>
+              </div>
+              {#if row.nodeCount > 1}
+                <span
+                  class="text-[9px] font-mono tracking-widest uppercase px-1.5 py-0.5 bg-red-500/15 border border-red-500/40 text-red-300"
+                  title="Multi-node instances are not supported by remote prefill yet."
+                  >Unsupported</span
+                >
+              {/if}
+            </div>
+            <div
+              class="flex rounded-md overflow-hidden border border-exo-light-gray/40 divide-x divide-exo-light-gray/40"
+            >
+              <button
+                class="flex-1 px-2 py-1 text-[11px] font-mono tracking-wider uppercase transition-colors {role ===
+                'prefill'
+                  ? 'bg-exo-yellow/20 text-exo-yellow'
+                  : 'bg-transparent text-white/80 hover:text-exo-yellow'}"
+                onclick={() =>
+                  setRole(row.id, role === "prefill" ? "none" : "prefill")}
+                >Prefill</button
+              >
+              <button
+                class="flex-1 px-2 py-1 text-[11px] font-mono tracking-wider uppercase transition-colors {role ===
+                'decode'
+                  ? 'bg-exo-medium-gray/50 text-foreground'
+                  : 'bg-transparent text-white/80 hover:text-foreground'}"
+                onclick={() =>
+                  setRole(row.id, role === "decode" ? "none" : "decode")}
+                >Decode</button
+              >
+            </div>
+          </div>
+        {/each}
+      </div>
+
+      <div class="flex gap-2 mt-5 justify-end">
+        <button
+          class="px-3 py-1.5 text-xs font-mono tracking-wider uppercase bg-exo-yellow/15 border border-exo-yellow/50 text-exo-yellow hover:bg-exo-yellow/25 hover:border-exo-yellow/80 disabled:opacity-40 disabled:cursor-not-allowed transition-colors"
+          onclick={save}
+          disabled={!canSave}
+        >
+          {saving ? "Saving..." : "Save route"}
+        </button>
+        <button
+          class="px-3 py-1.5 text-xs font-mono tracking-wider uppercase bg-exo-medium-gray/30 border border-exo-medium-gray/60 text-foreground hover:border-exo-yellow/60 disabled:opacity-40 disabled:cursor-not-allowed transition-colors"
+          onclick={cancelEdit}
+          disabled={saving}
+        >
+          Cancel
+        </button>
+      </div>
+    </section>
+  {/if}
+</div>
diff --git a/dashboard/src/lib/stores/app.svelte.ts b/dashboard/src/lib/stores/app.svelte.ts
index c5a541047..5b28b3f02 100644
--- a/dashboard/src/lib/stores/app.svelte.ts
+++ b/dashboard/src/lib/stores/app.svelte.ts
@@ -74,6 +74,12 @@ export interface Instance {
   };
 }
 
+export interface RawInstanceLink {
+  linkId: string;
+  prefillInstances: string[];
+  decodeInstances: string[];
+}
+
 // Granular node state types from the new state structure
 interface RawNodeIdentity {
   modelId?: string;
@@ -223,6 +229,7 @@ interface RawStateResponse {
     }
   >;
   runners?: Record<string, unknown>;
+  instanceLinks?: Record<string, RawInstanceLink>;
   downloads?: Record<string, unknown[]>;
   // New granular node state fields
   nodeIdentities?: Record<string, RawNodeIdentity>;
@@ -541,6 +548,8 @@ class AppStore {
   topologyData = $state<TopologyData | null>(null);
   instances = $state<Record<string, unknown>>({});
   runners = $state<Record<string, unknown>>({});
+  instanceLinks = $state<Record<string, RawInstanceLink>>({});
+  featureFlags = $state<Record<string, boolean>>({});
   downloads = $state<Record<string, unknown[]>>({});
   nodeDisk = $state<
     Record<
@@ -1274,6 +1283,7 @@ class AppStore {
 
   startPolling() {
     this.fetchState();
+    this.fetchFeatureFlags();
     this.fetchInterval = setInterval(() => this.fetchState(), 1000);
   }
 
@@ -1285,6 +1295,16 @@ class AppStore {
     this.stopPreviewsPolling();
   }
 
+  async fetchFeatureFlags() {
+    try {
+      const response = await fetch("/v1/feature-flags");
+      if (!response.ok) return;
+      this.featureFlags = await response.json();
+    } catch {
+      // Silently ignore — defaults to all-disabled.
+    }
+  }
+
   async fetchState() {
     try {
       const response = await fetch("/state");
@@ -1310,6 +1330,11 @@ class AppStore {
       if (data.runners) {
         this.runners = data.runners;
       }
+      if (data.instanceLinks) {
+        this.instanceLinks = data.instanceLinks;
+      } else {
+        this.instanceLinks = {};
+      }
       if (data.downloads) {
         this.downloads = data.downloads;
       }
@@ -1670,7 +1695,15 @@ class AppStore {
               }
             }
           }
-          return { role: m.role, content: msgContent };
+          const out: {
+            role: string;
+            content: string;
+            reasoning_content?: string;
+          } = { role: m.role, content: msgContent };
+          if (m.role === "assistant" && m.thinking) {
+            out.reasoning_content = m.thinking;
+          }
+          return out;
         }),
       ];
 
@@ -1877,7 +1910,15 @@ class AppStore {
       const apiMessages = [
         systemPrompt,
         ...targetConversation.messages.slice(0, -1).map((m) => {
-          return { role: m.role, content: m.content };
+          const out: {
+            role: string;
+            content: string;
+            reasoning_content?: string;
+          } = { role: m.role, content: m.content };
+          if (m.role === "assistant" && m.thinking) {
+            out.reasoning_content = m.thinking;
+          }
+          return out;
         }),
       ];
 
@@ -2408,10 +2449,15 @@ class AppStore {
               contentParts.push({ type: "text", text: textContent });
             }
 
-            return {
-              role: m.role,
-              content: contentParts,
-            };
+            const out: {
+              role: string;
+              content: typeof contentParts;
+              reasoning_content?: string;
+            } = { role: m.role, content: contentParts };
+            if (m.role === "assistant" && m.thinking) {
+              out.reasoning_content = m.thinking;
+            }
+            return out;
           }
 
           // Text-only message (original path)
@@ -2429,10 +2475,15 @@ class AppStore {
             }
           }
 
-          return {
-            role: m.role,
-            content: msgContent,
-          };
+          const out: {
+            role: string;
+            content: string;
+            reasoning_content?: string;
+          } = { role: m.role, content: msgContent };
+          if (m.role === "assistant" && m.thinking) {
+            out.reasoning_content = m.thinking;
+          }
+          return out;
         }),
       ];
 
@@ -3281,6 +3332,60 @@ class AppStore {
     }
   }
 
+  async createInstanceLink(
+    prefillInstances: string[],
+    decodeInstances: string[],
+  ): Promise<void> {
+    const response = await fetch("/v1/instance-links", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        prefill_instances: prefillInstances,
+        decode_instances: decodeInstances,
+      }),
+    });
+    if (!response.ok) {
+      throw new Error(
+        `Failed to create instance link: ${response.status} ${await response.text()}`,
+      );
+    }
+  }
+
+  async updateInstanceLink(
+    linkId: string,
+    prefillInstances: string[],
+    decodeInstances: string[],
+  ): Promise<void> {
+    const response = await fetch(
+      `/v1/instance-links/${encodeURIComponent(linkId)}`,
+      {
+        method: "PUT",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          prefill_instances: prefillInstances,
+          decode_instances: decodeInstances,
+        }),
+      },
+    );
+    if (!response.ok) {
+      throw new Error(
+        `Failed to update instance link: ${response.status} ${await response.text()}`,
+      );
+    }
+  }
+
+  async deleteInstanceLink(linkId: string): Promise<void> {
+    const response = await fetch(
+      `/v1/instance-links/${encodeURIComponent(linkId)}`,
+      { method: "DELETE" },
+    );
+    if (!response.ok) {
+      throw new Error(
+        `Failed to delete instance link: ${response.status} ${await response.text()}`,
+      );
+    }
+  }
+
   /**
    * Delete a downloaded model from a specific node
    */
@@ -3379,6 +3484,19 @@ export const prefillProgress = () => appStore.prefillProgress;
 export const topologyData = () => appStore.topologyData;
 export const instances = () => appStore.instances;
 export const runners = () => appStore.runners;
+export const instanceLinks = () => appStore.instanceLinks;
+export const featureFlags = () => appStore.featureFlags;
+export const createInstanceLink = (
+  prefillInstances: string[],
+  decodeInstances: string[],
+) => appStore.createInstanceLink(prefillInstances, decodeInstances);
+export const updateInstanceLink = (
+  linkId: string,
+  prefillInstances: string[],
+  decodeInstances: string[],
+) => appStore.updateInstanceLink(linkId, prefillInstances, decodeInstances);
+export const deleteInstanceLink = (linkId: string) =>
+  appStore.deleteInstanceLink(linkId);
 export const downloads = () => appStore.downloads;
 export const nodeDisk = () => appStore.nodeDisk;
 export const placementPreviews = () => appStore.placementPreviews;
diff --git a/dashboard/src/lib/utils/clipboard.ts b/dashboard/src/lib/utils/clipboard.ts
new file mode 100644
index 000000000..02e6f4565
--- /dev/null
+++ b/dashboard/src/lib/utils/clipboard.ts
@@ -0,0 +1,55 @@
+export async function copyText(text: string): Promise<boolean> {
+  if (
+    typeof window !== "undefined" &&
+    window.isSecureContext &&
+    navigator.clipboard?.writeText
+  ) {
+    try {
+      await navigator.clipboard.writeText(text);
+      return true;
+    } catch {
+      // fall through to execCommand fallback
+    }
+  }
+
+  if (typeof document === "undefined") {
+    return false;
+  }
+
+  const textarea = document.createElement("textarea");
+  textarea.value = text;
+  textarea.setAttribute("readonly", "");
+  textarea.style.position = "fixed";
+  textarea.style.top = "0";
+  textarea.style.left = "0";
+  textarea.style.width = "1px";
+  textarea.style.height = "1px";
+  textarea.style.padding = "0";
+  textarea.style.border = "none";
+  textarea.style.outline = "none";
+  textarea.style.boxShadow = "none";
+  textarea.style.background = "transparent";
+  textarea.style.opacity = "0";
+  document.body.appendChild(textarea);
+
+  const previousSelection = document.getSelection();
+  const previousRange =
+    previousSelection && previousSelection.rangeCount > 0
+      ? previousSelection.getRangeAt(0)
+      : null;
+
+  try {
+    textarea.focus();
+    textarea.select();
+    textarea.setSelectionRange(0, text.length);
+    return document.execCommand("copy");
+  } catch {
+    return false;
+  } finally {
+    document.body.removeChild(textarea);
+    if (previousRange && previousSelection) {
+      previousSelection.removeAllRanges();
+      previousSelection.addRange(previousRange);
+    }
+  }
+}
diff --git a/dashboard/src/lib/utils/model_family.ts b/dashboard/src/lib/utils/model_family.ts
new file mode 100644
index 000000000..d5b1fd371
--- /dev/null
+++ b/dashboard/src/lib/utils/model_family.ts
@@ -0,0 +1,44 @@
+// Mirrors src/exo/shared/models/model_cards.py:derive_base_model
+const QUANT_SUFFIXES = new RegExp(
+  "[-_ ](?:MLX|MXFP[0-9]+|NVFP[0-9]+|GPTQ|AWQ|GGUF|fp16|bf16|fp8|int[0-9]+|[0-9]+(?:\\.[0-9]+)?bit|Q[0-9]+(?:_[A-Z0-9]+)?|gs[0-9]+)" +
+    "(?:[-_ ](?:MLX|Q[0-9]+|Int[0-9]+|[A-Z0-9]+|gs[0-9]+))*$",
+  "i",
+);
+
+function normalize(s: string): string {
+  return s
+    .replaceAll("-", " ")
+    .replaceAll("_", " ")
+    .replaceAll("  ", " ")
+    .trim();
+}
+
+export function deriveBaseModel(modelId: string): string {
+  const short = modelId.includes("/")
+    ? (modelId.split("/").pop() ?? modelId)
+    : modelId;
+  const stripped = short.replace(QUANT_SUFFIXES, "");
+  return normalize(stripped);
+}
+
+export function baseModelsCompatible(a: string, b: string): boolean {
+  return deriveBaseModel(a).toLowerCase() === deriveBaseModel(b).toLowerCase();
+}
+
+// Mirrors src/exo/shared/models/model_cards.py:derive_family
+export function deriveFamily(modelId: string): string {
+  const short = modelId.includes("/")
+    ? (modelId.split("/").pop() ?? modelId)
+    : modelId;
+  const stripped = short
+    .replace(QUANT_SUFFIXES, "")
+    .toLowerCase()
+    .replaceAll("_", "-");
+  const parts = stripped.split(/[-.]/);
+  const familyParts: string[] = [];
+  for (const p of parts) {
+    if (/^\d+$/.test(p) || /^\d+[bm]?$/i.test(p)) break;
+    familyParts.push(p);
+  }
+  return familyParts.length > 0 ? familyParts.join("-") : stripped;
+}
diff --git a/dashboard/src/routes/+page.svelte b/dashboard/src/routes/+page.svelte
index 965482e67..f69cdd151 100644
--- a/dashboard/src/routes/+page.svelte
+++ b/dashboard/src/routes/+page.svelte
@@ -3435,6 +3435,7 @@
             >
               <li>Connect nodes with TB5 cables</li>
               <li>Boot to Recovery (hold power 10s → Options)</li>
+              <li>Open Terminal from the Utilities menu</li>
               <li>
                 Run
                 <code class="text-yellow-300 bg-yellow-400/10 px-1 rounded"
@@ -4822,6 +4823,7 @@
                 >
                   <li>Connect nodes with TB5 cables</li>
                   <li>Boot to Recovery (hold power 10s → Options)</li>
+                  <li>Open Terminal from the Utilities menu</li>
                   <li>
                     Run
                     <code class="text-yellow-300 bg-yellow-400/10 px-1 rounded"
@@ -4968,6 +4970,7 @@
                   >
                     <li>Connect nodes with TB5 cables</li>
                     <li>Boot to Recovery (hold power 10s → Options)</li>
+                    <li>Open Terminal from the Utilities menu</li>
                     <li>
                       Run
                       <code
diff --git a/dashboard/src/routes/advanced/+page.svelte b/dashboard/src/routes/advanced/+page.svelte
new file mode 100644
index 000000000..3b45cf23b
--- /dev/null
+++ b/dashboard/src/routes/advanced/+page.svelte
@@ -0,0 +1,81 @@
+<script lang="ts">
+  import { browser } from "$app/environment";
+  import HeaderNav from "$lib/components/HeaderNav.svelte";
+  import PrefillDecodeDisaggregation from "$lib/components/PrefillDecodeDisaggregation.svelte";
+  import { featureFlags, refreshState } from "$lib/stores/app.svelte";
+  import { onMount } from "svelte";
+
+  type TabId = "prefill-decode";
+
+  const tabs: { id: TabId; label: string }[] = [
+    { id: "prefill-decode", label: "Prefill / Decode" },
+  ];
+
+  let activeTab = $state<TabId>(tabs[0].id);
+  let flagsLoaded = $state(false);
+
+  onMount(() => {
+    refreshState().finally(() => {
+      flagsLoaded = true;
+    });
+  });
+
+  const flags = $derived(featureFlags());
+  const enabled = $derived(flags["disaggregation"] === true);
+
+  $effect(() => {
+    if (browser && flagsLoaded && !enabled) {
+      // No advanced features enabled — bounce home.
+      window.location.hash = "/";
+    }
+  });
+</script>
+
+<div class="min-h-screen bg-exo-dark-gray flex flex-col">
+  <HeaderNav />
+
+  <main class="flex-1 max-w-[1100px] mx-auto w-full px-4 md:px-6 py-8">
+    {#if !flagsLoaded}
+      <div class="text-exo-light-gray/60 text-sm">Loading…</div>
+    {:else if !enabled}
+      <div class="text-exo-light-gray/60 text-sm">
+        No advanced features enabled. Set <code
+          class="text-exo-yellow font-mono">ENABLE_DISAGGREGATION=true</code
+        > on the cluster to access prefill/decode disaggregation.
+      </div>
+    {:else}
+      <div class="mb-4">
+        <h1
+          class="text-white text-xl md:text-2xl font-semibold tracking-wide mb-2"
+        >
+          Advanced
+        </h1>
+        <p class="text-exo-light-gray/60 text-sm">
+          Cluster-level configuration. Most users don't need anything here.
+        </p>
+      </div>
+
+      <div
+        class="flex flex-wrap gap-2 mb-6 border-b border-exo-light-gray/10 pb-3"
+      >
+        {#each tabs as tab (tab.id)}
+          <button
+            onclick={() => (activeTab = tab.id)}
+            class="px-3 py-1.5 text-xs rounded-md transition-all cursor-pointer
+              {activeTab === tab.id
+              ? 'bg-exo-yellow/15 text-exo-yellow border border-exo-yellow/30'
+              : 'text-exo-light-gray/60 hover:text-white/80 border border-transparent hover:border-exo-light-gray/20'}"
+          >
+            {tab.label}
+          </button>
+        {/each}
+      </div>
+
+      <div class="space-y-4">
+        {#if activeTab === "prefill-decode"}
+          <PrefillDecodeDisaggregation />
+        {/if}
+      </div>
+    {/if}
+  </main>
+</div>
diff --git a/dashboard/src/routes/integrations/+page.svelte b/dashboard/src/routes/integrations/+page.svelte
index 924cfdf7b..b8e6665d4 100644
--- a/dashboard/src/routes/integrations/+page.svelte
+++ b/dashboard/src/routes/integrations/+page.svelte
@@ -14,6 +14,7 @@
 
   let modelCapabilities = $state<Record<string, string[]>>({});
   let modelContextLengths = $state<Record<string, number>>({});
+  let modelReasoningDialects = $state<Record<string, string>>({});
 
   const runningModels = $derived.by(() => {
     const models: string[] = [];
@@ -88,10 +89,12 @@
   let codexModel = $state("");
   let codexMcpPath = $state("/Users/username");
   let openClawModel = $state("");
+  let piModel = $state("");
   $effect(() => {
     const def = modelsBySize.length > 0 ? modelsBySize[0] : "your-model-id";
     codexModel = def;
     openClawModel = def;
+    piModel = def;
   });
 
   const claudeShellCommand = $derived(
@@ -130,6 +133,7 @@
     for (const modelId of runningModels) {
       const caps = modelCapabilities[modelId] || [];
       const ctxLen = modelContextLengths[modelId] || 0;
+      const dialect = modelReasoningDialects[modelId];
       const entry: Record<string, unknown> = { name: modelId };
       if (ctxLen > 0) {
         entry.limit = { context: ctxLen, output: Math.min(ctxLen, 16384) };
@@ -137,6 +141,27 @@
       if (caps.includes("vision")) {
         entry.modalities = { input: ["text", "image"], output: ["text"] };
       }
+      // Reasoning round-trip: opencode's `interleaved` field tells the
+      // openai-compatible adapter to send the assistant's prior
+      // reasoning_content back in subsequent turns. Emit it for dialects
+      // whose chat templates use prior reasoning:
+      //   - `tool_conditional` (DeepSeek V3.2 / V4): wrapper preserves all
+      //     reasoning when tools are present.
+      //   - `post_last_user` (Qwen3-Thinking, GLM 4.5+, MiniMax M2.x):
+      //     Jinja template reads reasoning_content for assistant turns since
+      //     the last user message — exactly the tool-chain window.
+      //   - `channel` (gpt-oss / Harmony): the model's Jinja template reads
+      //     `message.thinking` rather than `message.reasoning_content`, but
+      //     the server bridges `reasoning_content` → `thinking` before
+      //     rendering, so the round-trip works through the standard field.
+      // `suffix` (Kimi): reasoning lives in content; no separate field path.
+      if (
+        dialect === "tool_conditional" ||
+        dialect === "post_last_user" ||
+        dialect === "channel"
+      ) {
+        entry.interleaved = { field: "reasoning_content" };
+      }
       models[modelId] = entry;
     }
     if (Object.keys(models).length === 0) {
@@ -218,6 +243,55 @@
     ),
   );
 
+  const piModelsJson = $derived.by(() => {
+    const models: Record<string, unknown>[] = [];
+    for (const modelId of runningModels) {
+      const caps = modelCapabilities[modelId] || [];
+      const ctxLen = modelContextLengths[modelId] || 0;
+      const entry: Record<string, unknown> = { id: modelId };
+      if (caps.includes("vision")) {
+        entry.input = ["text", "image"];
+      }
+      // Mark thinking-capable models so pi surfaces its thinking-level selector
+      // for them. exo capability strings: "thinking" (model emits reasoning
+      // content) and "thinking_toggle" (user can turn it on/off).
+      if (caps.includes("thinking") || caps.includes("thinking_toggle")) {
+        entry.reasoning = true;
+      }
+      if (ctxLen > 0) {
+        entry.contextWindow = ctxLen;
+      }
+      models.push(entry);
+    }
+    if (models.length === 0) {
+      models.push({ id: "your-model-id" });
+    }
+    return JSON.stringify(
+      {
+        providers: {
+          exo: {
+            baseUrl: `${apiUrl}/v1`,
+            api: "openai-completions",
+            apiKey: "exo",
+            compat: {
+              supportsDeveloperRole: false,
+              // exo's OpenAI surface takes a boolean `enable_thinking` toggle,
+              // not graded effort levels, so disable pi's `reasoning_effort`
+              // parameter and use the matching top-level-boolean format.
+              supportsReasoningEffort: false,
+              thinkingFormat: "qwen",
+            },
+            models,
+          },
+        },
+      },
+      null,
+      2,
+    );
+  });
+
+  const piShellCommand = $derived(`pi --provider exo --model ${piModel}`);
+
   const ollamaCommand = $derived(
     `OLLAMA_HOST=${apiUrl}/ollama ollama run ${modelsBySize.length > 0 ? modelsBySize[0] : "your-model-id"}`,
   );
@@ -277,6 +351,7 @@
     "OpenCode",
     "Codex",
     "OpenClaw",
+    "Pi",
     "Open WebUI",
     "n8n",
     "Firefox",
@@ -298,16 +373,25 @@
     try {
       const resp = await fetch("/v1/models");
       const data = (await resp.json()) as {
-        data: { id: string; capabilities: string[]; context_length: number }[];
+        data: {
+          id: string;
+          capabilities: string[];
+          context_length: number;
+          reasoning_dialect?: string;
+        }[];
       };
       const caps: Record<string, string[]> = {};
       const ctxs: Record<string, number> = {};
+      const dialects: Record<string, string> = {};
       for (const model of data.data) {
         caps[model.id] = model.capabilities || [];
         if (model.context_length > 0) ctxs[model.id] = model.context_length;
+        if (model.reasoning_dialect)
+          dialects[model.id] = model.reasoning_dialect;
       }
       modelCapabilities = caps;
       modelContextLengths = ctxs;
+      modelReasoningDialects = dialects;
     } catch {
       /* ignore */
     }
@@ -515,6 +599,33 @@
           config={`openclaw doctor --fix${(modelCapabilities[openClawModel] || []).includes("vision") ? `\nopenclaw models set-image exo/${openClawModel}` : ""}\nopenclaw gateway &\nopenclaw dashboard`}
           language="bash"
         />
+      {:else if activeTab === "Pi"}
+        {#if runningModels.length > 1}
+          <div class="text-xs">
+            <span
+              class="text-exo-light-gray/50 text-[10px] uppercase tracking-wider block mb-1"
+              >Model</span
+            >
+            <select bind:value={piModel} class={selectClass}>
+              {#each runningModels as model}
+                <option value={model}>{model.split("/").pop()}</option>
+              {/each}
+            </select>
+          </div>
+        {/if}
+        <IntegrationCard
+          title="Models Config"
+          subtitle="~/.pi/agent/models.json"
+          description="Register exo as a custom provider in pi. Create or edit this file, then run pi and pick an exo model via /model. Install pi with: npm install -g @mariozechner/pi-coding-agent"
+          config={piModelsJson}
+        />
+        <IntegrationCard
+          title="Shell Command"
+          subtitle="Run in terminal"
+          description="Launch pi directly with the exo provider and model selected."
+          config={piShellCommand}
+          language="bash"
+        />
       {:else if activeTab === "Open WebUI"}
         <IntegrationCard
           title="1. Start Open WebUI"
diff --git a/docs/architecture.md b/docs/architecture.md
index daf5d3f25..55983168c 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -81,4 +81,4 @@ Whenever a device produces side effects, it captures those side effects in an `E
 
 ## Purity
 
-A significant goal of the current design is to make data flow explicit. Classes should either represent simple data (`CamelCaseModel`s typically, and `TaggedModel`s for unions) or active `System`s (Erlang `Actor`s), with all transformations of that data being "referentially transparent" - destructure and construct new data, don't mutate in place. We have had varying degrees of success with this, and are still exploring where purity makes sense.
+A significant goal of the current design is to make data flow explicit. Classes should either represent simple data (`FrozenModel`s typically, and `TaggedModel`s for unions) or active `System`s (Erlang `Actor`s), with all transformations of that data being "referentially transparent" - destructure and construct new data, don't mutate in place. We have had varying degrees of success with this, and are still exploring where purity makes sense.
diff --git a/flake.lock b/flake.lock
index cca42b7a7..498505e10 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,11 +2,11 @@
   "nodes": {
     "crane": {
       "locked": {
-        "lastModified": 1775790182,
-        "narHash": "sha256-pG2RWVQY0Pe+rmmXJx+Jpyi+JcgjWzS18m7fcD1B64Q=",
+        "lastModified": 1779130139,
+        "narHash": "sha256-BLrtr42azquO7MdGFU5a7KiMl3YpFlTeIXqy1fT5GlQ=",
         "owner": "ipetkov",
         "repo": "crane",
-        "rev": "534982f1c41834b101e381b07b1121a4f065a374",
+        "rev": "edb38893982a3338972bb4a2ec7ce7c29ba10fd9",
         "type": "github"
       },
       "original": {
@@ -47,11 +47,11 @@
         "rust-analyzer-src": "rust-analyzer-src"
       },
       "locked": {
-        "lastModified": 1775807984,
-        "narHash": "sha256-Redoe3D9zGN5I9QPHWL9vfMVQBehY1fKsMiRXQ83X3w=",
+        "lastModified": 1779185128,
+        "narHash": "sha256-Kl2bkmwZJD3n2KWDxuIlturZ7emqRK+anpD1LmDwpmY=",
         "owner": "nix-community",
         "repo": "fenix",
-        "rev": "fcf90c0c4d368b2ca917a7afa6d08e98a397e5fd",
+        "rev": "b7bd9323fe26a3b4f4bddbb2c2a1dacabced2f88",
         "type": "github"
       },
       "original": {
@@ -83,11 +83,11 @@
         ]
       },
       "locked": {
-        "lastModified": 1775087534,
-        "narHash": "sha256-91qqW8lhL7TLwgQWijoGBbiD4t7/q75KTi8NxjVmSmA=",
+        "lastModified": 1778716662,
+        "narHash": "sha256-m1Yf0wZ8j1OHjTc2UwHwyQRSnNeSgLJOd7q5Y45hzi4=",
         "owner": "hercules-ci",
         "repo": "flake-parts",
-        "rev": "3107b77cd68437b9a76194f0f7f9c55f2329ca5b",
+        "rev": "f7c1a2d347e4c52d5fb8d10cb4d94b5884e546fb",
         "type": "github"
       },
       "original": {
@@ -96,13 +96,33 @@
         "type": "github"
       }
     },
+    "nixglhost": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1732211616,
+        "narHash": "sha256-QZCKJoypcwgS3tDNSWMjlxEBZtOYPW3eXV24rMzKsac=",
+        "owner": "numtide",
+        "repo": "nix-gl-host",
+        "rev": "5269b233f83880a0b433eafe026f0bc0d8f1a4a9",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "nix-gl-host",
+        "type": "github"
+      }
+    },
     "nixpkgs": {
       "locked": {
-        "lastModified": 1775595990,
-        "narHash": "sha256-OEf7YqhF9IjJFYZJyuhAypgU+VsRB5lD4DuiMws5Ltc=",
+        "lastModified": 1779102034,
+        "narHash": "sha256-vZJZjLo513IeI8hjzHFc6TDezUd4uCE2Eq4SNO3DNNg=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "4e92bbcdb030f3b4782be4751dc08e6b6cb6ccf2",
+        "rev": "687f05a9184cad4eaf905c48b63649e3a86f5433",
         "type": "github"
       },
       "original": {
@@ -148,11 +168,11 @@
         ]
       },
       "locked": {
-        "lastModified": 1773870109,
-        "narHash": "sha256-ZoTdqZP03DcdoyxvpFHCAek4bkPUTUPUF3oCCgc3dP4=",
+        "lastModified": 1776659114,
+        "narHash": "sha256-qapCOQmR++yZSY43dzrp3wCrkOTLpod+ONtJWBk6iKU=",
         "owner": "pyproject-nix",
         "repo": "build-system-pkgs",
-        "rev": "b6e74f433b02fa4b8a7965ee24680f4867e2926f",
+        "rev": "ffaa2161dd5d63e0e94591f86b54fc239660fb2e",
         "type": "github"
       },
       "original": {
@@ -168,11 +188,11 @@
         ]
       },
       "locked": {
-        "lastModified": 1775439158,
-        "narHash": "sha256-NHY9SJNU019n+8NCabBDtmuzRFeE2gZlYKHowp9bV24=",
+        "lastModified": 1778901413,
+        "narHash": "sha256-GSKXTAnFqRAMlZkJrIPcQMYf+lpMr66K3i60mB9STvc=",
         "owner": "pyproject-nix",
         "repo": "pyproject.nix",
-        "rev": "fb6b728260f3f32761367e9fd1e1a25b4245bcd0",
+        "rev": "a228447c3e179d477c1b6246ef3efa8cfe3c469a",
         "type": "github"
       },
       "original": {
@@ -187,6 +207,7 @@
         "dream2nix": "dream2nix",
         "fenix": "fenix",
         "flake-parts": "flake-parts",
+        "nixglhost": "nixglhost",
         "nixpkgs": "nixpkgs",
         "pyproject-build-systems": "pyproject-build-systems",
         "pyproject-nix": "pyproject-nix",
@@ -197,11 +218,11 @@
     "rust-analyzer-src": {
       "flake": false,
       "locked": {
-        "lastModified": 1775745684,
-        "narHash": "sha256-8MbfLwd60FNa8dRFkjE+G3TT/x21G3Rsplm1bMBQUtU=",
+        "lastModified": 1779074864,
+        "narHash": "sha256-0M3WqsWmtXmv9Ev/vnFfCHosWvISDwiuuhQ104UO3CI=",
         "owner": "rust-lang",
         "repo": "rust-analyzer",
-        "rev": "64ddb549bc9a70d011328746fa46a8883f937b6b",
+        "rev": "cdfe408d4b436e806ff525cb3e67588a6a009ed1",
         "type": "github"
       },
       "original": {
@@ -263,11 +284,11 @@
         ]
       },
       "locked": {
-        "lastModified": 1775706324,
-        "narHash": "sha256-BTb4sydzX2B5/oNbvCdQFeSbk97xEnbb8bk84CiKCOs=",
+        "lastModified": 1778664018,
+        "narHash": "sha256-ogNyNANNLo0SMFevIeUpbTMOL9uUDu/hXvp7JlOYbwQ=",
         "owner": "pyproject-nix",
         "repo": "uv2nix",
-        "rev": "5707df99097375896a3dda811d492a2fabe63500",
+        "rev": "b48abe99ef639cd100c224898529370e5d935294",
         "type": "github"
       },
       "original": {
diff --git a/flake.nix b/flake.nix
index 5c817181b..86cf22207 100644
--- a/flake.nix
+++ b/flake.nix
@@ -45,11 +45,16 @@
       inputs.uv2nix.follows = "uv2nix";
       inputs.nixpkgs.follows = "nixpkgs";
     };
+
+    nixglhost = {
+      url = "github:numtide/nix-gl-host";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
   };
 
   nixConfig = {
-    extra-trusted-public-keys = "exo.cachix.org-1:okq7hl624TBeAR3kV+g39dUFSiaZgLRkLsFBCuJ2NZI=";
-    extra-substituters = "https://exo.cachix.org";
+    extra-trusted-public-keys = "exo.cachix.org-1:okq7hl624TBeAR3kV+g39dUFSiaZgLRkLsFBCuJ2NZI= cache.nixos-cuda.org:74DUi4Ye579gUqzH4ziL9IyiJBlDpMRn9MBN8oNan9M=";
+    extra-substituters = "https://exo.cachix.org https://cache.nixos-cuda.org";
   };
 
   outputs = inputs:
@@ -70,12 +75,12 @@
       debug = true; # Enable options autocompletion
 
       perSystem = { config, self', pkgs, lib, system, ... }:
-        {
-          # Allow unfree for metal-toolchain (needed for Darwin Metal packages)
-          _module.args.pkgs = import inputs.nixpkgs {
+        let
+          pkgsArgs = {
             inherit system;
             config.allowUnfreePredicate = pkg: (pkg.pname or "") == "metal-toolchain";
             overlays = [
+              inputs.nixglhost.overlays.default
               (import ./nix/apple-sdk-overlay.nix)
               (final: _: {
                 macmon = final.rustPlatform.buildRustPackage {
@@ -92,6 +97,13 @@
               })
             ];
           };
+        in
+        {
+          # Allow unfree for metal-toolchain (needed for Darwin Metal packages)
+          _module.args = {
+            pkgs = import inputs.nixpkgs pkgsArgs;
+            unfreePkgs = import inputs.nixpkgs (pkgsArgs // { config.allowUnfree = true; });
+          };
           treefmt = {
             projectRootFile = "flake.nix";
             programs = {
@@ -119,23 +131,12 @@
           };
 
           packages = {
+            default = self'.packages.exo;
             babeld = pkgs.callPackage ./nix/babeld.nix { };
-          } // lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin (
-            let
-              uvLock = builtins.fromTOML (builtins.readFile ./uv.lock);
-              mlxPackage = builtins.head (builtins.filter (p: p.name == "mlx" && p.source ? git) uvLock.package);
-              uvLockMlxVersion = mlxPackage.version;
-              uvLockMlxRev = builtins.elemAt (builtins.split "#" mlxPackage.source.git) 2;
-            in
-            {
-              metal-toolchain = pkgs.callPackage ./nix/metal-toolchain.nix { };
-              mlx = pkgs.callPackage ./nix/mlx.nix {
-                inherit (self'.packages) metal-toolchain;
-                inherit uvLockMlxVersion uvLockMlxRev;
-              };
-              default = self'.packages.exo;
-            }
-          );
+          } //
+          lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin {
+            metal-toolchain = pkgs.callPackage ./nix/metal-toolchain.nix { };
+          };
 
           devShells.default = with pkgs; pkgs.mkShell {
             inputsFrom = [ self'.checks.cargo-build ];
@@ -146,10 +147,8 @@
                 config.treefmt.build.wrapper
 
                 # PYTHON
-                self'.packages.python
+                self'.packages.exo.passthru.evenv
                 uv
-                ruff
-                basedpyright
 
                 # RUST
                 config.rust.toolchain
@@ -173,7 +172,7 @@
             OPENSSL_NO_VENDOR = "1";
 
             shellHook = ''
-              export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${self'.packages.python}/lib"
+              export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${python313}/lib"
               ${lib.optionalString stdenv.isLinux ''
                 export LD_LIBRARY_PATH="${openssl.out}/lib:$LD_LIBRARY_PATH"
               ''}
diff --git a/justfile b/justfile
index ebaac8fca..96e26c84b 100644
--- a/justfile
+++ b/justfile
@@ -22,7 +22,7 @@ sync-clean:
     uv sync --all-packages --force-reinstall --no-cache
 
 rust-rebuild:
-    cargo run --bin stub_gen
+    PYO3_PYTHON="$(uv run python -c 'import sys; print(sys.executable)')" cargo run --bin stub_gen
     uv sync --reinstall-package exo_pyo3_bindings
 
 build-dashboard:
@@ -36,7 +36,7 @@ package: build-dashboard
     uv run pyinstaller packaging/pyinstaller/exo.spec
     rm -rf build
 
-build-app: package
+build-app: rust-rebuild sync-clean package
     xcodebuild build -project app/EXO/EXO.xcodeproj -scheme EXO -configuration Debug -derivedDataPath app/EXO/build
     @echo "\nBuild complete. Run with:\n  open {{justfile_directory()}}/app/EXO/build/Build/Products/Debug/EXO.app"
 
diff --git a/nix/babeld.nix b/nix/babeld.nix
index b228c33a2..92a1781c8 100644
--- a/nix/babeld.nix
+++ b/nix/babeld.nix
@@ -21,7 +21,6 @@ stdenv.mkDerivation {
   makeFlags = [
     "PREFIX=${placeholder "out"}"
     "ETCDIR=${placeholder "out"}/etc"
-  ]
-  ++ lib.optional stdenv.isDarwin "LDLIBS=''";
+  ];
 }
 
diff --git a/nix/mlx.nix b/nix/mlx.nix
deleted file mode 100644
index 93c4519ba..000000000
--- a/nix/mlx.nix
+++ /dev/null
@@ -1,158 +0,0 @@
-{ stdenv
-, lib
-, fetchFromGitHub
-, replaceVars
-, fetchzip
-, cmake
-, nlohmann_json
-, apple-sdk_26
-, metal-toolchain
-, runCommand
-, fmt
-, python313Packages
-, uvLockMlxVersion
-, uvLockMlxRev
-}:
-
-assert stdenv.isDarwin;
-
-let
-  python = python313Packages.python;
-
-  # Static dependencies included directly during compilation
-  gguf-tools = fetchFromGitHub {
-    owner = "antirez";
-    repo = "gguf-tools";
-    rev = "8fa6eb65236618e28fd7710a0fba565f7faa1848";
-    hash = "sha256-15FvyPOFqTOr5vdWQoPnZz+mYH919++EtghjozDlnSA=";
-  };
-
-  metal_cpp = fetchzip {
-    url = "https://developer.apple.com/metal/cpp/files/metal-cpp_26.zip";
-    hash = "sha256-7n2eI2lw/S+Us6l7YPAATKwcIbRRpaQ8VmES7S8ZjY8=";
-  };
-
-  nanobind = fetchFromGitHub {
-    owner = "wjakob";
-    repo = "nanobind";
-    rev = "v2.10.2";
-    hash = "sha256-io44YhN+VpfHFWyvvLWSanRgbzA0whK8WlDNRi3hahU=";
-    fetchSubmodules = true;
-  };
-
-  mlx = stdenv.mkDerivation rec {
-    pname = "mlx";
-    version = uvLockMlxVersion;
-    pyproject = true;
-
-    src = fetchFromGitHub {
-      owner = "rltakashige";
-      repo = "mlx-jaccl-fix-small-recv";
-      rev = uvLockMlxRev;
-      hash = "sha256-M9x9QBYxwHv2z47qGZNJ4FgJyqLSIZ/3G1fEFQ421Lo=";
-    };
-
-    patches = [
-      (replaceVars ./darwin-build-fixes.patch {
-        sdkVersion = apple-sdk_26.version;
-        metalVersion = metal-toolchain.metalVersion;
-      })
-    ];
-
-    postPatch = ''
-      substituteInPlace mlx/backend/cpu/jit_compiler.cpp \
-        --replace-fail "g++" "$CXX"
-    '';
-
-    dontUseCmakeConfigure = true;
-
-    enableParallelBuilding = true;
-
-    # Allows multiple cores to be used in Python builds.
-    postUnpack = ''
-      export MAKEFLAGS+="''${enableParallelBuilding:+-j$NIX_BUILD_CORES}"
-    '';
-
-    # Updates the wrong fetcher rev attribute
-    passthru.skipBulkUpdate = true;
-
-    env = {
-      DEV_RELEASE = 1;
-      CMAKE_ARGS = toString [
-        (lib.cmakeBool "USE_SYSTEM_FMT" true)
-        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_GGUFLIB" "${gguf-tools}")
-        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_JSON" "${nlohmann_json.src}")
-        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_NANOBIND" "${nanobind}")
-        (lib.cmakeBool "FETCHCONTENT_FULLY_DISCONNECTED" true)
-        (lib.cmakeBool "MLX_BUILD_CPU" true)
-        (lib.cmakeBool "MLX_BUILD_METAL" true)
-        (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_METAL_CPP" "${metal_cpp}")
-        (lib.cmakeOptionType "string" "CMAKE_OSX_DEPLOYMENT_TARGET" "${apple-sdk_26.version}")
-        (lib.cmakeOptionType "filepath" "CMAKE_OSX_SYSROOT" "${apple-sdk_26.passthru.sdkroot}")
-      ];
-      SDKROOT = apple-sdk_26.passthru.sdkroot;
-      MACOSX_DEPLOYMENT_TARGET = apple-sdk_26.version;
-    };
-
-    build-system = [
-      python313Packages.setuptools
-    ];
-
-    nativeBuildInputs = [
-      cmake
-      metal-toolchain
-      python313Packages.pypaBuildHook
-      python313Packages.pypaInstallHook
-      python313Packages.setuptools
-      python313Packages.typing-extensions
-      python313Packages.wheel
-      python313Packages.cmake
-      python313Packages.ninja
-    ];
-
-    buildInputs = [
-      fmt
-      gguf-tools
-      python313Packages.nanobind
-      python313Packages.pybind11
-      apple-sdk_26
-    ];
-
-    # Tests require Metal GPU access which isn't available in the Nix sandbox.
-    # To run tests, build with: nix build --option sandbox false .#mlx.passthru.tests.mlxTest
-    doCheck = false;
-
-    pythonImportsCheck = [ "mlx" ];
-
-    passthru.tests = {
-      # Runs example scripts to verify MLX works. Requires --option sandbox false
-      # since Metal GPU access is needed.
-      mlxTest =
-        runCommand "run-mlx-examples"
-          {
-            buildInputs = [ mlx ];
-            nativeBuildInputs = [ python ];
-          }
-          ''
-            cp ${src}/examples/python/logistic_regression.py .
-            ${python.interpreter} logistic_regression.py
-            rm logistic_regression.py
-
-            cp ${src}/examples/python/linear_regression.py .
-            ${python.interpreter} linear_regression.py
-            rm linear_regression.py
-
-            touch $out
-          '';
-    };
-
-    meta = {
-      homepage = "https://github.com/ml-explore/mlx";
-      description = "Array framework for Apple silicon";
-      changelog = "https://github.com/ml-explore/mlx/releases/tag/${src.tag}";
-      license = lib.licenses.mit;
-      platforms = [ "aarch64-darwin" ];
-    };
-  };
-in
-mlx
diff --git a/packaging/pyinstaller/exo.spec b/packaging/pyinstaller/exo.spec
index 492c9072b..f4d89d4b4 100644
--- a/packaging/pyinstaller/exo.spec
+++ b/packaging/pyinstaller/exo.spec
@@ -1,5 +1,6 @@
 # -*- mode: python ; coding: utf-8 -*-
 
+import sys
 import importlib.util
 import shutil
 from pathlib import Path
@@ -56,6 +57,7 @@ HIDDEN_IMPORTS = sorted(
     set(
         collect_submodules("mlx")
         + _safe_collect("mlx_lm")
+        + _safe_collect("mlx_vlm")
         + _safe_collect("transformers")
     )
 )
@@ -67,18 +69,19 @@ DATAS: list[tuple[str, str]] = [
     (str(EXO_SHARED_MODELS_DIR), "exo/shared/models"),
 ]
 
-MACMON_PATH = shutil.which("macmon")
-if MACMON_PATH is None:
-    raise SystemExit(
-        "macmon binary not found in PATH. "
-        "Install the pinned fork used by exo via: "
-        "cargo install --git https://github.com/vladkens/macmon "
-        "--rev a1cd06b6cc0d5e61db24fd8832e74cd992097a7d macmon --force"
-    )
+if sys.platform == "darwin":
+    MACMON_PATH = shutil.which("macmon")
+    if MACMON_PATH is None:
+        raise SystemExit(
+            "macmon binary not found in PATH. "
+            "Install the pinned fork used by exo via: "
+            "cargo install --git https://github.com/vladkens/macmon "
+            "--rev a1cd06b6cc0d5e61db24fd8832e74cd992097a7d macmon --force"
+        )
 
 BINARIES: list[tuple[str, str]] = [
     (MACMON_PATH, "."),
-]
+] if sys.platform == "darwin" else []
 
 a = Analysis(
     [str(ENTRYPOINT)],
diff --git a/pyproject.toml b/pyproject.toml
index ecb661338..9309a81f8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,9 +1,9 @@
 [project]
 name = "exo"
-version = "0.3.69"
+version = "0.3.70"
 description = "Exo"
 readme = "README.md"
-requires-python = ">=3.13"
+requires-python = "==3.13.*"
 dependencies = [
   "aiofiles>=24.1.0",
   "aiohttp>=3.12.14",
@@ -15,22 +15,17 @@ dependencies = [
   "huggingface-hub>=1.8.0",
   "psutil>=7.0.0",
   "loguru>=0.7.3",
-  "exo_pyo3_bindings",                         # rust bindings
+  "exo-pyo3-bindings",               # rust bindings
   "anyio==4.11.0",
-  "mlx; sys_platform == 'darwin'",
-  "mlx[cpu]==0.30.6; sys_platform == 'linux'",
-  "mlx-lm",
-  "tiktoken>=0.12.0",                          # required for kimi k2 tokenizer
+  "tiktoken>=0.12.0",                # required for kimi k2 tokenizer
   "hypercorn>=0.18.0",
   "openai-harmony>=0.0.8",
   "httpx>=0.28.1",
   "tomlkit>=0.14.0",
-  "mflux==0.17.2",
   "python-multipart>=0.0.21",
   "msgspec>=0.19.0",
   "zstandard>=0.23.0",
-  "mlx-vlm>=0.3.11",
-  "transformers>=5.0.0,<5.4.0",
+  "transformers>=5.6.2",
 ]
 
 [project.scripts]
@@ -41,32 +36,95 @@ exo = "exo.main:main"
 dev = [
   "basedpyright>=1.29.0",
   "pyinstaller>=6.17.0",
+  "playwright>=1.52.0",
   "pytest>=8.4.0",
   "pytest-asyncio>=1.0.0",
   "pytest-env",
   "ruff>=0.11.13",
 ]
 
-# mlx[cuda] requires a newer version of mlx. the ideal on linux is: default to mlx[cpu] unless[cuda] specified.
 [project.optional-dependencies]
-# cuda = [
-#     "mlx[cuda]==0.26.3",
-# ]
+build = ["nanobind"]
+mlx-none = ["anyio"]
+mlx = [
+  "mlx==0.32.0",
+  "mlx-lm",
+  "mlx-vlm>=0.3.11",
+  "mflux==0.17.5",
+  "torch==2.10.0; sys_platform == 'darwin'",
+  "torch==2.10.0; sys_platform == 'linux'",
+  "torchaudio==2.10.0; sys_platform == 'darwin'",
+  "torchaudio==2.10.0; sys_platform == 'linux'",
+  "torchvision==0.25.0; sys_platform == 'darwin'",
+  "torchvision==0.25.0; sys_platform == 'linux'",
+]
+mlx-cpu = ["exo[mlx]", "mlx-cpu==0.31.2; sys_platform == 'linux'"]
+mlx-cuda12 = [
+  "exo[mlx]",
+  "mlx-cuda-12==0.32.0; sys_platform == 'linux'",
+  "nvidia-ml-py>=13.595.45",
+]
+mlx-cuda13 = [
+  "exo[mlx]",
+  "mlx-cuda-13==0.32.0; sys_platform == 'linux'",
+  "nvidia-ml-py>=13.595.45",
+]
 
 ###
 # workspace configuration
 ###
 
 [tool.uv.workspace]
-members = ["rust/exo_pyo3_bindings", "bench"]
+members = ["rust/exo_pyo3_bindings", "bench", "tools"]
 
 [tool.uv.sources]
-exo_pyo3_bindings = { workspace = true }
-mlx = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git", branch = "address-rdma-gpu-locks", marker = "sys_platform == 'darwin'" }
-mlx-lm = { git = "https://github.com/rltakashige/mlx-lm", branch = "leo/fix-arrayscache-leak" }
-# Uncomment to use local mlx/mlx-lm development versions:
-# mlx = { path = "/Users/Shared/mlx", editable=true }
-# mlx-lm = { path = "/Users/Shared/mlx-lm", editable=true }
+exo-pyo3-bindings = { workspace = true }
+mlx = [
+  { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git", branch = "address-rdma-gpu-locks", marker = "sys_platform == 'darwin'" },
+  { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_aarch64.whl", marker = "sys_platform == 'linux' and platform_machine == 'aarch64'" },
+  { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_x86_64.whl", marker = "sys_platform == 'linux' and platform_machine != 'aarch64'" },
+
+]
+mlx-lm = { git = "https://github.com/rltakashige/mlx-lm", branch = "leo/deepseek-v4" }
+mflux = { git = "https://github.com/evanev7/mflux", branch = "exo2" }
+torch = [
+  { index = "pytorch-cpu", marker = "sys_platform == 'linux' and extra == 'mlx-cpu' and extra != 'mlx-cuda13' and extra != 'mlx-cuda12'" },
+  { index = "pytorch-cu128", marker = "sys_platform == 'linux' and extra == 'mlx-cuda12' and extra != 'mlx-cuda13' " },
+  { index = "pytorch-cu130", marker = "sys_platform == 'linux' and extra == 'mlx-cuda13'" },
+]
+mlx-cuda-12 = [
+  { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_12-0.32.0-py3-none-manylinux_2_35_aarch64.whl", marker = "sys_platform == 'linux' and platform_machine == 'aarch64'" },
+  { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_12-0.32.0-py3-none-manylinux_2_35_x86_64.whl", marker = "sys_platform == 'linux' and platform_machine != 'aarch64'" },
+]
+mlx-cuda-13 = [
+  { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_13-0.32.0-py3-none-manylinux_2_35_aarch64.whl", marker = "sys_platform == 'linux' and platform_machine == 'aarch64'" },
+  { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_13-0.32.0-py3-none-manylinux_2_35_x86_64.whl", marker = "sys_platform == 'linux' and platform_machine != 'aarch64'" },
+]
+torchvision = [
+  { index = "pytorch-cpu", marker = "sys_platform == 'linux' and extra == 'mlx-cpu' and extra != 'mlx-cuda13' and extra != 'mlx-cuda12'" },
+  { index = "pytorch-cu128", marker = "sys_platform == 'linux' and extra == 'mlx-cuda12' and extra != 'mlx-cuda13'" },
+  { index = "pytorch-cu130", marker = "sys_platform == 'linux' and extra == 'mlx-cuda13'" },
+]
+torchaudio = [
+  { index = "pytorch-cpu", marker = "sys_platform == 'linux' and extra == 'mlx-cpu' and extra != 'mlx-cuda13' and extra != 'mlx-cuda12'" },
+  { index = "pytorch-cu128", marker = "sys_platform == 'linux' and extra == 'mlx-cuda12' and extra != 'mlx-cuda13'" },
+  { index = "pytorch-cu130", marker = "sys_platform == 'linux' and extra == 'mlx-cuda13'" },
+]
+
+[[tool.uv.index]]
+name = "pytorch-cu130"
+url = "https://download.pytorch.org/whl/cu130"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cu128"
+url = "https://download.pytorch.org/whl/cu128"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+explicit = true
 
 [build-system]
 requires = ["uv_build>=0.8.9,<0.9.0"]
@@ -77,7 +135,7 @@ build-backend = "uv_build"
 ###
 
 [tool.basedpyright]
-include = [".venv/lib/mlx", ".venv/lib/mlx_lm", "src", "bench"]
+include = ["src", "bench", "tools"]
 typeCheckingMode = "strict"
 failOnWarnings = true
 
@@ -102,11 +160,23 @@ exclude = [
   "**/rust",
   "**/.github",
 ]
-stubPath = ".mlx_typings"
+stubPath = ".typings"
+
+[[tool.basedpyright.executionEnvironments]]
+root = "src/exo/worker/engines/image"
+reportMissingModuleSource = false
 
 [[tool.basedpyright.executionEnvironments]]
 root = "src"
 
+[[tool.basedpyright.executionEnvironments]]
+root = "bench"
+extraPaths = ["tools/src"]
+
+[[tool.basedpyright.executionEnvironments]]
+root = "tools/src"
+
+
 ###
 # uv configuration
 ###
@@ -116,19 +186,61 @@ root = "src"
 required-version = ">=0.8.6"
 prerelease = "allow"
 environments = ["sys_platform == 'darwin'", "sys_platform == 'linux'"]
-extra-build-dependencies = { "miniaudio" = ["setuptools", "cffi", "pycparser"] }
+override-dependencies = ["opencv-python; python_version < '0'"]
+conflicts = [
+  [
+    { extra = "mlx-cuda13" },
+    { extra = "mlx-cuda12" },
+    { extra = "mlx-cpu" },
+    { extra = "mlx-none" },
+  ],
+]
+
+[tool.uv.extra-build-dependencies]
+miniaudio = ["setuptools", "cffi", "pycparser"]
+mlx = [
+  "setuptools",
+  "typing-extensions",
+  "nanobind",
+  "pybind11",
+  "wheel",
+  "cmake",
+  "ninja",
+]
+mlx-lm = ["setuptools"]
+mflux = ["uv_build"]
+xgrammar = [
+  "nanobind",
+  "setuptools",
+  "scikit-build-core",
+  "packaging",
+  "pathspec",
+]
+rouge-score = ["setuptools"]
+sacrebleu = ["setuptools"]
+sqlitedict = ["setuptools"]
+word2number = ["setuptools"]
+vllm = [
+  "setuptools",
+  "setuptools-scm",
+  "scikit-build-core",
+  "jinja2",
+  "wheel",
+  "markupsafe",
+  "typing-extensions",
+  "torch",
+]
+fastsafetensors = ["setuptools", "pybind11"]
+torch = ["typing-extensions"]
+torchvision = ["torch"]
+torchaudio = ["torch"]
 
 ###
 # ruff configuration
 ###
 
 [tool.ruff]
-extend-exclude = [
-  "shared/protobufs/**",
-  "*mlx_typings/**",
-  "rust/exo_pyo3_bindings/**",
-  "bench/vendor/**",
-]
+extend-exclude = [".typings/**", "rust/exo_pyo3_bindings/**", "bench/vendor/**"]
 
 [tool.ruff.lint]
 extend-select = ["I", "N", "B", "A", "PIE", "SIM"]
@@ -138,5 +250,5 @@ pythonpath = "."
 asyncio_mode = "auto"
 markers = ["slow: marks tests as slow (deselected by default)"]
 env = ["EXO_TESTS=1"]
-addopts = "-m 'not slow' --ignore=tests/start_distributed_test.py"
+addopts = "-m 'not slow' --ignore=tests --ignore=tmp"
 filterwarnings = ["ignore:builtin type Swig:DeprecationWarning"]
diff --git a/python/parts.nix b/python/parts.nix
index 4c2de0cf4..18c01e062 100644
--- a/python/parts.nix
+++ b/python/parts.nix
@@ -1,18 +1,48 @@
 { inputs, ... }:
-{
-  perSystem =
-    { config, self', pkgs, lib, system, ... }:
+let
+  # Load workspace from uv.lock
+  workspace = inputs.uv2nix.lib.workspace.loadWorkspace {
+    workspaceRoot = ../.;
+  };
+
+  mkPythonSet = { pkgs, lib, self', members }:
     let
-      # Load workspace from uv.lock
-      workspace = inputs.uv2nix.lib.workspace.loadWorkspace {
-        workspaceRoot = inputs.self;
+      inherit (pkgs.stdenv.hostPlatform) isLinux isDarwin isx86_64;
+      inherit (pkgs.config) cudaSupport;
+      inherit (pkgs) cudaPackages;
+      libmlx_source =
+        if (builtins.elem "mlx-cuda13" members.exo or [ ]) then "mlx-cuda-13"
+        else if (builtins.elem "mlx-cuda12" members.exo or [ ]) then "mlx-cuda-12"
+        else "mlx-cpu";
+      python = pkgs.python313;
+
+      cuda_cccl_compat = pkgs.runCommand "cuda-cccl-compat" { } ''
+        mkdir -p $out/include
+        ln -s ${cudaPackages.cuda_cccl}/include $out/include/cccl
+      '';
+      cudaLibs = with cudaPackages; [
+        cuda_crt
+        cuda_cudart
+        cuda_cccl
+        cuda_cupti
+        cuda_nvrtc
+        cuda_nvtx
+        cudnn
+        libcufile
+        libcublas
+        libcufft
+        libcurand
+        libcusolver
+        libcusparse
+        libcusparse_lt
+        libnvjitlink
+        libnvshmem
+        nccl
+      ];
+      cudaRoot = pkgs.symlinkJoin {
+        name = "cuda-merged-exo";
+        paths = builtins.concatMap (p: [ (lib.getBin p) (lib.getLib p) (lib.getDev p) ]) (cudaLibs ++ [ cudaPackages.cuda_nvcc cuda_cccl_compat ]);
       };
-
-      # Create overlay from workspace
-      # Use wheels from PyPI for most packages; we override mlx with our pure Nix Metal build
-      overlay = workspace.mkPyprojectOverlay { sourcePreference = "wheel"; };
-
-      # Override overlay to inject Nix-built components
       exoOverlay = final: prev: {
         # Replace workspace exo_pyo3_bindings with Nix-built wheel.
         # Preserve passthru so mkVirtualEnv can resolve dependency groups.
@@ -32,187 +62,240 @@
           '';
         };
       };
+      buildSystemsOverlay = final: prev:
+        lib.optionalAttrs isDarwin
+          {
+            mlx = prev.mlx.overrideAttrs (old:
+              let
+                # Static dependencies included directly during compilation
+                gguf-tools = pkgs.fetchFromGitHub {
+                  owner = "antirez";
+                  repo = "gguf-tools";
+                  rev = "8fa6eb65236618e28fd7710a0fba565f7faa1848";
+                  hash = "sha256-15FvyPOFqTOr5vdWQoPnZz+mYH919++EtghjozDlnSA=";
+                };
 
-      python = pkgs.python313;
+                metal_cpp = pkgs.fetchzip {
+                  url = "https://developer.apple.com/metal/cpp/files/metal-cpp_26.zip";
+                  hash = "sha256-7n2eI2lw/S+Us6l7YPAATKwcIbRRpaQ8VmES7S8ZjY8=";
+                };
 
-      # Overlay to provide build systems and custom packages
-      buildSystemsOverlay = final: prev: {
-        # mlx-lm is a git dependency that needs setuptools
-        mlx-lm = prev.mlx-lm.overrideAttrs (old: {
-          nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
-            final.setuptools
-          ];
-        });
-        # rouge-score and sacrebleu don't declare setuptools as a build dependency
-        rouge-score = prev.rouge-score.overrideAttrs (old: {
-          nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
-            final.setuptools
-          ];
-        });
-        sacrebleu = prev.sacrebleu.overrideAttrs (old: {
-          nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
-            final.setuptools
-          ];
-        });
-        sqlitedict = prev.sqlitedict.overrideAttrs (old: {
-          nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
-            final.setuptools
-          ];
-        });
-        word2number = prev.word2number.overrideAttrs (old: {
-          nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
-            final.setuptools
-          ];
-        });
-      } // lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin {
-        # Use our pure Nix-built MLX with Metal support (macOS only)
-        mlx = self'.packages.mlx;
+                nanobind = pkgs.fetchFromGitHub {
+                  owner = "wjakob";
+                  repo = "nanobind";
+                  rev = "v2.10.2";
+                  hash = "sha256-io44YhN+VpfHFWyvvLWSanRgbzA0whK8WlDNRi3hahU=";
+                  fetchSubmodules = true;
+                };
+              in
+              {
+                nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ pkgs.cmake self'.packages.metal-toolchain ];
+                # TODO: non-sdk_26 support
+                buildInputs = (old.buildInputs or [ ])
+                ++ [ gguf-tools pkgs.fmt pkgs.nlohmann_json pkgs.apple-sdk_26 ];
+                patches = [
+                  (pkgs.replaceVars ../nix/darwin-build-fixes.patch {
+                    sdkVersion = pkgs.apple-sdk_26.version;
+                    inherit (self'.packages.metal-toolchain) metalVersion;
+                  })
+                ];
+                postPatch = ''
+                  substituteInPlace mlx/backend/cpu/jit_compiler.cpp \
+                    --replace-fail "g++" "${lib.getExe' pkgs.stdenv.cc "c++"}"
+                '';
+
+                DEV_RELEASE = 1;
+                CMAKE_ARGS = toString ([
+                  (lib.cmakeBool "USE_SYSTEM_FMT" true)
+                  (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_GGUFLIB" "${gguf-tools}")
+                  (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_JSON" "${pkgs.nlohmann_json.src}")
+                  (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_NANOBIND" "${nanobind}")
+                  (lib.cmakeBool "FETCHCONTENT_FULLY_DISCONNECTED" true)
+                  (lib.cmakeBool "MLX_BUILD_CPU" true)
+                  (lib.cmakeBool "MLX_BUILD_METAL" true)
+                  (lib.cmakeOptionType "string" "CMAKE_INSTALL_LIBDIR" "lib")
+                  (lib.cmakeOptionType "filepath" "FETCHCONTENT_SOURCE_DIR_METAL_CPP" "${metal_cpp}")
+                  (lib.cmakeOptionType "string" "CMAKE_OSX_DEPLOYMENT_TARGET" "${pkgs.apple-sdk_26.version}")
+                  (lib.cmakeOptionType "filepath" "CMAKE_OSX_SYSROOT" "${pkgs.apple-sdk_26.passthru.sdkroot}")
+                ] ++ lib.optionals (isDarwin && isx86_64) [
+                  (lib.cmakeBool "MLX_ENABLE_X64_MAC" true)
+                ]);
+                SDKROOT = pkgs.apple-sdk_26.passthru.sdkroot;
+                MACOSX_DEPLOYMENT_TARGET = pkgs.apple-sdk_26.version;
+              });
+          } // lib.optionalAttrs isLinux {
+          mlx = prev.mlx.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ lib.optionals cudaSupport [ pkgs.autoAddDriverRunpath ];
+            buildInputs = old.buildInputs ++ lib.optionals cudaSupport cudaLibs;
+            postInstall = ''
+              cp -r "${final.${libmlx_source}}/${final.python.sitePackages}/mlx" "$out/${final.python.sitePackages}/mlx/"
+            '';
+            autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
+          });
+        } // lib.optionalAttrs cudaSupport {
+          "${libmlx_source}" = prev."${libmlx_source}".overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
+            buildInputs = old.buildInputs ++ cudaLibs;
+            autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
+          });
+          nvidia-cufile = prev.nvidia-cufile.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
+            buildInputs = old.buildInputs ++ [ pkgs.rdma-core ];
+          });
+          nvidia-cusolver = prev.nvidia-cusolver.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
+            buildInputs = old.buildInputs ++ cudaLibs;
+          });
+          nvidia-nvshmem-cu13 = prev.nvidia-nvshmem-cu13.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
+            buildInputs = old.buildInputs ++ [ pkgs.rdma-core pkgs.pmix pkgs.libfabric pkgs.ucx pkgs.openmpi ];
+          });
+          nvidia-cusparse = prev.nvidia-cusparse.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
+            buildInputs = old.buildInputs ++ cudaLibs;
+          });
+          torch = prev.torch.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
+            buildInputs = old.buildInputs ++ cudaLibs;
+            autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
+          });
+          torchaudio = prev.torchaudio.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
+            buildInputs = old.buildInputs ++ [ cudaPackages.cuda_cudart ];
+            preFixup = "addAutoPatchelfSearchPath '${final.torch}'";
+          });
+          torchvision = prev.torchvision.overrideAttrs (old: {
+            nativeBuildInputs = old.nativeBuildInputs ++ [ pkgs.autoAddDriverRunpath ];
+            preFixup = "addAutoPatchelfSearchPath '${final.torch}'";
+          });
+
+          torch-c-dlpack-ext = prev.torch-c-dlpack-ext.overrideAttrs (old: {
+            buildInputs = old.buildInputs ++ cudaLibs;
+            autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
+            preFixup = "addAutoPatchelfSearchPath '${final.torch}'";
+          });
+
+        } // lib.optionalAttrs (cudaSupport && isx86_64) {
+          numba = prev.numba.overrideAttrs (old: {
+            buildInputs = (old.buildInputs or [ ]) ++ [ pkgs.tbb ];
+          });
+        };
+      pyprojectOverlay = workspace.mkPyprojectOverlay {
+        sourcePreference = "wheel";
+        dependencies = members;
+      };
+      editableOverlay = workspace.mkEditablePyprojectOverlay {
+        # Use environment variable pointing to editable root directory
+        root = "$REPO_ROOT";
+        members = [ "exo" "exo-bench" ];
       };
-
-      # Additional overlay for Linux-specific fixes (type checking env).
-      # Native wheels have shared lib dependencies we don't need at type-check time.
-      linuxOverlay = final: prev:
-        let
-          ignoreMissing = drv: drv.overrideAttrs { autoPatchelfIgnoreMissingDeps = [ "*" ]; };
-          nvidiaPackages = lib.filterAttrs (name: _: lib.hasPrefix "nvidia-" name) prev;
-        in
-        lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux (
-          (lib.mapAttrs (_: ignoreMissing) nvidiaPackages) // {
-            mlx = ignoreMissing prev.mlx;
-            mlx-cuda-13 = prev.mlx-cuda-13.overrideAttrs (old: {
-              buildInputs = (old.buildInputs or [ ]) ++ [
-                final.nvidia-cublas
-                final.nvidia-cuda-nvrtc
-                final.nvidia-cudnn-cu13
-                final.nvidia-nccl-cu13
-              ];
-              preFixup = ''
-                addAutoPatchelfSearchPath ${final.nvidia-cublas}
-                addAutoPatchelfSearchPath ${final.nvidia-cuda-nvrtc}
-                addAutoPatchelfSearchPath ${final.nvidia-cudnn-cu13}
-                addAutoPatchelfSearchPath ${final.nvidia-nccl-cu13}
-              '';
-              autoPatchelfIgnoreMissingDeps = [ "libcuda.so.1" ];
-            });
-            torch = ignoreMissing prev.torch;
-            triton = ignoreMissing prev.triton;
-          }
-        );
-
       pythonSet = (pkgs.callPackage inputs.pyproject-nix.build.packages {
         inherit python;
       }).overrideScope (
         lib.composeManyExtensions [
           inputs.pyproject-build-systems.overlays.default
-          overlay
+          pyprojectOverlay
           exoOverlay
           buildSystemsOverlay
-          linuxOverlay
         ]
       );
-      # mlx-cpu and mlx-cuda-13 both ship mlx/ site-packages files; keep first.
-      # mlx-cpu/mlx-cuda-13 and nvidia-cudnn-cu12/cu13 ship overlapping files.
-      venvCollisionPaths = lib.optionals pkgs.stdenv.hostPlatform.isLinux [
-        "lib/python3.13/site-packages/mlx*"
-        "lib/python3.13/site-packages/nvidia*"
-      ];
+      # mlx and mlx-cuda ship clashing cmake files - we dont need them at runtime anyway
+      venv = name: (pythonSet.mkVirtualEnv "${name}-venv" members).overrideAttrs (_: { venvSkip = [ "lib/python${python.pythonVersion}/site-packages/mlx/share/cmake/*" "lib/python${python.pythonVersion}/site-packages/build_backend.py" ]; });
+      mkApp =
+        let
+          libPath = lib.makeLibraryPath (
+            [ pkgs.stdenv.cc.cc.lib ] ++ lib.optionals cudaSupport [ cudaRoot ]
+          );
+        in
+        text: name: pkgs.writeShellApplication {
+          inherit name;
+          text = ''
+            LD_LIBRARY_PATH="${libPath}''${LD_LIBRARY_PATH:+:}''${LD_LIBRARY_PATH:-}" exec \
+              ${lib.optionalString cudaSupport "nixglhost "} ${text}
+          '';
+          runtimeEnv = {
+            EXO_DASHBOARD_DIR = self'.packages.dashboard;
+            EXO_RESOURCES_DIR = inputs.self + /resources;
+          };
+          runtimeInputs = [
+            (venv name)
+          ] ++ lib.optionals cudaSupport [ pkgs.nix-gl-host ]
+          ++ lib.optionals isDarwin [ pkgs.macmon ];
+          passthru = {
+            venv = venv name;
+            evenv = ((pythonSet.overrideScope editableOverlay).mkVirtualEnv "${name}-evenv" (members // { exo = (members.exo or [ ]) ++ [ "dev" ]; })).overrideAttrs (_: {
+              venvSkip = [ "lib/python${python.pythonVersion}/site-packages/mlx/share/cmake/*" "lib/python${python.pythonVersion}/site-packages/build_backend.py" ];
+            });
+          } // lib.optionalAttrs cudaSupport {
+            inherit cudaRoot;
+          };
+        };
 
-      # Exclude bench deps from main env (bench has its own benchVenv)
-      exoDeps = removeAttrs workspace.deps.default [ "exo-bench" ];
-
-      exoVenv = (pythonSet.mkVirtualEnv "exo-env" exoDeps).overrideAttrs {
-        venvIgnoreCollisions = venvCollisionPaths;
-      };
+    in
+    {
+      inherit venv;
+      mkPythonScript = path: mkApp ''python ${path} "$@"'';
+      exo = mkApp ''exo "$@"'' "exo";
+    };
+in
+{
+  perSystem =
+    { self', pkgs, unfreePkgs, lib, ... }:
+    let
+      inherit (pkgs.stdenv.hostPlatform) isLinux;
+      inherit (mkPythonSet { inherit self' pkgs lib; members = { exo = [ "mlx-cpu" ]; }; }) exo;
 
       # Virtual environment with dev dependencies for testing
-      testVenv = (pythonSet.mkVirtualEnv "exo-test-env" (
-        exoDeps // {
-          exo = [ "dev" ]; # Include pytest, pytest-asyncio, pytest-env
-        }
-      )).overrideAttrs {
-        venvIgnoreCollisions = venvCollisionPaths;
+      testVenv = (mkPythonSet {
+        inherit self' pkgs lib; members = {
+        exo = [ "dev" "mlx-cpu" ]; # Include pytest, pytest-asyncio, pytest-env
       };
+      }).venv "exo-test";
 
-      mkPythonScript = name: path: pkgs.writeShellApplication {
-        inherit name;
-        runtimeInputs = [ exoVenv ];
-        runtimeEnv = {
-          EXO_DASHBOARD_DIR = self'.packages.dashboard;
-          EXO_RESOURCES_DIR = inputs.self + /resources;
-        };
-        text = ''exec python ${path} "$@"'';
-      };
-
-      benchVenv = pythonSet.mkVirtualEnv "exo-bench-env" {
-        exo-bench = [ ];
-      };
-
-      mkBenchScript = name: path: pkgs.writeShellApplication {
-        inherit name;
-        runtimeInputs = [ benchVenv ];
-        text = ''exec python ${path} "$@"'';
+      mkBenchScript = (mkPythonSet {
+        inherit self' pkgs lib; members = {
+        exo = [ "mlx-cpu" ];
+        exo-bench = [ ]; # Include pytest, pytest-asyncio, pytest-env
       };
+      }).mkPythonScript;
 
       mkSimplePythonScript = name: path: pkgs.writeShellApplication {
         inherit name;
         runtimeInputs = [ pkgs.python313 ];
         text = ''exec python ${path} "$@"'';
       };
-
-      exoPackage = pkgs.runCommand "exo"
-        {
-          nativeBuildInputs = [ pkgs.makeWrapper ];
-        }
-        ''
-          mkdir -p $out/bin
-
-          # Create wrapper script
-          makeWrapper ${exoVenv}/bin/exo $out/bin/exo \
-            --set EXO_DASHBOARD_DIR ${self'.packages.dashboard} \
-            --set EXO_RESOURCES_DIR ${inputs.self + /resources} \
-            ${lib.optionalString pkgs.stdenv.hostPlatform.isDarwin "--prefix PATH : ${pkgs.macmon}/bin"}
-        '';
+      # if someone is particularly interested in cuda12 support in nix, please open an issue.
+      # until then, it's more hassle than its worth
+      #cuda12Set = mkPythonSet { inherit self' lib; inherit (unfreePkgs.pkgsCuda.cudaPackages_12) pkgs; members = { exo = [ "mlx-cuda12" ]; }; };
+      cuda13Set = mkPythonSet { inherit self' lib; inherit (unfreePkgs.pkgsCuda.cudaPackages_13) pkgs; members = { exo = [ "mlx-cuda13" ]; }; };
     in
     {
-      # Python package only available on macOS (requires MLX/Metal)
-      packages = lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin
-        {
-          exo = exoPackage;
-          # Test environment for running pytest outside of Nix sandbox (needs GPU access)
-          exo-test-env = testVenv;
-        } // {
-
-        inherit python;
-
+      packages = {
+        inherit exo;
+        # for running tests in ci
+        exo-test-env = testVenv;
         exo-bench = mkBenchScript "exo-bench" (inputs.self + /bench/exo_bench.py);
         exo-eval = mkBenchScript "exo-eval" (inputs.self + /bench/exo_eval.py);
         exo-eval-tool-calls = mkBenchScript "exo-eval-tool-calls" (inputs.self + /bench/eval_tool_calls.py);
+        # used by ./tests/run_exo_on.sh
         exo-get-all-models-on-cluster = mkSimplePythonScript "exo-get-all-models-on-cluster" (inputs.self + /tests/get_all_models_on_cluster.py);
-        exo-mlx-bandwidth-test = mkPythonScript "exo-mlx-bandwidth-test" (inputs.self + /bench/test_mlx_bandwidth.py);
+      } // lib.optionalAttrs isLinux {
+        #exo-cuda-12 = cuda12Set.exo;
+        exo-cuda-13 = cuda13Set.exo;
       };
 
       checks = {
-        # Ruff linting (works on all platforms)
         lint = pkgs.runCommand "ruff-lint" { } ''
           export RUFF_CACHE_DIR="$TMPDIR/ruff-cache"
           ${pkgs.ruff}/bin/ruff check ${inputs.self}
           touch $out
         '';
 
-        # Hermetic basedpyright type checking
-        typecheck = pkgs.runCommand "typecheck"
-          {
-            nativeBuildInputs = [
-              testVenv
-              pkgs.basedpyright
-            ];
-          }
-          ''
-            cd ${inputs.self}
-            export HOME=$TMPDIR
-            basedpyright --pythonpath ${testVenv}/bin/python
-            touch $out
-          '';
+        typecheck = pkgs.runCommand "typecheck" { nativeBuildInputs = [ testVenv ]; } ''
+          cd ${inputs.self}
+          basedpyright
+          touch $out
+        '';
       };
     };
 }
diff --git a/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev-4bit.toml b/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev-4bit.toml
index 1e0c0e10d..0848f027b 100644
--- a/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev-4bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev-4bit.toml
@@ -7,6 +7,7 @@ family = "flux"
 quantization = "4bit"
 base_model = "FLUX.1 Kontext"
 capabilities = ["image_edit"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 15475325472
diff --git a/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev-8bit.toml b/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev-8bit.toml
index e20207189..0c142241b 100644
--- a/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev-8bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev-8bit.toml
@@ -7,6 +7,7 @@ family = "flux"
 quantization = "8bit"
 base_model = "FLUX.1 Kontext"
 capabilities = ["image_edit"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 21426029632
diff --git a/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev.toml b/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev.toml
index 14cdf2703..1087a2cf7 100644
--- a/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Kontext-dev.toml
@@ -7,6 +7,7 @@ family = "flux"
 quantization = ""
 base_model = "FLUX.1 Kontext"
 capabilities = ["image_edit"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 33327437952
diff --git a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-4bit.toml b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-4bit.toml
index fd9d86086..5ccd4a413 100644
--- a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-4bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-4bit.toml
@@ -7,6 +7,7 @@ family = "flux"
 quantization = "4bit"
 base_model = "FLUX.1 Krea"
 capabilities = ["image_gen"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 15475325472
diff --git a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-8bit.toml b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-8bit.toml
index 1d9221a70..288b3949b 100644
--- a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-8bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev-8bit.toml
@@ -7,6 +7,7 @@ family = "flux"
 quantization = "8bit"
 base_model = "FLUX.1 Krea"
 capabilities = ["image_gen"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 21426029632
diff --git a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev.toml b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev.toml
index 516fdb881..4b3e898d0 100644
--- a/resources/image_model_cards/exolabs--FLUX.1-Krea-dev.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-Krea-dev.toml
@@ -7,6 +7,7 @@ family = "flux"
 quantization = ""
 base_model = "FLUX.1 Krea"
 capabilities = ["image_gen"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 33327437952
diff --git a/resources/image_model_cards/exolabs--FLUX.1-dev-4bit.toml b/resources/image_model_cards/exolabs--FLUX.1-dev-4bit.toml
index 82b03badd..7bfc489cf 100644
--- a/resources/image_model_cards/exolabs--FLUX.1-dev-4bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-dev-4bit.toml
@@ -7,6 +7,7 @@ family = "flux"
 quantization = "4bit"
 base_model = "FLUX.1 Dev"
 capabilities = ["image_gen"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 15475325472
diff --git a/resources/image_model_cards/exolabs--FLUX.1-dev-8bit.toml b/resources/image_model_cards/exolabs--FLUX.1-dev-8bit.toml
index e35629d8d..52f6dba51 100644
--- a/resources/image_model_cards/exolabs--FLUX.1-dev-8bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-dev-8bit.toml
@@ -7,6 +7,7 @@ family = "flux"
 quantization = "8bit"
 base_model = "FLUX.1 Dev"
 capabilities = ["image_gen"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 21426029632
diff --git a/resources/image_model_cards/exolabs--FLUX.1-dev.toml b/resources/image_model_cards/exolabs--FLUX.1-dev.toml
index 44e5533dd..39953de09 100644
--- a/resources/image_model_cards/exolabs--FLUX.1-dev.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-dev.toml
@@ -7,6 +7,7 @@ family = "flux"
 quantization = ""
 base_model = "FLUX.1 Dev"
 capabilities = ["image_gen"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 33327437952
diff --git a/resources/image_model_cards/exolabs--FLUX.1-schnell-4bit.toml b/resources/image_model_cards/exolabs--FLUX.1-schnell-4bit.toml
index 4d6f97b83..c51bb2d70 100644
--- a/resources/image_model_cards/exolabs--FLUX.1-schnell-4bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-schnell-4bit.toml
@@ -7,6 +7,7 @@ family = "flux"
 quantization = "4bit"
 base_model = "FLUX.1 Schnell"
 capabilities = ["image_gen"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 15470210592
diff --git a/resources/image_model_cards/exolabs--FLUX.1-schnell-8bit.toml b/resources/image_model_cards/exolabs--FLUX.1-schnell-8bit.toml
index 56b2faebe..634a94036 100644
--- a/resources/image_model_cards/exolabs--FLUX.1-schnell-8bit.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-schnell-8bit.toml
@@ -7,6 +7,7 @@ family = "flux"
 quantization = "8bit"
 base_model = "FLUX.1 Schnell"
 capabilities = ["image_gen"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 21415799872
diff --git a/resources/image_model_cards/exolabs--FLUX.1-schnell.toml b/resources/image_model_cards/exolabs--FLUX.1-schnell.toml
index efb08522e..4cd6a5251 100644
--- a/resources/image_model_cards/exolabs--FLUX.1-schnell.toml
+++ b/resources/image_model_cards/exolabs--FLUX.1-schnell.toml
@@ -7,6 +7,7 @@ family = "flux"
 quantization = ""
 base_model = "FLUX.1 Schnell"
 capabilities = ["image_gen"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 33306978432
diff --git a/resources/image_model_cards/exolabs--Qwen-Image-4bit.toml b/resources/image_model_cards/exolabs--Qwen-Image-4bit.toml
index b11765a1a..16175cc14 100644
--- a/resources/image_model_cards/exolabs--Qwen-Image-4bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-4bit.toml
@@ -8,6 +8,7 @@ family = "qwen-image"
 quantization = "4bit"
 base_model = "Qwen Image"
 capabilities = ["image_gen"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 26799533856
diff --git a/resources/image_model_cards/exolabs--Qwen-Image-8bit.toml b/resources/image_model_cards/exolabs--Qwen-Image-8bit.toml
index 64f2b55a7..76d107651 100644
--- a/resources/image_model_cards/exolabs--Qwen-Image-8bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-8bit.toml
@@ -8,6 +8,7 @@ family = "qwen-image"
 quantization = "8bit"
 base_model = "Qwen Image"
 capabilities = ["image_gen"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 37014734400
diff --git a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-4bit.toml b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-4bit.toml
index 13992cbe8..80e9d8e5d 100644
--- a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-4bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-4bit.toml
@@ -8,6 +8,7 @@ family = "qwen-image"
 quantization = "4bit"
 base_model = "Qwen Image Edit"
 capabilities = ["image_edit"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 26799533856
diff --git a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-8bit.toml b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-8bit.toml
index 60b7e3dd8..ce7d6e20c 100644
--- a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-8bit.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509-8bit.toml
@@ -8,6 +8,7 @@ family = "qwen-image"
 quantization = "8bit"
 base_model = "Qwen Image Edit"
 capabilities = ["image_edit"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 37014734400
diff --git a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509.toml b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509.toml
index 9c68b2218..81fcb3ae6 100644
--- a/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image-Edit-2509.toml
@@ -8,6 +8,7 @@ family = "qwen-image"
 quantization = ""
 base_model = "Qwen Image Edit"
 capabilities = ["image_edit"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 57445135488
diff --git a/resources/image_model_cards/exolabs--Qwen-Image.toml b/resources/image_model_cards/exolabs--Qwen-Image.toml
index 5a86ccfef..734a8d0b3 100644
--- a/resources/image_model_cards/exolabs--Qwen-Image.toml
+++ b/resources/image_model_cards/exolabs--Qwen-Image.toml
@@ -8,6 +8,7 @@ family = "qwen-image"
 quantization = ""
 base_model = "Qwen Image"
 capabilities = ["image_gen"]
+backends = ["MlxMetal"]
 
 [storage_size]
 in_bytes = 57445135488
diff --git a/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-4bit.toml b/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-4bit.toml
index 011664823..39dba047e 100644
--- a/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-4bit.toml
@@ -8,8 +8,15 @@ family = "deepseek"
 quantization = "4bit"
 base_model = "DeepSeek V3.1"
 capabilities = ["text", "thinking", "thinking_toggle"]
+reasoning_dialect = "post_last_user"
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 405874409472
+
+# Source: https://huggingface.co/deepseek-ai/DeepSeek-V3.1/blob/main/generation_config.json
+# Source: https://huggingface.co/deepseek-ai/DeepSeek-V3.1/discussions/19
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-8bit.toml b/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-8bit.toml
index 62313d3a8..372aabaf1 100644
--- a/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--DeepSeek-V3.1-8bit.toml
@@ -8,8 +8,15 @@ family = "deepseek"
 quantization = "8bit"
 base_model = "DeepSeek V3.1"
 capabilities = ["text", "thinking", "thinking_toggle"]
+reasoning_dialect = "post_last_user"
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 765577920512
+
+# Source: https://huggingface.co/deepseek-ai/DeepSeek-V3.1/blob/main/generation_config.json
+# Source: https://huggingface.co/deepseek-ai/DeepSeek-V3.1/discussions/19
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--DeepSeek-V3.2-4bit.toml b/resources/inference_model_cards/mlx-community--DeepSeek-V3.2-4bit.toml
index 5288181ee..071d8e61b 100644
--- a/resources/inference_model_cards/mlx-community--DeepSeek-V3.2-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--DeepSeek-V3.2-4bit.toml
@@ -8,8 +8,15 @@ family = "deepseek"
 quantization = "4bit"
 base_model = "DeepSeek V3.2"
 capabilities = ["text", "thinking", "thinking_toggle"]
+reasoning_dialect = "tool_conditional"
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 378086226621
+
+# Source: https://huggingface.co/deepseek-ai/DeepSeek-V3.2/blob/main/generation_config.json
+# Source: https://docs.vllm.ai/projects/recipes/en/latest/DeepSeek/DeepSeek-V3_2.html
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--DeepSeek-V3.2-8bit.toml b/resources/inference_model_cards/mlx-community--DeepSeek-V3.2-8bit.toml
index 87f81738e..6f7337237 100644
--- a/resources/inference_model_cards/mlx-community--DeepSeek-V3.2-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--DeepSeek-V3.2-8bit.toml
@@ -8,8 +8,15 @@ family = "deepseek"
 quantization = "8bit"
 base_model = "DeepSeek V3.2"
 capabilities = ["text", "thinking", "thinking_toggle"]
+reasoning_dialect = "tool_conditional"
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 755957120916
+
+# Source: https://huggingface.co/deepseek-ai/DeepSeek-V3.2/blob/main/generation_config.json
+# Source: https://docs.vllm.ai/projects/recipes/en/latest/DeepSeek/DeepSeek-V3_2.html
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--DeepSeek-V4-Flash.toml b/resources/inference_model_cards/mlx-community--DeepSeek-V4-Flash.toml
new file mode 100644
index 000000000..c4f64474a
--- /dev/null
+++ b/resources/inference_model_cards/mlx-community--DeepSeek-V4-Flash.toml
@@ -0,0 +1,21 @@
+model_id = "mlx-community/DeepSeek-V4-Flash"
+n_layers = 43
+hidden_size = 4096
+num_key_value_heads = 1
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "deepseek"
+quantization = "8bit"
+base_model = "DeepSeek V4 Flash"
+capabilities = ["text", "thinking", "thinking_toggle"]
+reasoning_dialect = "tool_conditional"
+
+context_length = 1048576
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
+[storage_size]
+in_bytes = 155095760030
+
+# Source: https://huggingface.co/deepseek-ai/DeepSeek-V4-Flash
+[sampling_defaults]
+temperature = 1.0
+top_p = 1.0
diff --git a/resources/inference_model_cards/mlx-community--DeepSeek-V4-Pro.toml b/resources/inference_model_cards/mlx-community--DeepSeek-V4-Pro.toml
new file mode 100644
index 000000000..00dd3e66e
--- /dev/null
+++ b/resources/inference_model_cards/mlx-community--DeepSeek-V4-Pro.toml
@@ -0,0 +1,21 @@
+model_id = "mlx-community/DeepSeek-V4-Pro"
+n_layers = 61
+hidden_size = 7168
+num_key_value_heads = 1
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "deepseek"
+quantization = "8bit"
+base_model = "DeepSeek V4 Pro"
+capabilities = ["text", "thinking", "thinking_toggle"]
+reasoning_dialect = "tool_conditional"
+
+context_length = 1048576
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
+[storage_size]
+in_bytes = 849681803879
+
+# Source: https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro
+[sampling_defaults]
+temperature = 1.0
+top_p = 1.0
diff --git a/resources/inference_model_cards/mlx-community--GLM-4.5-Air-8bit.toml b/resources/inference_model_cards/mlx-community--GLM-4.5-Air-8bit.toml
index 644fe4255..63b9ee4e3 100644
--- a/resources/inference_model_cards/mlx-community--GLM-4.5-Air-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.5-Air-8bit.toml
@@ -8,8 +8,13 @@ family = "glm"
 quantization = "8bit"
 base_model = "GLM 4.5 Air"
 capabilities = ["text", "thinking", "thinking_toggle"]
-
+reasoning_dialect = "post_last_user"
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 122406567936
+
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--GLM-4.5-Air-bf16.toml b/resources/inference_model_cards/mlx-community--GLM-4.5-Air-bf16.toml
index a7b5c37a9..489d01001 100644
--- a/resources/inference_model_cards/mlx-community--GLM-4.5-Air-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.5-Air-bf16.toml
@@ -8,8 +8,13 @@ family = "glm"
 quantization = "bf16"
 base_model = "GLM 4.5 Air"
 capabilities = ["text", "thinking", "thinking_toggle"]
-
+reasoning_dialect = "post_last_user"
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 229780750336
+
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--GLM-4.7-4bit.toml b/resources/inference_model_cards/mlx-community--GLM-4.7-4bit.toml
index c8b32a382..4a10e257e 100644
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-4bit.toml
@@ -8,8 +8,15 @@ family = "glm"
 quantization = "4bit"
 base_model = "GLM 4.7"
 capabilities = ["text", "thinking", "thinking_toggle"]
-
+reasoning_dialect = "post_last_user"
 context_length = 202752
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 198556925568
+
+# Source: https://huggingface.co/zai-org/GLM-4.7
+# Source: https://unsloth.ai/docs/models/glm-4.7-flash
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--GLM-4.7-6bit.toml b/resources/inference_model_cards/mlx-community--GLM-4.7-6bit.toml
index 7eb301e55..108cab88a 100644
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-6bit.toml
@@ -8,8 +8,15 @@ family = "glm"
 quantization = "6bit"
 base_model = "GLM 4.7"
 capabilities = ["text", "thinking", "thinking_toggle"]
-
+reasoning_dialect = "post_last_user"
 context_length = 202752
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 286737579648
+
+# Source: https://huggingface.co/zai-org/GLM-4.7
+# Source: https://unsloth.ai/docs/models/glm-4.7-flash
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--GLM-4.7-8bit-gs32.toml b/resources/inference_model_cards/mlx-community--GLM-4.7-8bit-gs32.toml
index 91365de8d..6fead9cf3 100644
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-8bit-gs32.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-8bit-gs32.toml
@@ -8,8 +8,15 @@ family = "glm"
 quantization = "8bit"
 base_model = "GLM 4.7"
 capabilities = ["text", "thinking", "thinking_toggle"]
-
+reasoning_dialect = "post_last_user"
 context_length = 202752
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 396963397248
+
+# Source: https://huggingface.co/zai-org/GLM-4.7
+# Source: https://unsloth.ai/docs/models/glm-4.7-flash
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-4bit.toml b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-4bit.toml
index d0be6d821..47e56efd4 100644
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-4bit.toml
@@ -8,8 +8,15 @@ family = "glm"
 quantization = "4bit"
 base_model = "GLM 4.7 Flash"
 capabilities = ["text", "thinking", "thinking_toggle"]
-
+reasoning_dialect = "post_last_user"
 context_length = 202752
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 19327352832
+
+# Source: https://huggingface.co/zai-org/GLM-4.7-Flash
+# Source: https://unsloth.ai/docs/models/glm-4.7-flash
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-5bit.toml b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-5bit.toml
index 9c7136dbf..fc685138d 100644
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-5bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-5bit.toml
@@ -8,8 +8,15 @@ family = "glm"
 quantization = "5bit"
 base_model = "GLM 4.7 Flash"
 capabilities = ["text", "thinking", "thinking_toggle"]
-
+reasoning_dialect = "post_last_user"
 context_length = 202752
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 22548578304
+
+# Source: https://huggingface.co/zai-org/GLM-4.7-Flash
+# Source: https://unsloth.ai/docs/models/glm-4.7-flash
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-6bit.toml b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-6bit.toml
index cf2ed455f..75f2aca01 100644
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-6bit.toml
@@ -8,8 +8,15 @@ family = "glm"
 quantization = "6bit"
 base_model = "GLM 4.7 Flash"
 capabilities = ["text", "thinking", "thinking_toggle"]
-
+reasoning_dialect = "post_last_user"
 context_length = 202752
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 26843545600
+
+# Source: https://huggingface.co/zai-org/GLM-4.7-Flash
+# Source: https://unsloth.ai/docs/models/glm-4.7-flash
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-8bit.toml b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-8bit.toml
index 879b322c6..83aef8218 100644
--- a/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-4.7-Flash-8bit.toml
@@ -8,8 +8,15 @@ family = "glm"
 quantization = "8bit"
 base_model = "GLM 4.7 Flash"
 capabilities = ["text", "thinking", "thinking_toggle"]
-
+reasoning_dialect = "post_last_user"
 context_length = 202752
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 34359738368
+
+# Source: https://huggingface.co/zai-org/GLM-4.7-Flash
+# Source: https://unsloth.ai/docs/models/glm-4.7-flash
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--GLM-5-8bit.toml b/resources/inference_model_cards/mlx-community--GLM-5-8bit.toml
index 37b0fb2ab..0798598b8 100644
--- a/resources/inference_model_cards/mlx-community--GLM-5-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-5-8bit.toml
@@ -8,8 +8,14 @@ family = "glm"
 quantization = "8bit"
 base_model = "GLM-5"
 capabilities = ["text", "thinking"]
-
+reasoning_dialect = "post_last_user"
 context_length = 202752
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 790517400864
+
+# Source: https://huggingface.co/zai-org/GLM-5
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--GLM-5-MXFP4-Q8.toml b/resources/inference_model_cards/mlx-community--GLM-5-MXFP4-Q8.toml
index 2874e4e60..69272d25a 100644
--- a/resources/inference_model_cards/mlx-community--GLM-5-MXFP4-Q8.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-5-MXFP4-Q8.toml
@@ -8,8 +8,14 @@ family = "glm"
 quantization = "MXFP4-Q8"
 base_model = "GLM-5"
 capabilities = ["text", "thinking"]
-
+reasoning_dialect = "post_last_user"
 context_length = 202752
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 405478939008
+
+# Source: https://huggingface.co/zai-org/GLM-5
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--GLM-5-bf16.toml b/resources/inference_model_cards/mlx-community--GLM-5-bf16.toml
index 1086d86a4..05c0e4577 100644
--- a/resources/inference_model_cards/mlx-community--GLM-5-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-5-bf16.toml
@@ -8,8 +8,14 @@ family = "glm"
 quantization = "bf16"
 base_model = "GLM-5"
 capabilities = ["text", "thinking"]
-
+reasoning_dialect = "post_last_user"
 context_length = 202752
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 1487822475264
+
+# Source: https://huggingface.co/zai-org/GLM-5
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--GLM-5.1-DQ4plus-q8.toml b/resources/inference_model_cards/mlx-community--GLM-5.1-DQ4plus-q8.toml
new file mode 100644
index 000000000..c92ffd252
--- /dev/null
+++ b/resources/inference_model_cards/mlx-community--GLM-5.1-DQ4plus-q8.toml
@@ -0,0 +1,21 @@
+model_id = "mlx-community/GLM-5.1-DQ4plus-q8"
+n_layers = 78
+hidden_size = 6144
+num_key_value_heads = 64
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "glm"
+quantization = "8bit"
+base_model = "GLM-5.1"
+capabilities = ["text", "thinking"]
+reasoning_dialect = "post_last_user"
+context_length = 202752
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
+[storage_size]
+in_bytes = 465173655552
+
+# Source: https://huggingface.co/zai-org/GLM-5.1
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--GLM-5.1-MXFP4-Q8.toml b/resources/inference_model_cards/mlx-community--GLM-5.1-MXFP4-Q8.toml
new file mode 100644
index 000000000..8230d2fdb
--- /dev/null
+++ b/resources/inference_model_cards/mlx-community--GLM-5.1-MXFP4-Q8.toml
@@ -0,0 +1,21 @@
+model_id = "mlx-community/GLM-5.1-MXFP4-Q8"
+n_layers = 78
+hidden_size = 6144
+num_key_value_heads = 64
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "glm"
+quantization = "MXFP4-Q8"
+base_model = "GLM-5.1"
+capabilities = ["text", "thinking"]
+reasoning_dialect = "post_last_user"
+context_length = 202752
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
+[storage_size]
+in_bytes = 405480321024
+
+# Source: https://huggingface.co/zai-org/GLM-5.1
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--GLM-5.1.toml b/resources/inference_model_cards/mlx-community--GLM-5.1.toml
new file mode 100644
index 000000000..a59dddfb7
--- /dev/null
+++ b/resources/inference_model_cards/mlx-community--GLM-5.1.toml
@@ -0,0 +1,21 @@
+model_id = "mlx-community/GLM-5.1"
+n_layers = 78
+hidden_size = 6144
+num_key_value_heads = 64
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "glm"
+quantization = "bf16"
+base_model = "GLM-5.1"
+capabilities = ["text", "thinking"]
+reasoning_dialect = "post_last_user"
+context_length = 202752
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
+[storage_size]
+in_bytes = 1487822475264
+
+# Source: https://huggingface.co/zai-org/GLM-5.1
+# Source: https://docs.z.ai/api-reference/llm/chat-completion
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--Kimi-K2-Instruct-4bit.toml b/resources/inference_model_cards/mlx-community--Kimi-K2-Instruct-4bit.toml
index 878aa2aa0..f8ad022c0 100644
--- a/resources/inference_model_cards/mlx-community--Kimi-K2-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2-Instruct-4bit.toml
@@ -10,6 +10,11 @@ base_model = "Kimi K2"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 620622774272
+
+# Source: https://huggingface.co/moonshotai/Kimi-K2-Instruct
+# Source: https://platform.kimi.ai/docs/guide/kimi-k2-quickstart
+[sampling_defaults]
+temperature = 0.6
diff --git a/resources/inference_model_cards/mlx-community--Kimi-K2-Thinking.toml b/resources/inference_model_cards/mlx-community--Kimi-K2-Thinking.toml
index 37f4befc9..cccffb5dc 100644
--- a/resources/inference_model_cards/mlx-community--Kimi-K2-Thinking.toml
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2-Thinking.toml
@@ -8,8 +8,13 @@ family = "kimi"
 quantization = ""
 base_model = "Kimi K2"
 capabilities = ["text", "thinking", "thinking_toggle"]
-
+reasoning_dialect = "suffix"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 706522120192
+
+# Source: https://huggingface.co/moonshotai/Kimi-K2-Thinking
+# Source: https://platform.kimi.ai/docs/guide/use-kimi-k2-thinking-model
+[sampling_defaults]
+temperature = 1.0
diff --git a/resources/inference_model_cards/mlx-community--Kimi-K2.5.toml b/resources/inference_model_cards/mlx-community--Kimi-K2.5.toml
index efe7d4bbe..69e17d4e5 100644
--- a/resources/inference_model_cards/mlx-community--Kimi-K2.5.toml
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2.5.toml
@@ -8,9 +8,9 @@ family = "kimi"
 quantization = ""
 base_model = "Kimi K2.5"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "suffix"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 662498705408
 
@@ -19,3 +19,17 @@ image_token_id = 163605
 model_type = "kimi_vl"
 weights_repo = "davehind/Kimi-K2.5-vision"
 processor_repo = "moonshotai/Kimi-K2.5"
+
+# Source: https://deepwiki.com/MoonshotAI/Kimi-K2.5/3.7-recommended-parameters
+# Source: https://unsloth.ai/docs/models/kimi-k2.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+min_p = 0.01
+
+# Source: https://deepwiki.com/MoonshotAI/Kimi-K2.5/3.7-recommended-parameters
+# Source: https://unsloth.ai/docs/models/kimi-k2.5
+[sampling_defaults.non_thinking]
+temperature = 0.6
+top_p = 0.95
+min_p = 0.01
diff --git a/resources/inference_model_cards/mlx-community--Kimi-K2.6-mlx-DQ3_K_M-q8.toml b/resources/inference_model_cards/mlx-community--Kimi-K2.6-mlx-DQ3_K_M-q8.toml
new file mode 100644
index 000000000..b2444d7d2
--- /dev/null
+++ b/resources/inference_model_cards/mlx-community--Kimi-K2.6-mlx-DQ3_K_M-q8.toml
@@ -0,0 +1,33 @@
+model_id = "mlx-community/Kimi-K2.6-mlx-DQ3_K_M-q8"
+n_layers = 61
+hidden_size = 7168
+num_key_value_heads = 64
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "kimi"
+quantization = "3bit"
+base_model = "Kimi K2.6"
+capabilities = ["text", "thinking", "thinking_toggle", "vision"]
+reasoning_dialect = "suffix"
+context_length = 262144
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
+[storage_size]
+in_bytes = 470628683776
+
+[vision]
+image_token_id = 163605
+model_type = "kimi_vl"
+weights_repo = "exolabs/Kimi-K2.6-vision"
+processor_repo = "moonshotai/Kimi-K2.6"
+
+# Source: https://huggingface.co/moonshotai/Kimi-K2.6
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+min_p = 0.01
+
+# Source: https://huggingface.co/moonshotai/Kimi-K2.6
+[sampling_defaults.non_thinking]
+temperature = 0.6
+top_p = 0.95
+min_p = 0.01
diff --git a/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-70B-Instruct-HF-4bit.toml b/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-70B-Instruct-HF-4bit.toml
index d98cabdd9..96ea196b4 100644
--- a/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-70B-Instruct-HF-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-70B-Instruct-HF-4bit.toml
@@ -9,6 +9,12 @@ base_model = "NVIDIA Llama-3.1-Nemotron-70B-Instruct"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 39688355840
+
+# Source: https://huggingface.co/RedHatAI/Llama-3.1-Nemotron-70B-Instruct-HF-FP8-dynamic
+# Source: https://deepinfra.com/nvidia/Llama-3.1-Nemotron-70B-Instruct/api
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.9
diff --git a/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-70B-Instruct-HF-8bit.toml b/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-70B-Instruct-HF-8bit.toml
index 4f4297abb..67056553a 100644
--- a/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-70B-Instruct-HF-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-70B-Instruct-HF-8bit.toml
@@ -9,6 +9,12 @@ base_model = "NVIDIA Llama-3.1-Nemotron-70B-Instruct"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 74964549632
+
+# Source: https://huggingface.co/RedHatAI/Llama-3.1-Nemotron-70B-Instruct-HF-FP8-dynamic
+# Source: https://deepinfra.com/nvidia/Llama-3.1-Nemotron-70B-Instruct/api
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.9
diff --git a/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-70B-Instruct-HF-bf16.toml b/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-70B-Instruct-HF-bf16.toml
index 4ef63c170..531ab7a7c 100644
--- a/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-70B-Instruct-HF-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-70B-Instruct-HF-bf16.toml
@@ -9,6 +9,12 @@ base_model = "NVIDIA Llama-3.1-Nemotron-70B-Instruct"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 141107412992
+
+# Source: https://huggingface.co/RedHatAI/Llama-3.1-Nemotron-70B-Instruct-HF-FP8-dynamic
+# Source: https://deepinfra.com/nvidia/Llama-3.1-Nemotron-70B-Instruct/api
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.9
diff --git a/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-Nano-4B-v1.1-4bit.toml b/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-Nano-4B-v1.1-4bit.toml
index 687c56644..6d355ea93 100644
--- a/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-Nano-4B-v1.1-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-Nano-4B-v1.1-4bit.toml
@@ -9,6 +9,15 @@ base_model = "NVIDIA Llama-3.1-Nemotron-Nano-4B-v1.1"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 2538706944
+
+# Source: https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+
+# Source: https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1
+[sampling_defaults.non_thinking]
+temperature = 0.0
diff --git a/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-Nano-4B-v1.1-8bit.toml b/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-Nano-4B-v1.1-8bit.toml
index dae6fd93b..98aa06041 100644
--- a/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-Nano-4B-v1.1-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-Nano-4B-v1.1-8bit.toml
@@ -9,6 +9,15 @@ base_model = "NVIDIA Llama-3.1-Nemotron-Nano-4B-v1.1"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 4794980352
+
+# Source: https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+
+# Source: https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1
+[sampling_defaults.non_thinking]
+temperature = 0.0
diff --git a/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-Nano-4B-v1.1-bf16.toml b/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-Nano-4B-v1.1-bf16.toml
index 0e2616466..c479bafa7 100644
--- a/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-Nano-4B-v1.1-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.1-Nemotron-Nano-4B-v1.1-bf16.toml
@@ -9,6 +9,15 @@ base_model = "NVIDIA Llama-3.1-Nemotron-Nano-4B-v1.1"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 9025492992
+
+# Source: https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+
+# Source: https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1
+[sampling_defaults.non_thinking]
+temperature = 0.0
diff --git a/resources/inference_model_cards/mlx-community--Llama-3.2-1B-Instruct-4bit.toml b/resources/inference_model_cards/mlx-community--Llama-3.2-1B-Instruct-4bit.toml
index fa30edc32..4a70c63a3 100644
--- a/resources/inference_model_cards/mlx-community--Llama-3.2-1B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.2-1B-Instruct-4bit.toml
@@ -10,6 +10,12 @@ base_model = "Llama 3.2 1B"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 729808896
+
+# Source: https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Llama-3.2-1B-Instruct/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.9
diff --git a/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-4bit.toml b/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-4bit.toml
index 6255db7bf..9e21b1312 100644
--- a/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-4bit.toml
@@ -10,6 +10,12 @@ base_model = "Llama 3.2 3B"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 1863319552
+
+# Source: https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Llama-3.2-3B-Instruct/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.9
diff --git a/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-8bit.toml b/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-8bit.toml
index e2de35ecf..b3deee1bb 100644
--- a/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.2-3B-Instruct-8bit.toml
@@ -10,6 +10,12 @@ base_model = "Llama 3.2 3B"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 3501195264
+
+# Source: https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Llama-3.2-3B-Instruct/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.9
diff --git a/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-4bit.toml b/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-4bit.toml
index ef81e828f..cd2d96007 100644
--- a/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-4bit.toml
@@ -10,6 +10,12 @@ base_model = "Llama 3.3 70B"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 40652242944
+
+# Source: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Llama-3.3-70B-Instruct/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.9
diff --git a/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-8bit.toml b/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-8bit.toml
index fb83f3d00..0ffae4af1 100644
--- a/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Llama-3.3-70B-Instruct-8bit.toml
@@ -10,6 +10,12 @@ base_model = "Llama 3.3 70B"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 76799803392
+
+# Source: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Llama-3.3-70B-Instruct/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.9
diff --git a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-70B-Instruct-4bit.toml b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-70B-Instruct-4bit.toml
index 893130879..5a2d14db5 100644
--- a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-70B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-70B-Instruct-4bit.toml
@@ -10,6 +10,12 @@ base_model = "Llama 3.1 70B"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 40652242944
+
+# Source: https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Meta-Llama-3.1-70B-Instruct/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.9
diff --git a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-4bit.toml b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-4bit.toml
index a9ab123b2..cfa7e57b7 100644
--- a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-4bit.toml
@@ -10,6 +10,12 @@ base_model = "Llama 3.1 8B"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 4637851648
+
+# Source: https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.9
diff --git a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-8bit.toml b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-8bit.toml
index 84cdbaf1c..eb2163a94 100644
--- a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-8bit.toml
@@ -10,6 +10,12 @@ base_model = "Llama 3.1 8B"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 8954839040
+
+# Source: https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.9
diff --git a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-bf16.toml b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-bf16.toml
index f2121d649..4caefc149 100644
--- a/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--Meta-Llama-3.1-8B-Instruct-bf16.toml
@@ -10,6 +10,12 @@ base_model = "Llama 3.1 8B"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 16882073600
+
+# Source: https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.9
diff --git a/resources/inference_model_cards/mlx-community--MiniMax-M2.1-3bit.toml b/resources/inference_model_cards/mlx-community--MiniMax-M2.1-3bit.toml
index 0b22f0111..0865a4647 100644
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.1-3bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.1-3bit.toml
@@ -8,8 +8,15 @@ family = "minimax"
 quantization = "3bit"
 base_model = "MiniMax M2.1"
 capabilities = ["text", "thinking", "thinking_toggle"]
-
+reasoning_dialect = "post_last_user"
 context_length = 196608
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 100086644736
+
+# Source: https://huggingface.co/MiniMaxAI/MiniMax-M2.1
+# Source: https://github.com/MiniMax-AI/MiniMax-M2.1
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--MiniMax-M2.1-8bit.toml b/resources/inference_model_cards/mlx-community--MiniMax-M2.1-8bit.toml
index 32e5bc3f4..997a89bf7 100644
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.1-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.1-8bit.toml
@@ -8,8 +8,15 @@ family = "minimax"
 quantization = "8bit"
 base_model = "MiniMax M2.1"
 capabilities = ["text", "thinking", "thinking_toggle"]
-
+reasoning_dialect = "post_last_user"
 context_length = 196608
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 242986745856
+
+# Source: https://huggingface.co/MiniMaxAI/MiniMax-M2.1
+# Source: https://github.com/MiniMax-AI/MiniMax-M2.1
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--MiniMax-M2.5-4bit.toml b/resources/inference_model_cards/mlx-community--MiniMax-M2.5-4bit.toml
index c64185124..75e741d0a 100644
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.5-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.5-4bit.toml
@@ -8,8 +8,15 @@ family = "minimax"
 quantization = "4bit"
 base_model = "MiniMax M2.5"
 capabilities = ["text", "thinking"]
-
+reasoning_dialect = "post_last_user"
 context_length = 196608
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 128666664960
+
+# Source: https://huggingface.co/MiniMaxAI/MiniMax-M2.5
+# Source: https://github.com/MiniMax-AI/MiniMax-M2.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--MiniMax-M2.5-6bit.toml b/resources/inference_model_cards/mlx-community--MiniMax-M2.5-6bit.toml
index 9c366e6ec..6bf42d8b8 100644
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.5-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.5-6bit.toml
@@ -8,8 +8,15 @@ family = "minimax"
 quantization = "6bit"
 base_model = "MiniMax M2.5"
 capabilities = ["text", "thinking"]
-
+reasoning_dialect = "post_last_user"
 context_length = 196608
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 185826705408
+
+# Source: https://huggingface.co/MiniMaxAI/MiniMax-M2.5
+# Source: https://github.com/MiniMax-AI/MiniMax-M2.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--MiniMax-M2.5-8bit.toml b/resources/inference_model_cards/mlx-community--MiniMax-M2.5-8bit.toml
index c6946fa4d..aa6457943 100644
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.5-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.5-8bit.toml
@@ -8,8 +8,15 @@ family = "minimax"
 quantization = "8bit"
 base_model = "MiniMax M2.5"
 capabilities = ["text", "thinking"]
-
+reasoning_dialect = "post_last_user"
 context_length = 196608
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 242986745856
+
+# Source: https://huggingface.co/MiniMaxAI/MiniMax-M2.5
+# Source: https://github.com/MiniMax-AI/MiniMax-M2.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--MiniMax-M2.7-4bit-mxfp4.toml b/resources/inference_model_cards/mlx-community--MiniMax-M2.7-4bit-mxfp4.toml
index 7e6ffffd5..2759c97f1 100644
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.7-4bit-mxfp4.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.7-4bit-mxfp4.toml
@@ -8,8 +8,16 @@ family = "minimax"
 quantization = "4bit-mxfp4"
 base_model = "MiniMax M2.7"
 capabilities = ["text", "thinking"]
-
+reasoning_dialect = "post_last_user"
 context_length = 196608
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 121537496794
+
+# Source: https://huggingface.co/MiniMaxAI/MiniMax-M2.7
+# Source: https://github.com/MiniMax-AI/MiniMax-M2.7
+# Source: https://unsloth.ai/docs/models/minimax-m27
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--MiniMax-M2.7-4bit.toml b/resources/inference_model_cards/mlx-community--MiniMax-M2.7-4bit.toml
index fc86e18e0..8d1a5ad25 100644
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.7-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.7-4bit.toml
@@ -8,8 +8,16 @@ family = "minimax"
 quantization = "4bit"
 base_model = "MiniMax M2.7"
 capabilities = ["text", "thinking"]
-
+reasoning_dialect = "post_last_user"
 context_length = 196608
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 128682598717
+
+# Source: https://huggingface.co/MiniMaxAI/MiniMax-M2.7
+# Source: https://github.com/MiniMax-AI/MiniMax-M2.7
+# Source: https://unsloth.ai/docs/models/minimax-m27
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--MiniMax-M2.7-5bit.toml b/resources/inference_model_cards/mlx-community--MiniMax-M2.7-5bit.toml
index 3702afdc1..3897cc9f6 100644
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.7-5bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.7-5bit.toml
@@ -8,8 +8,16 @@ family = "minimax"
 quantization = "5bit"
 base_model = "MiniMax M2.7"
 capabilities = ["text", "thinking"]
-
+reasoning_dialect = "post_last_user"
 context_length = 196608
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 157262619651
+
+# Source: https://huggingface.co/MiniMaxAI/MiniMax-M2.7
+# Source: https://github.com/MiniMax-AI/MiniMax-M2.7
+# Source: https://unsloth.ai/docs/models/minimax-m27
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--MiniMax-M2.7-6bit.toml b/resources/inference_model_cards/mlx-community--MiniMax-M2.7-6bit.toml
index 6213e77d0..bac8ae305 100644
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.7-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.7-6bit.toml
@@ -8,8 +8,16 @@ family = "minimax"
 quantization = "6bit"
 base_model = "MiniMax M2.7"
 capabilities = ["text", "thinking"]
-
+reasoning_dialect = "post_last_user"
 context_length = 196608
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 185842639299
+
+# Source: https://huggingface.co/MiniMaxAI/MiniMax-M2.7
+# Source: https://github.com/MiniMax-AI/MiniMax-M2.7
+# Source: https://unsloth.ai/docs/models/minimax-m27
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--MiniMax-M2.7-8bit.toml b/resources/inference_model_cards/mlx-community--MiniMax-M2.7-8bit.toml
index c964a587c..97ce9a75b 100644
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.7-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.7-8bit.toml
@@ -8,8 +8,16 @@ family = "minimax"
 quantization = "8bit"
 base_model = "MiniMax M2.7"
 capabilities = ["text", "thinking"]
-
+reasoning_dialect = "post_last_user"
 context_length = 196608
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 243002680786
+
+# Source: https://huggingface.co/MiniMaxAI/MiniMax-M2.7
+# Source: https://github.com/MiniMax-AI/MiniMax-M2.7
+# Source: https://unsloth.ai/docs/models/minimax-m27
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--MiniMax-M2.7.toml b/resources/inference_model_cards/mlx-community--MiniMax-M2.7.toml
index 4d3645c67..e5acfbdab 100644
--- a/resources/inference_model_cards/mlx-community--MiniMax-M2.7.toml
+++ b/resources/inference_model_cards/mlx-community--MiniMax-M2.7.toml
@@ -8,8 +8,16 @@ family = "minimax"
 quantization = "bf16"
 base_model = "MiniMax M2.7"
 capabilities = ["text", "thinking"]
-
+reasoning_dialect = "post_last_user"
 context_length = 196608
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 457492783366
+
+# Source: https://huggingface.co/MiniMaxAI/MiniMax-M2.7
+# Source: https://github.com/MiniMax-AI/MiniMax-M2.7
+# Source: https://unsloth.ai/docs/models/minimax-m27
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-4Bit.toml b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-4Bit.toml
index 7b79a2056..840905fac 100644
--- a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-4Bit.toml
+++ b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-4Bit.toml
@@ -9,6 +9,12 @@ base_model = "NVIDIA Nemotron-3-Nano-30B-A3B"
 capabilities = ["text"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 17775342336
+
+# Source: https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B
+# Source: https://unsloth.ai/docs/models/nemotron-3
+[sampling_defaults]
+temperature = 1.0
+top_p = 1.0
diff --git a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-5Bit.toml b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-5Bit.toml
index 8a4c73848..99baffb11 100644
--- a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-5Bit.toml
+++ b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-5Bit.toml
@@ -9,6 +9,12 @@ base_model = "NVIDIA Nemotron-3-Nano-30B-A3B"
 capabilities = ["text"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 21721476864
+
+# Source: https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B
+# Source: https://unsloth.ai/docs/models/nemotron-3
+[sampling_defaults]
+temperature = 1.0
+top_p = 1.0
diff --git a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-6Bit.toml b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-6Bit.toml
index 96b7a7fbf..47449c7e3 100644
--- a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-6Bit.toml
+++ b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-6Bit.toml
@@ -9,6 +9,12 @@ base_model = "NVIDIA Nemotron-3-Nano-30B-A3B"
 capabilities = ["text"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 25667611392
+
+# Source: https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B
+# Source: https://unsloth.ai/docs/models/nemotron-3
+[sampling_defaults]
+temperature = 1.0
+top_p = 1.0
diff --git a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-8Bit.toml b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-8Bit.toml
index 1c95fa909..785c62f38 100644
--- a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-8Bit.toml
+++ b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-8Bit.toml
@@ -9,6 +9,12 @@ base_model = "NVIDIA Nemotron-3-Nano-30B-A3B"
 capabilities = ["text"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 33559880448
+
+# Source: https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B
+# Source: https://unsloth.ai/docs/models/nemotron-3
+[sampling_defaults]
+temperature = 1.0
+top_p = 1.0
diff --git a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-BF16.toml b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-BF16.toml
index 5364e5bc2..688bf8c6c 100644
--- a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-BF16.toml
+++ b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-BF16.toml
@@ -9,6 +9,12 @@ base_model = "NVIDIA Nemotron-3-Nano-30B-A3B"
 capabilities = ["text"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 63155889408
+
+# Source: https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B
+# Source: https://unsloth.ai/docs/models/nemotron-3
+[sampling_defaults]
+temperature = 1.0
+top_p = 1.0
diff --git a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-MXFP4.toml b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-MXFP4.toml
index 1c7137dae..9fa625380 100644
--- a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-MXFP4.toml
+++ b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-MLX-MXFP4.toml
@@ -9,6 +9,12 @@ base_model = "NVIDIA Nemotron-3-Nano-30B-A3B"
 capabilities = ["text"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 16788808704
+
+# Source: https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B
+# Source: https://unsloth.ai/docs/models/nemotron-3
+[sampling_defaults]
+temperature = 1.0
+top_p = 1.0
diff --git a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4.toml b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4.toml
index 13aae100e..d590c794b 100644
--- a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4.toml
+++ b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4.toml
@@ -9,6 +9,12 @@ base_model = "NVIDIA Nemotron-3-Nano-30B-A3B"
 capabilities = ["text"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 19323906944
+
+# Source: https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B
+# Source: https://unsloth.ai/docs/models/nemotron-3
+[sampling_defaults]
+temperature = 1.0
+top_p = 1.0
diff --git a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-Nano-9B-v2-4bits.toml b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-Nano-9B-v2-4bits.toml
index 649fecd90..3e3eaa083 100644
--- a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-Nano-9B-v2-4bits.toml
+++ b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-Nano-9B-v2-4bits.toml
@@ -9,6 +9,17 @@ base_model = "NVIDIA Nemotron-Nano-9B-v2"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 5002791936
+
+# Source: https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2
+# Source: https://build.nvidia.com/nvidia/nvidia-nemotron-nano-9b-v2/modelcard
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+
+# Source: https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2
+# Source: https://build.nvidia.com/nvidia/nvidia-nemotron-nano-9b-v2/modelcard
+[sampling_defaults.non_thinking]
+temperature = 0.0
diff --git a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-Nano-9B-v2-6bit.toml b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-Nano-9B-v2-6bit.toml
index e5c6422bb..d06f09d01 100644
--- a/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-Nano-9B-v2-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--NVIDIA-Nemotron-Nano-9B-v2-6bit.toml
@@ -9,6 +9,17 @@ base_model = "NVIDIA Nemotron-Nano-9B-v2"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 7224298496
+
+# Source: https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2
+# Source: https://build.nvidia.com/nvidia/nvidia-nemotron-nano-9b-v2/modelcard
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+
+# Source: https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2
+# Source: https://build.nvidia.com/nvidia/nvidia-nemotron-nano-9b-v2/modelcard
+[sampling_defaults.non_thinking]
+temperature = 0.0
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-0.6B-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-0.6B-4bit.toml
index f99033fc7..efb96e5dd 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-0.6B-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-0.6B-4bit.toml
@@ -10,6 +10,20 @@ base_model = "Qwen3 0.6B"
 capabilities = ["text", "thinking", "thinking_toggle"]
 
 context_length = 32768
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 342884352
+
+# Source: https://huggingface.co/Qwen/Qwen3-0.6B#best-practices
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+
+# Source: https://huggingface.co/Qwen/Qwen3-0.6B#best-practices
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-0.6B-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-0.6B-8bit.toml
index 8d7e76432..4c959ebc7 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-0.6B-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-0.6B-8bit.toml
@@ -10,6 +10,20 @@ base_model = "Qwen3 0.6B"
 capabilities = ["text", "thinking", "thinking_toggle"]
 
 context_length = 32768
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 698351616
+
+# Source: https://huggingface.co/Qwen/Qwen3-0.6B#best-practices
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+
+# Source: https://huggingface.co/Qwen/Qwen3-0.6B#best-practices
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-4bit.toml
index 6e2709f57..ed5506c6b 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-4bit.toml
@@ -10,6 +10,13 @@ base_model = "Qwen3 235B"
 capabilities = ["text", "thinking", "thinking_toggle"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 141733920768
+
+# Source: https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507#best-practices
+[sampling_defaults]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-8bit.toml
index 0e41f4ed0..2e52ee9dd 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-235B-A22B-Instruct-2507-8bit.toml
@@ -10,6 +10,13 @@ base_model = "Qwen3 235B"
 capabilities = ["text", "thinking", "thinking_toggle"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 268435456000
+
+# Source: https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507#best-practices
+[sampling_defaults]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-4bit.toml
index 4e736c14e..42678d4a1 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-4bit.toml
@@ -10,6 +10,20 @@ base_model = "Qwen3 30B"
 capabilities = ["text", "thinking", "thinking_toggle"]
 
 context_length = 32768
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 17612931072
+
+# Source: https://huggingface.co/Qwen/Qwen3-30B-A3B#best-practices
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+
+# Source: https://huggingface.co/Qwen/Qwen3-30B-A3B#best-practices
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-8bit.toml
index 15308a846..026de660b 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-30B-A3B-8bit.toml
@@ -10,6 +10,20 @@ base_model = "Qwen3 30B"
 capabilities = ["text", "thinking", "thinking_toggle"]
 
 context_length = 32768
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 33279705088
+
+# Source: https://huggingface.co/Qwen/Qwen3-30B-A3B#best-practices
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+
+# Source: https://huggingface.co/Qwen/Qwen3-30B-A3B#best-practices
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-4bit.toml
index 95010af8c..62b25b486 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-4bit.toml
@@ -10,6 +10,14 @@ base_model = "Qwen3 Coder 480B"
 capabilities = ["text", "code"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 289910292480
+
+# Source: https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct#best-practices
+# Source: https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF
+[sampling_defaults]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+repetition_penalty = 1.05
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-8bit.toml
index 963fb2054..5efe672b8 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Coder-480B-A35B-Instruct-8bit.toml
@@ -10,6 +10,14 @@ base_model = "Qwen3 Coder 480B"
 capabilities = ["text", "code"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 579820584960
+
+# Source: https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct#best-practices
+# Source: https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF
+[sampling_defaults]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+repetition_penalty = 1.05
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-4bit.toml
index e28c1d95a..d692f7dc6 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-4bit.toml
@@ -10,6 +10,13 @@ base_model = "Qwen3 Coder Next"
 capabilities = ["text", "code"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 45644286500
+
+# Source: https://huggingface.co/mlx-community/Qwen3-Coder-Next-4bit/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-5bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-5bit.toml
index 58c63f78a..4127b4348 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-5bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-5bit.toml
@@ -10,6 +10,13 @@ base_model = "Qwen3 Coder Next"
 capabilities = ["text", "code"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 57657697020
+
+# Source: https://huggingface.co/mlx-community/Qwen3-Coder-Next-4bit/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-6bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-6bit.toml
index 17f921e5d..e89d91dbf 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-6bit.toml
@@ -10,6 +10,13 @@ base_model = "Qwen3 Coder Next"
 capabilities = ["text", "code"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 68899327465
+
+# Source: https://huggingface.co/mlx-community/Qwen3-Coder-Next-4bit/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-8bit.toml
index 934894aca..8ecf357fe 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-8bit.toml
@@ -10,6 +10,13 @@ base_model = "Qwen3 Coder Next"
 capabilities = ["text", "code"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 89357758772
+
+# Source: https://huggingface.co/mlx-community/Qwen3-Coder-Next-4bit/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-bf16.toml b/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-bf16.toml
index fd135bb09..73c32b0ef 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Coder-Next-bf16.toml
@@ -10,6 +10,13 @@ base_model = "Qwen3 Coder Next"
 capabilities = ["text", "code"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 157548627945
+
+# Source: https://huggingface.co/mlx-community/Qwen3-Coder-Next-4bit/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 40
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-4bit.toml
index 7a331b466..2145cbea6 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-4bit.toml
@@ -10,6 +10,13 @@ base_model = "Qwen3 Next 80B"
 capabilities = ["text"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 46976204800
+
+# Source: https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct#best-practices
+[sampling_defaults]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-8bit.toml
index a54f01de8..e1328021d 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Instruct-8bit.toml
@@ -10,6 +10,13 @@ base_model = "Qwen3 Next 80B"
 capabilities = ["text"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 88814387200
+
+# Source: https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct#best-practices
+[sampling_defaults]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-4bit.toml
index d126f727d..708b309cf 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-4bit.toml
@@ -8,8 +8,15 @@ family = "qwen"
 quantization = "4bit"
 base_model = "Qwen3 Next 80B"
 capabilities = ["text", "thinking", "thinking_toggle"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 47080074240
+
+# Source: https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking#best-practices
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-8bit.toml
index b008b1301..900972557 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-Next-80B-A3B-Thinking-8bit.toml
@@ -8,8 +8,15 @@ family = "qwen"
 quantization = "8bit"
 base_model = "Qwen3 Next 80B"
 capabilities = ["text", "thinking", "thinking_toggle"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 88814387200
+
+# Source: https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking#best-practices
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
diff --git a/resources/inference_model_cards/mlx-community--Qwen3-VL-4B-Instruct-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3-VL-4B-Instruct-4bit.toml
index 6d34735f4..d8e450022 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3-VL-4B-Instruct-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3-VL-4B-Instruct-4bit.toml
@@ -9,6 +9,15 @@ base_model = "Qwen3-VL 4B"
 capabilities = ["text", "thinking", "vision"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 3340000000
+
+# Source: https://huggingface.co/Qwen/Qwen3-VL-4B-Instruct#generation-hyperparameters
+# Source: https://unsloth.ai/docs/models/qwen3-how-to-run-and-fine-tune/qwen3-vl-how-to-run-and-fine-tune
+[sampling_defaults]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-4bit.toml
index 100ab9971..5e50aca6d 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-4bit.toml
@@ -8,8 +8,28 @@ family = "qwen"
 quantization = "4bit"
 base_model = "Qwen3.5 122B A10B"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 69593314272
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-122B-A10B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 0.0
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-122B-A10B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-6bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-6bit.toml
index 99177f485..ae589b0c0 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-6bit.toml
@@ -8,8 +8,28 @@ family = "qwen"
 quantization = "6bit"
 base_model = "Qwen3.5 122B A10B"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 100120675296
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-122B-A10B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 0.0
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-122B-A10B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-8bit.toml
index cff7cf62a..ede2e81c2 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-8bit.toml
@@ -8,8 +8,28 @@ family = "qwen"
 quantization = "8bit"
 base_model = "Qwen3.5 122B A10B"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 130648036320
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-122B-A10B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 0.0
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-122B-A10B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-bf16.toml b/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-bf16.toml
index 2299c8e79..e1cdb9f5f 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3.5-122B-A10B-bf16.toml
@@ -8,8 +8,28 @@ family = "qwen"
 quantization = "bf16"
 base_model = "Qwen3.5 122B A10B"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 245125640160
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-122B-A10B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 0.0
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-122B-A10B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.5-27B-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.5-27B-4bit.toml
index 8c8a4f119..c0ae0cbff 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3.5-27B-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3.5-27B-4bit.toml
@@ -8,8 +8,28 @@ family = "qwen"
 quantization = "4bit"
 base_model = "Qwen3.5 27B"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 16054266848
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-27B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 0.0
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-27B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.5-27B-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.5-27B-8bit.toml
index 3c7dc6eb7..aef1aa39a 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3.5-27B-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3.5-27B-8bit.toml
@@ -8,8 +8,28 @@ family = "qwen"
 quantization = "8bit"
 base_model = "Qwen3.5 27B"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 29500943328
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-27B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 0.0
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-27B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.5-2B-MLX-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.5-2B-MLX-8bit.toml
index a62d4495e..78ff87479 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3.5-2B-MLX-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3.5-2B-MLX-8bit.toml
@@ -8,8 +8,28 @@ family = "qwen"
 quantization = "8bit"
 base_model = "Qwen3.5 2B"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 2662787264
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-9B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-9B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.5-35B-A3B-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.5-35B-A3B-4bit.toml
index dfb5e244f..d8abb8ded 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3.5-35B-A3B-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3.5-35B-A3B-4bit.toml
@@ -8,8 +8,28 @@ family = "qwen"
 quantization = "4bit"
 base_model = "Qwen3.5 35B A3B"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 20391405152
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-35B-A3B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-35B-A3B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.5-35B-A3B-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.5-35B-A3B-8bit.toml
index 24881e046..e995d131d 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3.5-35B-A3B-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3.5-35B-A3B-8bit.toml
@@ -8,8 +8,28 @@ family = "qwen"
 quantization = "8bit"
 base_model = "Qwen3.5 35B A3B"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 37721130592
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-35B-A3B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-35B-A3B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.5-397B-A17B-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.5-397B-A17B-4bit.toml
index 7823e87b5..202b82861 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3.5-397B-A17B-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3.5-397B-A17B-4bit.toml
@@ -8,8 +8,28 @@ family = "qwen"
 quantization = "4bit"
 base_model = "Qwen3.5 397B A17B"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 223860768352
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-397B-A17B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 0.0
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-397B-A17B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.5-397B-A17B-6bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.5-397B-A17B-6bit.toml
index 8df64cad9..8774868ce 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3.5-397B-A17B-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3.5-397B-A17B-6bit.toml
@@ -8,8 +8,28 @@ family = "qwen"
 quantization = "6bit"
 base_model = "Qwen3.5 397B A17B"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 322946674272
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-397B-A17B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 0.0
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-397B-A17B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.5-397B-A17B-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.5-397B-A17B-8bit.toml
index 4aa634001..a0cef3ff9 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3.5-397B-A17B-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3.5-397B-A17B-8bit.toml
@@ -8,8 +8,28 @@ family = "qwen"
 quantization = "8bit"
 base_model = "Qwen3.5 397B A17B"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 422032580192
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-397B-A17B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 0.0
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-397B-A17B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.5-9B-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.5-9B-4bit.toml
index c8bb95c7c..2248f1936 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3.5-9B-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3.5-9B-4bit.toml
@@ -8,8 +8,28 @@ family = "qwen"
 quantization = "4bit"
 base_model = "Qwen3.5 9B"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 5950062560
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-9B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-9B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.5-9B-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.5-9B-8bit.toml
index 6b1dfede6..44cbc4561 100644
--- a/resources/inference_model_cards/mlx-community--Qwen3.5-9B-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--Qwen3.5-9B-8bit.toml
@@ -8,8 +8,28 @@ family = "qwen"
 quantization = "8bit"
 base_model = "Qwen3.5 9B"
 capabilities = ["text", "thinking", "thinking_toggle", "vision"]
-
+reasoning_dialect = "post_last_user"
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 10426433504
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-9B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
+
+# Source: https://huggingface.co/Qwen/Qwen3.5-9B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.6-27B-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.6-27B-4bit.toml
new file mode 100644
index 000000000..88decd003
--- /dev/null
+++ b/resources/inference_model_cards/mlx-community--Qwen3.6-27B-4bit.toml
@@ -0,0 +1,35 @@
+model_id = "mlx-community/Qwen3.6-27B-4bit"
+n_layers = 64
+hidden_size = 5120
+num_key_value_heads = 4
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "4bit"
+base_model = "Qwen3.6 27B"
+capabilities = ["text", "thinking", "thinking_toggle", "vision"]
+reasoning_dialect = "post_last_user"
+context_length = 262144
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
+[storage_size]
+in_bytes = 16054262240
+
+# Source: https://huggingface.co/Qwen/Qwen3.6-27B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
+
+# Source: https://huggingface.co/Qwen/Qwen3.6-27B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.6-27B-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.6-27B-8bit.toml
new file mode 100644
index 000000000..8cef7490a
--- /dev/null
+++ b/resources/inference_model_cards/mlx-community--Qwen3.6-27B-8bit.toml
@@ -0,0 +1,35 @@
+model_id = "mlx-community/Qwen3.6-27B-8bit"
+n_layers = 64
+hidden_size = 5120
+num_key_value_heads = 4
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "8bit"
+base_model = "Qwen3.6 27B"
+capabilities = ["text", "thinking", "thinking_toggle", "vision"]
+reasoning_dialect = "post_last_user"
+context_length = 262144
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
+[storage_size]
+in_bytes = 29500938720
+
+# Source: https://huggingface.co/Qwen/Qwen3.6-27B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
+
+# Source: https://huggingface.co/Qwen/Qwen3.6-27B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.6-27B-bf16.toml b/resources/inference_model_cards/mlx-community--Qwen3.6-27B-bf16.toml
new file mode 100644
index 000000000..d04042daa
--- /dev/null
+++ b/resources/inference_model_cards/mlx-community--Qwen3.6-27B-bf16.toml
@@ -0,0 +1,35 @@
+model_id = "mlx-community/Qwen3.6-27B-bf16"
+n_layers = 64
+hidden_size = 5120
+num_key_value_heads = 4
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "bf16"
+base_model = "Qwen3.6 27B"
+capabilities = ["text", "thinking", "thinking_toggle", "vision"]
+reasoning_dialect = "post_last_user"
+context_length = 262144
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
+[storage_size]
+in_bytes = 54713457120
+
+# Source: https://huggingface.co/Qwen/Qwen3.6-27B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
+
+# Source: https://huggingface.co/Qwen/Qwen3.6-27B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-4bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-4bit.toml
new file mode 100644
index 000000000..d2ca90e8f
--- /dev/null
+++ b/resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-4bit.toml
@@ -0,0 +1,35 @@
+model_id = "mlx-community/Qwen3.6-35B-A3B-4bit"
+n_layers = 40
+hidden_size = 2048
+num_key_value_heads = 2
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "4bit"
+base_model = "Qwen3.6 35B A3B"
+capabilities = ["text", "thinking", "thinking_toggle", "vision"]
+reasoning_dialect = "post_last_user"
+context_length = 262144
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
+[storage_size]
+in_bytes = 20401929952
+
+# Source: https://huggingface.co/Qwen/Qwen3.6-35B-A3B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
+
+# Source: https://huggingface.co/Qwen/Qwen3.6-35B-A3B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-5bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-5bit.toml
new file mode 100644
index 000000000..8229424f6
--- /dev/null
+++ b/resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-5bit.toml
@@ -0,0 +1,35 @@
+model_id = "mlx-community/Qwen3.6-35B-A3B-5bit"
+n_layers = 40
+hidden_size = 2048
+num_key_value_heads = 2
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "5bit"
+base_model = "Qwen3.6 35B A3B"
+capabilities = ["text", "thinking", "thinking_toggle", "vision"]
+reasoning_dialect = "post_last_user"
+context_length = 262144
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
+[storage_size]
+in_bytes = 24731729632
+
+# Source: https://huggingface.co/Qwen/Qwen3.6-35B-A3B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
+
+# Source: https://huggingface.co/Qwen/Qwen3.6-35B-A3B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-8bit.toml b/resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-8bit.toml
new file mode 100644
index 000000000..c03748fae
--- /dev/null
+++ b/resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-8bit.toml
@@ -0,0 +1,35 @@
+model_id = "mlx-community/Qwen3.6-35B-A3B-8bit"
+n_layers = 40
+hidden_size = 2048
+num_key_value_heads = 2
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "8bit"
+base_model = "Qwen3.6 35B A3B"
+capabilities = ["text", "thinking", "thinking_toggle", "vision"]
+reasoning_dialect = "post_last_user"
+context_length = 262144
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
+[storage_size]
+in_bytes = 37721128672
+
+# Source: https://huggingface.co/Qwen/Qwen3.6-35B-A3B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
+
+# Source: https://huggingface.co/Qwen/Qwen3.6-35B-A3B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-bf16.toml b/resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-bf16.toml
new file mode 100644
index 000000000..9bd5cf1eb
--- /dev/null
+++ b/resources/inference_model_cards/mlx-community--Qwen3.6-35B-A3B-bf16.toml
@@ -0,0 +1,35 @@
+model_id = "mlx-community/Qwen3.6-35B-A3B-bf16"
+n_layers = 40
+hidden_size = 2048
+num_key_value_heads = 2
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "qwen"
+quantization = "bf16"
+base_model = "Qwen3.6 35B A3B"
+capabilities = ["text", "thinking", "thinking_toggle", "vision"]
+reasoning_dialect = "post_last_user"
+context_length = 262144
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
+[storage_size]
+in_bytes = 70214363872
+
+# Source: https://huggingface.co/Qwen/Qwen3.6-35B-A3B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
+
+# Source: https://huggingface.co/Qwen/Qwen3.6-35B-A3B#best-practices
+# Source: https://unsloth.ai/docs/models/qwen3.5
+[sampling_defaults.non_thinking]
+temperature = 0.7
+top_p = 0.8
+top_k = 20
+min_p = 0.0
+repetition_penalty = 1.0
+presence_penalty = 1.5
diff --git a/resources/inference_model_cards/mlx-community--Step-3.5-Flash-4bit.toml b/resources/inference_model_cards/mlx-community--Step-3.5-Flash-4bit.toml
index d29716419..734febb03 100644
--- a/resources/inference_model_cards/mlx-community--Step-3.5-Flash-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--Step-3.5-Flash-4bit.toml
@@ -10,6 +10,18 @@ base_model = "Step 3.5 Flash"
 capabilities = ["text", "thinking", "thinking_toggle"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 114572190076
+
+# Source: https://huggingface.co/stepfun-ai/Step-3.5-Flash/discussions/3
+# Source: https://github.com/stepfun-ai/Step-3.5-Flash/blob/main/llama.cpp/docs/step3.5-flash.md
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+
+# Source: https://huggingface.co/stepfun-ai/Step-3.5-Flash/discussions/3
+# Source: https://github.com/stepfun-ai/Step-3.5-Flash/blob/main/llama.cpp/docs/step3.5-flash.md
+[sampling_defaults.thinking]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--Step-3.5-Flash-6bit.toml b/resources/inference_model_cards/mlx-community--Step-3.5-Flash-6bit.toml
index 4adc3bb47..57a5f4cf0 100644
--- a/resources/inference_model_cards/mlx-community--Step-3.5-Flash-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--Step-3.5-Flash-6bit.toml
@@ -10,6 +10,18 @@ base_model = "Step 3.5 Flash"
 capabilities = ["text", "thinking", "thinking_toggle"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 159039627774
+
+# Source: https://huggingface.co/stepfun-ai/Step-3.5-Flash/discussions/3
+# Source: https://github.com/stepfun-ai/Step-3.5-Flash/blob/main/llama.cpp/docs/step3.5-flash.md
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+
+# Source: https://huggingface.co/stepfun-ai/Step-3.5-Flash/discussions/3
+# Source: https://github.com/stepfun-ai/Step-3.5-Flash/blob/main/llama.cpp/docs/step3.5-flash.md
+[sampling_defaults.thinking]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--Step-3.5-Flash-8Bit.toml b/resources/inference_model_cards/mlx-community--Step-3.5-Flash-8Bit.toml
index 1306637ce..977903fd9 100644
--- a/resources/inference_model_cards/mlx-community--Step-3.5-Flash-8Bit.toml
+++ b/resources/inference_model_cards/mlx-community--Step-3.5-Flash-8Bit.toml
@@ -10,6 +10,18 @@ base_model = "Step 3.5 Flash"
 capabilities = ["text", "thinking", "thinking_toggle"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 209082699847
+
+# Source: https://huggingface.co/stepfun-ai/Step-3.5-Flash/discussions/3
+# Source: https://github.com/stepfun-ai/Step-3.5-Flash/blob/main/llama.cpp/docs/step3.5-flash.md
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.95
+
+# Source: https://huggingface.co/stepfun-ai/Step-3.5-Flash/discussions/3
+# Source: https://github.com/stepfun-ai/Step-3.5-Flash/blob/main/llama.cpp/docs/step3.5-flash.md
+[sampling_defaults.thinking]
+temperature = 1.0
+top_p = 0.95
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-4bit.toml b/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-4bit.toml
index a36497e8b..729b4c5ee 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-4bit.toml
@@ -2,7 +2,7 @@ model_id = "mlx-community/gemma-4-26b-a4b-it-4bit"
 n_layers = 30
 hidden_size = 2816
 num_key_value_heads = 8
-supports_tensor = true
+supports_tensor = false
 tasks = ["TextGeneration"]
 family = "gemma"
 quantization = "4bit"
@@ -10,6 +10,13 @@ base_model = "Gemma 4 26B A4B"
 capabilities = ["text", "vision"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 15608614044
+
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+# Source: https://huggingface.co/google/gemma-4-26b-a4b-it/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-6bit.toml b/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-6bit.toml
index bd5823118..e98f520f8 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-6bit.toml
@@ -2,7 +2,7 @@ model_id = "mlx-community/gemma-4-26b-a4b-it-6bit"
 n_layers = 30
 hidden_size = 2816
 num_key_value_heads = 8
-supports_tensor = true
+supports_tensor = false
 tasks = ["TextGeneration"]
 family = "gemma"
 quantization = "6bit"
@@ -10,6 +10,13 @@ base_model = "Gemma 4 26B A4B"
 capabilities = ["text", "vision"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 21781015708
+
+# Source: https://huggingface.co/google/gemma-4-26b-a4b-it/blob/main/generation_config.json
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-8bit.toml b/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-8bit.toml
index 9dda6aa44..36ac44ddf 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-8bit.toml
@@ -2,7 +2,7 @@ model_id = "mlx-community/gemma-4-26b-a4b-it-8bit"
 n_layers = 30
 hidden_size = 2816
 num_key_value_heads = 8
-supports_tensor = true
+supports_tensor = false
 tasks = ["TextGeneration"]
 family = "gemma"
 quantization = "8bit"
@@ -10,6 +10,13 @@ base_model = "Gemma 4 26B A4B"
 capabilities = ["text", "vision"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 27953417372
+
+# Source: https://huggingface.co/google/gemma-4-26b-a4b-it/blob/main/generation_config.json
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-bf16.toml b/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-bf16.toml
index ece399828..e11972544 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-26b-a4b-it-bf16.toml
@@ -10,6 +10,13 @@ base_model = "Gemma 4 26B A4B"
 capabilities = ["text", "vision"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 51611872412
+
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+# Source: https://huggingface.co/google/gemma-4-26b-a4b-it/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-31b-it-4bit.toml b/resources/inference_model_cards/mlx-community--gemma-4-31b-it-4bit.toml
index d8a40f35e..409db1973 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-31b-it-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-31b-it-4bit.toml
@@ -10,6 +10,13 @@ base_model = "Gemma 4 31B"
 capabilities = ["text", "vision"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 18411755224
+
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+# Source: https://huggingface.co/google/gemma-4-31B-it/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-31b-it-6bit.toml b/resources/inference_model_cards/mlx-community--gemma-4-31b-it-6bit.toml
index 6222ce70c..21c0461dd 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-31b-it-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-31b-it-6bit.toml
@@ -10,6 +10,13 @@ base_model = "Gemma 4 31B"
 capabilities = ["text", "vision"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 26087306968
+
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+# Source: https://huggingface.co/google/gemma-4-31B-it/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-31b-it-8bit.toml b/resources/inference_model_cards/mlx-community--gemma-4-31b-it-8bit.toml
index 863961183..378447a46 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-31b-it-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-31b-it-8bit.toml
@@ -10,6 +10,13 @@ base_model = "Gemma 4 31B"
 capabilities = ["text", "vision"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 33762858712
+
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+# Source: https://huggingface.co/google/gemma-4-31B-it/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-31b-it-bf16.toml b/resources/inference_model_cards/mlx-community--gemma-4-31b-it-bf16.toml
index 1d4f740c2..ea3c87d8b 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-31b-it-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-31b-it-bf16.toml
@@ -10,6 +10,13 @@ base_model = "Gemma 4 31B"
 capabilities = ["text", "vision"]
 
 context_length = 262144
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 62546177752
+
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+# Source: https://huggingface.co/google/gemma-4-31B-it/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-4bit.toml b/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-4bit.toml
index 9d8f99324..5234fab0d 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-4bit.toml
@@ -10,6 +10,13 @@ base_model = "Gemma 4 E2B"
 capabilities = ["text", "vision"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 3580765126
+
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+# Source: https://huggingface.co/google/gemma-4-e2b-it/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-6bit.toml b/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-6bit.toml
index c9c6d1a00..d4d1f674e 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-6bit.toml
@@ -10,6 +10,13 @@ base_model = "Gemma 4 E2B"
 capabilities = ["text", "vision"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 4739998662
+
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+# Source: https://huggingface.co/google/gemma-4-e2b-it/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-8bit.toml b/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-8bit.toml
index 804da9076..9eb59b37d 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-8bit.toml
@@ -10,6 +10,13 @@ base_model = "Gemma 4 E2B"
 capabilities = ["text", "vision"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 5899232198
+
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+# Source: https://huggingface.co/google/gemma-4-e2b-it/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-bf16.toml b/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-bf16.toml
index 8f2920da3..b45e690d1 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-e2b-it-bf16.toml
@@ -10,6 +10,13 @@ base_model = "Gemma 4 E2B"
 capabilities = ["text", "vision"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 10246357958
+
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+# Source: https://huggingface.co/google/gemma-4-e2b-it/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-4bit.toml b/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-4bit.toml
index 1122bbeb7..e88c4639b 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-4bit.toml
@@ -10,6 +10,13 @@ base_model = "Gemma 4 E4B"
 capabilities = ["text", "vision"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 5216992212
+
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+# Source: https://huggingface.co/google/gemma-4-e4b-it/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-6bit.toml b/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-6bit.toml
index 6f3b430c3..0402a8015 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-6bit.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-6bit.toml
@@ -10,6 +10,13 @@ base_model = "Gemma 4 E4B"
 capabilities = ["text", "vision"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 7090961364
+
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+# Source: https://huggingface.co/google/gemma-4-e4b-it/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-8bit.toml b/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-8bit.toml
index 48e21fef1..158d3fc6d 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-8bit.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-8bit.toml
@@ -10,6 +10,13 @@ base_model = "Gemma 4 E4B"
 capabilities = ["text", "vision"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 8964930516
+
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+# Source: https://huggingface.co/google/gemma-4-e4b-it/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-bf16.toml b/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-bf16.toml
index db87c2ee4..8d1f84f84 100644
--- a/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-bf16.toml
+++ b/resources/inference_model_cards/mlx-community--gemma-4-e4b-it-bf16.toml
@@ -10,6 +10,13 @@ base_model = "Gemma 4 E4B"
 capabilities = ["text", "vision"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 15992314836
+
+# Source: https://ai.google.dev/gemma/docs/core/model_card_4
+# Source: https://huggingface.co/google/gemma-4-e4b-it/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+top_k = 64
diff --git a/resources/inference_model_cards/mlx-community--gpt-oss-120b-MXFP4-Q8.toml b/resources/inference_model_cards/mlx-community--gpt-oss-120b-MXFP4-Q8.toml
index 02e8b7551..091b079ba 100644
--- a/resources/inference_model_cards/mlx-community--gpt-oss-120b-MXFP4-Q8.toml
+++ b/resources/inference_model_cards/mlx-community--gpt-oss-120b-MXFP4-Q8.toml
@@ -8,8 +8,15 @@ family = "gpt-oss"
 quantization = "MXFP4-Q8"
 base_model = "GPT-OSS 120B"
 capabilities = ["text", "thinking"]
-
+reasoning_dialect = "channel"
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 70652212224
+
+# Source: https://github.com/openai/gpt-oss/blob/main/README.md
+# Source: https://unsloth.ai/docs/models/gpt-oss-how-to-run-and-fine-tune
+[sampling_defaults]
+temperature = 1.0
+top_p = 1.0
+top_k = 0
diff --git a/resources/inference_model_cards/mlx-community--gpt-oss-20b-MXFP4-Q8.toml b/resources/inference_model_cards/mlx-community--gpt-oss-20b-MXFP4-Q8.toml
index a363bfab9..9e1a56bae 100644
--- a/resources/inference_model_cards/mlx-community--gpt-oss-20b-MXFP4-Q8.toml
+++ b/resources/inference_model_cards/mlx-community--gpt-oss-20b-MXFP4-Q8.toml
@@ -8,8 +8,15 @@ family = "gpt-oss"
 quantization = "MXFP4-Q8"
 base_model = "GPT-OSS 20B"
 capabilities = ["text", "thinking"]
-
+reasoning_dialect = "channel"
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 12025908224
+
+# Source: https://github.com/openai/gpt-oss/blob/main/README.md
+# Source: https://unsloth.ai/docs/models/gpt-oss-how-to-run-and-fine-tune
+[sampling_defaults]
+temperature = 1.0
+top_p = 1.0
+top_k = 0
diff --git a/resources/inference_model_cards/mlx-community--llama-3.3-70b-instruct-fp16.toml b/resources/inference_model_cards/mlx-community--llama-3.3-70b-instruct-fp16.toml
index 194a88d7a..f37400aa9 100644
--- a/resources/inference_model_cards/mlx-community--llama-3.3-70b-instruct-fp16.toml
+++ b/resources/inference_model_cards/mlx-community--llama-3.3-70b-instruct-fp16.toml
@@ -10,6 +10,12 @@ base_model = "Llama 3.3 70B"
 capabilities = ["text"]
 
 context_length = 131072
-
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
 [storage_size]
 in_bytes = 144383672320
+
+# Source: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct/blob/main/generation_config.json
+# Source: https://huggingface.co/unsloth/Llama-3.3-70B-Instruct/blob/main/generation_config.json
+[sampling_defaults]
+temperature = 0.6
+top_p = 0.9
diff --git a/resources/inference_model_cards/moonshotai--Kimi-K2.6.toml b/resources/inference_model_cards/moonshotai--Kimi-K2.6.toml
new file mode 100644
index 000000000..b169e51ae
--- /dev/null
+++ b/resources/inference_model_cards/moonshotai--Kimi-K2.6.toml
@@ -0,0 +1,33 @@
+model_id = "moonshotai/Kimi-K2.6"
+n_layers = 61
+hidden_size = 7168
+num_key_value_heads = 64
+supports_tensor = true
+tasks = ["TextGeneration"]
+family = "kimi"
+quantization = ""
+base_model = "Kimi K2.6"
+capabilities = ["text", "thinking", "thinking_toggle", "vision"]
+
+context_length = 262144
+backends = ["MlxMetal", "MlxCuda", "MlxCpu"]
+[storage_size]
+in_bytes = 595148192736
+
+[vision]
+image_token_id = 163605
+model_type = "kimi_vl"
+weights_repo = "exolabs/Kimi-K2.6-vision"
+processor_repo = "moonshotai/Kimi-K2.6"
+
+# Source: https://huggingface.co/moonshotai/Kimi-K2.6
+[sampling_defaults]
+temperature = 1.0
+top_p = 0.95
+min_p = 0.01
+
+# Source: https://huggingface.co/moonshotai/Kimi-K2.6
+[sampling_defaults.non_thinking]
+temperature = 0.6
+top_p = 0.95
+min_p = 0.01
diff --git a/rust/exo_pyo3_bindings/Cargo.toml b/rust/exo_pyo3_bindings/Cargo.toml
index e7577ab79..143e4d1fd 100644
--- a/rust/exo_pyo3_bindings/Cargo.toml
+++ b/rust/exo_pyo3_bindings/Cargo.toml
@@ -46,9 +46,12 @@ pyo3-async-runtimes = { version = "0.27.0", features = [
 ] }
 pyo3-log = "0.13.2"
 
+pidfile-rs = "0.3"
+
 # macro dependencies
 extend = { workspace = true }
 delegate = { workspace = true }
+thiserror = "2.0"
 
 # async runtime
 tokio = { workspace = true, features = ["full", "tracing"] }
diff --git a/rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi b/rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi
index bfd8978af..e7c423f03 100644
--- a/rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi
+++ b/rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi
@@ -2,6 +2,8 @@
 # ruff: noqa: E501, F401
 
 import builtins
+import os
+import pathlib
 import typing
 
 @typing.final
@@ -69,6 +71,48 @@ class NoPeersSubscribedToTopicError(builtins.Exception):
     def __repr__(self) -> builtins.str: ...
     def __str__(self) -> builtins.str: ...
 
+@typing.final
+class Pidfile:
+    r"""
+    A PID file protected with a lock.
+    
+    An instance of `Pidfile` can be used to manage a PID file: create it,
+    lock it, detect already running daemons. It is backed by [`pidfile`][]
+    functions of `libbsd`/`libutil` which use `flopen` to lock the PID
+    file.
+    
+    When a PID file is created, the process ID of the current process is
+    *not* written there, making it possible to lock the PID file before
+    forking and only write the ID of the forked process when it is ready.
+    
+    The PID file is deleted automatically when the `Pidfile` comes out of
+    the scope. To close the PID file without deleting it, for example, in
+    the parent process of a forked daemon, call `close()`.
+    
+    [`exit`]: https://doc.rust-lang.org/std/process/fn.exit.html
+    [`pidfile`]: https://linux.die.net/man/3/pidfile
+    [`daemon`(3)]: https://linux.die.net/man/3/daemon
+    """
+    def __new__(cls, path: builtins.str | os.PathLike | pathlib.Path, mode: builtins.int) -> Pidfile:
+        r"""
+        Creates a new PID file and locks it.
+        
+        If the PID file cannot be locked, returns `PidfileError::AlreadyRunning` with
+        a PID of the already running process, or `None` if no PID has been written to
+        the PID file yet.
+        """
+    def write(self) -> None:
+        r"""
+        Writes the current process ID to the PID file.
+        
+        The file is truncated before writing.
+        """
+
+@typing.final
+class PidfileError(builtins.Exception):
+    def __repr__(self) -> builtins.str: ...
+    def __str__(self) -> builtins.str: ...
+
 class PyFromSwarm:
     @typing.final
     class Connection(PyFromSwarm):
diff --git a/rust/exo_pyo3_bindings/pyproject.toml b/rust/exo_pyo3_bindings/pyproject.toml
index 17c170cbc..531f55e5a 100644
--- a/rust/exo_pyo3_bindings/pyproject.toml
+++ b/rust/exo_pyo3_bindings/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
 
 [project]
 name = "exo_pyo3_bindings"
-version = "0.2.1"
+version = "0.2.2"
 description = "Add your description here"
 readme = "README.md"
 authors = [
diff --git a/rust/exo_pyo3_bindings/src/lib.rs b/rust/exo_pyo3_bindings/src/lib.rs
index e22afdeb2..18a147f4c 100644
--- a/rust/exo_pyo3_bindings/src/lib.rs
+++ b/rust/exo_pyo3_bindings/src/lib.rs
@@ -7,9 +7,11 @@
 mod allow_threading;
 mod ident;
 mod networking;
+mod pidfile;
 
 use crate::ident::PyKeypair;
 use crate::networking::networking_submodule;
+use crate::pidfile::pidfile_submodule;
 use pyo3::prelude::PyModule;
 use pyo3::types::PyModuleMethods;
 use pyo3::{Bound, PyResult, pyclass, pymodule};
@@ -164,6 +166,7 @@ fn main_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
     //       too many importing issues...
     m.add_class::<PyKeypair>()?;
     networking_submodule(m)?;
+    pidfile_submodule(m)?;
 
     // top-level constructs
     // TODO: ...
diff --git a/rust/exo_pyo3_bindings/src/pidfile.rs b/rust/exo_pyo3_bindings/src/pidfile.rs
new file mode 100644
index 000000000..32e8d7f79
--- /dev/null
+++ b/rust/exo_pyo3_bindings/src/pidfile.rs
@@ -0,0 +1,87 @@
+use pidfile_rs::{Pidfile, PidfileError};
+use pyo3::exceptions::PyException;
+use pyo3::prelude::{PyModule, PyModuleMethods};
+use pyo3::{Bound, PyErr, PyResult, Python, pyclass, pymethods};
+use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods};
+use std::fs::Permissions;
+use std::os::unix::prelude::PermissionsExt;
+use std::path::PathBuf;
+
+#[gen_stub_pyclass]
+#[pyclass(frozen, extends=PyException, name="PidfileError")]
+pub struct PyPidfileError(PidfileError);
+
+impl PyPidfileError {
+    // TODO: I actually like this pattern a LOT more but how to abstract??
+    fn into_pyerr(self, py: Python) -> PyErr {
+        match Bound::new(py, self) {
+            Ok(err) => PyErr::from_value(err.into_any()),
+            Err(err) => err,
+        }
+    }
+}
+
+#[gen_stub_pymethods]
+#[pymethods]
+impl PyPidfileError {
+    fn __repr__(&self) -> String {
+        format!("PidfileError(\"{}\")", self.0)
+    }
+
+    fn __str__(&self) -> String {
+        self.0.to_string()
+    }
+}
+
+/// A PID file protected with a lock.
+///
+/// An instance of `Pidfile` can be used to manage a PID file: create it,
+/// lock it, detect already running daemons. It is backed by [`pidfile`][]
+/// functions of `libbsd`/`libutil` which use `flopen` to lock the PID
+/// file.
+///
+/// When a PID file is created, the process ID of the current process is
+/// *not* written there, making it possible to lock the PID file before
+/// forking and only write the ID of the forked process when it is ready.
+///
+/// The PID file is deleted automatically when the `Pidfile` comes out of
+/// the scope. To close the PID file without deleting it, for example, in
+/// the parent process of a forked daemon, call `close()`.
+///
+/// [`exit`]: https://doc.rust-lang.org/std/process/fn.exit.html
+/// [`pidfile`]: https://linux.die.net/man/3/pidfile
+/// [`daemon`(3)]: https://linux.die.net/man/3/daemon
+#[gen_stub_pyclass]
+#[pyclass(name = "Pidfile")]
+pub struct PyPidfile(Pidfile);
+
+#[gen_stub_pymethods]
+#[pymethods]
+impl PyPidfile {
+    /// Creates a new PID file and locks it.
+    ///
+    /// If the PID file cannot be locked, returns `PidfileError::AlreadyRunning` with
+    /// a PID of the already running process, or `None` if no PID has been written to
+    /// the PID file yet.
+    #[new]
+    fn py_new(py: Python, path: PathBuf, mode: u32) -> PyResult<Self> {
+        Ok(Self(
+            Pidfile::new(&path, Permissions::from_mode(mode))
+                .map_err(|e| PyPidfileError(e).into_pyerr(py))?,
+        ))
+    }
+
+    /// Writes the current process ID to the PID file.
+    ///
+    /// The file is truncated before writing.
+    fn write<'py>(&mut self, py: Python<'py>) -> PyResult<()> {
+        self.0.write().map_err(|e| PyPidfileError(e).into_pyerr(py))
+    }
+}
+
+pub fn pidfile_submodule(m: &Bound<PyModule>) -> PyResult<()> {
+    m.add_class::<PyPidfileError>()?;
+    m.add_class::<PyPidfile>()?;
+
+    Ok(())
+}
diff --git a/rust/exo_pyo3_bindings/tests/test_python.py b/rust/exo_pyo3_bindings/tests/test_python.py
index a653103d1..ed65f4298 100644
--- a/rust/exo_pyo3_bindings/tests/test_python.py
+++ b/rust/exo_pyo3_bindings/tests/test_python.py
@@ -1,10 +1,12 @@
 import asyncio
 
 import pytest
+from _pytest.capture import CaptureFixture
 from exo_pyo3_bindings import (
     Keypair,
     NetworkingHandle,
     NoPeersSubscribedToTopicError,
+    Pidfile,
     PyFromSwarm,
 )
 
@@ -26,6 +28,13 @@ async def test_sleep_on_multiple_items() -> None:
             print("caught it", e)
 
 
+def test_pidfile(capsys: CaptureFixture[str]):
+    with capsys.disabled():
+        print("\nbefore python")
+        scoped_lock_file()
+        print("after python")
+
+
 async def _await_recv(h: NetworkingHandle):
     while True:
         event = await h.recv()
@@ -34,3 +43,7 @@ async def _await_recv(h: NetworkingHandle):
                 print(f"PYTHON: connection update: {c}")
             case PyFromSwarm.Message() as m:
                 print(f"PYTHON: message: {m}")
+
+
+def scoped_lock_file():
+    a = Pidfile("/tmp/lock.pid", 0o0600)
diff --git a/src/exo/api/adapters/chat_completions.py b/src/exo/api/adapters/chat_completions.py
index 6dd5d7c13..cbd545318 100644
--- a/src/exo/api/adapters/chat_completions.py
+++ b/src/exo/api/adapters/chat_completions.py
@@ -131,9 +131,13 @@ async def chat_request_to_text_generation(
                         multimodal_content.append({"type": "text", "text": part.text})
                     else:
                         multimodal_content.append({"type": "image"})
-                chat_template_messages.append(
-                    {"role": msg.role, "content": multimodal_content}
-                )
+                multimodal_msg: dict[str, Any] = {
+                    "role": msg.role,
+                    "content": multimodal_content,
+                }
+                if msg.reasoning_content is not None:
+                    multimodal_msg["reasoning_content"] = msg.reasoning_content
+                chat_template_messages.append(multimodal_msg)
                 continue
             msg_copy = msg.model_copy(update={"content": content})
 
@@ -168,6 +172,8 @@ async def chat_request_to_text_generation(
         min_p=request.min_p,
         repetition_penalty=request.repetition_penalty,
         repetition_context_size=request.repetition_context_size,
+        presence_penalty=request.presence_penalty,
+        frequency_penalty=request.frequency_penalty,
         images=images,
     )
 
@@ -232,7 +238,7 @@ async def generate_chat_stream(
                         code=500,
                     )
                 )
-                yield f"data: {error_response.model_dump_json()}\n\n"
+                yield f"data: {error_response.model_dump_json(exclude_none=True)}\n\n"
                 yield "data: [DONE]\n\n"
                 return
 
@@ -263,7 +269,7 @@ async def generate_chat_stream(
                     ],
                     usage=last_usage,
                 )
-                yield f"data: {tool_response.model_dump_json()}\n\n"
+                yield f"data: {tool_response.model_dump_json(exclude_none=True)}\n\n"
                 if chunk.stats is not None:
                     yield f": generation_stats {chunk.stats.model_dump_json()}\n\n"
                 yield "data: [DONE]\n\n"
@@ -277,7 +283,7 @@ async def generate_chat_stream(
                     chunk_response = chunk_response.model_copy(
                         update={"usage": last_usage}
                     )
-                yield f"data: {chunk_response.model_dump_json()}\n\n"
+                yield f"data: {chunk_response.model_dump_json(exclude_none=True)}\n\n"
 
                 if chunk.finish_reason is not None:
                     if chunk.stats is not None:
@@ -373,5 +379,5 @@ async def collect_chat_response(
             )
         ],
         usage=last_usage,
-    ).model_dump_json()
+    ).model_dump_json(exclude_none=True)
     return
diff --git a/src/exo/api/adapters/responses.py b/src/exo/api/adapters/responses.py
index a3c248c0d..41ceab1ad 100644
--- a/src/exo/api/adapters/responses.py
+++ b/src/exo/api/adapters/responses.py
@@ -23,6 +23,7 @@ from exo.api.types.openai_responses import (
     FunctionCallInputItem,
     FunctionCallOutputInputItem,
     ImageGenerationCallInputItem,
+    InputTokensDetails,
     ItemReferenceInputItem,
     LocalShellCallInputItem,
     LocalShellCallOutputInputItem,
@@ -30,6 +31,7 @@ from exo.api.types.openai_responses import (
     McpApprovalResponseInputItem,
     McpCallInputItem,
     McpListToolsInputItem,
+    OutputTokensDetails,
     ReasoningInputItem,
     ResponseCompletedEvent,
     ResponseContentPart,
@@ -82,9 +84,24 @@ from exo.shared.types.text_generation import (
 )
 
 
+def _build_response_usage(usage: Usage) -> ResponseUsage:
+    """Build a ResponseUsage from the internal Usage type."""
+    return ResponseUsage(
+        input_tokens=usage.prompt_tokens,
+        input_tokens_details=InputTokensDetails(
+            cached_tokens=usage.prompt_tokens_details.cached_tokens,
+        ),
+        output_tokens=usage.completion_tokens,
+        output_tokens_details=OutputTokensDetails(
+            reasoning_tokens=usage.completion_tokens_details.reasoning_tokens,
+        ),
+        total_tokens=usage.total_tokens,
+    )
+
+
 def _format_sse(event: ResponsesStreamEvent) -> str:
     """Format a streaming event as an SSE message."""
-    return f"event: {event.type}\ndata: {event.model_dump_json()}\n\n"
+    return f"event: {event.type}\ndata: {event.model_dump_json(exclude_none=True)}\n\n"
 
 
 def _extract_content(content: str | list[ResponseContentPart]) -> str:
@@ -96,6 +113,23 @@ def _extract_content(content: str | list[ResponseContentPart]) -> str:
     )
 
 
+def _append_tool_call(
+    chat_template_messages: list[dict[str, Any]], tool_call: dict[str, Any]
+) -> None:
+    if chat_template_messages:
+        prev = chat_template_messages[-1]
+        if prev.get("role") == "assistant" and isinstance(prev.get("content"), str):
+            existing: list[dict[str, Any]] | None = prev.get("tool_calls")
+            if existing is None:
+                prev["tool_calls"] = [tool_call]
+            else:
+                existing.append(tool_call)
+            return
+    chat_template_messages.append(
+        {"role": "assistant", "content": "", "tool_calls": [tool_call]}
+    )
+
+
 async def responses_request_to_text_generation(
     request: ResponsesRequest,
 ) -> TextGenerationTaskParams:
@@ -165,59 +199,44 @@ async def responses_request_to_text_generation(
                     | McpCallInputItem()
                     | CustomToolCallInputItem()
                 ):
-                    chat_template_messages.append(
+                    _append_tool_call(
+                        chat_template_messages,
                         {
-                            "role": "assistant",
-                            "content": "",
-                            "tool_calls": [
-                                {
-                                    "id": item.call_id,
-                                    "type": "function",
-                                    "function": {
-                                        "name": item.name,
-                                        "arguments": item.arguments,
-                                    },
-                                }
-                            ],
-                        }
+                            "id": item.call_id,
+                            "type": "function",
+                            "function": {
+                                "name": item.name,
+                                "arguments": item.arguments,
+                            },
+                        },
                     )
                 case (
                     LocalShellCallInputItem()
                     | ShellCallInputItem()
                     | ComputerCallInputItem()
                 ):
-                    chat_template_messages.append(
+                    _append_tool_call(
+                        chat_template_messages,
                         {
-                            "role": "assistant",
-                            "content": "",
-                            "tool_calls": [
-                                {
-                                    "id": item.call_id,
-                                    "type": "function",
-                                    "function": {
-                                        "name": item.type,
-                                        "arguments": json.dumps(item.action),
-                                    },
-                                }
-                            ],
-                        }
+                            "id": item.call_id,
+                            "type": "function",
+                            "function": {
+                                "name": item.type,
+                                "arguments": json.dumps(item.action),
+                            },
+                        },
                     )
                 case ApplyPatchCallInputItem():
-                    chat_template_messages.append(
+                    _append_tool_call(
+                        chat_template_messages,
                         {
-                            "role": "assistant",
-                            "content": "",
-                            "tool_calls": [
-                                {
-                                    "id": item.call_id,
-                                    "type": "function",
-                                    "function": {
-                                        "name": "apply_patch",
-                                        "arguments": json.dumps({"patch": item.patch}),
-                                    },
-                                }
-                            ],
-                        }
+                            "id": item.call_id,
+                            "type": "function",
+                            "function": {
+                                "name": "apply_patch",
+                                "arguments": json.dumps({"patch": item.patch}),
+                            },
+                        },
                     )
                 case (
                     WebSearchCallInputItem()
@@ -237,21 +256,16 @@ async def responses_request_to_text_generation(
                         args = {"prompt": item.prompt}
                     else:
                         args = {"query": item.query}
-                    chat_template_messages.append(
+                    _append_tool_call(
+                        chat_template_messages,
                         {
-                            "role": "assistant",
-                            "content": "",
-                            "tool_calls": [
-                                {
-                                    "id": item.call_id,
-                                    "type": "function",
-                                    "function": {
-                                        "name": item.type,
-                                        "arguments": json.dumps(args),
-                                    },
-                                }
-                            ],
-                        }
+                            "id": item.call_id,
+                            "type": "function",
+                            "function": {
+                                "name": item.type,
+                                "arguments": json.dumps(args),
+                            },
+                        },
                     )
                 case (
                     FunctionCallOutputInputItem()
@@ -303,21 +317,16 @@ async def responses_request_to_text_generation(
                             }
                         )
                 case McpApprovalRequestInputItem():
-                    chat_template_messages.append(
+                    _append_tool_call(
+                        chat_template_messages,
                         {
-                            "role": "assistant",
-                            "content": "",
-                            "tool_calls": [
-                                {
-                                    "id": item.call_id,
-                                    "type": "function",
-                                    "function": {
-                                        "name": item.name,
-                                        "arguments": item.arguments,
-                                    },
-                                }
-                            ],
-                        }
+                            "id": item.call_id,
+                            "type": "function",
+                            "function": {
+                                "name": item.name,
+                                "arguments": item.arguments,
+                            },
+                        },
                     )
                 case McpApprovalResponseInputItem():
                     chat_template_messages.append(
@@ -436,13 +445,7 @@ async def collect_responses_response(
         raise ValueError(error_message)
 
     # Create usage from usage data if available
-    usage = None
-    if last_usage is not None:
-        usage = ResponseUsage(
-            input_tokens=last_usage.prompt_tokens,
-            output_tokens=last_usage.completion_tokens,
-            total_tokens=last_usage.total_tokens,
-        )
+    usage = _build_response_usage(last_usage) if last_usage is not None else None
 
     output: list[ResponseItem] = []
     if thinking_parts:
@@ -468,7 +471,7 @@ async def collect_responses_response(
         output=output,
         output_text=accumulated_text,
         usage=usage,
-    ).model_dump_json()
+    ).model_dump_json(exclude_none=True)
     return
 
 
@@ -791,13 +794,7 @@ async def generate_responses_stream(
     yield _format_sse(item_done)
 
     # Create usage from usage data if available
-    usage = None
-    if last_usage is not None:
-        usage = ResponseUsage(
-            input_tokens=last_usage.prompt_tokens,
-            output_tokens=last_usage.completion_tokens,
-            total_tokens=last_usage.total_tokens,
-        )
+    usage = _build_response_usage(last_usage) if last_usage is not None else None
 
     # response.completed
     output: list[ResponseItem] = []
diff --git a/src/exo/api/main.py b/src/exo/api/main.py
index 7bc0a462b..90a93a4c5 100644
--- a/src/exo/api/main.py
+++ b/src/exo/api/main.py
@@ -20,6 +20,7 @@ from fastapi.staticfiles import StaticFiles
 from hypercorn.asyncio import serve  # pyright: ignore[reportUnknownVariableType]
 from hypercorn.config import Config
 from hypercorn.typing import ASGIFramework
+from hypercorn.utils import LifespanTimeoutError
 from loguru import logger
 
 from exo.api.adapters.chat_completions import (
@@ -79,6 +80,8 @@ from exo.api.types import (
     ImageListItem,
     ImageListResponse,
     ImageSize,
+    InstanceLinkBody,
+    InstanceLinkResponse,
     ModelList,
     ModelListModel,
     PlaceInstanceParams,
@@ -101,6 +104,7 @@ from exo.api.types.claude_api import (
     ClaudeMessagesResponse,
 )
 from exo.api.types.ollama_api import (
+    OllamaCapability,
     OllamaChatRequest,
     OllamaChatResponse,
     OllamaGenerateRequest,
@@ -122,6 +126,7 @@ from exo.master.placement import place_instance as get_instance_placements
 from exo.shared.apply import apply
 from exo.shared.constants import (
     DASHBOARD_DIR,
+    ENABLE_DISAGGREGATION,
     EXO_CACHE_HOME,
     EXO_EVENT_LOG_DIR,
     EXO_IMAGE_CACHE_DIR,
@@ -130,12 +135,11 @@ from exo.shared.constants import (
 )
 from exo.shared.election import ElectionMessage
 from exo.shared.logging import InterceptLogger
+from exo.shared.models import model_cards
 from exo.shared.models.model_cards import (
     ModelCard,
     ModelId,
-    add_to_card_cache,
-    get_card,
-    get_model_cards,
+    ModelTask,
 )
 from exo.shared.tracing import TraceEvent, compute_stats, export_trace, load_trace_file
 from exo.shared.types.chunks import (
@@ -154,6 +158,7 @@ from exo.shared.types.commands import (
     DeleteCustomModelCard,
     DeleteDownload,
     DeleteInstance,
+    DeleteInstanceLink,
     DownloadCommand,
     ForwarderCommand,
     ForwarderDownloadCommand,
@@ -161,6 +166,7 @@ from exo.shared.types.commands import (
     ImageGeneration,
     PlaceInstance,
     SendInputChunk,
+    SetInstanceLink,
     StartDownload,
     TaskCancelled,
     TaskFinished,
@@ -174,6 +180,7 @@ from exo.shared.types.events import (
     InstanceDeleted,
     TracesMerged,
 )
+from exo.shared.types.instance_link import InstanceLink, InstanceLinkId
 from exo.shared.types.memory import Memory
 from exo.shared.types.state import State
 from exo.shared.types.tasks import (
@@ -185,7 +192,10 @@ from exo.shared.types.tasks import (
 from exo.shared.types.tasks import (
     TextGeneration as TextGenerationTask,
 )
-from exo.shared.types.text_generation import Base64Image, TextGenerationTaskParams
+from exo.shared.types.text_generation import (
+    Base64ImageHash,
+    TextGenerationTaskParams,
+)
 from exo.shared.types.worker.downloads import DownloadCompleted
 from exo.shared.types.worker.instances import Instance, InstanceId, InstanceMeta
 from exo.shared.types.worker.shards import Sharding
@@ -212,6 +222,17 @@ def _ensure_seed(params: AdvancedImageParams | None) -> AdvancedImageParams:
     return params
 
 
+def _require_disaggregation_enabled() -> None:
+    if not ENABLE_DISAGGREGATION:
+        raise HTTPException(
+            status_code=HTTPStatus.NOT_FOUND,
+            detail=(
+                "Prefill/decode disaggregation is disabled. "
+                "Set ENABLE_DISAGGREGATION=true to enable."
+            ),
+        )
+
+
 class API:
     def __init__(
         self,
@@ -234,6 +255,7 @@ class API:
         self.node_id: NodeId = node_id
         self.last_completed_election: int = 0
         self.port = port
+        self._sent_image_hashes: set[str] = set()
 
         self.paused: bool = False
         self.paused_ev: anyio.Event = anyio.Event()
@@ -283,6 +305,7 @@ class API:
         self.event_receiver.close()
         self.event_receiver = event_receiver
         self._tg.start_soon(self._apply_state)
+        self._sent_image_hashes = set()
 
     def unpause(self, result_clock: int):
         logger.info("Unpausing API")
@@ -323,6 +346,11 @@ class API:
         self.app.get("/instance/previews")(self.get_placement_previews)
         self.app.get("/instance/{instance_id}")(self.get_instance)
         self.app.delete("/instance/{instance_id}")(self.delete_instance)
+        self.app.get("/v1/instance-links")(self.list_instance_links)
+        self.app.post("/v1/instance-links")(self.create_instance_link)
+        self.app.put("/v1/instance-links/{link_id}")(self.update_instance_link)
+        self.app.delete("/v1/instance-links/{link_id}")(self.delete_instance_link)
+        self.app.get("/v1/feature-flags")(self.get_feature_flags)
         self.app.get("/models")(self.get_models)
         self.app.get("/v1/models")(self.get_models)
         self.app.post("/models/add")(self.add_custom_model)
@@ -331,7 +359,9 @@ class API:
         self.app.post("/v1/chat/completions", response_model=None)(
             self.chat_completions
         )
-        self.app.post("/bench/chat/completions")(self.bench_chat_completions)
+        self.app.post("/bench/chat/completions", response_model=None)(
+            self.bench_chat_completions
+        )
         self.app.post("/v1/images/generations", response_model=None)(
             self.image_generations
         )
@@ -347,6 +377,9 @@ class API:
         # Ollama API
         self.app.head("/ollama/")(self.ollama_version)
         self.app.head("/ollama/api/version")(self.ollama_version)
+        self.app.post("/ollama/v1/chat/completions", response_model=None)(
+            self.chat_completions
+        )
         self.app.post("/ollama/api/chat", response_model=None)(self.ollama_chat)
         self.app.post("/ollama/api/api/chat", response_model=None)(self.ollama_chat)
         self.app.post("/ollama/api/v1/chat", response_model=None)(self.ollama_chat)
@@ -449,9 +482,11 @@ class API:
                 ),
                 node_memory=self.state.node_memory,
                 node_network=self.state.node_network,
+                node_backends=self.state.node_backends,
                 topology=self.state.topology,
                 current_instances=self.state.instances,
                 download_status=self.state.downloads,
+                node_rdma_ctl=self.state.node_rdma_ctl,
             )
         except ValueError as exc:
             raise HTTPException(status_code=400, detail=str(exc)) from exc
@@ -511,10 +546,12 @@ class API:
                     ),
                     node_memory=self.state.node_memory,
                     node_network=self.state.node_network,
+                    node_backends=self.state.node_backends,
                     topology=self.state.topology,
                     current_instances=self.state.instances,
                     required_nodes=required_nodes,
                     download_status=self.state.downloads,
+                    node_rdma_ctl=self.state.node_rdma_ctl,
                 )
             except ValueError as exc:
                 if (model_card.model_id, sharding, instance_meta, 0) not in seen:
@@ -610,6 +647,49 @@ class API:
             instance_id=instance_id,
         )
 
+    async def get_feature_flags(self) -> dict[str, bool]:
+        return {"disaggregation": ENABLE_DISAGGREGATION}
+
+    async def list_instance_links(self) -> list[InstanceLink]:
+        if not ENABLE_DISAGGREGATION:
+            return []
+        return list(self.state.instance_links.values())
+
+    async def create_instance_link(
+        self, body: InstanceLinkBody
+    ) -> InstanceLinkResponse:
+        _require_disaggregation_enabled()
+        return await self._set_instance_link(InstanceLinkId(), body)
+
+    async def update_instance_link(
+        self, link_id: InstanceLinkId, body: InstanceLinkBody
+    ) -> InstanceLinkResponse:
+        _require_disaggregation_enabled()
+        return await self._set_instance_link(link_id, body)
+
+    async def _set_instance_link(
+        self, link_id: InstanceLinkId, body: InstanceLinkBody
+    ) -> InstanceLinkResponse:
+        command = SetInstanceLink(
+            link_id=link_id,
+            prefill_instances=list(body.prefill_instances),
+            decode_instances=list(body.decode_instances),
+        )
+        await self._send(command)
+        return InstanceLinkResponse(
+            message="Command received.", command_id=command.command_id
+        )
+
+    async def delete_instance_link(
+        self, link_id: InstanceLinkId
+    ) -> InstanceLinkResponse:
+        _require_disaggregation_enabled()
+        command = DeleteInstanceLink(link_id=link_id)
+        await self._send(command)
+        return InstanceLinkResponse(
+            message="Command received.", command_id=command.command_id
+        )
+
     async def cancel_command(self, command_id: CommandId) -> CancelCommandResponse:
         """Cancel an active command by closing its stream and notifying workers."""
         sender = self._text_generation_queues.get(
@@ -737,11 +817,10 @@ class API:
             "TODO: we should send a notification to the user to download the model"
         )
 
-    _sent_image_hashes: set[str] = set()
-
     async def _send_text_generation_with_images(
         self, task_params: TextGenerationTaskParams
     ) -> TextGeneration:
+        task_params = task_params.with_card_sampling_defaults()
         images = task_params.images
         if not images:
             command = TextGeneration(task_params=task_params)
@@ -749,23 +828,19 @@ class API:
             return command
 
         hashes = [hashlib.sha256(img.encode("ascii")).hexdigest() for img in images]
+        all_hashes = {idx: Base64ImageHash(h) for idx, h in enumerate(hashes)}
+        task_params = task_params.model_copy(
+            update={"images": [], "image_hashes": all_hashes}
+        )
+        command = TextGeneration(task_params=task_params)
 
-        cached_hashes: dict[int, str] = {}
         new_images: list[tuple[int, str]] = []
         for idx, (img, h) in enumerate(zip(images, hashes, strict=True)):
-            if h in self._sent_image_hashes:
-                cached_hashes[idx] = h
-            else:
+            if h not in self._sent_image_hashes:
                 self._sent_image_hashes.add(h)
                 new_images.append((idx, img))
 
-        wrapped_hashes = {idx: Base64Image(h) for idx, h in cached_hashes.items()}
-
         if not new_images:
-            task_params = task_params.model_copy(
-                update={"images": [], "image_hashes": wrapped_hashes}
-            )
-            command = TextGeneration(task_params=task_params)
             await self._send(command)
             return command
 
@@ -774,16 +849,6 @@ class API:
             for i in range(0, len(img_data), EXO_MAX_CHUNK_SIZE):
                 all_chunks.append((img_idx, img_data[i : i + EXO_MAX_CHUNK_SIZE]))
 
-        task_params = task_params.model_copy(
-            update={
-                "images": [],
-                "image_hashes": wrapped_hashes,
-                "total_input_chunks": len(all_chunks),
-                "image_count": len(new_images),
-            }
-        )
-        command = TextGeneration(task_params=task_params)
-
         for global_idx, (img_idx, chunk_data) in enumerate(all_chunks):
             await self._send(
                 SendInputChunk(
@@ -839,17 +904,39 @@ class API:
 
     async def bench_chat_completions(
         self, payload: BenchChatCompletionRequest
-    ) -> BenchChatCompletionResponse:
+    ) -> BenchChatCompletionResponse | StreamingResponse:
         task_params = await chat_request_to_text_generation(payload)
         resolved_model = await self._resolve_and_validate_text_model(
             ModelId(task_params.model)
         )
         task_params = task_params.model_copy(update={"model": resolved_model})
 
-        task_params = task_params.model_copy(update={"stream": False, "bench": True})
+        task_params = task_params.model_copy(
+            update={
+                "stream": False,
+                "bench": True,
+                "use_prefix_cache": payload.use_prefix_cache,
+            }
+        )
 
         command = await self._send_text_generation_with_images(task_params)
 
+        if payload.stream:
+            return StreamingResponse(
+                with_sse_keepalive(
+                    generate_chat_stream(
+                        command.command_id,
+                        self._token_chunk_stream(command.command_id),
+                    ),
+                ),
+                media_type="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "Connection": "close",
+                    "X-Accel-Buffering": "no",
+                },
+            )
+
         return await self._collect_text_generation_with_stats(command.command_id)
 
     async def _resolve_and_validate_text_model(self, model_id: ModelId) -> ModelId:
@@ -1554,17 +1641,16 @@ class API:
     async def ollama_tags(self) -> OllamaTagsResponse:
         """Returns list of models in Ollama tags format. We return the downloaded ones only."""
 
-        def none_if_empty(value: str) -> str | None:
-            return value or None
-
-        downloaded_model_ids: set[str] = set()
+        downloaded_model_ids: set[ModelId] = set()
         for node_downloads in self.state.downloads.values():
             for dl in node_downloads:
                 if isinstance(dl, DownloadCompleted):
                     downloaded_model_ids.add(dl.shard_metadata.model_card.model_id)
 
         cards = [
-            c for c in await get_model_cards() if c.model_id in downloaded_model_ids
+            c
+            for c in await model_cards.card_cache.list_all()
+            if c.model_id in downloaded_model_ids
         ]
 
         now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
@@ -1577,8 +1663,8 @@ class API:
                     size=card.storage_size.in_bytes,
                     digest="sha256:000000000000",
                     details=OllamaModelDetails(
-                        family=none_if_empty(card.family),
-                        quantization_level=none_if_empty(card.quantization),
+                        family=card.family or None,
+                        quantization_level=card.quantization or None,
                     ),
                 )
                 for card in cards
@@ -1599,6 +1685,20 @@ class API:
                 status_code=404, detail=f"Model not found: {model_name}"
             ) from exc
 
+        capabilities: list[OllamaCapability] = []
+        if ModelTask.TextGeneration in card.tasks:
+            capabilities.extend(("completion", "tools"))
+        if card.vision is not None:
+            capabilities.append("vision")
+
+        architecture = card.family or "unknown"
+        model_info: dict[str, Any] = {
+            "general.architecture": architecture,
+            "general.basename": card.base_model or str(card.model_id),
+        }
+        if card.context_length > 0:
+            model_info[f"{architecture}.context_length"] = card.context_length
+
         return OllamaShowResponse(
             modelfile=f"FROM {card.model_id}",
             template="{{ .Prompt }}",
@@ -1606,6 +1706,8 @@ class API:
                 family=card.family or None,
                 quantization_level=card.quantization or None,
             ),
+            model_info=model_info,
+            capabilities=capabilities,
         )
 
     async def ollama_ps(self) -> OllamaPsResponse:
@@ -1628,7 +1730,7 @@ class API:
 
     async def ollama_version(self) -> dict[str, str]:
         """Returns version information for Ollama API compatibility."""
-        return {"version": "exo v1.0"}
+        return {"version": "1.0.0"}
 
     def _calculate_total_available_memory(self) -> Memory:
         """Calculate total available memory across all nodes in bytes."""
@@ -1641,7 +1743,7 @@ class API:
 
     async def get_models(self, status: str | None = Query(default=None)) -> ModelList:
         """Returns list of available models, optionally filtered by being downloaded."""
-        cards = await get_model_cards()
+        cards = await model_cards.card_cache.list_all()
 
         if status == "downloaded":
             downloaded_model_ids: set[str] = set()
@@ -1667,6 +1769,7 @@ class API:
                     quantization=card.quantization,
                     base_model=card.base_model,
                     capabilities=card.capabilities,
+                    reasoning_dialect=card.reasoning_dialect,
                     context_length=card.context_length,
                 )
                 for card in cards
@@ -1691,7 +1794,7 @@ class API:
 
         # Immediately update the local cache so the subsequent GET /models
         # returns the new model without waiting for the event round-trip.
-        add_to_card_cache(card)
+        model_cards.card_cache.cc[card.model_id] = card
 
         return ModelListModel(
             id=card.model_id,
@@ -1707,7 +1810,7 @@ class API:
 
     async def delete_custom_model(self, model_id: ModelId) -> JSONResponse:
         """Delete a user-added custom model card and sync deletion across the cluster."""
-        card = get_card(model_id)
+        card = model_cards.card_cache.get(model_id)
         if card is None or not card.is_custom:
             raise HTTPException(status_code=404, detail="Custom model card not found")
 
@@ -1777,12 +1880,21 @@ class API:
                     await anyio.sleep_forever()
                 finally:
                     with anyio.CancelScope(shield=True):
+                        # IMPORTANT: when new queues are added, update this (for proper shutdown semantics)
+                        self._shutdown_queues(self._text_generation_queues)
+                        self._shutdown_queues(self._image_generation_queues)
+
                         shutdown_ev.set()
         finally:
             self._event_log.close()
             self.command_sender.close()
             self.event_receiver.close()
 
+    @staticmethod
+    def _shutdown_queues[K, V](queues: dict[K, Sender[V]]):
+        for v in queues.values():
+            v.close()
+
     async def run_api(self, ev: anyio.Event):
         cfg = Config()
         cfg.bind = [f"0.0.0.0:{self.port}"]
@@ -1790,12 +1902,23 @@ class API:
         cfg.accesslog = None
         cfg.errorlog = "-"
         cfg.logger_class = InterceptLogger
+
+        # prevents hangs when mid-request and connection refuses to close
+        cfg.graceful_timeout = 2  # seconds
+        cfg.shutdown_timeout = 3  # seconds
+
         with anyio.CancelScope(shield=True):
-            await serve(
-                cast(ASGIFramework, self.app),
-                cfg,
-                shutdown_trigger=ev.wait,
-            )
+            try:
+                await serve(
+                    cast(ASGIFramework, self.app),
+                    cfg,
+                    shutdown_trigger=ev.wait,
+                )
+            except LifespanTimeoutError as e:
+                logger.warning(
+                    "Graceful server shutdown timed out, some connections forcebly closed"
+                )
+                logger.opt(exception=e).debug("")
 
     async def _apply_state(self):
         with self.event_receiver as events:
diff --git a/src/exo/api/tests/test_chat_completions_stream.py b/src/exo/api/tests/test_chat_completions_stream.py
new file mode 100644
index 000000000..2f718f167
--- /dev/null
+++ b/src/exo/api/tests/test_chat_completions_stream.py
@@ -0,0 +1,195 @@
+# pyright: reportAny=false
+"""Tests asserting OpenAI-spec wire shape for /v1/chat/completions deltas."""
+
+import json
+from collections.abc import AsyncGenerator
+from typing import Any
+
+from exo.api.adapters.chat_completions import (
+    collect_chat_response,
+    generate_chat_stream,
+)
+from exo.api.types import (
+    CompletionTokensDetails,
+    PromptTokensDetails,
+    ToolCallItem,
+    Usage,
+)
+from exo.shared.types.chunks import (
+    ErrorChunk,
+    PrefillProgressChunk,
+    TokenChunk,
+    ToolCallChunk,
+)
+from exo.shared.types.common import CommandId, ModelId
+
+_TEST_MODEL = ModelId("test-model")
+_NULLABLE_DELTA_FIELDS = {"content", "refusal"}
+
+
+def _make_usage(prompt_tokens: int = 1, completion_tokens: int = 1) -> Usage:
+    return Usage(
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        total_tokens=prompt_tokens + completion_tokens,
+        prompt_tokens_details=PromptTokensDetails(),
+        completion_tokens_details=CompletionTokensDetails(),
+    )
+
+
+async def _stream(
+    chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk],
+) -> AsyncGenerator[
+    PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk, None
+]:
+    for chunk in chunks:
+        yield chunk
+
+
+def _parse_data_events(lines: list[str]) -> list[dict[str, Any]]:
+    events: list[dict[str, Any]] = []
+    for line in lines:
+        for sub in line.split("\n"):
+            if sub.startswith("data: ") and not sub.endswith("[DONE]"):
+                events.append(json.loads(sub[len("data: ") :]))
+    return events
+
+
+def _assert_delta_spec_compliant(delta: dict[str, Any]) -> None:
+    """Reject any null delta key the OpenAI spec doesn't allow to be null."""
+    for key, value in delta.items():
+        if value is None and key not in _NULLABLE_DELTA_FIELDS:
+            raise AssertionError(
+                f"delta.{key} is null but spec requires it to be absent or a value; "
+                f"full delta={delta!r}"
+            )
+
+
+class TestTokenStreamDeltaShape:
+    async def test_token_chunk_delta_has_no_disallowed_nulls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=1,
+                text="Hello",
+                usage=None,
+            ),
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=2,
+                text=" world",
+                usage=_make_usage(),
+                finish_reason="stop",
+            ),
+        ]
+        lines: list[str] = []
+        async for event in generate_chat_stream(
+            CommandId("test-cmd-token"), _stream(chunks)
+        ):
+            lines.append(event)
+
+        events = _parse_data_events(lines)
+        assert len(events) == 2
+        for event in events:
+            delta = event["choices"][0]["delta"]
+            _assert_delta_spec_compliant(delta)
+            assert "tool_calls" not in delta or isinstance(delta["tool_calls"], list)
+            assert "function_call" not in delta
+            assert "name" not in delta
+            assert "tool_call_id" not in delta
+
+    async def test_thinking_chunk_delta_has_no_disallowed_nulls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=1,
+                text="Hmm",
+                usage=None,
+                is_thinking=True,
+            ),
+        ]
+        lines: list[str] = []
+        async for event in generate_chat_stream(
+            CommandId("test-cmd-thinking"), _stream(chunks)
+        ):
+            lines.append(event)
+
+        events = _parse_data_events(lines)
+        assert len(events) == 1
+        delta = events[0]["choices"][0]["delta"]
+        _assert_delta_spec_compliant(delta)
+        assert delta.get("reasoning_content") == "Hmm"
+        assert "content" not in delta
+
+
+class TestToolCallStreamDeltaShape:
+    async def test_tool_call_chunk_delta_has_array_tool_calls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            ToolCallChunk(
+                model=_TEST_MODEL,
+                tool_calls=[
+                    ToolCallItem(id="call_1", name="get_weather", arguments="{}"),
+                ],
+                usage=_make_usage(),
+            ),
+        ]
+        lines: list[str] = []
+        async for event in generate_chat_stream(
+            CommandId("test-cmd-tool"), _stream(chunks)
+        ):
+            lines.append(event)
+
+        events = _parse_data_events(lines)
+        assert len(events) == 1
+        delta = events[0]["choices"][0]["delta"]
+        _assert_delta_spec_compliant(delta)
+        assert isinstance(delta["tool_calls"], list)
+        assert delta["tool_calls"][0]["function"]["name"] == "get_weather"
+
+
+class TestErrorStreamShape:
+    async def test_error_chunk_response_has_no_nulls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            ErrorChunk(model=_TEST_MODEL, error_message="boom"),
+        ]
+        lines: list[str] = []
+        async for event in generate_chat_stream(
+            CommandId("test-cmd-err"), _stream(chunks)
+        ):
+            lines.append(event)
+
+        events = _parse_data_events(lines)
+        assert len(events) == 1
+        assert events[0]["error"]["message"] == "boom"
+        for value in events[0]["error"].values():
+            assert value is not None
+
+
+class TestNonStreamingResponseShape:
+    async def test_collected_response_message_has_no_disallowed_nulls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=1,
+                text="Hello",
+                usage=_make_usage(),
+                finish_reason="stop",
+            ),
+        ]
+        parts: list[str] = []
+        async for part in collect_chat_response(
+            CommandId("test-cmd-nonstream"), _stream(chunks)
+        ):
+            parts.append(part)
+
+        assert len(parts) == 1
+        payload = json.loads(parts[0])
+        message = payload["choices"][0]["message"]
+        for key, value in message.items():
+            if value is None:
+                assert key in {"content", "refusal", "reasoning_content"}, (
+                    f"non-streaming message.{key} is null but spec disallows it"
+                )
+        assert "function_call" not in message
+        assert "name" not in message
+        assert "tool_call_id" not in message
diff --git a/src/exo/api/tests/test_openai_responses_api.py b/src/exo/api/tests/test_openai_responses_api.py
index 7165df8e0..3a3195819 100644
--- a/src/exo/api/tests/test_openai_responses_api.py
+++ b/src/exo/api/tests/test_openai_responses_api.py
@@ -4,14 +4,30 @@ ResponsesRequest is the API wire type for the Responses endpoint.
 The responses adapter converts it to TextGenerationTaskParams for the pipeline.
 """
 
+import json
+from collections.abc import AsyncGenerator
+from typing import Any, cast
+
 import pydantic
 import pytest
 
+from exo.api.adapters.responses import (
+    collect_responses_response,
+    generate_responses_stream,
+)
+from exo.api.types import CompletionTokensDetails, PromptTokensDetails, Usage
 from exo.api.types.openai_responses import (
+    InputTokensDetails,
+    OutputTokensDetails,
     ResponseInputMessage,
     ResponsesRequest,
+    ResponsesResponse,
+    ResponseUsage,
 )
-from exo.shared.types.common import ModelId
+from exo.shared.types.chunks import TokenChunk
+from exo.shared.types.common import CommandId, ModelId
+
+_TEST_MODEL = ModelId("test-model")
 
 
 class TestResponsesRequestValidation:
@@ -46,3 +62,210 @@ class TestResponsesRequestValidation:
             input=[ResponseInputMessage(role="user", content="Hello")],
         )
         assert len(request.input) == 1
+
+
+class TestResponseUsage:
+    """Tests for ResponseUsage with input_tokens_details and output_tokens_details."""
+
+    def test_usage_defaults_to_zero_details(self):
+        usage = ResponseUsage(
+            input_tokens=10,
+            input_tokens_details=InputTokensDetails(),
+            output_tokens=20,
+            output_tokens_details=OutputTokensDetails(),
+            total_tokens=30,
+        )
+        assert usage.input_tokens_details.cached_tokens == 0
+        assert usage.output_tokens_details.reasoning_tokens == 0
+
+    def test_usage_with_reasoning_tokens(self):
+        usage = ResponseUsage(
+            input_tokens=10,
+            input_tokens_details=InputTokensDetails(),
+            output_tokens=20,
+            output_tokens_details=OutputTokensDetails(reasoning_tokens=5),
+            total_tokens=30,
+        )
+        assert usage.output_tokens_details.reasoning_tokens == 5
+
+    def test_usage_with_cached_tokens(self):
+        usage = ResponseUsage(
+            input_tokens=10,
+            input_tokens_details=InputTokensDetails(cached_tokens=7),
+            output_tokens=20,
+            output_tokens_details=OutputTokensDetails(),
+            total_tokens=30,
+        )
+        assert usage.input_tokens_details.cached_tokens == 7
+
+    def test_usage_serialization(self):
+        usage = ResponseUsage(
+            input_tokens=10,
+            input_tokens_details=InputTokensDetails(cached_tokens=3),
+            output_tokens=20,
+            output_tokens_details=OutputTokensDetails(reasoning_tokens=5),
+            total_tokens=30,
+        )
+        data = usage.model_dump()
+        assert data["input_tokens_details"] == {"cached_tokens": 3}
+        assert data["output_tokens_details"] == {"reasoning_tokens": 5}
+
+    def test_usage_serialization_zero_details(self):
+        usage = ResponseUsage(
+            input_tokens=10,
+            input_tokens_details=InputTokensDetails(),
+            output_tokens=20,
+            output_tokens_details=OutputTokensDetails(),
+            total_tokens=30,
+        )
+        data = usage.model_dump()
+        assert data["input_tokens_details"] == {"cached_tokens": 0}
+        assert data["output_tokens_details"] == {"reasoning_tokens": 0}
+
+
+def _make_usage(
+    prompt_tokens: int, completion_tokens: int, reasoning_tokens: int = 0
+) -> Usage:
+    """Create a Usage object for testing."""
+    return Usage(
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        total_tokens=prompt_tokens + completion_tokens,
+        prompt_tokens_details=PromptTokensDetails(),
+        completion_tokens_details=CompletionTokensDetails(
+            reasoning_tokens=reasoning_tokens
+        ),
+    )
+
+
+async def _token_chunks(
+    chunks: list[TokenChunk],
+) -> AsyncGenerator[TokenChunk, None]:
+    for chunk in chunks:
+        yield chunk
+
+
+class TestCollectResponsesResponseReasoningTokens:
+    """Tests for reasoning_tokens in collect_responses_response."""
+
+    async def test_non_streaming_includes_reasoning_tokens(self):
+        usage = _make_usage(10, 25, reasoning_tokens=8)
+        chunks = [
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=0,
+                text="thinking...",
+                is_thinking=True,
+                usage=None,
+            ),
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=1,
+                text="Hello world",
+                is_thinking=False,
+                usage=usage,
+            ),
+        ]
+        command_id = CommandId("test-cmd-001")
+        result_parts: list[str] = []
+        async for part in collect_responses_response(
+            command_id, "test-model", _token_chunks(chunks)
+        ):
+            result_parts.append(part)
+        assert len(result_parts) == 1
+        response = ResponsesResponse.model_validate_json(result_parts[0])
+        assert response.usage is not None
+        assert response.usage.input_tokens_details.cached_tokens == 0
+        assert response.usage.output_tokens_details.reasoning_tokens == 8
+
+    async def test_non_streaming_zero_reasoning_tokens(self):
+        usage = _make_usage(10, 20, reasoning_tokens=0)
+        chunks = [
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=0,
+                text="Hello world",
+                is_thinking=False,
+                usage=usage,
+            ),
+        ]
+        command_id = CommandId("test-cmd-002")
+        result_parts: list[str] = []
+        async for part in collect_responses_response(
+            command_id, "test-model", _token_chunks(chunks)
+        ):
+            result_parts.append(part)
+        assert len(result_parts) == 1
+        response = ResponsesResponse.model_validate_json(result_parts[0])
+        assert response.usage is not None
+        assert response.usage.output_tokens_details.reasoning_tokens == 0
+        assert response.usage.input_tokens_details.cached_tokens == 0
+
+
+class TestGenerateResponsesStreamReasoningTokens:
+    """Tests for reasoning_tokens in generate_responses_stream."""
+
+    async def test_streaming_includes_reasoning_tokens(self):
+        usage = _make_usage(10, 25, reasoning_tokens=8)
+        chunks = [
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=0,
+                text="thinking...",
+                is_thinking=True,
+                usage=None,
+            ),
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=1,
+                text="Hello world",
+                is_thinking=False,
+                usage=usage,
+            ),
+        ]
+        command_id = CommandId("test-cmd-003")
+        events: list[str] = []
+        async for event in generate_responses_stream(
+            command_id, "test-model", _token_chunks(chunks)
+        ):
+            events.append(event)
+
+        # The last event should be response.completed
+        last_event = events[-1]
+        # Parse the SSE data
+        data_line = [
+            line for line in last_event.strip().split("\n") if line.startswith("data: ")
+        ][0]
+        data = cast(dict[str, Any], json.loads(data_line.removeprefix("data: ")))
+        assert data["type"] == "response.completed"
+        response_usage = cast(dict[str, Any], data["response"]["usage"])
+        assert response_usage["input_tokens_details"] == {"cached_tokens": 0}
+        assert response_usage["output_tokens_details"] == {"reasoning_tokens": 8}
+
+    async def test_streaming_zero_reasoning_tokens(self):
+        usage = _make_usage(10, 20, reasoning_tokens=0)
+        chunks = [
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=0,
+                text="Hello world",
+                is_thinking=False,
+                usage=usage,
+            ),
+        ]
+        command_id = CommandId("test-cmd-004")
+        events: list[str] = []
+        async for event in generate_responses_stream(
+            command_id, "test-model", _token_chunks(chunks)
+        ):
+            events.append(event)
+
+        last_event = events[-1]
+        data_line = [
+            line for line in last_event.strip().split("\n") if line.startswith("data: ")
+        ][0]
+        data = cast(dict[str, Any], json.loads(data_line.removeprefix("data: ")))
+        assert data["type"] == "response.completed"
+        response_usage = cast(dict[str, Any], data["response"]["usage"])
+        assert response_usage["input_tokens_details"] == {"cached_tokens": 0}
+        assert response_usage["output_tokens_details"] == {"reasoning_tokens": 0}
diff --git a/src/exo/api/types/__init__.py b/src/exo/api/types/__init__.py
index 3a61c1cd3..9cb2f834f 100644
--- a/src/exo/api/types/__init__.py
+++ b/src/exo/api/types/__init__.py
@@ -34,6 +34,8 @@ from .api import ImageGenerationTaskParams as ImageGenerationTaskParams
 from .api import ImageListItem as ImageListItem
 from .api import ImageListResponse as ImageListResponse
 from .api import ImageSize as ImageSize
+from .api import InstanceLinkBody as InstanceLinkBody
+from .api import InstanceLinkResponse as InstanceLinkResponse
 from .api import Logprobs as Logprobs
 from .api import LogprobsContentItem as LogprobsContentItem
 from .api import ModelList as ModelList
diff --git a/src/exo/api/types/api.py b/src/exo/api/types/api.py
index 709ad144f..8cfa10dd1 100644
--- a/src/exo/api/types/api.py
+++ b/src/exo/api/types/api.py
@@ -8,10 +8,10 @@ from pydantic import BaseModel, Field, field_validator
 from exo.shared.models.model_cards import ModelCard, ModelId
 from exo.shared.types.common import CommandId, NodeId
 from exo.shared.types.memory import Memory
-from exo.shared.types.text_generation import ReasoningEffort
+from exo.shared.types.text_generation import ReasoningDialect, ReasoningEffort
 from exo.shared.types.worker.instances import Instance, InstanceId, InstanceMeta
 from exo.shared.types.worker.shards import Sharding, ShardMetadata
-from exo.utils.pydantic_ext import CamelCaseModel
+from exo.utils.pydantic_ext import FrozenModel
 
 FinishReason = Literal[
     "stop", "length", "tool_calls", "content_filter", "function_call", "error"
@@ -48,6 +48,7 @@ class ModelListModel(BaseModel):
     quantization: str = Field(default="")
     base_model: str = Field(default="")
     capabilities: list[str] = Field(default_factory=list)
+    reasoning_dialect: ReasoningDialect = "none"
 
 
 class ModelList(BaseModel):
@@ -165,6 +166,7 @@ class GenerationStats(BaseModel):
     prompt_tokens: int
     generation_tokens: int
     peak_memory_usage: Memory
+    prefix_cache_hit: Literal["none", "partial", "exact"] = "none"
 
 
 class ImageGenerationStats(BaseModel):
@@ -232,7 +234,7 @@ class ChatCompletionRequest(BaseModel):
 
 
 class BenchChatCompletionRequest(ChatCompletionRequest):
-    pass
+    use_prefix_cache: bool = False
 
 
 class AddCustomModelParams(BaseModel):
@@ -294,6 +296,16 @@ class CancelCommandResponse(BaseModel):
     command_id: CommandId
 
 
+class InstanceLinkBody(BaseModel):
+    prefill_instances: list[InstanceId]
+    decode_instances: list[InstanceId]
+
+
+class InstanceLinkResponse(BaseModel):
+    message: str
+    command_id: CommandId
+
+
 ImageSize = Literal[
     "auto",
     "512x512",
@@ -417,29 +429,29 @@ class ImageListResponse(BaseModel, frozen=True):
     data: list[ImageListItem]
 
 
-class StartDownloadParams(CamelCaseModel):
+class StartDownloadParams(FrozenModel):
     target_node_id: NodeId
     shard_metadata: ShardMetadata
 
 
-class StartDownloadResponse(CamelCaseModel):
+class StartDownloadResponse(FrozenModel):
     command_id: CommandId
 
 
-class DeleteDownloadResponse(CamelCaseModel):
+class DeleteDownloadResponse(FrozenModel):
     command_id: CommandId
 
 
-class CancelDownloadParams(CamelCaseModel):
+class CancelDownloadParams(FrozenModel):
     target_node_id: NodeId
     model_id: ModelId
 
 
-class CancelDownloadResponse(CamelCaseModel):
+class CancelDownloadResponse(FrozenModel):
     command_id: CommandId
 
 
-class TraceEventResponse(CamelCaseModel):
+class TraceEventResponse(FrozenModel):
     name: str
     start_us: int
     duration_us: int
@@ -447,12 +459,12 @@ class TraceEventResponse(CamelCaseModel):
     category: str
 
 
-class TraceResponse(CamelCaseModel):
+class TraceResponse(FrozenModel):
     task_id: str
     traces: list[TraceEventResponse]
 
 
-class TraceCategoryStats(CamelCaseModel):
+class TraceCategoryStats(FrozenModel):
     total_us: int
     count: int
     min_us: int
@@ -460,31 +472,31 @@ class TraceCategoryStats(CamelCaseModel):
     avg_us: float
 
 
-class TraceRankStats(CamelCaseModel):
+class TraceRankStats(FrozenModel):
     by_category: dict[str, TraceCategoryStats]
 
 
-class TraceStatsResponse(CamelCaseModel):
+class TraceStatsResponse(FrozenModel):
     task_id: str
     total_wall_time_us: int
     by_category: dict[str, TraceCategoryStats]
     by_rank: dict[int, TraceRankStats]
 
 
-class TraceListItem(CamelCaseModel):
+class TraceListItem(FrozenModel):
     task_id: str
     created_at: str
     file_size: int
 
 
-class TraceListResponse(CamelCaseModel):
+class TraceListResponse(FrozenModel):
     traces: list[TraceListItem]
 
 
-class DeleteTracesRequest(CamelCaseModel):
+class DeleteTracesRequest(FrozenModel):
     task_ids: list[str]
 
 
-class DeleteTracesResponse(CamelCaseModel):
+class DeleteTracesResponse(FrozenModel):
     deleted: list[str]
     not_found: list[str]
diff --git a/src/exo/api/types/ollama_api.py b/src/exo/api/types/ollama_api.py
index 58a54ac02..c122a5dca 100644
--- a/src/exo/api/types/ollama_api.py
+++ b/src/exo/api/types/ollama_api.py
@@ -11,6 +11,16 @@ from exo.shared.models.model_cards import ModelId
 
 OllamaRole = Literal["system", "user", "assistant", "tool"]
 OllamaDoneReason = Literal["stop", "length", "tool_call", "error"]
+OllamaCapability = Literal[
+    "completion",
+    "tools",
+    "insert",
+    "vision",
+    "embedding",
+    "thinking",
+    "image",
+    "audio",
+]
 
 
 class OllamaToolFunction(BaseModel, frozen=True):
@@ -133,6 +143,7 @@ class OllamaShowResponse(BaseModel, frozen=True, strict=True):
     template: str | None = None
     details: OllamaModelDetails | None = None
     model_info: dict[str, Any] | None = None
+    capabilities: list[OllamaCapability] = []
 
 
 class OllamaPsModel(BaseModel, frozen=True, strict=True):
diff --git a/src/exo/api/types/openai_responses.py b/src/exo/api/types/openai_responses.py
index 0b7476483..753b57d0b 100644
--- a/src/exo/api/types/openai_responses.py
+++ b/src/exo/api/types/openai_responses.py
@@ -407,11 +407,25 @@ class ResponseReasoningItem(BaseModel, frozen=True):
 ResponseItem = ResponseMessageItem | ResponseFunctionCallItem | ResponseReasoningItem
 
 
+class InputTokensDetails(BaseModel, frozen=True):
+    """Breakdown of input token counts in Responses API response."""
+
+    cached_tokens: int = 0
+
+
+class OutputTokensDetails(BaseModel, frozen=True):
+    """Breakdown of output token counts in Responses API response."""
+
+    reasoning_tokens: int = 0
+
+
 class ResponseUsage(BaseModel, frozen=True):
     """Token usage in Responses API response."""
 
     input_tokens: int
+    input_tokens_details: InputTokensDetails
     output_tokens: int
+    output_tokens_details: OutputTokensDetails
     total_tokens: int
 
 
diff --git a/src/exo/download/coordinator.py b/src/exo/download/coordinator.py
index bfb9ddf8b..de9c4722e 100644
--- a/src/exo/download/coordinator.py
+++ b/src/exo/download/coordinator.py
@@ -16,7 +16,8 @@ from exo.download.download_utils import (
 )
 from exo.download.shard_downloader import ShardDownloader
 from exo.shared.constants import EXO_DEFAULT_MODELS_DIR, EXO_MODELS_READ_ONLY_DIRS
-from exo.shared.models.model_cards import ModelId, get_model_cards
+from exo.shared.models import model_cards
+from exo.shared.models.model_cards import ModelId
 from exo.shared.types.commands import (
     CancelDownload,
     DeleteDownload,
@@ -88,7 +89,9 @@ class DownloadCoordinator:
 
         try:
             if progress.status == "complete":
-                found = await to_thread.run_sync(resolve_existing_model, model_id)
+                found = await to_thread.run_sync(
+                    resolve_existing_model, model_id, callback_shard.model_card
+                )
                 if found is not None:
                     completed = self._completed_from_path(
                         callback_shard, found, progress.total
@@ -193,7 +196,9 @@ class DownloadCoordinator:
                 return
 
         # Check all model directories for pre-existing complete models
-        found_path = await to_thread.run_sync(resolve_existing_model, model_id)
+        found_path = await to_thread.run_sync(
+            resolve_existing_model, model_id, shard.model_card
+        )
         if found_path is not None:
             logger.info(f"DownloadCoordinator: Model {model_id} found at {found_path}")
             completed = self._completed_from_path(
@@ -220,7 +225,9 @@ class DownloadCoordinator:
         )
 
         if initial_progress.status == "complete":
-            found = await to_thread.run_sync(resolve_existing_model, model_id)
+            found = await to_thread.run_sync(
+                resolve_existing_model, model_id, shard.model_card
+            )
             if found is not None:
                 completed = self._completed_from_path(
                     shard, found, initial_progress.total
@@ -351,7 +358,9 @@ class DownloadCoordinator:
 
                     if progress.status == "complete":
                         found = await to_thread.run_sync(
-                            resolve_existing_model, model_id
+                            resolve_existing_model,
+                            model_id,
+                            progress.shard.model_card,
                         )
                         if found is not None:
                             status: DownloadProgress = self._completed_from_path(
@@ -365,7 +374,30 @@ class DownloadCoordinator:
                                 model_directory=self._default_model_dir(model_id),
                             )
                     elif progress.status in ["in_progress", "not_started"]:
-                        if progress.downloaded_this_session.in_bytes == 0:
+                        # TODO(ciaran): temporary solution
+                        # Don't downgrade a model that is already confirmed complete.
+                        if isinstance(
+                            self.download_status.get(model_id), DownloadCompleted
+                        ):
+                            continue
+                        # The per-file size check compares local files against
+                        # the latest HF "main" revision, which is a moving
+                        # target.  When HF updates text files (README, YAML,
+                        # jinja) in a new commit, the cached file list has new
+                        # sizes while local files still match the old revision.
+                        # Fall back to the authoritative completeness check
+                        # (is_model_directory_complete) which validates that all
+                        # safetensors weight files are present.
+                        found = await to_thread.run_sync(
+                            resolve_existing_model,
+                            model_id,
+                            progress.shard.model_card,
+                        )
+                        if found is not None:
+                            status = self._completed_from_path(
+                                progress.shard, found, progress.total
+                            )
+                        elif progress.downloaded_this_session.in_bytes == 0:
                             status = DownloadPending(
                                 node_id=self.node_id,
                                 shard_metadata=progress.shard,
@@ -391,7 +423,7 @@ class DownloadCoordinator:
                     )
                 # Scan read-only directories for pre-downloaded models
                 if EXO_MODELS_READ_ONLY_DIRS:
-                    for card in await get_model_cards():
+                    for card in await model_cards.card_cache.list_all():
                         mid = card.model_id
                         if mid in self.active_downloads:
                             continue
@@ -400,7 +432,9 @@ class DownloadCoordinator:
                             (DownloadCompleted, DownloadOngoing, DownloadFailed),
                         ):
                             continue
-                        found = await to_thread.run_sync(resolve_existing_model, mid)
+                        found = await to_thread.run_sync(
+                            resolve_existing_model, mid, card
+                        )
                         if found is not None and is_read_only_model_dir(found):
                             path_shard = PipelineShardMetadata(
                                 model_card=card,
diff --git a/src/exo/download/download_utils.py b/src/exo/download/download_utils.py
index 818efca3b..1e81f6d09 100644
--- a/src/exo/download/download_utils.py
+++ b/src/exo/download/download_utils.py
@@ -1,11 +1,12 @@
 import asyncio
 import hashlib
 import os
+import random
 import shutil
 import ssl
 import time
 import traceback
-from collections.abc import Awaitable
+from collections.abc import Awaitable, Mapping
 from datetime import timedelta
 from pathlib import Path
 from typing import Callable, Literal
@@ -35,7 +36,7 @@ from exo.shared.constants import (
     EXO_MODELS_DIRS,
     EXO_MODELS_READ_ONLY_DIRS,
 )
-from exo.shared.models.model_cards import ModelTask
+from exo.shared.models.model_cards import ModelCard, ModelTask
 from exo.shared.types.common import ModelId
 from exo.shared.types.memory import Memory
 from exo.shared.types.worker.downloads import (
@@ -55,6 +56,36 @@ class HuggingFaceAuthenticationError(Exception):
 class HuggingFaceRateLimitError(Exception):
     """429 Huggingface code"""
 
+    def __init__(self, msg: str, retry_after: float | None = None) -> None:
+        super().__init__(msg)
+        self.retry_after = retry_after
+
+
+def _parse_retry_after(headers: Mapping[str, str]) -> float | None:
+    """Parse seconds-to-reset from HF's RateLimit header.
+
+    HF sends e.g. ``ratelimit: "api";r=0;t=52`` on 429s; ``t`` is the wait.
+    Returns ``None`` if the header is missing or has no ``t`` field.
+    """
+    raw = headers.get("RateLimit") or headers.get("ratelimit")
+    if raw is None:
+        return None
+    for part in raw.split(";"):
+        key, _, val = part.strip().partition("=")
+        if key == "t":
+            try:
+                return float(val)
+            except ValueError:
+                return None
+    return None
+
+
+# reset window is 5 min
+_RATE_LIMIT_MAX_SLEEP_SECS = 300.0
+
+# 24h. Manually clear the cache (or `delete_model`) to force a refresh.
+_FILE_LIST_CACHE_TTL_SECS = 24 * 60 * 60
+
 
 async def _build_auth_error_message(status_code: int, model_id: ModelId) -> str:
     token = await get_hf_token()
@@ -118,7 +149,9 @@ class InsufficientDiskSpaceError(Exception):
     """Raised when no writable model directory has enough free space."""
 
 
-def resolve_existing_model(model_id: ModelId) -> Path | None:
+def resolve_existing_model(
+    model_id: ModelId, card: ModelCard | None = None
+) -> Path | None:
     """Search all model directories for a complete, pre-existing model.
 
     Checks read-only directories first, then writable directories.
@@ -128,7 +161,7 @@ def resolve_existing_model(model_id: ModelId) -> Path | None:
     normalized = model_id.normalize()
     for search_dir in (*EXO_MODELS_READ_ONLY_DIRS, *EXO_MODELS_DIRS):
         candidate = search_dir / normalized
-        if candidate.is_dir() and is_model_directory_complete(candidate):
+        if candidate.is_dir() and is_model_directory_complete(candidate, card):
             return candidate
     return None
 
@@ -165,6 +198,29 @@ def select_download_dir(required_bytes: int) -> Path:
     )
 
 
+async def select_download_dir_for_shard(
+    model_id: ModelId,
+    filtered_file_list: list[FileListEntry],
+    total_size: int,
+) -> Path:
+    for candidate_dir in EXO_MODELS_DIRS:
+        if not candidate_dir.exists():
+            continue
+        sub = candidate_dir / model_id.normalize()
+        if not await aios.path.isdir(sub):
+            continue
+        existing_bytes = 0
+        for file_entry in filtered_file_list:
+            existing_bytes += await get_downloaded_size(sub / file_entry.path)
+        remaining = max(total_size - existing_bytes, 0)
+        try:
+            if shutil.disk_usage(candidate_dir).free >= remaining:
+                return candidate_dir
+        except OSError:
+            continue
+    return select_download_dir(total_size)
+
+
 async def resolve_model_dir(model_id: ModelId) -> Path:
     """Return the directory for a model's files, creating it if needed.
 
@@ -279,10 +335,26 @@ def _scan_model_directory(
     return list(entries_by_path.values())
 
 
-def is_model_directory_complete(model_dir: Path) -> bool:
-    """Check if a model directory contains all required weight files."""
+def is_model_directory_complete(model_dir: Path, card: ModelCard | None = None) -> bool:
+    """Check if a model directory contains all required weight files.
+    Also checks for sibling weights repo.
+    """
     file_list = _scan_model_directory(model_dir, recursive=True)
-    return file_list is not None and all(f.size is not None for f in file_list)
+    if file_list is None or not all(f.size is not None for f in file_list):
+        return False
+    if (
+        card is not None
+        and card.vision is not None
+        and card.vision.weights_repo != str(card.model_id)
+    ):
+        vision_id = ModelId(card.vision.weights_repo)
+        normalized = vision_id.normalize()
+        for search_dir in (*EXO_MODELS_READ_ONLY_DIRS, *EXO_MODELS_DIRS):
+            candidate = search_dir / normalized
+            if candidate.is_dir() and is_model_directory_complete(candidate):
+                return True
+        return False
+    return True
 
 
 async def _build_file_list_from_local_directory(
@@ -307,9 +379,6 @@ async def _build_file_list_from_local_directory(
     return None
 
 
-_fetched_file_lists_this_session: set[str] = set()
-
-
 async def fetch_file_list_with_cache(
     model_id: ModelId,
     revision: str = "main",
@@ -319,13 +388,16 @@ async def fetch_file_list_with_cache(
 ) -> list[FileListEntry]:
     target_dir = await ensure_cache_dir(model_id)
     cache_file = target_dir / f"{model_id.normalize()}--{revision}--file_list.json"
-    cache_key = f"{model_id.normalize()}--{revision}"
 
-    if cache_key in _fetched_file_lists_this_session and await aios.path.exists(
-        cache_file
-    ):
-        async with aiofiles.open(cache_file, "r") as f:
-            return TypeAdapter(list[FileListEntry]).validate_json(await f.read())
+    # cache survives process restarts so cold starts don't re-burst HF
+    if await aios.path.exists(cache_file):
+        try:
+            cache_age = time.time() - (await aios.stat(cache_file)).st_mtime
+        except OSError:
+            cache_age = float("inf")
+        if cache_age < _FILE_LIST_CACHE_TTL_SECS:
+            async with aiofiles.open(cache_file, "r") as f:
+                return TypeAdapter(list[FileListEntry]).validate_json(await f.read())
 
     if skip_internet:
         if await aios.path.exists(cache_file):
@@ -354,7 +426,6 @@ async def fetch_file_list_with_cache(
             await f.write(
                 TypeAdapter(list[FileListEntry]).dump_json(file_list).decode()
             )
-        _fetched_file_lists_this_session.add(cache_key)
         return file_list
     except Exception as e:
         logger.opt(exception=e).warning(
@@ -385,17 +456,29 @@ async def fetch_file_list_with_retry(
     recursive: bool = False,
     on_connection_lost: Callable[[], None] = lambda: None,
 ) -> list[FileListEntry]:
-    n_attempts = 3
+    n_attempts = 5
     for attempt in range(n_attempts):
         try:
             return await _fetch_file_list(model_id, revision, path, recursive)
         except HuggingFaceAuthenticationError:
             raise
+        except HuggingFaceRateLimitError as e:
+            if attempt == n_attempts - 1:
+                raise
+            sleep_for = e.retry_after if e.retry_after is not None else 2.0**attempt
+            sleep_for = min(sleep_for, _RATE_LIMIT_MAX_SLEEP_SECS) + random.uniform(
+                0, 1
+            )
+            logger.warning(
+                f"Rate limited by HuggingFace fetching file list for {model_id}; "
+                f"sleeping {sleep_for:.1f}s before retry {attempt + 2}/{n_attempts}"
+            )
+            await asyncio.sleep(sleep_for)
         except Exception as e:
             on_connection_lost()
             if attempt == n_attempts - 1:
                 raise e
-            await asyncio.sleep(2.0**attempt)
+            await asyncio.sleep(2.0**attempt + random.uniform(0, 1))
     raise Exception(
         f"Failed to fetch file list for {model_id=} {revision=} {path=} {recursive=}"
     )
@@ -406,6 +489,9 @@ async def _fetch_file_list(
 ) -> list[FileListEntry]:
     api_url = f"{get_hf_endpoint()}/api/models/{model_id}/tree/{revision}"
     url = f"{api_url}/{path}" if path else api_url
+    # ?recursive=true returns the whole subtree in one request
+    if recursive:
+        url = f"{url}?recursive=true"
 
     headers = await get_download_headers()
     async with (
@@ -417,7 +503,8 @@ async def _fetch_file_list(
             raise HuggingFaceAuthenticationError(msg)
         elif response.status == 429:
             raise HuggingFaceRateLimitError(
-                f"Couldn't download {model_id} because of HuggingFace rate limit."
+                f"HuggingFace rate limit hit fetching file list for {model_id}",
+                retry_after=_parse_retry_after(response.headers),
             )
         elif response.status == 200:
             data_json = await response.text()
@@ -427,10 +514,14 @@ async def _fetch_file_list(
                 if item.type == "file":
                     files.append(FileListEntry.model_validate(item))
                 elif item.type == "directory" and recursive:
-                    subfiles = await _fetch_file_list(
-                        model_id, revision, item.path, recursive
-                    )
-                    files.extend(subfiles)
+                    # already inlined by ?recursive=true
+                    continue
+            if recursive and len(data) >= 1000:
+                # HF tree endpoint paginates at 1000; we don't follow cursors
+                logger.warning(
+                    f"File list for {model_id} hit the 1000-entry page cap "
+                    "and may be truncated; cursor pagination is not implemented"
+                )
             return files
         else:
             raise Exception(f"Failed to fetch file list: {response.status}")
@@ -511,6 +602,11 @@ async def file_meta(
         if r.status in [401, 403]:
             msg = await _build_auth_error_message(r.status, model_id)
             raise HuggingFaceAuthenticationError(msg)
+        if r.status == 429:
+            raise HuggingFaceRateLimitError(
+                f"HuggingFace rate limit hit fetching metadata for {model_id}/{path}",
+                retry_after=_parse_retry_after(r.headers),
+            )
         content_length = int(
             r.headers.get("x-linked-size") or r.headers.get("content-length") or 0
         )
@@ -530,7 +626,7 @@ async def download_file_with_retry(
     on_connection_lost: Callable[[], None] = lambda: None,
     skip_internet: bool = False,
 ) -> Path:
-    n_attempts = 3
+    n_attempts = 5
     for attempt in range(n_attempts):
         try:
             return await _download_file(
@@ -542,12 +638,16 @@ async def download_file_with_retry(
             raise
         except HuggingFaceRateLimitError as e:
             if attempt == n_attempts - 1:
-                raise e
-            logger.error(
-                f"Download error on attempt {attempt}/{n_attempts} for {model_id=} {revision=} {path=} {target_dir=}"
+                raise
+            sleep_for = e.retry_after if e.retry_after is not None else 2.0**attempt
+            sleep_for = min(sleep_for, _RATE_LIMIT_MAX_SLEEP_SECS) + random.uniform(
+                0, 1
             )
-            logger.error(traceback.format_exc())
-            await asyncio.sleep(2.0**attempt)
+            logger.warning(
+                f"Rate limited by HuggingFace downloading {model_id}/{path}; "
+                f"sleeping {sleep_for:.1f}s before retry {attempt + 2}/{n_attempts}"
+            )
+            await asyncio.sleep(sleep_for)
         except Exception as e:
             if attempt == n_attempts - 1:
                 on_connection_lost()
@@ -556,7 +656,7 @@ async def download_file_with_retry(
                 f"Download error on attempt {attempt + 1}/{n_attempts} for {model_id=} {revision=} {path=} {target_dir=}"
             )
             logger.error(traceback.format_exc())
-            await asyncio.sleep(2.0**attempt)
+            await asyncio.sleep(2.0**attempt + random.uniform(0, 1))
     raise Exception(
         f"Failed to download file {model_id=} {revision=} {path=} {target_dir=}"
     )
@@ -624,6 +724,11 @@ async def _download_file(
             if r.status in [401, 403]:
                 msg = await _build_auth_error_message(r.status, model_id)
                 raise HuggingFaceAuthenticationError(msg)
+            if r.status == 429:
+                raise HuggingFaceRateLimitError(
+                    f"HuggingFace rate limit hit downloading {model_id}/{path}",
+                    retry_after=_parse_retry_after(r.headers),
+                )
             assert r.status in [200, 206], (
                 f"Failed to download {path} from {url}: {r.status}"
             )
@@ -824,13 +929,20 @@ async def download_shard(
             if "/" in f.path or not f.path.endswith(".safetensors")
         ]
 
-    # Pick a writable directory with enough free space
+    # Pick a writable directory with enough free space.
     total_size = sum(f.size or 0 for f in filtered_file_list)
-    models_dir = (
-        select_download_dir(total_size) if not skip_download else EXO_DEFAULT_MODELS_DIR
-    )
-    target_dir = models_dir / model_id.normalize()
-    if not skip_download:
+    if skip_download:
+        existing = resolve_existing_model(model_id)
+        target_dir = (
+            existing
+            if existing is not None
+            else EXO_DEFAULT_MODELS_DIR / model_id.normalize()
+        )
+    else:
+        models_dir = await select_download_dir_for_shard(
+            model_id, filtered_file_list, total_size
+        )
+        target_dir = models_dir / model_id.normalize()
         await aios.makedirs(target_dir, exist_ok=True)
     file_progress: dict[str, RepoFileDownloadProgress] = {}
 
diff --git a/src/exo/download/impl_shard_downloader.py b/src/exo/download/impl_shard_downloader.py
index 0820380f1..a76d2cd4d 100644
--- a/src/exo/download/impl_shard_downloader.py
+++ b/src/exo/download/impl_shard_downloader.py
@@ -11,11 +11,11 @@ from exo.download.download_utils import (
     download_shard,
 )
 from exo.download.shard_downloader import ShardDownloader
+from exo.shared.models import model_cards
 from exo.shared.models.model_cards import (
     ModelCard,
     ModelId,
     ModelTask,
-    get_model_cards,
 )
 from exo.shared.types.memory import Memory
 from exo.shared.types.worker.shards import (
@@ -117,40 +117,39 @@ class ResumableShardDownloader(ShardDownloader):
     ) -> Path:
         allow_patterns = ["config.json"] if config_only else None
 
+        has_vision_sibling = (
+            not config_only
+            and not self.offline
+            and shard.model_card.vision is not None
+            and shard.model_card.vision.weights_repo != str(shard.model_card.model_id)
+        )
+
+        async def main_progress(
+            cb_shard: ShardMetadata, progress: RepoDownloadProgress
+        ) -> None:
+            if has_vision_sibling and progress.status == "complete":
+                return
+            await self.on_progress_wrapper(cb_shard, progress)
+
         target_dir, _ = await download_shard(
             shard,
-            self.on_progress_wrapper,
+            main_progress,
             max_parallel_downloads=self.max_parallel_downloads,
             allow_patterns=allow_patterns,
             skip_internet=self.offline,
         )
 
-        if (
-            not config_only
-            and not self.offline
-            and shard.model_card.vision
-            and shard.model_card.vision.weights_repo != str(shard.model_card.model_id)
-        ):
-            vision_repo = shard.model_card.vision.weights_repo
-            vision_card = ModelCard(
-                model_id=ModelId(vision_repo),
-                storage_size=Memory.from_bytes(0),
-                n_layers=1,
-                hidden_size=1,
-                supports_tensor=False,
-                tasks=[ModelTask.TextGeneration],
-            )
-            vision_shard = PipelineShardMetadata(
-                model_card=vision_card,
-                device_rank=0,
-                world_size=1,
-                start_layer=0,
-                end_layer=1,
-                n_layers=1,
-            )
+        if has_vision_sibling:
+            vision_shard = self._build_vision_shard(shard)
+
+            async def vision_progress(
+                _cb_shard: ShardMetadata, progress: RepoDownloadProgress
+            ) -> None:
+                await self.on_progress_wrapper(shard, progress)
+
             await download_shard(
                 vision_shard,
-                self.on_progress_wrapper,
+                vision_progress,
                 max_parallel_downloads=self.max_parallel_downloads,
                 allow_patterns=["*.safetensors", "config.json"],
                 skip_internet=self.offline,
@@ -158,6 +157,88 @@ class ResumableShardDownloader(ShardDownloader):
 
         return target_dir
 
+    async def _status_for_shard(
+        self, shard: ShardMetadata
+    ) -> tuple[Path, RepoDownloadProgress]:
+        async def _noop(
+            _cb_shard: ShardMetadata, _progress: RepoDownloadProgress
+        ) -> None:
+            return
+
+        path, main_progress = await download_shard(
+            shard,
+            _noop,
+            skip_download=True,
+            skip_internet=self.offline,
+        )
+
+        has_vision_sibling = (
+            shard.model_card.vision is not None
+            and shard.model_card.vision.weights_repo != str(shard.model_card.model_id)
+        )
+        if not has_vision_sibling:
+            return path, main_progress
+
+        vision_shard = self._build_vision_shard(shard)
+        _, vision_progress = await download_shard(
+            vision_shard,
+            _noop,
+            skip_download=True,
+            skip_internet=self.offline,
+        )
+        combined = self._combine_progress(shard, main_progress, vision_progress)
+        return path, combined
+
+    @staticmethod
+    def _build_vision_shard(shard: ShardMetadata) -> PipelineShardMetadata:
+        assert shard.model_card.vision is not None
+        vision_card = ModelCard(
+            model_id=ModelId(shard.model_card.vision.weights_repo),
+            storage_size=Memory.from_bytes(0),
+            n_layers=1,
+            hidden_size=1,
+            supports_tensor=False,
+            tasks=[ModelTask.TextGeneration],
+            backends=shard.model_card.backends,
+        )
+        return PipelineShardMetadata(
+            model_card=vision_card,
+            device_rank=0,
+            world_size=1,
+            start_layer=0,
+            end_layer=1,
+            n_layers=1,
+        )
+
+    @staticmethod
+    def _combine_progress(
+        shard: ShardMetadata,
+        main: RepoDownloadProgress,
+        vision: RepoDownloadProgress,
+    ) -> RepoDownloadProgress:
+        status_rank = {"not_started": 0, "in_progress": 1, "complete": 2}
+        combined_status = min(
+            (main.status, vision.status), key=lambda s: status_rank[s]
+        )
+        file_progress = dict(main.file_progress)
+        for file_path, fp in vision.file_progress.items():
+            file_progress[f"{vision.repo_id}/{file_path}"] = fp
+        return RepoDownloadProgress(
+            repo_id=main.repo_id,
+            repo_revision=main.repo_revision,
+            shard=shard,
+            completed_files=main.completed_files + vision.completed_files,
+            total_files=main.total_files + vision.total_files,
+            downloaded=main.downloaded + vision.downloaded,
+            downloaded_this_session=main.downloaded_this_session
+            + vision.downloaded_this_session,
+            total=main.total + vision.total,
+            overall_speed=main.overall_speed + vision.overall_speed,
+            overall_eta=max(main.overall_eta, vision.overall_eta),
+            status=combined_status,
+            file_progress=file_progress,
+        )
+
     async def get_shard_download_status(
         self,
     ) -> AsyncIterator[tuple[Path, RepoDownloadProgress]]:
@@ -166,12 +247,7 @@ class ResumableShardDownloader(ShardDownloader):
         ) -> tuple[Path, RepoDownloadProgress]:
             """Helper coroutine that builds the shard for a model and gets its download status."""
             shard = await build_full_shard(model_id)
-            return await download_shard(
-                shard,
-                self.on_progress_wrapper,
-                skip_download=True,
-                skip_internet=self.offline,
-            )
+            return await self._status_for_shard(shard)
 
         semaphore = asyncio.Semaphore(self.max_parallel_downloads)
 
@@ -183,7 +259,7 @@ class ResumableShardDownloader(ShardDownloader):
 
         tasks = [
             create_task(download_with_semaphore(model_card))
-            for model_card in await get_model_cards()
+            for model_card in await model_cards.card_cache.list_all()
         ]
 
         for task in asyncio.as_completed(tasks):
@@ -195,10 +271,5 @@ class ResumableShardDownloader(ShardDownloader):
     async def get_shard_download_status_for_shard(
         self, shard: ShardMetadata
     ) -> RepoDownloadProgress:
-        _, progress = await download_shard(
-            shard,
-            self.on_progress_wrapper,
-            skip_download=True,
-            skip_internet=self.offline,
-        )
+        _, progress = await self._status_for_shard(shard)
         return progress
diff --git a/src/exo/download/shard_downloader.py b/src/exo/download/shard_downloader.py
index 2addda80c..fa3c9a4ba 100644
--- a/src/exo/download/shard_downloader.py
+++ b/src/exo/download/shard_downloader.py
@@ -7,6 +7,7 @@ from typing import AsyncIterator, Callable
 
 from exo.download.download_utils import RepoDownloadProgress
 from exo.shared.models.model_cards import ModelCard, ModelId, ModelTask
+from exo.shared.types.backends import Backend
 from exo.shared.types.memory import Memory
 from exo.shared.types.worker.shards import (
     PipelineShardMetadata,
@@ -93,6 +94,7 @@ NOOP_DOWNLOAD_PROGRESS = RepoDownloadProgress(
             hidden_size=1,
             supports_tensor=False,
             tasks=[ModelTask.TextGeneration],
+            backends=[Backend.MlxMetal],
         ),
         device_rank=0,
         world_size=1,
diff --git a/src/exo/download/tests/test_cancel_download.py b/src/exo/download/tests/test_cancel_download.py
index 3d02d65fe..6546bb368 100644
--- a/src/exo/download/tests/test_cancel_download.py
+++ b/src/exo/download/tests/test_cancel_download.py
@@ -12,6 +12,7 @@ from exo.download.download_utils import RepoDownloadProgress
 from exo.download.impl_shard_downloader import SingletonShardDownloader
 from exo.download.shard_downloader import ShardDownloader
 from exo.shared.models.model_cards import ModelCard, ModelId, ModelTask
+from exo.shared.types.backends import Backend
 from exo.shared.types.commands import (
     CancelDownload,
     ForwarderDownloadCommand,
@@ -37,6 +38,7 @@ def _make_shard(model_id: ModelId = MODEL_ID) -> ShardMetadata:
             hidden_size=1024,
             supports_tensor=False,
             tasks=[ModelTask.TextGeneration],
+            backends=[Backend.MlxMetal],
         ),
         device_rank=0,
         world_size=1,
diff --git a/src/exo/download/tests/test_download_status_not_lost.py b/src/exo/download/tests/test_download_status_not_lost.py
new file mode 100644
index 000000000..c9e9cc609
--- /dev/null
+++ b/src/exo/download/tests/test_download_status_not_lost.py
@@ -0,0 +1,283 @@
+"""Regression tests for #1918: download status must not revert from completed to pending.
+
+The periodic rescan in _emit_existing_download_progress compares local file
+sizes against HuggingFace API sizes.  Text files (README, YAML, jinja) can
+have different local vs remote sizes due to encoding changes.  This must NOT
+cause a completed download to be downgraded.
+"""
+
+import asyncio
+import contextlib
+from collections.abc import AsyncIterator, Awaitable
+from datetime import timedelta
+from pathlib import Path
+from typing import Callable, Literal
+from unittest.mock import patch
+
+from exo.download.coordinator import DownloadCoordinator
+from exo.download.download_utils import RepoDownloadProgress
+from exo.download.impl_shard_downloader import SingletonShardDownloader
+from exo.download.shard_downloader import ShardDownloader
+from exo.shared.models.model_cards import ModelCard, ModelId, ModelTask
+from exo.shared.types.backends import Backend
+from exo.shared.types.commands import ForwarderDownloadCommand
+from exo.shared.types.common import NodeId
+from exo.shared.types.events import Event, NodeDownloadProgress
+from exo.shared.types.memory import Memory
+from exo.shared.types.worker.downloads import (
+    DownloadCompleted,
+    DownloadPending,
+)
+from exo.shared.types.worker.shards import PipelineShardMetadata, ShardMetadata
+from exo.utils.channels import Receiver, Sender, channel
+
+NODE_ID = NodeId("aaaaaaaa-aaaa-4aaa-8aaa-aaaaaaaaaaaa")
+MODEL_ID = ModelId("test-org/test-model")
+MODEL_DIR = Path("/fake/models/test-org--test-model")
+
+
+def _make_shard(model_id: ModelId = MODEL_ID) -> ShardMetadata:
+    return PipelineShardMetadata(
+        model_card=ModelCard(
+            model_id=model_id,
+            storage_size=Memory.from_mb(100),
+            n_layers=28,
+            hidden_size=1024,
+            supports_tensor=False,
+            tasks=[ModelTask.TextGeneration],
+            backends=[Backend.MlxMetal],
+        ),
+        device_rank=0,
+        world_size=1,
+        start_layer=0,
+        end_layer=28,
+        n_layers=28,
+    )
+
+
+SHARD = _make_shard()
+
+
+class FakeShardDownloader(ShardDownloader):
+    """Fake downloader that yields a single model with configurable status."""
+
+    def __init__(
+        self, status: Literal["not_started", "in_progress", "complete"] = "not_started"
+    ) -> None:
+        self._status: Literal["not_started", "in_progress", "complete"] = status
+        self._progress_callbacks: list[
+            Callable[[ShardMetadata, RepoDownloadProgress], Awaitable[None]]
+        ] = []
+
+    def on_progress(
+        self,
+        callback: Callable[[ShardMetadata, RepoDownloadProgress], Awaitable[None]],
+    ) -> None:
+        self._progress_callbacks.append(callback)
+
+    async def ensure_shard(
+        self,
+        shard: ShardMetadata,
+        config_only: bool = False,  # noqa: ARG002
+    ) -> Path:
+        return MODEL_DIR  # pragma: no cover
+
+    async def get_shard_download_status(
+        self,
+    ) -> AsyncIterator[tuple[Path, RepoDownloadProgress]]:
+        yield (
+            MODEL_DIR,
+            RepoDownloadProgress(
+                repo_id=str(MODEL_ID),
+                repo_revision="main",
+                shard=SHARD,
+                completed_files=10,
+                total_files=13,
+                downloaded=Memory.from_mb(95),
+                downloaded_this_session=Memory.from_bytes(0),
+                total=Memory.from_mb(100),
+                overall_speed=0,
+                overall_eta=timedelta(seconds=0),
+                status=self._status,
+            ),
+        )
+
+    async def get_shard_download_status_for_shard(
+        self,
+        shard: ShardMetadata,
+    ) -> RepoDownloadProgress:
+        return RepoDownloadProgress(
+            repo_id=str(shard.model_card.model_id),
+            repo_revision="main",
+            shard=shard,
+            completed_files=0,
+            total_files=0,
+            downloaded=Memory.from_bytes(0),
+            downloaded_this_session=Memory.from_bytes(0),
+            total=Memory.from_bytes(0),
+            overall_speed=0,
+            overall_eta=timedelta(seconds=0),
+            status="not_started",
+        )
+
+
+def _setup_coordinator(
+    downloader: ShardDownloader,
+) -> tuple[
+    DownloadCoordinator,
+    Sender[ForwarderDownloadCommand],
+    Receiver[Event],
+]:
+    cmd_send, cmd_recv = channel[ForwarderDownloadCommand]()
+    event_send, event_recv = channel[Event]()
+    wrapped = SingletonShardDownloader(downloader)
+    coordinator = DownloadCoordinator(
+        node_id=NODE_ID,
+        shard_downloader=wrapped,
+        download_command_receiver=cmd_recv,
+        event_sender=event_send,
+    )
+    return coordinator, cmd_send, event_recv
+
+
+async def _collect_events(
+    event_recv: Receiver[Event], timeout: float = 1.0
+) -> list[Event]:
+    """Drain events until timeout."""
+    events: list[Event] = []
+    try:
+        async with asyncio.timeout(timeout):
+            while True:
+                events.append(await event_recv.receive())
+    except TimeoutError:
+        pass
+    return events
+
+
+async def test_completed_status_not_downgraded_by_rescan() -> None:
+    """A model already marked DownloadCompleted must not revert to
+    DownloadPending when the periodic rescan reports a non-complete
+    file-size status (regression test for #1918)."""
+    downloader = FakeShardDownloader(status="not_started")
+    coordinator, _cmd_send, event_recv = _setup_coordinator(downloader)
+
+    # Pre-seed the coordinator with a completed status for the model
+    completed = DownloadCompleted(
+        node_id=NODE_ID,
+        shard_metadata=SHARD,
+        total=Memory.from_mb(100),
+        model_directory=str(MODEL_DIR),
+    )
+    coordinator.download_status[MODEL_ID] = completed
+
+    # Run the coordinator (the rescan loop fires immediately)
+    coordinator_task = asyncio.create_task(coordinator.run())
+    try:
+        # Wait for the rescan to process (it should skip the completed model)
+        events = await _collect_events(event_recv, timeout=1.5)
+
+        # The model must still be DownloadCompleted — not downgraded
+        assert isinstance(coordinator.download_status[MODEL_ID], DownloadCompleted), (
+            f"Expected DownloadCompleted but got {type(coordinator.download_status[MODEL_ID]).__name__}"
+        )
+
+        # No DownloadPending event should have been emitted for this model
+        pending_events = [
+            e
+            for e in events
+            if isinstance(e, NodeDownloadProgress)
+            and isinstance(e.download_progress, DownloadPending)
+            and e.download_progress.shard_metadata.model_card.model_id == MODEL_ID
+        ]
+        assert len(pending_events) == 0, (
+            f"Expected no DownloadPending events for completed model, got {len(pending_events)}"
+        )
+    finally:
+        await coordinator.shutdown()
+        coordinator_task.cancel()
+        with contextlib.suppress(asyncio.CancelledError):
+            await coordinator_task
+
+
+async def test_incomplete_model_with_files_present_detected_as_complete() -> None:
+    """When the per-file size check says not_started but resolve_existing_model
+    confirms the model directory is complete, the model should be marked
+    DownloadCompleted (regression test for #1918 — initial scan case)."""
+    downloader = FakeShardDownloader(status="not_started")
+    coordinator, _cmd_send, event_recv = _setup_coordinator(downloader)
+
+    # Mock resolve_existing_model to return a valid path (model is on disk)
+    with patch(
+        "exo.download.coordinator.resolve_existing_model",
+        return_value=MODEL_DIR,
+    ):
+        coordinator_task = asyncio.create_task(coordinator.run())
+        try:
+            events = await _collect_events(event_recv, timeout=1.5)
+
+            # The model should be DownloadCompleted (resolve_existing_model confirmed it)
+            assert isinstance(
+                coordinator.download_status.get(MODEL_ID), DownloadCompleted
+            ), (
+                f"Expected DownloadCompleted but got "
+                f"{type(coordinator.download_status.get(MODEL_ID)).__name__}"
+            )
+
+            # Should have emitted a DownloadCompleted event
+            completed_events = [
+                e
+                for e in events
+                if isinstance(e, NodeDownloadProgress)
+                and isinstance(e.download_progress, DownloadCompleted)
+                and e.download_progress.shard_metadata.model_card.model_id == MODEL_ID
+            ]
+            assert len(completed_events) > 0, (
+                "Expected at least one DownloadCompleted event"
+            )
+        finally:
+            await coordinator.shutdown()
+            coordinator_task.cancel()
+            with contextlib.suppress(asyncio.CancelledError):
+                await coordinator_task
+
+
+async def test_genuinely_incomplete_model_stays_pending() -> None:
+    """When the per-file size check says not_started and resolve_existing_model
+    returns None (model truly not complete), the model should correctly be
+    DownloadPending."""
+    downloader = FakeShardDownloader(status="not_started")
+    coordinator, _cmd_send, event_recv = _setup_coordinator(downloader)
+
+    # Mock resolve_existing_model to return None (model not on disk)
+    with patch(
+        "exo.download.coordinator.resolve_existing_model",
+        return_value=None,
+    ):
+        coordinator_task = asyncio.create_task(coordinator.run())
+        try:
+            events = await _collect_events(event_recv, timeout=1.5)
+
+            # The model should be DownloadPending
+            assert isinstance(
+                coordinator.download_status.get(MODEL_ID), DownloadPending
+            ), (
+                f"Expected DownloadPending but got "
+                f"{type(coordinator.download_status.get(MODEL_ID)).__name__}"
+            )
+
+            # Should have emitted a DownloadPending event
+            pending_events = [
+                e
+                for e in events
+                if isinstance(e, NodeDownloadProgress)
+                and isinstance(e.download_progress, DownloadPending)
+                and e.download_progress.shard_metadata.model_card.model_id == MODEL_ID
+            ]
+            assert len(pending_events) > 0, (
+                "Expected at least one DownloadPending event"
+            )
+        finally:
+            await coordinator.shutdown()
+            coordinator_task.cancel()
+            with contextlib.suppress(asyncio.CancelledError):
+                await coordinator_task
diff --git a/src/exo/download/tests/test_offline_mode.py b/src/exo/download/tests/test_offline_mode.py
index 9a94e5205..29a69275a 100644
--- a/src/exo/download/tests/test_offline_mode.py
+++ b/src/exo/download/tests/test_offline_mode.py
@@ -1,5 +1,7 @@
 """Tests for offline/air-gapped mode."""
 
+import os
+import time
 from collections.abc import AsyncIterator
 from pathlib import Path
 from unittest.mock import AsyncMock, patch
@@ -231,3 +233,64 @@ class TestFetchFileListOffline:
         raise FileNotFoundError."""
         with pytest.raises(FileNotFoundError, match="No internet"):
             await fetch_file_list_with_cache(model_id, "main", skip_internet=True)
+
+
+class TestFileListCacheTTL:
+    async def test_uses_fresh_cache_without_fetching(
+        self, model_id: ModelId, temp_models_dir: Path
+    ) -> None:
+        from pydantic import TypeAdapter
+
+        cache_dir = temp_models_dir / "caches" / model_id.normalize()
+        await aios.makedirs(cache_dir, exist_ok=True)
+
+        cached_list = [
+            FileListEntry(type="file", path="model.safetensors", size=1000),
+        ]
+        cache_file = cache_dir / f"{model_id.normalize()}--main--file_list.json"
+        async with aiofiles.open(cache_file, "w") as f:
+            await f.write(
+                TypeAdapter(list[FileListEntry]).dump_json(cached_list).decode()
+            )
+
+        with patch(
+            "exo.download.download_utils.fetch_file_list_with_retry",
+            new_callable=AsyncMock,
+        ) as mock_fetch:
+            result = await fetch_file_list_with_cache(model_id, "main")
+
+        assert result == cached_list
+        mock_fetch.assert_not_called()
+
+    async def test_refetches_when_cache_older_than_ttl(
+        self, model_id: ModelId, temp_models_dir: Path
+    ) -> None:
+        from pydantic import TypeAdapter
+
+        from exo.download.download_utils import (
+            _FILE_LIST_CACHE_TTL_SECS,  # pyright: ignore[reportPrivateUsage]
+        )
+
+        cache_dir = temp_models_dir / "caches" / model_id.normalize()
+        await aios.makedirs(cache_dir, exist_ok=True)
+
+        stale_list = [FileListEntry(type="file", path="stale.bin", size=1)]
+        cache_file = cache_dir / f"{model_id.normalize()}--main--file_list.json"
+        async with aiofiles.open(cache_file, "w") as f:
+            await f.write(
+                TypeAdapter(list[FileListEntry]).dump_json(stale_list).decode()
+            )
+
+        old_mtime = time.time() - _FILE_LIST_CACHE_TTL_SECS - 60
+        os.utime(cache_file, (old_mtime, old_mtime))
+
+        fresh_list = [FileListEntry(type="file", path="fresh.bin", size=2)]
+        with patch(
+            "exo.download.download_utils.fetch_file_list_with_retry",
+            new_callable=AsyncMock,
+            return_value=fresh_list,
+        ) as mock_fetch:
+            result = await fetch_file_list_with_cache(model_id, "main")
+
+        assert result == fresh_list
+        mock_fetch.assert_called_once()
diff --git a/src/exo/download/tests/test_rate_limit_handling.py b/src/exo/download/tests/test_rate_limit_handling.py
new file mode 100644
index 000000000..3af71334c
--- /dev/null
+++ b/src/exo/download/tests/test_rate_limit_handling.py
@@ -0,0 +1,355 @@
+"""Tests for HuggingFace 429 rate-limit handling in download_utils."""
+
+from collections.abc import AsyncIterator
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import aiofiles.os as aios
+import pytest
+
+from exo.download.download_utils import (
+    HuggingFaceRateLimitError,
+    _download_file,  # pyright: ignore[reportPrivateUsage]
+    _fetch_file_list,  # pyright: ignore[reportPrivateUsage]
+    _parse_retry_after,  # pyright: ignore[reportPrivateUsage]
+    download_file_with_retry,
+    fetch_file_list_with_retry,
+    file_meta,
+)
+from exo.shared.types.common import ModelId
+
+# captured from a real HF 429 on 2026-04-30 (header is lowercased by Cloudfront)
+REAL_HF_429_HEADERS_2026_04_30 = {
+    "ratelimit": '"api";r=0;t=52',
+    "ratelimit-policy": '"fixed window";"api";q=500;w=300',
+}
+
+
+class TestParseRetryAfter:
+    def test_parses_documented_format(self) -> None:
+        assert _parse_retry_after({"RateLimit": '"api";r=0;t=243'}) == 243.0
+
+    def test_parses_real_hf_response(self) -> None:
+        assert _parse_retry_after(REAL_HF_429_HEADERS_2026_04_30) == 52.0
+
+    def test_parses_resolvers_bucket(self) -> None:
+        assert _parse_retry_after({"ratelimit": '"resolvers";r=0;t=120'}) == 120.0
+
+    def test_parses_pages_bucket(self) -> None:
+        assert _parse_retry_after({"ratelimit": '"pages";r=0;t=10'}) == 10.0
+
+    def test_returns_none_when_header_missing(self) -> None:
+        assert _parse_retry_after({}) is None
+
+    def test_returns_none_when_only_retry_after_present(self) -> None:
+        assert _parse_retry_after({"Retry-After": "60"}) is None
+
+    def test_returns_none_when_format_unrecognised(self) -> None:
+        assert _parse_retry_after({"ratelimit": "garbage"}) is None
+
+    def test_handles_extra_whitespace(self) -> None:
+        assert _parse_retry_after({"ratelimit": '"api"; r=0; t=42'}) == 42.0
+
+
+class TestFetchFileListRetry:
+    async def test_uses_retry_after_from_error(self) -> None:
+        sleeps: list[float] = []
+
+        async def fake_sleep(seconds: float) -> None:
+            sleeps.append(seconds)
+
+        async def fake_fetch(*args: object, **kwargs: object) -> list[object]:
+            if not sleeps:
+                raise HuggingFaceRateLimitError("rate limited", retry_after=2.0)
+            return []
+
+        with (
+            patch(
+                "exo.download.download_utils._fetch_file_list", side_effect=fake_fetch
+            ),
+            patch("exo.download.download_utils.asyncio.sleep", side_effect=fake_sleep),
+        ):
+            result = await fetch_file_list_with_retry(ModelId("test/model"))
+
+        assert result == []
+        assert len(sleeps) == 1
+        assert 2.0 <= sleeps[0] < 3.0  # retry_after + jitter[0,1)
+
+    async def test_falls_back_to_exp_backoff_when_no_retry_after(self) -> None:
+        sleeps: list[float] = []
+
+        async def fake_sleep(seconds: float) -> None:
+            sleeps.append(seconds)
+
+        async def fake_fetch(*args: object, **kwargs: object) -> list[object]:
+            if not sleeps:
+                raise HuggingFaceRateLimitError("rate limited", retry_after=None)
+            return []
+
+        with (
+            patch(
+                "exo.download.download_utils._fetch_file_list", side_effect=fake_fetch
+            ),
+            patch("exo.download.download_utils.asyncio.sleep", side_effect=fake_sleep),
+        ):
+            await fetch_file_list_with_retry(ModelId("test/model"))
+
+        assert len(sleeps) == 1
+        assert 1.0 <= sleeps[0] < 2.0  # 2**0 + jitter[0,1)
+
+    async def test_caps_sleep_at_max_window(self) -> None:
+        sleeps: list[float] = []
+
+        async def fake_sleep(seconds: float) -> None:
+            sleeps.append(seconds)
+
+        async def fake_fetch(*args: object, **kwargs: object) -> list[object]:
+            if not sleeps:
+                raise HuggingFaceRateLimitError("rate limited", retry_after=10_000.0)
+            return []
+
+        with (
+            patch(
+                "exo.download.download_utils._fetch_file_list", side_effect=fake_fetch
+            ),
+            patch("exo.download.download_utils.asyncio.sleep", side_effect=fake_sleep),
+        ):
+            await fetch_file_list_with_retry(ModelId("test/model"))
+
+        assert len(sleeps) == 1
+        assert 300.0 <= sleeps[0] < 301.0  # cap + jitter[0,1)
+
+    async def test_retries_up_to_five_times(self) -> None:
+        sleeps: list[float] = []
+
+        async def fake_sleep(seconds: float) -> None:
+            sleeps.append(seconds)
+
+        async def fake_fetch(*args: object, **kwargs: object) -> list[object]:
+            raise HuggingFaceRateLimitError("rate limited", retry_after=1.0)
+
+        with (
+            patch(
+                "exo.download.download_utils._fetch_file_list", side_effect=fake_fetch
+            ),
+            patch("exo.download.download_utils.asyncio.sleep", side_effect=fake_sleep),
+            pytest.raises(HuggingFaceRateLimitError),
+        ):
+            await fetch_file_list_with_retry(ModelId("test/model"))
+
+        assert len(sleeps) == 4  # 5 attempts -> 4 sleeps before giving up
+
+
+class TestDownloadFileRetry:
+    @pytest.fixture
+    async def target_dir(self, tmp_path: Path) -> AsyncIterator[Path]:
+        target = tmp_path / "downloads"
+        await aios.makedirs(target, exist_ok=True)
+        yield target
+
+    async def test_uses_retry_after_from_error(self, target_dir: Path) -> None:
+        sleeps: list[float] = []
+        results: list[Path] = [target_dir / "file.bin"]
+
+        async def fake_sleep(seconds: float) -> None:
+            sleeps.append(seconds)
+
+        async def fake_download(*args: object, **kwargs: object) -> Path:
+            if not sleeps:
+                raise HuggingFaceRateLimitError("rate limited", retry_after=5.0)
+            return results[0]
+
+        with (
+            patch(
+                "exo.download.download_utils._download_file",
+                side_effect=fake_download,
+            ),
+            patch("exo.download.download_utils.asyncio.sleep", side_effect=fake_sleep),
+        ):
+            result = await download_file_with_retry(
+                ModelId("test/model"), "main", "file.bin", target_dir
+            )
+
+        assert result == results[0]
+        assert len(sleeps) == 1
+        assert 5.0 <= sleeps[0] < 6.0
+
+    async def test_caps_sleep_at_max_window(self, target_dir: Path) -> None:
+        sleeps: list[float] = []
+        results: list[Path] = [target_dir / "file.bin"]
+
+        async def fake_sleep(seconds: float) -> None:
+            sleeps.append(seconds)
+
+        async def fake_download(*args: object, **kwargs: object) -> Path:
+            if not sleeps:
+                raise HuggingFaceRateLimitError("rate limited", retry_after=99_999.0)
+            return results[0]
+
+        with (
+            patch(
+                "exo.download.download_utils._download_file",
+                side_effect=fake_download,
+            ),
+            patch("exo.download.download_utils.asyncio.sleep", side_effect=fake_sleep),
+        ):
+            await download_file_with_retry(
+                ModelId("test/model"), "main", "file.bin", target_dir
+            )
+
+        assert len(sleeps) == 1
+        assert 300.0 <= sleeps[0] < 301.0
+
+    async def test_retries_up_to_five_times(self, target_dir: Path) -> None:
+        sleeps: list[float] = []
+
+        async def fake_sleep(seconds: float) -> None:
+            sleeps.append(seconds)
+
+        with (
+            patch(
+                "exo.download.download_utils._download_file",
+                new_callable=AsyncMock,
+                side_effect=HuggingFaceRateLimitError("rate limited", retry_after=1.0),
+            ),
+            patch("exo.download.download_utils.asyncio.sleep", side_effect=fake_sleep),
+            pytest.raises(HuggingFaceRateLimitError),
+        ):
+            await download_file_with_retry(
+                ModelId("test/model"), "main", "file.bin", target_dir
+            )
+
+        assert len(sleeps) == 4
+
+
+def _make_mock_session_returning(
+    response_attrs: dict[str, object], method: str = "get"
+) -> MagicMock:
+    """Build a MagicMock that mimics ``create_http_session`` returning a
+    response whose ``status`` / ``headers`` are set from ``response_attrs``.
+
+    Mocks the chain ``create_http_session().__aenter__() -> session``, and
+    ``session.<method>().__aenter__() -> response``.
+    """
+    mock_response = MagicMock()
+    for k, v in response_attrs.items():
+        setattr(mock_response, k, v)
+
+    mock_session = MagicMock()
+    method_mock = getattr(mock_session, method)  # pyright: ignore[reportAny]
+    method_mock.return_value.__aenter__ = AsyncMock(  # pyright: ignore[reportAny]
+        return_value=mock_response
+    )
+    method_mock.return_value.__aexit__ = AsyncMock(  # pyright: ignore[reportAny]
+        return_value=None
+    )
+
+    mock_factory = MagicMock()
+    mock_factory.return_value.__aenter__ = AsyncMock(  # pyright: ignore[reportAny]
+        return_value=mock_session
+    )
+    mock_factory.return_value.__aexit__ = AsyncMock(  # pyright: ignore[reportAny]
+        return_value=None
+    )
+    return mock_factory
+
+
+REAL_HF_429_HEADER_DICT = {"ratelimit": '"api";r=0;t=52'}
+
+
+class TestRateLimitAtHttpCallSites:
+    """Verify each HF call site translates an HTTP 429 into a
+    ``HuggingFaceRateLimitError`` carrying the parsed ``retry_after``.
+
+    These tests would catch regressions where (a) the 429 branch is
+    deleted, (b) ``_parse_retry_after`` stops being called, or
+    (c) the wrong header object is passed to it.
+    """
+
+    async def test_fetch_file_list_maps_429_to_rate_limit_error(self) -> None:
+        mock_factory = _make_mock_session_returning(
+            {"status": 429, "headers": REAL_HF_429_HEADER_DICT}
+        )
+        with (
+            patch("exo.download.download_utils.create_http_session", mock_factory),
+            pytest.raises(HuggingFaceRateLimitError) as exc_info,
+        ):
+            await _fetch_file_list(ModelId("test/model"), "main")
+        assert exc_info.value.retry_after == 52.0
+
+    async def test_file_meta_maps_429_to_rate_limit_error(self) -> None:
+        mock_factory = _make_mock_session_returning(
+            {"status": 429, "headers": REAL_HF_429_HEADER_DICT}, method="head"
+        )
+        with (
+            patch("exo.download.download_utils.create_http_session", mock_factory),
+            pytest.raises(HuggingFaceRateLimitError) as exc_info,
+        ):
+            await file_meta(ModelId("test/model"), "main", "weights.safetensors")
+        assert exc_info.value.retry_after == 52.0
+
+    async def test_file_meta_maps_429_after_307_redirect(self) -> None:
+        """When the initial HEAD 307s and the redirected HEAD then 429s,
+        the 429 must still surface as ``HuggingFaceRateLimitError``."""
+        # First HEAD -> 307 with a Location header pointing somewhere new.
+        first_response = MagicMock()
+        first_response.status = 307
+        first_response.headers = {"location": "/redirected/url"}
+
+        # Second HEAD (the recursive call) -> 429 with the real-HF header.
+        second_response = MagicMock()
+        second_response.status = 429
+        second_response.headers = REAL_HF_429_HEADER_DICT
+
+        responses = iter([first_response, second_response])
+
+        mock_session = MagicMock()
+        mock_session.head.return_value.__aenter__ = AsyncMock(  # pyright: ignore[reportAny]
+            side_effect=lambda: next(responses)
+        )
+        mock_session.head.return_value.__aexit__ = AsyncMock(  # pyright: ignore[reportAny]
+            return_value=None
+        )
+
+        mock_factory = MagicMock()
+        mock_factory.return_value.__aenter__ = AsyncMock(  # pyright: ignore[reportAny]
+            return_value=mock_session
+        )
+        mock_factory.return_value.__aexit__ = AsyncMock(  # pyright: ignore[reportAny]
+            return_value=None
+        )
+
+        with (
+            patch("exo.download.download_utils.create_http_session", mock_factory),
+            pytest.raises(HuggingFaceRateLimitError) as exc_info,
+        ):
+            await file_meta(ModelId("test/model"), "main", "weights.safetensors")
+        assert exc_info.value.retry_after == 52.0
+
+    async def test_download_file_maps_429_to_rate_limit_error(
+        self, tmp_path: Path
+    ) -> None:
+        target_dir = tmp_path / "downloads"
+        await aios.makedirs(target_dir, exist_ok=True)
+        # No local file -> _download_file goes straight to file_meta then GET.
+        # We need both calls to succeed enough to reach the GET branch:
+        #   - file_meta returns a non-429 (size, etag) so we proceed.
+        #   - the GET then 429s.
+        with (
+            patch(
+                "exo.download.download_utils.file_meta",
+                new_callable=AsyncMock,
+                return_value=(100, "abc123"),
+            ),
+            patch(
+                "exo.download.download_utils.create_http_session",
+                _make_mock_session_returning(
+                    {"status": 429, "headers": REAL_HF_429_HEADER_DICT}
+                ),
+            ),
+            pytest.raises(HuggingFaceRateLimitError) as exc_info,
+        ):
+            await _download_file(
+                ModelId("test/model"), "main", "weights.safetensors", target_dir
+            )
+        assert exc_info.value.retry_after == 52.0
diff --git a/src/exo/download/tests/test_re_download.py b/src/exo/download/tests/test_re_download.py
index 756179977..7b456540c 100644
--- a/src/exo/download/tests/test_re_download.py
+++ b/src/exo/download/tests/test_re_download.py
@@ -13,6 +13,7 @@ from exo.download.download_utils import RepoDownloadProgress
 from exo.download.impl_shard_downloader import SingletonShardDownloader
 from exo.download.shard_downloader import ShardDownloader
 from exo.shared.models.model_cards import ModelCard, ModelId, ModelTask
+from exo.shared.types.backends import Backend
 from exo.shared.types.commands import (
     DeleteDownload,
     ForwarderDownloadCommand,
@@ -38,6 +39,7 @@ def _make_shard(model_id: ModelId = MODEL_ID) -> ShardMetadata:
             hidden_size=1024,
             supports_tensor=False,
             tasks=[ModelTask.TextGeneration],
+            backends=[Backend.MlxMetal],
         ),
         device_rank=0,
         world_size=1,
diff --git a/src/exo/download/tests/test_safetensors_index.py b/src/exo/download/tests/test_safetensors_index.py
new file mode 100644
index 000000000..6425501c0
--- /dev/null
+++ b/src/exo/download/tests/test_safetensors_index.py
@@ -0,0 +1,35 @@
+"""Tests for safetensors index file parsing."""
+
+from exo.shared.types.worker.downloads import ModelSafetensorsIndex
+
+
+def test_safetensors_index_missing_total_size():
+    """Index files from image models (e.g. FLUX/mflux) have metadata without
+    a total_size field — they use quantization_level and mflux_version instead.
+
+    This must parse successfully — a previous bug caused Pydantic validation to
+    fail silently (total_size was a required PositiveInt), which made
+    _scan_model_directory skip weight-map checks and report incomplete models
+    as complete.
+    """
+    raw = '{"metadata": {"quantization_level": "4", "mflux_version": "0.3.0"}, "weight_map": {"layer.safetensors": "layer.safetensors"}}'
+    index = ModelSafetensorsIndex.model_validate_json(raw)
+    assert index.metadata is not None
+    assert index.metadata.total_size is None
+    assert index.weight_map == {"layer.safetensors": "layer.safetensors"}
+
+
+def test_safetensors_index_valid_total_size():
+    """Standard text model index files with a valid total_size should continue
+    to parse correctly."""
+    raw = '{"metadata": {"total_size": 12345}, "weight_map": {"a.safetensors": "a.safetensors"}}'
+    index = ModelSafetensorsIndex.model_validate_json(raw)
+    assert index.metadata is not None
+    assert index.metadata.total_size == 12345
+
+
+def test_safetensors_index_null_metadata():
+    """Index files with null metadata should parse correctly."""
+    raw = '{"metadata": null, "weight_map": {"a.safetensors": "a.safetensors"}}'
+    index = ModelSafetensorsIndex.model_validate_json(raw)
+    assert index.metadata is None
diff --git a/src/exo/main.py b/src/exo/main.py
index 49e21895f..8e3220ebd 100644
--- a/src/exo/main.py
+++ b/src/exo/main.py
@@ -3,6 +3,7 @@ import multiprocessing as mp
 import os
 import resource
 import signal
+import sys
 from dataclasses import dataclass, field
 from typing import Self
 
@@ -17,12 +18,14 @@ from exo.download.impl_shard_downloader import exo_shard_downloader
 from exo.master.main import Master
 from exo.routing.event_router import EventRouter
 from exo.routing.router import Router, get_node_id_keypair
-from exo.shared.constants import EXO_LOG
+from exo.shared.constants import EXO_DEFAULT_MODELS_DIR, EXO_LOG
 from exo.shared.election import Election, ElectionResult
 from exo.shared.logging import logger_cleanup, logger_setup
 from exo.shared.types.common import NodeId, SessionId
 from exo.utils.channels import Receiver, channel
-from exo.utils.pydantic_ext import CamelCaseModel
+from exo.utils.daemon import detach_stdio_to_devnull
+from exo.utils.pidfile import PidfileLockError, acquire_exo_pidfile
+from exo.utils.pydantic_ext import FrozenModel
 from exo.utils.task_group import TaskGroup
 from exo.worker.main import Worker
 
@@ -68,6 +71,9 @@ class Node:
 
         logger.info(f"Starting node {node_id}")
 
+        # Errors the very first time exo is run as dir doesn't exist
+        EXO_DEFAULT_MODELS_DIR.mkdir(parents=True, exist_ok=True)
+
         # Create DownloadCoordinator (unless --no-downloads)
         if not args.no_downloads:
             download_coordinator = DownloadCoordinator(
@@ -192,7 +198,6 @@ class Node:
                         self.router.receiver(topics.GLOBAL_EVENTS),
                         self.router.sender(topics.LOCAL_EVENTS),
                     )
-                    self._tg.start_soon(self.event_router.run)
 
                 if (
                     result.session_id.master_node_id == self.node_id
@@ -258,20 +263,33 @@ class Node:
                         self._tg.start_soon(self.worker.run)
                     if self.api:
                         self.api.reset(result.won_clock, self.event_router.receiver())
+                    self._tg.start_soon(self.event_router.run)
                 else:
                     if self.api:
                         self.api.unpause(result.won_clock)
 
 
 def main():
+    # Exit early if no PID file (not compatible with double-for daemonization yet)
+    try:
+        pidfile = acquire_exo_pidfile()
+    except PidfileLockError as exception:
+        print(exception, file=sys.stderr)
+        raise SystemExit(1) from exception
+
     args = Args.parse()
     soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
     target = min(max(soft, 65535), hard)
     resource.setrlimit(resource.RLIMIT_NOFILE, (target, hard))
 
     mp.set_start_method("spawn", force=True)
+
     # TODO: Refactor the current verbosity system
     logger_setup(EXO_LOG, args.verbosity)
+    if args.no_stdio:
+        detach_stdio_to_devnull()
+        logger.info("Detached stdio to /dev/null")
+
     logger.info(f"{'=' * 40}")
     logger.info(f"Starting EXO | pid={os.getpid()}")
     logger.info(f"{'=' * 40}")
@@ -306,9 +324,10 @@ def main():
     finally:
         logger.info("EXO Shutdown complete")
         logger_cleanup()
+        del pidfile
 
 
-class Args(CamelCaseModel):
+class Args(FrozenModel):
     verbosity: int = 0
     force_master: bool = False
     spawn_api: bool = False
@@ -319,6 +338,7 @@ class Args(CamelCaseModel):
     offline: bool = os.getenv("EXO_OFFLINE", "false").lower() == "true"
     no_batch: bool = False
     fast_synch: bool | None = None  # None = auto, True = force on, False = force off
+    no_stdio: bool = False
     bootstrap_peers: list[str] = []
     libp2p_port: int
 
@@ -378,6 +398,11 @@ class Args(CamelCaseModel):
             action="store_true",
             help="Disable continuous batching, use sequential generation",
         )
+        parser.add_argument(
+            "--no-stdio",
+            action="store_true",
+            help="Detach stdin/stdout/stderr to /dev/null after logging is configured",
+        )
         parser.add_argument(
             "--bootstrap-peers",
             type=lambda s: [p for p in s.split(",") if p],
diff --git a/src/exo/master/main.py b/src/exo/master/main.py
index 1511897ea..655e6556e 100644
--- a/src/exo/master/main.py
+++ b/src/exo/master/main.py
@@ -10,6 +10,7 @@ from exo.master.placement import (
     get_transition_events,
     place_instance,
 )
+from exo.master.placement_utils import find_ip_prioritised
 from exo.shared.apply import apply
 from exo.shared.constants import EXO_EVENT_LOG_DIR, EXO_TRACING_ENABLED
 from exo.shared.types.commands import (
@@ -17,6 +18,7 @@ from exo.shared.types.commands import (
     CreateInstance,
     DeleteCustomModelCard,
     DeleteInstance,
+    DeleteInstanceLink,
     ForwarderCommand,
     ForwarderDownloadCommand,
     ImageEdits,
@@ -24,6 +26,7 @@ from exo.shared.types.commands import (
     PlaceInstance,
     RequestEventLog,
     SendInputChunk,
+    SetInstanceLink,
     TaskCancelled,
     TaskFinished,
     TestCommand,
@@ -38,6 +41,8 @@ from exo.shared.types.events import (
     IndexedEvent,
     InputChunkReceived,
     InstanceDeleted,
+    InstanceLinkCreated,
+    InstanceLinkDeleted,
     LocalForwarderEvent,
     NodeGatheredInfo,
     NodeTimedOut,
@@ -48,6 +53,7 @@ from exo.shared.types.events import (
     TracesCollected,
     TracesMerged,
 )
+from exo.shared.types.instance_link import InstanceLink
 from exo.shared.types.state import State
 from exo.shared.types.tasks import (
     ImageEdits as ImageEditsTask,
@@ -69,6 +75,46 @@ from exo.utils.event_buffer import MultiSourceBuffer
 from exo.utils.task_group import TaskGroup
 
 
+def _prefill_endpoint_for(state: State, decode_instance_id: InstanceId) -> str | None:
+    decode = state.instances.get(decode_instance_id)
+    if decode is None:
+        return None
+    decode_node = next(iter(decode.shard_assignments.node_to_runner.keys()), None)
+    if decode_node is None:
+        return None
+
+    sources: set[InstanceId] = set()
+    for link in state.instance_links.values():
+        if decode_instance_id in link.decode_instances:
+            sources.update(link.prefill_instances)
+    sources.discard(decode_instance_id)
+
+    in_flight = {TaskStatus.Pending, TaskStatus.Running}
+    task_counts: dict[InstanceId, int] = {
+        src_id: sum(
+            1
+            for task in state.tasks.values()
+            if task.instance_id == src_id and task.task_status in in_flight
+        )
+        for src_id in sources
+    }
+    for src_id in sorted(sources, key=lambda sid: task_counts[sid]):
+        instance = state.instances.get(src_id)
+        if instance is None:
+            continue
+        for node_id, runner_id in instance.shard_assignments.node_to_runner.items():
+            port = state.prefill_server_ports.get(runner_id)
+            if port is None:
+                continue
+            ip = find_ip_prioritised(
+                decode_node, node_id, state.topology, state.node_network, ring=True
+            )
+            if ip is None:
+                continue
+            return f"{ip}:{port}"
+    return None
+
+
 class Master:
     def __init__(
         self,
@@ -128,15 +174,24 @@ class Master:
                         case TestCommand():
                             pass
                         case TextGeneration():
+                            prefill_only: set[InstanceId] = set()
+                            for link in self.state.instance_links.values():
+                                prefill_only.update(link.prefill_instances)
+                            for link in self.state.instance_links.values():
+                                prefill_only.difference_update(link.decode_instances)
+
                             for instance in self.state.instances.values():
                                 if (
                                     instance.shard_assignments.model_id
                                     == command.task_params.model
+                                    and instance.instance_id not in prefill_only
                                 ):
+                                    in_flight = {TaskStatus.Pending, TaskStatus.Running}
                                     task_count = sum(
                                         1
                                         for task in self.state.tasks.values()
                                         if task.instance_id == instance.instance_id
+                                        and task.task_status in in_flight
                                     )
                                     instance_task_counts[instance.instance_id] = (
                                         task_count
@@ -154,20 +209,27 @@ class Master:
                                 ],
                             )
 
+                            decode_instance_id = available_instance_ids[0]
                             task_id = TaskId()
+                            params = command.task_params.model_copy(
+                                update={
+                                    "prefill_endpoint": _prefill_endpoint_for(
+                                        self.state, decode_instance_id
+                                    ),
+                                }
+                            )
                             generated_events.append(
                                 TaskCreated(
                                     task_id=task_id,
                                     task=TextGenerationTask(
                                         task_id=task_id,
                                         command_id=command.command_id,
-                                        instance_id=available_instance_ids[0],
+                                        instance_id=decode_instance_id,
                                         task_status=TaskStatus.Pending,
-                                        task_params=command.task_params,
+                                        task_params=params,
                                     ),
                                 )
                             )
-
                             self.command_task_mapping[command.command_id] = task_id
                         case ImageGeneration():
                             for instance in self.state.instances.values():
@@ -175,10 +237,12 @@ class Master:
                                     instance.shard_assignments.model_id
                                     == command.task_params.model
                                 ):
+                                    in_flight = {TaskStatus.Pending, TaskStatus.Running}
                                     task_count = sum(
                                         1
                                         for task in self.state.tasks.values()
                                         if task.instance_id == instance.instance_id
+                                        and task.task_status in in_flight
                                     )
                                     instance_task_counts[instance.instance_id] = (
                                         task_count
@@ -229,10 +293,12 @@ class Master:
                                     instance.shard_assignments.model_id
                                     == command.task_params.model
                                 ):
+                                    in_flight = {TaskStatus.Pending, TaskStatus.Running}
                                     task_count = sum(
                                         1
                                         for task in self.state.tasks.values()
                                         if task.instance_id == instance.instance_id
+                                        and task.task_status in in_flight
                                     )
                                     instance_task_counts[instance.instance_id] = (
                                         task_count
@@ -298,7 +364,9 @@ class Master:
                                 self.state.instances,
                                 self.state.node_memory,
                                 self.state.node_network,
+                                self.state.node_backends,
                                 download_status=self.state.downloads,
+                                node_rdma_ctl=self.state.node_rdma_ctl,
                             )
                             transition_events = get_transition_events(
                                 self.state.instances, placement, self.state.tasks
@@ -357,6 +425,21 @@ class Master:
                             generated_events.append(
                                 CustomModelCardDeleted(model_id=command.model_id)
                             )
+                        case SetInstanceLink():
+                            link = InstanceLink(
+                                link_id=command.link_id,
+                                prefill_instances=list(
+                                    dict.fromkeys(command.prefill_instances)
+                                ),
+                                decode_instances=list(
+                                    dict.fromkeys(command.decode_instances)
+                                ),
+                            )
+                            generated_events.append(InstanceLinkCreated(link=link))
+                        case DeleteInstanceLink():
+                            generated_events.append(
+                                InstanceLinkDeleted(link_id=command.link_id)
+                            )
                         case RequestEventLog():
                             # We should just be able to send everything, since other buffers will ignore old messages
                             # rate limit to 1000 at a time
@@ -410,13 +493,18 @@ class Master:
                         continue
 
                     logger.debug(f"Master indexing event: {str(event)[:100]}")
+
+                    event = event.model_copy(
+                        update={"_master_time_stamp": datetime.now(tz=timezone.utc)}
+                    )
+                    if isinstance(event, NodeGatheredInfo):
+                        event = event.model_copy(
+                            update={"when": str(datetime.now(tz=timezone.utc))}
+                        )
+
                     indexed = IndexedEvent(event=event, idx=len(self._event_log))
                     self.state = apply(self.state, indexed)
 
-                    event._master_time_stamp = datetime.now(tz=timezone.utc)  # pyright: ignore[reportPrivateUsage]
-                    if isinstance(event, NodeGatheredInfo):
-                        event.when = str(datetime.now(tz=timezone.utc))
-
                     self._event_log.append(event)
                     await self._send_event(indexed)
 
diff --git a/src/exo/master/placement.py b/src/exo/master/placement.py
index f9380693f..0ed634e2e 100644
--- a/src/exo/master/placement.py
+++ b/src/exo/master/placement.py
@@ -1,4 +1,3 @@
-import random
 from collections.abc import Mapping
 from copy import deepcopy
 from typing import Sequence
@@ -14,6 +13,7 @@ from exo.master.placement_utils import (
 )
 from exo.shared.models.model_cards import ModelId
 from exo.shared.topology import Topology
+from exo.shared.types.backends import Backend
 from exo.shared.types.commands import (
     CancelDownload,
     CreateInstance,
@@ -29,7 +29,7 @@ from exo.shared.types.events import (
     TaskStatusUpdated,
 )
 from exo.shared.types.memory import Memory
-from exo.shared.types.profiling import MemoryUsage, NodeNetworkInfo
+from exo.shared.types.profiling import MemoryUsage, NodeNetworkInfo, NodeRdmaCtlStatus
 from exo.shared.types.tasks import Task, TaskId, TaskStatus
 from exo.shared.types.worker.downloads import (
     DownloadCompleted,
@@ -46,11 +46,12 @@ from exo.shared.types.worker.instances import (
     MlxRingInstance,
 )
 from exo.shared.types.worker.shards import Sharding
+from exo.utils.ports import random_ephemeral_port
 
-
-def random_ephemeral_port() -> int:
-    port = random.randint(49153, 65535)
-    return port - 1 if port <= 52415 else port
+INSTANCE_META_BACKENDS: dict[InstanceMeta, list[Backend]] = {
+    InstanceMeta.MlxRing: [Backend.MlxMetal, Backend.MlxCuda, Backend.MlxCpu],
+    InstanceMeta.MlxJaccl: [Backend.MlxMetal],
+}
 
 
 def add_instance_to_placements(
@@ -108,8 +109,10 @@ def place_instance(
     current_instances: Mapping[InstanceId, Instance],
     node_memory: Mapping[NodeId, MemoryUsage],
     node_network: Mapping[NodeId, NodeNetworkInfo],
+    node_backends: Mapping[NodeId, list[Backend]],
     required_nodes: set[NodeId] | None = None,
     download_status: Mapping[NodeId, Sequence[DownloadProgress]] | None = None,
+    node_rdma_ctl: Mapping[NodeId, NodeRdmaCtlStatus] | None = None,
 ) -> dict[InstanceId, Instance]:
     cycles = topology.get_cycles()
     candidate_cycles = list(filter(lambda it: len(it) >= command.min_nodes, cycles))
@@ -133,12 +136,16 @@ def place_instance(
                 f"Requested Tensor sharding but this model does not support tensor parallelism: {command.model_card.model_id}"
             )
         # TODO: the condition here for tensor parallel is not correct, but it works good enough for now.
+        # DeepSeek V4 is MQA (num_key_value_heads=1) but its sharding strategy
+        # head-parallelises wq_b/wo_a and shards MoE experts instead of splitting
+        # KV heads, so the kv-head divisibility check doesn't apply.
+        is_deepseek_v4 = command.model_card.base_model.startswith("DeepSeek V4")
         kv_heads = command.model_card.num_key_value_heads
         cycles_with_sufficient_memory = [
             cycle
             for cycle in cycles_with_sufficient_memory
             if command.model_card.hidden_size % len(cycle) == 0
-            and (kv_heads is None or kv_heads % len(cycle) == 0)
+            and (is_deepseek_v4 or kv_heads is None or kv_heads % len(cycle) == 0)
         ]
         if not cycles_with_sufficient_memory:
             raise ValueError(
@@ -153,11 +160,55 @@ def place_instance(
         raise ValueError(
             "Pipeline parallelism is not supported for DeepSeek V3.1 (8-bit)"
         )
+    if (
+        command.sharding == Sharding.Pipeline
+        and command.model_card.base_model.startswith("Gemma 4")
+    ):
+        cycles_with_sufficient_memory = [
+            cycle for cycle in cycles_with_sufficient_memory if len(cycle) == 1
+        ]
+        if not cycles_with_sufficient_memory:
+            raise ValueError(
+                "Pipeline parallelism is not supported for Gemma 4; use tensor parallelism instead."
+            )
 
     smallest_cycles = get_smallest_cycles(cycles_with_sufficient_memory)
 
+    required_backends = set(INSTANCE_META_BACKENDS[command.instance_meta]) & set(
+        command.model_card.backends
+    )
+    if not required_backends:
+        raise ValueError(
+            f"Model {command.model_card.model_id} backends "
+            f"{sorted(b.value for b in command.model_card.backends)} cannot satisfy engine "
+            f"{command.instance_meta.value} which requires "
+            f"{sorted(b.value for b in INSTANCE_META_BACKENDS[command.instance_meta])}"
+        )
+    smallest_cycles = [
+        cycle
+        for cycle in smallest_cycles
+        if all(
+            set(node_backends.get(node_id, [])) & required_backends for node_id in cycle
+        )
+    ]
+    if not smallest_cycles:
+        raise ValueError(
+            f"No cycle where every node supports a backend in "
+            f"{sorted(b.value for b in required_backends)} for {command.model_card.model_id}"
+        )
+
+    rdma_ctl_status = node_rdma_ctl or {}
+
+    def _all_rdma_ctl_enabled(cycle: Cycle) -> bool:
+        return all(
+            ((status := rdma_ctl_status.get(node_id)) is not None and status.enabled)
+            for node_id in cycle
+        )
+
     smallest_rdma_cycles = [
-        cycle for cycle in smallest_cycles if topology.is_rdma_cycle(cycle)
+        cycle
+        for cycle in smallest_cycles
+        if topology.is_rdma_cycle(cycle) and _all_rdma_ctl_enabled(cycle)
     ]
 
     if command.instance_meta == InstanceMeta.MlxJaccl:
@@ -193,8 +244,12 @@ def place_instance(
 
     # Single-node: force Pipeline/Ring (Tensor and Jaccl require multi-node)
     if len(selected_cycle) == 1:
-        command.instance_meta = InstanceMeta.MlxRing
-        command.sharding = Sharding.Pipeline
+        command = command.model_copy(
+            update={
+                "instance_meta": InstanceMeta.MlxRing,
+                "sharding": Sharding.Pipeline,
+            }
+        )
 
     shard_assignments = get_shard_assignments(
         command.model_card, selected_cycle, command.sharding, node_memory
diff --git a/src/exo/master/placement_utils.py b/src/exo/master/placement_utils.py
index eb78c8fa5..0375e97e0 100644
--- a/src/exo/master/placement_utils.py
+++ b/src/exo/master/placement_utils.py
@@ -336,7 +336,7 @@ def _find_connection_ip(
             yield connection.sink_multiaddr.ip_address
 
 
-def _find_ip_prioritised(
+def find_ip_prioritised(
     node_id: NodeId,
     other_node_id: NodeId,
     cycle_digraph: Topology,
@@ -413,7 +413,7 @@ def get_mlx_ring_hosts_by_node(
                 hosts_for_node.append(Host(ip="198.51.100.1", port=0))
                 continue
 
-            connection_ip = _find_ip_prioritised(
+            connection_ip = find_ip_prioritised(
                 node_id, other_node_id, cycle_digraph, node_network, ring=True
             )
             if connection_ip is None:
@@ -445,7 +445,7 @@ def get_mlx_jaccl_coordinators(
         if n == coordinator:
             return "0.0.0.0"
 
-        ip = _find_ip_prioritised(
+        ip = find_ip_prioritised(
             n, coordinator, cycle_digraph, node_network, ring=False
         )
         if ip is not None:
diff --git a/src/exo/master/tests/test_master.py b/src/exo/master/tests/test_master.py
index c4a1cff0c..d370a1e72 100644
--- a/src/exo/master/tests/test_master.py
+++ b/src/exo/master/tests/test_master.py
@@ -8,6 +8,7 @@ from loguru import logger
 from exo.master.main import Master
 from exo.routing.router import get_node_id_keypair
 from exo.shared.models.model_cards import ModelCard, ModelTask
+from exo.shared.types.backends import Backend
 from exo.shared.types.commands import (
     CommandId,
     ForwarderCommand,
@@ -43,6 +44,7 @@ from exo.shared.types.worker.instances import (
 )
 from exo.shared.types.worker.shards import PipelineShardMetadata, Sharding
 from exo.utils.channels import channel
+from exo.utils.info_gatherer.info_gatherer import NodeBackends
 
 
 @pytest.mark.asyncio
@@ -120,6 +122,20 @@ async def test_master():
                 ),
             )
         )
+        await local_event_sender.send(
+            LocalForwarderEvent(
+                origin_idx=1,
+                origin=SystemId("Worker"),
+                session=session_id,
+                event=(
+                    NodeGatheredInfo(
+                        when=str(datetime.now(tz=timezone.utc)),
+                        node_id=node_id,
+                        info=NodeBackends(backends=[Backend.MlxMetal]),
+                    )
+                ),
+            )
+        )
 
         # wait for initial topology event
         logger.info("wait for initial topology event")
@@ -127,6 +143,8 @@ async def test_master():
             await anyio.sleep(0.001)
         while len(master.state.node_memory) == 0:
             await anyio.sleep(0.001)
+        while len(master.state.node_backends) == 0:
+            await anyio.sleep(0.001)
 
         logger.info("inject a CreateInstance Command")
         await command_sender.send(
@@ -142,6 +160,7 @@ async def test_master():
                             hidden_size=7168,
                             supports_tensor=True,
                             tasks=[ModelTask.TextGeneration],
+                            backends=[Backend.MlxMetal],
                         ),
                         sharding=Sharding.Pipeline,
                         instance_meta=InstanceMeta.MlxRing,
@@ -173,17 +192,19 @@ async def test_master():
                 ),
             )
         )
-        while len(_get_events()) < 3:
+        while len(_get_events()) < 4:
             await anyio.sleep(0.01)
 
         events = _get_events()
-        assert len(events) == 3
+        assert len(events) == 4
         assert events[0].idx == 0
         assert events[1].idx == 1
         assert events[2].idx == 2
+        assert events[3].idx == 3
         assert isinstance(events[0].event, NodeGatheredInfo)
-        assert isinstance(events[1].event, InstanceCreated)
-        created_instance = events[1].event.instance
+        assert isinstance(events[1].event, NodeGatheredInfo)
+        assert isinstance(events[2].event, InstanceCreated)
+        created_instance = events[2].event.instance
         assert isinstance(created_instance, MlxRingInstance)
         runner_id = list(created_instance.shard_assignments.runner_to_shard.keys())[0]
         # Validate the shard assignments
@@ -201,6 +222,7 @@ async def test_master():
                         hidden_size=7168,
                         supports_tensor=True,
                         tasks=[ModelTask.TextGeneration],
+                        backends=[Backend.MlxMetal],
                     ),
                     device_rank=0,
                     world_size=1,
@@ -215,10 +237,10 @@ async def test_master():
         assert len(created_instance.hosts_by_node[node_id]) == 1
         assert created_instance.hosts_by_node[node_id][0].ip == "0.0.0.0"
         assert created_instance.ephemeral_port > 0
-        assert isinstance(events[2].event, TaskCreated)
-        assert events[2].event.task.task_status == TaskStatus.Pending
-        assert isinstance(events[2].event.task, TextGenerationTask)
-        assert events[2].event.task.task_params == TextGenerationTaskParams(
+        assert isinstance(events[3].event, TaskCreated)
+        assert events[3].event.task.task_status == TaskStatus.Pending
+        assert isinstance(events[3].event.task, TextGenerationTask)
+        assert events[3].event.task.task_params == TextGenerationTaskParams(
             model=ModelId("llama-3.2-1b"),
             input=[
                 InputMessage(
diff --git a/src/exo/master/tests/test_placement.py b/src/exo/master/tests/test_placement.py
index 40530ad28..b89118726 100644
--- a/src/exo/master/tests/test_placement.py
+++ b/src/exo/master/tests/test_placement.py
@@ -1,3 +1,5 @@
+from collections.abc import Mapping
+
 import pytest
 
 from exo.master.placement import (
@@ -12,6 +14,7 @@ from exo.master.tests.conftest import (
 )
 from exo.shared.models.model_cards import ModelCard, ModelId, ModelTask
 from exo.shared.topology import Topology
+from exo.shared.types.backends import Backend
 from exo.shared.types.commands import PlaceInstance
 from exo.shared.types.common import CommandId, NodeId
 from exo.shared.types.events import (
@@ -21,7 +24,11 @@ from exo.shared.types.events import (
 )
 from exo.shared.types.memory import Memory
 from exo.shared.types.multiaddr import Multiaddr
-from exo.shared.types.profiling import NetworkInterfaceInfo, NodeNetworkInfo
+from exo.shared.types.profiling import (
+    NetworkInterfaceInfo,
+    NodeNetworkInfo,
+    NodeRdmaCtlStatus,
+)
 from exo.shared.types.tasks import TaskId, TaskStatus, TextGeneration
 from exo.shared.types.text_generation import (
     InputMessage,
@@ -67,9 +74,16 @@ def model_card() -> ModelCard:
         hidden_size=30,
         supports_tensor=True,
         tasks=[ModelTask.TextGeneration],
+        backends=[Backend.MlxMetal],
     )
 
 
+def _metal_only(
+    node_memory: Mapping[NodeId, object],
+) -> dict[NodeId, list[Backend]]:
+    return {node_id: [Backend.MlxMetal] for node_id in node_memory}
+
+
 def place_instance_command(model_card: ModelCard) -> PlaceInstance:
     return PlaceInstance(
         command_id=CommandId(),
@@ -95,10 +109,14 @@ def test_get_instance_placements_create_instance(
     model_card: ModelCard,
 ):
     # arrange
-    model_card.n_layers = total_layers
-    model_card.storage_size = Memory.from_bytes(
-        sum(available_memory)
-    )  # make it exactly fit across all nodes
+    model_card = model_card.model_copy(
+        update={
+            "n_layers": total_layers,
+            "storage_size": Memory.from_bytes(
+                sum(available_memory)
+            ),  # make it exactly fit across all nodes
+        }
+    )
     topology = Topology()
 
     cic = place_instance_command(model_card)
@@ -147,7 +165,9 @@ def test_get_instance_placements_create_instance(
     topology.add_connection(conn_b_a)
 
     # act
-    placements = place_instance(cic, topology, {}, node_memory, node_network)
+    placements = place_instance(
+        cic, topology, {}, node_memory, node_network, _metal_only(node_memory)
+    )
 
     # assert
     assert len(placements) == 1
@@ -187,9 +207,12 @@ def test_get_instance_placements_one_node_exact_fit() -> None:
             hidden_size=1000,
             supports_tensor=True,
             tasks=[ModelTask.TextGeneration],
+            backends=[Backend.MlxMetal],
         ),
     )
-    placements = place_instance(cic, topology, {}, node_memory, node_network)
+    placements = place_instance(
+        cic, topology, {}, node_memory, node_network, _metal_only(node_memory)
+    )
 
     assert len(placements) == 1
     instance_id = list(placements.keys())[0]
@@ -214,9 +237,12 @@ def test_get_instance_placements_one_node_fits_with_extra_memory() -> None:
             hidden_size=1000,
             supports_tensor=True,
             tasks=[ModelTask.TextGeneration],
+            backends=[Backend.MlxMetal],
         ),
     )
-    placements = place_instance(cic, topology, {}, node_memory, node_network)
+    placements = place_instance(
+        cic, topology, {}, node_memory, node_network, _metal_only(node_memory)
+    )
 
     assert len(placements) == 1
     instance_id = list(placements.keys())[0]
@@ -241,11 +267,14 @@ def test_get_instance_placements_one_node_not_fit() -> None:
             hidden_size=1000,
             supports_tensor=True,
             tasks=[ModelTask.TextGeneration],
+            backends=[Backend.MlxMetal],
         ),
     )
 
     with pytest.raises(ValueError, match="No cycles found with sufficient memory"):
-        place_instance(cic, topology, {}, node_memory, node_network)
+        place_instance(
+            cic, topology, {}, node_memory, node_network, _metal_only(node_memory)
+        )
 
 
 def test_get_transition_events_no_change(instance: Instance):
@@ -296,7 +325,7 @@ def test_placement_selects_leaf_nodes(
     # arrange
     topology = Topology()
 
-    model_card.storage_size = Memory.from_bytes(1000)
+    model_card = model_card.model_copy(update={"storage_size": Memory.from_bytes(1000)})
 
     node_id_a = NodeId()
     node_id_b = NodeId()
@@ -344,7 +373,9 @@ def test_placement_selects_leaf_nodes(
     cic = place_instance_command(model_card=model_card)
 
     # act
-    placements = place_instance(cic, topology, {}, node_memory, node_network)
+    placements = place_instance(
+        cic, topology, {}, node_memory, node_network, _metal_only(node_memory)
+    )
 
     # assert
     assert len(placements) == 1
@@ -364,8 +395,12 @@ def test_tensor_rdma_backend_connectivity_matrix(
 ):
     # arrange
     topology = Topology()
-    model_card.n_layers = 12
-    model_card.storage_size = Memory.from_bytes(1500)
+    model_card = model_card.model_copy(
+        update={
+            "n_layers": 12,
+            "storage_size": Memory.from_bytes(1500),
+        }
+    )
 
     node_a = NodeId()
     node_b = NodeId()
@@ -431,8 +466,22 @@ def test_tensor_rdma_backend_connectivity_matrix(
         min_nodes=1,
     )
 
+    node_rdma_ctl = {
+        node_a: NodeRdmaCtlStatus(enabled=True),
+        node_b: NodeRdmaCtlStatus(enabled=True),
+        node_c: NodeRdmaCtlStatus(enabled=True),
+    }
+
     # act
-    placements = place_instance(cic, topology, {}, node_memory, node_network)
+    placements = place_instance(
+        cic,
+        topology,
+        {},
+        node_memory,
+        node_network,
+        _metal_only(node_memory),
+        node_rdma_ctl=node_rdma_ctl,
+    )
 
     # assert
     assert len(placements) == 1
@@ -474,6 +523,133 @@ def test_tensor_rdma_backend_connectivity_matrix(
             assert len(ip_part.split(".")) == 4
 
 
+def _build_three_node_rdma_topology() -> tuple[
+    Topology, NodeId, NodeId, NodeId, dict[NodeId, NodeNetworkInfo]
+]:
+    topology = Topology()
+    node_a = NodeId()
+    node_b = NodeId()
+    node_c = NodeId()
+
+    ethernet_interface = NetworkInterfaceInfo(name="en0", ip_address="10.0.0.1")
+    ethernet_conn = SocketConnection(
+        sink_multiaddr=Multiaddr(address="/ip4/10.0.0.1/tcp/8000")
+    )
+    node_network = {
+        node_a: NodeNetworkInfo(interfaces=[ethernet_interface]),
+        node_b: NodeNetworkInfo(interfaces=[ethernet_interface]),
+        node_c: NodeNetworkInfo(interfaces=[ethernet_interface]),
+    }
+
+    for n in (node_a, node_b, node_c):
+        topology.add_node(n)
+
+    rdma_pairs = [
+        (node_a, node_b, 3),
+        (node_b, node_a, 3),
+        (node_b, node_c, 4),
+        (node_c, node_b, 4),
+        (node_a, node_c, 5),
+        (node_c, node_a, 5),
+    ]
+    for src, sink, iface in rdma_pairs:
+        topology.add_connection(
+            Connection(source=src, sink=sink, edge=create_rdma_connection(iface))
+        )
+
+    socket_pairs = [
+        (node_a, node_b),
+        (node_b, node_c),
+        (node_c, node_a),
+        (node_a, node_c),
+        (node_b, node_a),
+        (node_c, node_b),
+    ]
+    for src, sink in socket_pairs:
+        topology.add_connection(Connection(source=src, sink=sink, edge=ethernet_conn))
+
+    return topology, node_a, node_b, node_c, node_network
+
+
+def test_place_mlx_jaccl_rejects_when_a_node_has_rdma_ctl_disabled(
+    model_card: ModelCard,
+):
+    # arrange
+    model_card = model_card.model_copy(
+        update={"n_layers": 12, "storage_size": Memory.from_bytes(1500)}
+    )
+    topology, node_a, node_b, node_c, node_network = _build_three_node_rdma_topology()
+    node_memory = {
+        node_a: create_node_memory(500),
+        node_b: create_node_memory(500),
+        node_c: create_node_memory(500),
+    }
+    node_rdma_ctl = {
+        node_a: NodeRdmaCtlStatus(enabled=True),
+        node_b: NodeRdmaCtlStatus(enabled=True),
+        node_c: NodeRdmaCtlStatus(enabled=False),
+    }
+    cic = PlaceInstance(
+        sharding=Sharding.Tensor,
+        instance_meta=InstanceMeta.MlxJaccl,
+        command_id=CommandId(),
+        model_card=model_card,
+        min_nodes=3,
+    )
+
+    # act / assert
+    with pytest.raises(
+        ValueError, match="Requested RDMA \\(MlxJaccl\\) but no RDMA-connected cycles"
+    ):
+        place_instance(
+            cic,
+            topology,
+            {},
+            node_memory,
+            node_network,
+            _metal_only(node_memory),
+            node_rdma_ctl=node_rdma_ctl,
+        )
+
+
+def test_place_mlx_jaccl_rejects_when_node_rdma_ctl_missing(model_card: ModelCard):
+    """A node with no observed rdma_ctl status must not participate in RDMA placement."""
+    # arrange
+    model_card = model_card.model_copy(
+        update={"n_layers": 12, "storage_size": Memory.from_bytes(1500)}
+    )
+    topology, node_a, node_b, node_c, node_network = _build_three_node_rdma_topology()
+    node_memory = {
+        node_a: create_node_memory(500),
+        node_b: create_node_memory(500),
+        node_c: create_node_memory(500),
+    }
+    # node_c has no rdma_ctl entry at all
+    node_rdma_ctl = {
+        node_a: NodeRdmaCtlStatus(enabled=True),
+        node_b: NodeRdmaCtlStatus(enabled=True),
+    }
+    cic = PlaceInstance(
+        sharding=Sharding.Tensor,
+        instance_meta=InstanceMeta.MlxJaccl,
+        command_id=CommandId(),
+        model_card=model_card,
+        min_nodes=3,
+    )
+
+    # act / assert
+    with pytest.raises(ValueError):
+        place_instance(
+            cic,
+            topology,
+            {},
+            node_memory,
+            node_network,
+            _metal_only(node_memory),
+            node_rdma_ctl=node_rdma_ctl,
+        )
+
+
 def _make_task(
     instance_id: InstanceId,
     status: TaskStatus = TaskStatus.Running,
@@ -605,7 +781,7 @@ def test_placement_prefers_cycle_with_downloaded_model(
     """When two cycles are otherwise equal, prefer the one with the model already downloaded."""
     topology = Topology()
 
-    model_card.storage_size = Memory.from_bytes(500)
+    model_card = model_card.model_copy(update={"storage_size": Memory.from_bytes(500)})
 
     node_a = NodeId()
     node_b = NodeId()
@@ -638,7 +814,13 @@ def test_placement_prefers_cycle_with_downloaded_model(
 
     cic = place_instance_command(model_card)
     placements = place_instance(
-        cic, topology, {}, node_memory, node_network, download_status=download_status
+        cic,
+        topology,
+        {},
+        node_memory,
+        node_network,
+        _metal_only(node_memory),
+        download_status=download_status,
     )
 
     assert len(placements) == 1
@@ -653,7 +835,7 @@ def test_placement_prefers_cycle_with_higher_download_progress(
     """When two cycles are otherwise equal, prefer the one with more download progress."""
     topology = Topology()
 
-    model_card.storage_size = Memory.from_bytes(1000)
+    model_card = model_card.model_copy(update={"storage_size": Memory.from_bytes(1000)})
 
     node_a = NodeId()
     node_b = NodeId()
@@ -710,7 +892,13 @@ def test_placement_prefers_cycle_with_higher_download_progress(
 
     cic = place_instance_command(model_card)
     placements = place_instance(
-        cic, topology, {}, node_memory, node_network, download_status=download_status
+        cic,
+        topology,
+        {},
+        node_memory,
+        node_network,
+        _metal_only(node_memory),
+        download_status=download_status,
     )
 
     assert len(placements) == 1
@@ -725,7 +913,7 @@ def test_placement_does_not_prefer_cycle_with_failed_download(
     """A failed download should count as 0% — not preferred over a node with no download history."""
     topology = Topology()
 
-    model_card.storage_size = Memory.from_bytes(500)
+    model_card = model_card.model_copy(update={"storage_size": Memory.from_bytes(500)})
 
     node_a = NodeId()
     node_b = NodeId()
@@ -758,7 +946,13 @@ def test_placement_does_not_prefer_cycle_with_failed_download(
 
     cic = place_instance_command(model_card)
     placements = place_instance(
-        cic, topology, {}, node_memory, node_network, download_status=download_status
+        cic,
+        topology,
+        {},
+        node_memory,
+        node_network,
+        _metal_only(node_memory),
+        download_status=download_status,
     )
 
     assert len(placements) == 1
@@ -766,3 +960,99 @@ def test_placement_does_not_prefer_cycle_with_failed_download(
     assigned_nodes = set(instance.shard_assignments.node_to_runner.keys())
     # node_a should win on RAM tiebreaker since failed download scores 0.0
     assert assigned_nodes == {node_a}
+
+
+def test_placement_rejects_when_model_backends_disjoint_from_engine(
+    model_card: ModelCard,
+):
+    topology = Topology()
+    node_id = NodeId()
+    topology.add_node(node_id)
+    node_memory = {node_id: create_node_memory(1000 * 1024)}
+    node_network = {node_id: create_node_network()}
+
+    cic = place_instance_command(
+        model_card.model_copy(update={"backends": [Backend.Vllm]})
+    )
+
+    with pytest.raises(ValueError, match="cannot satisfy engine MlxRing"):
+        place_instance(
+            cic, topology, {}, node_memory, node_network, _metal_only(node_memory)
+        )
+
+
+def test_placement_rejects_when_only_some_nodes_support_backend(
+    model_card: ModelCard,
+):
+    topology = Topology()
+    node_a = NodeId()
+    node_b = NodeId()
+    node_c = NodeId()
+    for n in (node_a, node_b, node_c):
+        topology.add_node(n)
+
+    eth = create_socket_connection(1)
+    for src, dst in [
+        (node_a, node_b),
+        (node_b, node_c),
+        (node_c, node_a),
+        (node_a, node_c),
+        (node_c, node_b),
+        (node_b, node_a),
+    ]:
+        topology.add_connection(Connection(source=src, sink=dst, edge=eth))
+
+    node_memory = {n: create_node_memory(500 * 1024) for n in (node_a, node_b, node_c)}
+    node_network = {n: create_node_network() for n in (node_a, node_b, node_c)}
+    node_backends = {
+        node_a: [Backend.MlxMetal],
+        node_b: [Backend.MlxMetal],
+        node_c: [Backend.MlxCuda],  # the lone CUDA-only node breaks the cycle
+    }
+
+    cic = place_instance_command(
+        model_card.model_copy(
+            update={
+                "backends": [Backend.MlxMetal],
+                "n_layers": 12,
+                "storage_size": Memory.from_kb(1500),
+            }
+        )
+    )
+
+    with pytest.raises(ValueError, match="No cycle where every node supports"):
+        place_instance(cic, topology, {}, node_memory, node_network, node_backends)
+
+
+def test_mlx_jaccl_rejects_cuda_only_cycle(model_card: ModelCard):
+    topology, node_a, node_b, node_c, node_network = _build_three_node_rdma_topology()
+    node_memory = {n: create_node_memory(500) for n in (node_a, node_b, node_c)}
+    node_rdma_ctl = {
+        n: NodeRdmaCtlStatus(enabled=True) for n in (node_a, node_b, node_c)
+    }
+    node_backends = {n: [Backend.MlxCuda] for n in (node_a, node_b, node_c)}
+
+    cic = PlaceInstance(
+        sharding=Sharding.Tensor,
+        instance_meta=InstanceMeta.MlxJaccl,
+        command_id=CommandId(),
+        model_card=model_card.model_copy(
+            update={
+                "backends": [Backend.MlxMetal, Backend.MlxCuda],
+                "n_layers": 12,
+                "storage_size": Memory.from_bytes(1500),
+            }
+        ),
+        min_nodes=3,
+    )
+
+    with pytest.raises(ValueError, match="No cycle where every node supports"):
+        place_instance(
+            cic,
+            topology,
+            {},
+            node_memory,
+            node_network,
+            node_backends,
+            node_rdma_ctl=node_rdma_ctl,
+        )
diff --git a/src/exo/master/tests/test_placement_utils.py b/src/exo/master/tests/test_placement_utils.py
index 245c4fd7e..b2f647294 100644
--- a/src/exo/master/tests/test_placement_utils.py
+++ b/src/exo/master/tests/test_placement_utils.py
@@ -14,6 +14,7 @@ from exo.master.tests.conftest import (
 )
 from exo.shared.models.model_cards import ModelCard, ModelId, ModelTask
 from exo.shared.topology import Topology
+from exo.shared.types.backends import Backend
 from exo.shared.types.common import NodeId
 from exo.shared.types.memory import Memory
 from exo.shared.types.profiling import (
@@ -243,6 +244,7 @@ def test_get_shard_assignments(
         hidden_size=1000,
         supports_tensor=True,
         tasks=[ModelTask.TextGeneration],
+        backends=[Backend.MlxMetal],
     )
 
     cycles = topology.get_cycles()
@@ -484,6 +486,7 @@ def test_get_shard_assignments_insufficient_memory_raises():
         hidden_size=1000,
         supports_tensor=True,
         tasks=[ModelTask.TextGeneration],
+        backends=[Backend.MlxMetal],
     )
     cycles = topology.get_cycles()
     selected_cycle = cycles[0]
@@ -533,6 +536,7 @@ class TestCfgParallelPlacement:
             supports_tensor=False,
             uses_cfg=True,
             tasks=[ModelTask.TextToImage],
+            backends=[Backend.MlxMetal],
         )
 
         assignments = get_shard_assignments_for_pipeline_parallel(
@@ -576,6 +580,7 @@ class TestCfgParallelPlacement:
             supports_tensor=False,
             uses_cfg=True,
             tasks=[ModelTask.TextToImage],
+            backends=[Backend.MlxMetal],
         )
 
         assignments = get_shard_assignments_for_pipeline_parallel(
@@ -625,6 +630,7 @@ class TestCfgParallelPlacement:
             supports_tensor=False,
             uses_cfg=True,
             tasks=[ModelTask.TextToImage],
+            backends=[Backend.MlxMetal],
         )
 
         assignments = get_shard_assignments_for_pipeline_parallel(
@@ -660,6 +666,7 @@ class TestCfgParallelPlacement:
             supports_tensor=False,
             uses_cfg=False,  # Non-CFG model
             tasks=[ModelTask.TextToImage],
+            backends=[Backend.MlxMetal],
         )
 
         assignments = get_shard_assignments_for_pipeline_parallel(
diff --git a/src/exo/routing/connection_message.py b/src/exo/routing/connection_message.py
index 3cc0362dc..b00891124 100644
--- a/src/exo/routing/connection_message.py
+++ b/src/exo/routing/connection_message.py
@@ -1,12 +1,12 @@
 from exo_pyo3_bindings import PyFromSwarm
 
 from exo.shared.types.common import NodeId
-from exo.utils.pydantic_ext import CamelCaseModel
+from exo.utils.pydantic_ext import FrozenModel
 
 """Serialisable types for Connection Updates/Messages"""
 
 
-class ConnectionMessage(CamelCaseModel):
+class ConnectionMessage(FrozenModel):
     node_id: NodeId
     connected: bool
 
diff --git a/src/exo/routing/event_router.py b/src/exo/routing/event_router.py
index 9fcda57b4..4f99c1525 100644
--- a/src/exo/routing/event_router.py
+++ b/src/exo/routing/event_router.py
@@ -72,6 +72,7 @@ class EventRouter:
         return send
 
     def receiver(self) -> Receiver[IndexedEvent]:
+        assert not self._tg.is_running()
         send, recv = channel[IndexedEvent]()
         self.internal_outbound.append(send)
         return recv
diff --git a/src/exo/routing/router.py b/src/exo/routing/router.py
index 9447d6aa2..a9341d10c 100644
--- a/src/exo/routing/router.py
+++ b/src/exo/routing/router.py
@@ -25,7 +25,7 @@ from loguru import logger
 
 from exo.shared.constants import EXO_NODE_ID_KEYPAIR
 from exo.utils.channels import Receiver, Sender, channel
-from exo.utils.pydantic_ext import CamelCaseModel
+from exo.utils.pydantic_ext import FrozenModel
 from exo.utils.task_group import TaskGroup
 
 from .connection_message import ConnectionMessage
@@ -36,7 +36,7 @@ from .topics import CONNECTION_MESSAGES, PublishPolicy, TypedTopic
 # of preventing feedback, as it does not ask for a system id so cannot tell
 # which message is coming/going to which system.
 # This is currently only relevant for Election
-class TopicRouter[T: CamelCaseModel]:
+class TopicRouter[T: FrozenModel]:
     def __init__(
         self,
         topic: TypedTopic[T],
@@ -114,7 +114,7 @@ class Router:
         )
 
     def __init__(self, handle: NetworkingHandle):
-        self.topic_routers: dict[str, TopicRouter[CamelCaseModel]] = {}
+        self.topic_routers: dict[str, TopicRouter[FrozenModel]] = {}
         send, recv = channel[tuple[str, bytes]]()
         self.networking_receiver: Receiver[tuple[str, bytes]] = recv
         self._net: NetworkingHandle = handle
@@ -122,18 +122,18 @@ class Router:
         self._id_count = count()
         self._tg: TaskGroup = TaskGroup()
 
-    async def register_topic[T: CamelCaseModel](self, topic: TypedTopic[T]):
+    async def register_topic[T: FrozenModel](self, topic: TypedTopic[T]):
         send = self._tmp_networking_sender
         if send:
             self._tmp_networking_sender = None
         else:
             send = self.networking_receiver.clone_sender()
         router = TopicRouter[T](topic, send)
-        self.topic_routers[topic.topic] = cast(TopicRouter[CamelCaseModel], router)
+        self.topic_routers[topic.topic] = cast(TopicRouter[FrozenModel], router)
         if self._tg.is_running():
             await self._networking_subscribe(topic.topic)
 
-    def sender[T: CamelCaseModel](self, topic: TypedTopic[T]) -> Sender[T]:
+    def sender[T: FrozenModel](self, topic: TypedTopic[T]) -> Sender[T]:
         router = self.topic_routers.get(topic.topic, None)
         # There's gotta be a way to do this without THIS many asserts
         assert router is not None
@@ -141,7 +141,7 @@ class Router:
         sender = cast(TopicRouter[T], router).new_sender()
         return sender
 
-    def receiver[T: CamelCaseModel](self, topic: TypedTopic[T]) -> Receiver[T]:
+    def receiver[T: FrozenModel](self, topic: TypedTopic[T]) -> Receiver[T]:
         router = self.topic_routers.get(topic.topic, None)
         # There's gotta be a way to do this without THIS many asserts
 
@@ -150,7 +150,7 @@ class Router:
         assert router.topic.model_type == topic.model_type
 
         send, recv = channel[T]()
-        router.senders.add(cast(Sender[CamelCaseModel], send))
+        router.senders.add(cast(Sender[FrozenModel], send))
 
         return recv
 
diff --git a/src/exo/routing/topics.py b/src/exo/routing/topics.py
index 5d95a2a11..9776e5424 100644
--- a/src/exo/routing/topics.py
+++ b/src/exo/routing/topics.py
@@ -8,7 +8,7 @@ from exo.shared.types.events import (
     GlobalForwarderEvent,
     LocalForwarderEvent,
 )
-from exo.utils.pydantic_ext import CamelCaseModel
+from exo.utils.pydantic_ext import FrozenModel
 
 
 class PublishPolicy(str, Enum):
@@ -21,7 +21,7 @@ class PublishPolicy(str, Enum):
 
 
 @dataclass  # (frozen=True)
-class TypedTopic[T: CamelCaseModel]:
+class TypedTopic[T: FrozenModel]:
     topic: str
     publish_policy: PublishPolicy
 
diff --git a/src/exo/shared/apply.py b/src/exo/shared/apply.py
index fb9f6d9ab..5f72247ba 100644
--- a/src/exo/shared/apply.py
+++ b/src/exo/shared/apply.py
@@ -4,7 +4,8 @@ from datetime import datetime
 
 from loguru import logger
 
-from exo.shared.types.common import NodeId
+from exo.shared.models.model_cards import ModelCard
+from exo.shared.types.common import ModelId, NodeId
 from exo.shared.types.events import (
     ChunkGenerated,
     CustomModelCardAdded,
@@ -14,6 +15,8 @@ from exo.shared.types.events import (
     InputChunkReceived,
     InstanceCreated,
     InstanceDeleted,
+    InstanceLinkCreated,
+    InstanceLinkDeleted,
     NodeDownloadProgress,
     NodeGatheredInfo,
     NodeTimedOut,
@@ -29,6 +32,7 @@ from exo.shared.types.events import (
     TracesCollected,
     TracesMerged,
 )
+from exo.shared.types.instance_link import InstanceLink, InstanceLinkId
 from exo.shared.types.profiling import (
     NodeIdentity,
     NodeNetworkInfo,
@@ -41,13 +45,19 @@ from exo.shared.types.tasks import Task, TaskId, TaskStatus
 from exo.shared.types.topology import Connection, RDMAConnection
 from exo.shared.types.worker.downloads import DownloadProgress
 from exo.shared.types.worker.instances import Instance, InstanceId
-from exo.shared.types.worker.runners import RunnerId, RunnerShutdown, RunnerStatus
+from exo.shared.types.worker.runners import (
+    RunnerId,
+    RunnerReady,
+    RunnerShutdown,
+    RunnerStatus,
+)
 from exo.utils.info_gatherer.info_gatherer import (
     MacmonMetrics,
     MacThunderboltConnections,
     MacThunderboltIdentifiers,
     MemoryUsage,
     MiscData,
+    NodeBackends,
     NodeConfig,
     NodeDiskUsage,
     NodeNetworkInterfaces,
@@ -57,6 +67,18 @@ from exo.utils.info_gatherer.info_gatherer import (
 )
 
 
+def _is_rdma_ctl_enabled(
+    node_id: NodeId, node_rdma_ctl: Mapping[NodeId, NodeRdmaCtlStatus]
+) -> bool:
+    """A node is RDMA-capable only if rdma_ctl status has been observed as enabled.
+
+    Missing entries default to ``False`` — if we have not yet observed (or the node
+    cannot run) ``rdma_ctl``, it must not participate in an RDMA-backed instance.
+    """
+    status = node_rdma_ctl.get(node_id)
+    return status is not None and status.enabled
+
+
 def event_apply(event: Event, state: State) -> State:
     """Apply an event to state."""
     match event:
@@ -67,10 +89,12 @@ def event_apply(event: Event, state: State) -> State:
             | InputChunkReceived()
             | TracesCollected()
             | TracesMerged()
-            | CustomModelCardAdded()
-            | CustomModelCardDeleted()
         ):  # Pass-through events that don't modify state
             return state
+        case CustomModelCardAdded():
+            return apply_custom_model_card_added(event, state)
+        case CustomModelCardDeleted():
+            return apply_custom_model_card_deleted(event, state)
         case InstanceCreated():
             return apply_instance_created(event, state)
         case InstanceDeleted():
@@ -95,6 +119,10 @@ def event_apply(event: Event, state: State) -> State:
             return apply_topology_edge_created(event, state)
         case TopologyEdgeDeleted():
             return apply_topology_edge_deleted(event, state)
+        case InstanceLinkCreated():
+            return apply_instance_link_created(event, state)
+        case InstanceLinkDeleted():
+            return apply_instance_link_deleted(event, state)
 
 
 def apply(state: State, event: IndexedEvent) -> State:
@@ -194,7 +222,38 @@ def apply_instance_deleted(event: InstanceDeleted, state: State) -> State:
     new_instances: Mapping[InstanceId, Instance] = {
         iid: inst for iid, inst in state.instances.items() if iid != event.instance_id
     }
-    return state.model_copy(update={"instances": new_instances})
+    new_links: dict[InstanceLinkId, InstanceLink] = {}
+    for link_id, link in state.instance_links.items():
+        prefill = [i for i in link.prefill_instances if i != event.instance_id]
+        decode = [i for i in link.decode_instances if i != event.instance_id]
+        if not prefill or not decode:
+            continue
+        if prefill == list(link.prefill_instances) and decode == list(
+            link.decode_instances
+        ):
+            new_links[link_id] = link
+        else:
+            new_links[link_id] = link.model_copy(
+                update={"prefill_instances": prefill, "decode_instances": decode}
+            )
+    return state.model_copy(
+        update={"instances": new_instances, "instance_links": new_links}
+    )
+
+
+def apply_instance_link_created(event: InstanceLinkCreated, state: State) -> State:
+    new_links: Mapping[InstanceLinkId, InstanceLink] = {
+        **state.instance_links,
+        event.link.link_id: event.link,
+    }
+    return state.model_copy(update={"instance_links": new_links})
+
+
+def apply_instance_link_deleted(event: InstanceLinkDeleted, state: State) -> State:
+    new_links: Mapping[InstanceLinkId, InstanceLink] = {
+        lid: link for lid, link in state.instance_links.items() if lid != event.link_id
+    }
+    return state.model_copy(update={"instance_links": new_links})
 
 
 def apply_runner_status_updated(event: RunnerStatusUpdated, state: State) -> State:
@@ -202,12 +261,28 @@ def apply_runner_status_updated(event: RunnerStatusUpdated, state: State) -> Sta
         new_runners: Mapping[RunnerId, RunnerStatus] = {
             rid: rs for rid, rs in state.runners.items() if rid != event.runner_id
         }
-        return state.model_copy(update={"runners": new_runners})
+        new_ports: Mapping[RunnerId, int] = {
+            rid: p
+            for rid, p in state.prefill_server_ports.items()
+            if rid != event.runner_id
+        }
+        return state.model_copy(
+            update={"runners": new_runners, "prefill_server_ports": new_ports}
+        )
     new_runners = {
         **state.runners,
         event.runner_id: event.runner_status,
     }
-    return state.model_copy(update={"runners": new_runners})
+    update: dict[str, object] = {"runners": new_runners}
+    if (
+        isinstance(event.runner_status, RunnerReady)
+        and event.runner_status.prefill_server_port is not None
+    ):
+        update["prefill_server_ports"] = {
+            **state.prefill_server_ports,
+            event.runner_id: event.runner_status.prefill_server_port,
+        }
+    return state.model_copy(update=update)
 
 
 def apply_node_timed_out(event: NodeTimedOut, state: State) -> State:
@@ -338,6 +413,9 @@ def apply_node_gathered_info(event: NodeGatheredInfo, state: State) -> State:
                 for nid in state.node_thunderbolt
                 for tb_ident in state.node_thunderbolt[nid].interfaces
             }
+            source_is_rdma_enabled = _is_rdma_ctl_enabled(
+                event.node_id, state.node_rdma_ctl
+            )
             as_rdma_conns = [
                 Connection(
                     source=event.node_id,
@@ -350,6 +428,10 @@ def apply_node_gathered_info(event: NodeGatheredInfo, state: State) -> State:
                 for tb_conn in info.conns
                 if tb_conn.source_uuid in conn_map
                 if tb_conn.sink_uuid in conn_map
+                if source_is_rdma_enabled
+                and _is_rdma_ctl_enabled(
+                    conn_map[tb_conn.sink_uuid][0], state.node_rdma_ctl
+                )
             ]
             topology.replace_all_out_rdma_connections(event.node_id, as_rdma_conns)
         case ThunderboltBridgeInfo():
@@ -373,6 +455,17 @@ def apply_node_gathered_info(event: NodeGatheredInfo, state: State) -> State:
                 **state.node_rdma_ctl,
                 event.node_id: NodeRdmaCtlStatus(enabled=info.enabled),
             }
+            # If RDMA just got disabled on this node, drop any RDMA edges touching it
+            # so placement / topology consumers cannot pick a disabled node for an
+            # RDMA-backed instance. (Edges will repopulate on the next
+            # MacThunderboltConnections poll once both endpoints are enabled again.)
+            if not info.enabled:
+                topology.remove_all_rdma_connections_touching(event.node_id)
+        case NodeBackends():
+            update["node_backends"] = {
+                **state.node_backends,
+                event.node_id: info.backends,
+            }
 
     return state.model_copy(update=update)
 
@@ -388,3 +481,22 @@ def apply_topology_edge_deleted(event: TopologyEdgeDeleted, state: State) -> Sta
     topology.remove_connection(event.conn)
     # TODO: Clean up removing the reverse connection
     return state.model_copy(update={"topology": topology})
+
+
+def apply_custom_model_card_added(event: CustomModelCardAdded, state: State) -> State:
+    new_cards: Mapping[ModelId, ModelCard] = {
+        **state.custom_model_cards,
+        event.model_card.model_id: event.model_card,
+    }
+    return state.model_copy(update={"custom_model_cards": new_cards})
+
+
+def apply_custom_model_card_deleted(
+    event: CustomModelCardDeleted, state: State
+) -> State:
+    new_cards: Mapping[ModelId, ModelCard] = {
+        model_id: card
+        for model_id, card in state.custom_model_cards.items()
+        if model_id != event.model_id
+    }
+    return state.model_copy(update={"custom_model_cards": new_cards})
diff --git a/src/exo/shared/constants.py b/src/exo/shared/constants.py
index 01f315bc3..fd0869c46 100644
--- a/src/exo/shared/constants.py
+++ b/src/exo/shared/constants.py
@@ -68,7 +68,12 @@ DASHBOARD_DIR = (
 # Log files (data/logs or cache)
 EXO_LOG_DIR = EXO_CACHE_HOME / "exo_log"
 EXO_LOG = EXO_LOG_DIR / "exo.log"
+EXO_RUNNER_LOG_DIR = EXO_LOG_DIR / "runner_log"
+EXO_RUNNER_STDOUT_LOG = EXO_RUNNER_LOG_DIR / "stdout.log"
+EXO_RUNNER_STDERR_LOG = EXO_RUNNER_LOG_DIR / "stderr.log"
+
 EXO_TEST_LOG = EXO_CACHE_HOME / "exo_test.log"
+EXO_PID_FILE = EXO_CACHE_HOME / "exo.pid"
 
 # Identity (config)
 EXO_NODE_ID_KEYPAIR = EXO_CONFIG_HOME / "node_id.keypair"
@@ -96,6 +101,8 @@ EXO_OFFLINE = os.getenv("EXO_OFFLINE", "false").lower() == "true"
 
 EXO_TRACING_ENABLED = os.getenv("EXO_TRACING_ENABLED", "false").lower() == "true"
 
+ENABLE_DISAGGREGATION = os.getenv("ENABLE_DISAGGREGATION", "false").lower() == "true"
+
 EXO_MAX_CONCURRENT_REQUESTS = int(os.getenv("EXO_MAX_CONCURRENT_REQUESTS", "8"))
 
 EXO_MAX_INSTANCE_RETRIES = 5
diff --git a/src/exo/shared/election.py b/src/exo/shared/election.py
index 6f6e1f8f0..958a83d2f 100644
--- a/src/exo/shared/election.py
+++ b/src/exo/shared/election.py
@@ -12,13 +12,13 @@ from exo.routing.connection_message import ConnectionMessage
 from exo.shared.types.commands import ForwarderCommand
 from exo.shared.types.common import NodeId, SessionId
 from exo.utils.channels import Receiver, Sender
-from exo.utils.pydantic_ext import CamelCaseModel
+from exo.utils.pydantic_ext import FrozenModel
 from exo.utils.task_group import TaskGroup
 
 DEFAULT_ELECTION_TIMEOUT = 3.0
 
 
-class ElectionMessage(CamelCaseModel):
+class ElectionMessage(FrozenModel):
     clock: int
     seniority: int
     proposed_session: SessionId
@@ -39,7 +39,7 @@ class ElectionMessage(CamelCaseModel):
             )
 
 
-class ElectionResult(CamelCaseModel):
+class ElectionResult(FrozenModel):
     session_id: SessionId
     won_clock: int
     is_new_master: bool
diff --git a/src/exo/shared/models/model_cards.py b/src/exo/shared/models/model_cards.py
index bdbe3f636..8911b9323 100644
--- a/src/exo/shared/models/model_cards.py
+++ b/src/exo/shared/models/model_cards.py
@@ -26,9 +26,11 @@ from exo.shared.constants import (
     EXO_MODELS_DIRS,
     RESOURCES_DIR,
 )
+from exo.shared.types.backends import Backend
 from exo.shared.types.common import ModelId
 from exo.shared.types.memory import Memory
-from exo.utils.pydantic_ext import CamelCaseModel
+from exo.shared.types.text_generation import ReasoningDialect
+from exo.utils.pydantic_ext import FrozenModel
 
 # kinda ugly...
 # TODO: load search path from config.toml
@@ -38,10 +40,62 @@ _BUILTIN_CARD_DIRS = [
     Path(RESOURCES_DIR) / "image_model_cards",
 ]
 
-_card_cache: dict[ModelId, "ModelCard"] = {}
+
+class _CardCache:
+    def __init__(self):
+        self.cc: dict[ModelId, "ModelCard"] = {}
+
+    def get(self, model_id: ModelId) -> "ModelCard | None":
+        return self.cc.get(model_id)
+
+    async def save(self, card: "ModelCard"):
+        self.cc[card.model_id] = card
+        try:
+            await card.save_to_custom_dir()
+        except OSError as e:
+            logger.warning(f"failed to save custom model card ({e.strerror})")
+
+    async def pop(self, model_id: ModelId) -> "ModelCard | None":
+        """Delete a user-added custom model card. Returns True if deleted."""
+        card_path = _custom_cards_dir / (ModelId(model_id).normalize() + ".toml")
+        try:
+            if await card_path.exists():
+                await card_path.unlink()
+                return self.cc.pop(model_id, None)
+        except OSError as e:
+            logger.warning(f"failed to delete custom model card ({e.strerror})")
+
+    async def list_all(self) -> list["ModelCard"]:
+        if len(self.cc) == 0:
+            await self.refresh()
+        if EXO_ENABLE_IMAGE_MODELS:
+            return list(self.cc.values())
+        return [c for c in self.cc.values() if not _is_image_card(c)]
+
+    async def _load_cards_from_dir(self, directory: Path, *, is_custom: bool) -> None:
+        """Load all TOML model cards from a directory into the cache."""
+        async for toml_file in directory.rglob("*.toml"):
+            try:
+                card = await ModelCard.load_from_path(toml_file)
+                if is_custom:
+                    card = card.model_copy(update={"is_custom": True})
+                if self.get(card.model_id) is None:
+                    self.cc[card.model_id] = card
+            except (ValidationError, TOMLKitError) as e:
+                logger.opt(exception=e).warning(
+                    f"failed to validate model card at {toml_file}"
+                )
+
+    async def refresh(self) -> None:
+        for path in _BUILTIN_CARD_DIRS:
+            await self._load_cards_from_dir(path, is_custom=False)
+        await self._load_cards_from_dir(_custom_cards_dir, is_custom=True)
 
 
-def _detect_vision_from_config(model_id: ModelId) -> "VisionCardConfig | None":
+card_cache = _CardCache()
+
+
+def detect_vision_from_config(model_id: ModelId) -> "VisionCardConfig | None":
     normalized = model_id.normalize()
     for model_dir in [d / normalized for d in EXO_MODELS_DIRS]:
         config_path = model_dir / "config.json"
@@ -58,53 +112,17 @@ def _detect_vision_from_config(model_id: ModelId) -> "VisionCardConfig | None":
     return None
 
 
-async def _load_cards_from_dir(directory: Path, *, is_custom: bool) -> None:
-    """Load all TOML model cards from a directory into the cache."""
-    async for toml_file in directory.rglob("*.toml"):
-        try:
-            card = await ModelCard.load_from_path(toml_file)
-            if is_custom:
-                card = card.model_copy(update={"is_custom": True})
-            if card.vision is None:
-                vision = _detect_vision_from_config(card.model_id)
-                if vision is not None:
-                    card = card.model_copy(update={"vision": vision})
-            if card.model_id not in _card_cache:
-                _card_cache[card.model_id] = card
-        except (ValidationError, TOMLKitError):
-            pass
-
-
-async def _refresh_card_cache() -> None:
-    for path in _BUILTIN_CARD_DIRS:
-        await _load_cards_from_dir(path, is_custom=False)
-    await _load_cards_from_dir(_custom_cards_dir, is_custom=True)
-
-
 def _is_image_card(card: "ModelCard") -> bool:
     return any(t in (ModelTask.TextToImage, ModelTask.ImageToImage) for t in card.tasks)
 
 
-def get_card(model_id: ModelId) -> "ModelCard | None":
-    """Look up a single model card from the cache by ID."""
-    return _card_cache.get(model_id)
-
-
-async def get_model_cards() -> list["ModelCard"]:
-    if len(_card_cache) == 0:
-        await _refresh_card_cache()
-    if EXO_ENABLE_IMAGE_MODELS:
-        return list(_card_cache.values())
-    return [c for c in _card_cache.values() if not _is_image_card(c)]
-
-
 class ModelTask(str, Enum):
     TextGeneration = "TextGeneration"
     TextToImage = "TextToImage"
     ImageToImage = "ImageToImage"
 
 
-class ComponentInfo(CamelCaseModel):
+class ComponentInfo(FrozenModel):
     component_name: str
     component_path: str
     storage_size: Memory
@@ -113,7 +131,7 @@ class ComponentInfo(CamelCaseModel):
     safetensors_index_filename: str | None = None
 
 
-class VisionCardConfig(CamelCaseModel):
+class VisionCardConfig(FrozenModel):
     image_token_id: int
     model_type: str
     weights_repo: str = ""
@@ -121,7 +139,22 @@ class VisionCardConfig(CamelCaseModel):
     processor_repo: str | None = None
 
 
-class ModelCard(CamelCaseModel):
+class SamplingValues(FrozenModel):
+    temperature: float | None = None
+    top_p: float | None = None
+    top_k: int | None = None
+    min_p: float | None = None
+    repetition_penalty: float | None = None
+    presence_penalty: float | None = None
+    frequency_penalty: float | None = None
+
+
+class SamplingDefaults(SamplingValues):
+    thinking: SamplingValues | None = None
+    non_thinking: SamplingValues | None = None
+
+
+class ModelCard(FrozenModel):
     model_id: ModelId
     storage_size: Memory
     n_layers: PositiveInt
@@ -134,11 +167,22 @@ class ModelCard(CamelCaseModel):
     quantization: str = ""
     base_model: str = ""
     capabilities: list[str] = []
+    backends: list[Backend]
+    reasoning_dialect: ReasoningDialect = "none"
     context_length: int = 0
     uses_cfg: bool = False
     trust_remote_code: bool = True
     is_custom: bool = False
     vision: VisionCardConfig | None = None
+    sampling_defaults: SamplingDefaults = Field(default_factory=SamplingDefaults)
+
+    @model_validator(mode="after")
+    def _autodetect_vision(self) -> "ModelCard":
+        if self.vision is None:
+            detected = detect_vision_from_config(self.model_id)
+            if detected is not None:
+                object.__setattr__(self, "vision", detected)
+        return self
 
     @model_validator(mode="after")
     def _fill_vision_weights_repo(self) -> "ModelCard":
@@ -155,6 +199,11 @@ class ModelCard(CamelCaseModel):
     def _validate_tasks(cls, v: list[str | ModelTask]) -> list[ModelTask]:
         return [item if isinstance(item, ModelTask) else ModelTask(item) for item in v]
 
+    @field_validator("backends", mode="before")
+    @classmethod
+    def _validate_backends(cls, v: list[str | Backend]) -> list[Backend]:
+        return [item if isinstance(item, Backend) else Backend(item) for item in v]
+
     async def save(self, path: Path) -> None:
         async with await open_file(path, "w") as f:
             py = self.model_dump(exclude_none=True, exclude={"is_custom"})
@@ -174,14 +223,13 @@ class ModelCard(CamelCaseModel):
     # Is it okay that model card.load defaults to network access if the card doesn't exist? do we want to be more explicit here?
     @staticmethod
     async def load(model_id: ModelId) -> "ModelCard":
-        if model_id not in _card_cache:
-            await _refresh_card_cache()
-        if (mc := _card_cache.get(model_id)) is not None:
+        if card_cache.get(model_id) is None:
+            await card_cache.refresh()
+        if (mc := card_cache.get(model_id)) is not None:
             return mc
 
         mc = await ModelCard.fetch_from_hf(model_id)
         await mc.save_to_custom_dir()
-        _card_cache[model_id] = mc
         return mc
 
     @staticmethod
@@ -208,24 +256,12 @@ class ModelCard(CamelCaseModel):
             trust_remote_code=False,
             is_custom=True,
             vision=config_data.vision,
+            backends=list(
+                Backend
+            ),  # all backends — we don't know what an arbitrary HF model supports; let placement gate decide
         )
 
 
-def add_to_card_cache(card: "ModelCard") -> None:
-    """Add or update a model card in the in-memory cache."""
-    _card_cache[card.model_id] = card
-
-
-async def delete_custom_card(model_id: ModelId) -> bool:
-    """Delete a user-added custom model card. Returns True if deleted."""
-    card_path = _custom_cards_dir / (ModelId(model_id).normalize() + ".toml")
-    if await card_path.exists():
-        await card_path.unlink()
-        _card_cache.pop(model_id, None)
-        return True
-    return False
-
-
 class ConfigData(BaseModel):
     model_config = {"extra": "ignore"}  # Allow unknown fields
 
@@ -250,12 +286,14 @@ class ConfigData(BaseModel):
         return self.architectures in [
             ["Glm4MoeLiteForCausalLM"],
             ["GlmMoeDsaForCausalLM"],
+            ["DeepseekV4ForCausalLM"],
             ["DeepseekV32ForCausalLM"],
             ["DeepseekV3ForCausalLM"],
             ["Qwen3NextForCausalLM"],
             ["Qwen3MoeForCausalLM"],
             ["Qwen3_5MoeForConditionalGeneration"],
             ["Qwen3_5ForConditionalGeneration"],
+            ["Qwen3VLForConditionalGeneration"],
             ["MiniMaxM2ForCausalLM"],
             ["LlamaForCausalLM"],
             ["GptOssForCausalLM"],
@@ -346,7 +384,7 @@ async def fetch_safetensors_size(model_id: ModelId) -> Memory:
         index_data = ModelSafetensorsIndex.model_validate_json(await f.read())
 
     metadata = index_data.metadata
-    if metadata is not None:
+    if metadata is not None and metadata.total_size is not None:
         return Memory.from_bytes(metadata.total_size)
 
     info = model_info(model_id)
diff --git a/src/exo/shared/tests/conftest.py b/src/exo/shared/tests/conftest.py
index 0622cb0bc..b0ffb08d8 100644
--- a/src/exo/shared/tests/conftest.py
+++ b/src/exo/shared/tests/conftest.py
@@ -8,6 +8,7 @@ from _pytest.logging import LogCaptureFixture
 from loguru import logger
 
 from exo.shared.models.model_cards import ModelCard, ModelId, ModelTask
+from exo.shared.types.backends import Backend
 from exo.shared.types.memory import Memory
 from exo.shared.types.worker.shards import PipelineShardMetadata, ShardMetadata
 
@@ -38,6 +39,7 @@ def get_pipeline_shard_metadata(
             hidden_size=1000,
             supports_tensor=True,
             tasks=[ModelTask.TextGeneration],
+            backends=[Backend.MlxMetal],
         ),
         device_rank=device_rank,
         world_size=world_size,
diff --git a/src/exo/shared/tests/test_apply/test_apply_custom_model_cards.py b/src/exo/shared/tests/test_apply/test_apply_custom_model_cards.py
new file mode 100644
index 000000000..9d60c28fd
--- /dev/null
+++ b/src/exo/shared/tests/test_apply/test_apply_custom_model_cards.py
@@ -0,0 +1,46 @@
+from exo.shared.apply import apply
+from exo.shared.models.model_cards import ModelCard, ModelTask
+from exo.shared.types.backends import Backend
+from exo.shared.types.common import ModelId
+from exo.shared.types.events import (
+    CustomModelCardAdded,
+    CustomModelCardDeleted,
+    IndexedEvent,
+)
+from exo.shared.types.memory import Memory
+from exo.shared.types.state import State
+
+
+def _model_card(model_id: ModelId) -> ModelCard:
+    return ModelCard(
+        model_id=model_id,
+        n_layers=1,
+        storage_size=Memory.from_bytes(1),
+        hidden_size=1,
+        supports_tensor=True,
+        tasks=[ModelTask.TextGeneration],
+        backends=[Backend.MlxMetal],
+    )
+
+
+def test_custom_model_card_added_is_reduced_into_state() -> None:
+    card = _model_card(ModelId("custom/model"))
+
+    state = apply(
+        State(),
+        IndexedEvent(idx=0, event=CustomModelCardAdded(model_card=card)),
+    )
+
+    assert state.custom_model_cards == {card.model_id: card}
+
+
+def test_custom_model_card_deleted_removes_card_from_state() -> None:
+    card = _model_card(ModelId("custom/model"))
+    state = State(custom_model_cards={card.model_id: card}, last_event_applied_idx=0)
+
+    state = apply(
+        state,
+        IndexedEvent(idx=1, event=CustomModelCardDeleted(model_id=card.model_id)),
+    )
+
+    assert state.custom_model_cards == {}
diff --git a/src/exo/shared/tests/test_apply/test_apply_instance_link.py b/src/exo/shared/tests/test_apply/test_apply_instance_link.py
new file mode 100644
index 000000000..0af2e888a
--- /dev/null
+++ b/src/exo/shared/tests/test_apply/test_apply_instance_link.py
@@ -0,0 +1,72 @@
+from exo.shared.apply import (
+    apply_instance_deleted,
+    apply_instance_link_created,
+    apply_instance_link_deleted,
+)
+from exo.shared.types.events import (
+    InstanceDeleted,
+    InstanceLinkCreated,
+    InstanceLinkDeleted,
+)
+from exo.shared.types.instance_link import InstanceLink, InstanceLinkId
+from exo.shared.types.state import State
+from exo.shared.types.worker.instances import InstanceId
+
+
+def _link(
+    prefill: list[InstanceId],
+    decode: list[InstanceId],
+    link_id: InstanceLinkId | None = None,
+) -> InstanceLink:
+    return InstanceLink(
+        link_id=link_id or InstanceLinkId(),
+        prefill_instances=prefill,
+        decode_instances=decode,
+    )
+
+
+def test_create_link() -> None:
+    state = State()
+    link = _link([InstanceId("a")], [InstanceId("b")])
+    new_state = apply_instance_link_created(InstanceLinkCreated(link=link), state)
+    assert new_state.instance_links == {link.link_id: link}
+
+
+def test_update_replaces_existing_link() -> None:
+    a, b, c = InstanceId("a"), InstanceId("b"), InstanceId("c")
+    link = _link([a], [b])
+    state = State(instance_links={link.link_id: link})
+
+    updated = link.model_copy(update={"decode_instances": [b, c]})
+    new_state = apply_instance_link_created(InstanceLinkCreated(link=updated), state)
+    assert set(new_state.instance_links[link.link_id].decode_instances) == {b, c}
+
+
+def test_delete_link() -> None:
+    link = _link([InstanceId("a")], [InstanceId("b")])
+    state = State(instance_links={link.link_id: link})
+
+    new_state = apply_instance_link_deleted(
+        InstanceLinkDeleted(link_id=link.link_id), state
+    )
+    assert new_state.instance_links == {}
+
+
+def test_instance_deleted_strips_from_links() -> None:
+    a, b, c = InstanceId("a"), InstanceId("b"), InstanceId("c")
+    link = _link([a, c], [b])
+    state = State(instance_links={link.link_id: link})
+
+    new_state = apply_instance_deleted(InstanceDeleted(instance_id=a), state)
+    remaining = new_state.instance_links[link.link_id]
+    assert remaining.prefill_instances == [c]
+    assert remaining.decode_instances == [b]
+
+
+def test_instance_deleted_drops_link_when_role_empties() -> None:
+    a, b = InstanceId("a"), InstanceId("b")
+    link = _link([a], [b])
+    state = State(instance_links={link.link_id: link})
+
+    new_state = apply_instance_deleted(InstanceDeleted(instance_id=a), state)
+    assert link.link_id not in new_state.instance_links
diff --git a/src/exo/shared/tests/test_apply/test_apply_rdma_gating.py b/src/exo/shared/tests/test_apply/test_apply_rdma_gating.py
new file mode 100644
index 000000000..492e3fc5e
--- /dev/null
+++ b/src/exo/shared/tests/test_apply/test_apply_rdma_gating.py
@@ -0,0 +1,231 @@
+from datetime import datetime, timezone
+
+from exo.shared.apply import apply_node_gathered_info
+from exo.shared.topology import Topology
+from exo.shared.types.common import NodeId
+from exo.shared.types.events import NodeGatheredInfo
+from exo.shared.types.profiling import (
+    NodeRdmaCtlStatus,
+    NodeThunderboltInfo,
+)
+from exo.shared.types.state import State
+from exo.shared.types.thunderbolt import ThunderboltConnection, ThunderboltIdentifier
+from exo.shared.types.topology import RDMAConnection
+from exo.utils.info_gatherer.info_gatherer import (
+    MacThunderboltConnections,
+    RdmaCtlStatus,
+)
+
+
+def _now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+def _make_state_with_thunderbolt_idents(
+    *node_ids_and_uuids: tuple[NodeId, str, str],
+    rdma_ctl: dict[NodeId, NodeRdmaCtlStatus] | None = None,
+) -> State:
+    """Build a State with Thunderbolt identifiers per node so the apply MacThunderboltConnections
+    case can resolve uuid -> (node, iface)."""
+    node_thunderbolt = {
+        nid: NodeThunderboltInfo(
+            interfaces=[ThunderboltIdentifier(rdma_interface=iface, domain_uuid=uuid)]
+        )
+        for nid, uuid, iface in node_ids_and_uuids
+    }
+    return State(
+        node_thunderbolt=node_thunderbolt,
+        node_rdma_ctl=rdma_ctl or {},
+    )
+
+
+def _has_rdma_edge(topology: Topology, source: NodeId, sink: NodeId) -> bool:
+    return any(
+        isinstance(edge, RDMAConnection)
+        for edge in topology.get_all_connections_between(source, sink)
+    )
+
+
+def test_mac_thunderbolt_connections_emits_rdma_when_both_endpoints_enabled():
+    node_a = NodeId()
+    node_b = NodeId()
+    state = _make_state_with_thunderbolt_idents(
+        (node_a, "uuid-a", "rdma_en1"),
+        (node_b, "uuid-b", "rdma_en1"),
+        rdma_ctl={
+            node_a: NodeRdmaCtlStatus(enabled=True),
+            node_b: NodeRdmaCtlStatus(enabled=True),
+        },
+    )
+
+    event = NodeGatheredInfo(
+        node_id=node_a,
+        when=_now(),
+        info=MacThunderboltConnections(
+            conns=[ThunderboltConnection(source_uuid="uuid-a", sink_uuid="uuid-b")]
+        ),
+    )
+
+    new_state = apply_node_gathered_info(event, state)
+
+    assert _has_rdma_edge(new_state.topology, node_a, node_b)
+
+
+def test_mac_thunderbolt_connections_skips_rdma_when_source_rdma_ctl_disabled():
+    node_a = NodeId()
+    node_b = NodeId()
+    state = _make_state_with_thunderbolt_idents(
+        (node_a, "uuid-a", "rdma_en1"),
+        (node_b, "uuid-b", "rdma_en1"),
+        rdma_ctl={
+            node_a: NodeRdmaCtlStatus(enabled=False),
+            node_b: NodeRdmaCtlStatus(enabled=True),
+        },
+    )
+
+    event = NodeGatheredInfo(
+        node_id=node_a,
+        when=_now(),
+        info=MacThunderboltConnections(
+            conns=[ThunderboltConnection(source_uuid="uuid-a", sink_uuid="uuid-b")]
+        ),
+    )
+
+    new_state = apply_node_gathered_info(event, state)
+
+    assert not _has_rdma_edge(new_state.topology, node_a, node_b)
+
+
+def test_mac_thunderbolt_connections_skips_rdma_when_sink_rdma_ctl_disabled():
+    node_a = NodeId()
+    node_b = NodeId()
+    state = _make_state_with_thunderbolt_idents(
+        (node_a, "uuid-a", "rdma_en1"),
+        (node_b, "uuid-b", "rdma_en1"),
+        rdma_ctl={
+            node_a: NodeRdmaCtlStatus(enabled=True),
+            node_b: NodeRdmaCtlStatus(enabled=False),
+        },
+    )
+
+    event = NodeGatheredInfo(
+        node_id=node_a,
+        when=_now(),
+        info=MacThunderboltConnections(
+            conns=[ThunderboltConnection(source_uuid="uuid-a", sink_uuid="uuid-b")]
+        ),
+    )
+
+    new_state = apply_node_gathered_info(event, state)
+
+    assert not _has_rdma_edge(new_state.topology, node_a, node_b)
+
+
+def test_mac_thunderbolt_connections_skips_rdma_when_rdma_ctl_status_missing():
+    """Missing rdma_ctl status defaults to not-enabled — node is RDMA-incapable."""
+    node_a = NodeId()
+    node_b = NodeId()
+    state = _make_state_with_thunderbolt_idents(
+        (node_a, "uuid-a", "rdma_en1"),
+        (node_b, "uuid-b", "rdma_en1"),
+        rdma_ctl={
+            node_a: NodeRdmaCtlStatus(enabled=True),
+            # node_b intentionally absent
+        },
+    )
+
+    event = NodeGatheredInfo(
+        node_id=node_a,
+        when=_now(),
+        info=MacThunderboltConnections(
+            conns=[ThunderboltConnection(source_uuid="uuid-a", sink_uuid="uuid-b")]
+        ),
+    )
+
+    new_state = apply_node_gathered_info(event, state)
+
+    assert not _has_rdma_edge(new_state.topology, node_a, node_b)
+
+
+def test_rdma_ctl_status_disabled_purges_existing_rdma_edges():
+    """When a node reports rdma_ctl disabled, all RDMA edges touching it must be removed."""
+    node_a = NodeId()
+    node_b = NodeId()
+
+    # Start with both nodes RDMA-enabled and existing RDMA edges in the topology.
+    state = _make_state_with_thunderbolt_idents(
+        (node_a, "uuid-a", "rdma_en1"),
+        (node_b, "uuid-b", "rdma_en1"),
+        rdma_ctl={
+            node_a: NodeRdmaCtlStatus(enabled=True),
+            node_b: NodeRdmaCtlStatus(enabled=True),
+        },
+    )
+    state = apply_node_gathered_info(
+        NodeGatheredInfo(
+            node_id=node_a,
+            when=_now(),
+            info=MacThunderboltConnections(
+                conns=[ThunderboltConnection(source_uuid="uuid-a", sink_uuid="uuid-b")]
+            ),
+        ),
+        state,
+    )
+    state = apply_node_gathered_info(
+        NodeGatheredInfo(
+            node_id=node_b,
+            when=_now(),
+            info=MacThunderboltConnections(
+                conns=[ThunderboltConnection(source_uuid="uuid-b", sink_uuid="uuid-a")]
+            ),
+        ),
+        state,
+    )
+    assert _has_rdma_edge(state.topology, node_a, node_b)
+    assert _has_rdma_edge(state.topology, node_b, node_a)
+
+    # Now node_a flips to rdma_ctl disabled — both directions of RDMA edge must drop.
+    state = apply_node_gathered_info(
+        NodeGatheredInfo(
+            node_id=node_a, when=_now(), info=RdmaCtlStatus(enabled=False)
+        ),
+        state,
+    )
+
+    assert not _has_rdma_edge(state.topology, node_a, node_b)
+    assert not _has_rdma_edge(state.topology, node_b, node_a)
+    assert state.node_rdma_ctl[node_a].enabled is False
+
+
+def test_topology_remove_all_rdma_connections_touching_keeps_socket_edges():
+    """Purging RDMA edges for a disabled node must not affect non-RDMA edges."""
+    from exo.shared.types.multiaddr import Multiaddr
+    from exo.shared.types.topology import Connection, SocketConnection
+
+    topology = Topology()
+    node_a = NodeId()
+    node_b = NodeId()
+    topology.add_node(node_a)
+    topology.add_node(node_b)
+    topology.add_connection(
+        Connection(
+            source=node_a,
+            sink=node_b,
+            edge=RDMAConnection(
+                source_rdma_iface="rdma_en1", sink_rdma_iface="rdma_en1"
+            ),
+        )
+    )
+    socket_edge = SocketConnection(
+        sink_multiaddr=Multiaddr(address="/ip4/10.0.0.1/tcp/8000")
+    )
+    topology.add_connection(Connection(source=node_a, sink=node_b, edge=socket_edge))
+
+    topology.remove_all_rdma_connections_touching(node_a)
+
+    assert not _has_rdma_edge(topology, node_a, node_b)
+    # Socket edge survives.
+    assert any(
+        isinstance(edge, SocketConnection)
+        for edge in topology.get_all_connections_between(node_a, node_b)
+    )
diff --git a/src/exo/shared/topology.py b/src/exo/shared/topology.py
index 9d649a6f4..121d5af2d 100644
--- a/src/exo/shared/topology.py
+++ b/src/exo/shared/topology.py
@@ -169,6 +169,22 @@ class Topology:
         for conn in new_connections:
             self.add_connection(conn)
 
+    def remove_all_rdma_connections_touching(self, node_id: NodeId) -> None:
+        """Remove every RDMA edge incident to ``node_id`` (incoming or outgoing)."""
+        if node_id not in self._vertex_indices:
+            return
+        rx_idx = self._vertex_indices[node_id]
+        rdma_edge_idxs = [
+            edge_idx
+            for edge_idx in (
+                *self._graph.out_edge_indices(rx_idx),
+                *self._graph.in_edge_indices(rx_idx),
+            )
+            if isinstance(self._graph.get_edge_data_by_index(edge_idx), RDMAConnection)
+        ]
+        for edge_idx in rdma_edge_idxs:
+            self._graph.remove_edge_from_index(edge_idx)
+
     def remove_connection(self, conn: Connection) -> None:
         if (
             conn.source not in self._vertex_indices
diff --git a/src/exo/shared/types/backends.py b/src/exo/shared/types/backends.py
new file mode 100644
index 000000000..d1b9bc6c0
--- /dev/null
+++ b/src/exo/shared/types/backends.py
@@ -0,0 +1,8 @@
+from enum import Enum
+
+
+class Backend(str, Enum):
+    MlxMetal = "MlxMetal"
+    MlxCpu = "MlxCpu"
+    MlxCuda = "MlxCuda"
+    Vllm = "Vllm"
diff --git a/src/exo/shared/types/chunks.py b/src/exo/shared/types/chunks.py
index 204556e88..82425d9f9 100644
--- a/src/exo/shared/types/chunks.py
+++ b/src/exo/shared/types/chunks.py
@@ -11,6 +11,7 @@ from exo.api.types import (
 )
 from exo.shared.models.model_cards import ModelId
 from exo.utils.pydantic_ext import TaggedModel
+from exo.worker.runner.diagnostics import KnownRunnerDiagnostic
 
 from .common import CommandId
 
@@ -34,6 +35,10 @@ class ErrorChunk(BaseChunk):
     error_message: str
     finish_reason: Literal["error"] = "error"
 
+    # NOTE: this is a bad place to put this, creates semantic overlap/confusion;
+    #       at some point someone put this somewhere else, thanks :)
+    diagnostics: list[KnownRunnerDiagnostic] = []
+
 
 class ToolCallChunk(BaseChunk):
     tool_calls: list[ToolCallItem]
@@ -85,6 +90,6 @@ class PrefillProgressChunk(BaseChunk):
     total_tokens: int
 
 
-GenerationChunk = (
-    TokenChunk | ImageChunk | ToolCallChunk | ErrorChunk | PrefillProgressChunk
-)
+StatusChunk = PrefillProgressChunk
+GenerationChunk = TokenChunk | ImageChunk | ToolCallChunk | ErrorChunk
+Chunk = StatusChunk | GenerationChunk
diff --git a/src/exo/shared/types/commands.py b/src/exo/shared/types/commands.py
index a6c988a56..67d318b25 100644
--- a/src/exo/shared/types/commands.py
+++ b/src/exo/shared/types/commands.py
@@ -7,10 +7,11 @@ from exo.api.types import (
 from exo.shared.models.model_cards import ModelCard, ModelId
 from exo.shared.types.chunks import InputImageChunk
 from exo.shared.types.common import CommandId, NodeId, SystemId
+from exo.shared.types.instance_link import InstanceLinkId
 from exo.shared.types.text_generation import TextGenerationTaskParams
 from exo.shared.types.worker.instances import Instance, InstanceId, InstanceMeta
 from exo.shared.types.worker.shards import Sharding, ShardMetadata
-from exo.utils.pydantic_ext import CamelCaseModel, TaggedModel
+from exo.utils.pydantic_ext import FrozenModel, TaggedModel
 
 
 class BaseCommand(TaggedModel):
@@ -89,6 +90,16 @@ class DeleteCustomModelCard(BaseCommand):
     model_id: ModelId
 
 
+class SetInstanceLink(BaseCommand):
+    link_id: InstanceLinkId
+    prefill_instances: list[InstanceId]
+    decode_instances: list[InstanceId]
+
+
+class DeleteInstanceLink(BaseCommand):
+    link_id: InstanceLinkId
+
+
 DownloadCommand = StartDownload | DeleteDownload | CancelDownload
 
 
@@ -106,14 +117,16 @@ Command = (
     | SendInputChunk
     | AddCustomModelCard
     | DeleteCustomModelCard
+    | SetInstanceLink
+    | DeleteInstanceLink
 )
 
 
-class ForwarderCommand(CamelCaseModel):
+class ForwarderCommand(FrozenModel):
     origin: SystemId
     command: Command
 
 
-class ForwarderDownloadCommand(CamelCaseModel):
+class ForwarderDownloadCommand(FrozenModel):
     origin: SystemId
     command: DownloadCommand
diff --git a/src/exo/shared/types/common.py b/src/exo/shared/types/common.py
index e539386f0..097803d30 100644
--- a/src/exo/shared/types/common.py
+++ b/src/exo/shared/types/common.py
@@ -4,7 +4,7 @@ from uuid import uuid4
 from pydantic import GetCoreSchemaHandler, field_validator
 from pydantic_core import CoreSchema, core_schema
 
-from exo.utils.pydantic_ext import CamelCaseModel
+from exo.utils.pydantic_ext import FrozenModel
 
 
 class Id(str):
@@ -59,12 +59,12 @@ class TruncatingString(str):
         )
 
 
-class SessionId(CamelCaseModel):
+class SessionId(FrozenModel):
     master_node_id: NodeId
     election_clock: int
 
 
-class Host(CamelCaseModel):
+class Host(FrozenModel):
     ip: str
     port: int
 
diff --git a/src/exo/shared/types/events.py b/src/exo/shared/types/events.py
index d9799313e..01aa0ce5d 100644
--- a/src/exo/shared/types/events.py
+++ b/src/exo/shared/types/events.py
@@ -5,14 +5,15 @@ from pydantic import Field
 
 from exo.shared.models.model_cards import ModelCard
 from exo.shared.topology import Connection
-from exo.shared.types.chunks import GenerationChunk, InputImageChunk
+from exo.shared.types.chunks import Chunk, InputImageChunk
 from exo.shared.types.common import CommandId, Id, ModelId, NodeId, SessionId, SystemId
+from exo.shared.types.instance_link import InstanceLink, InstanceLinkId
 from exo.shared.types.tasks import Task, TaskId, TaskStatus
 from exo.shared.types.worker.downloads import DownloadProgress
 from exo.shared.types.worker.instances import Instance, InstanceId
 from exo.shared.types.worker.runners import RunnerId, RunnerStatus
 from exo.utils.info_gatherer.info_gatherer import GatheredInfo
-from exo.utils.pydantic_ext import CamelCaseModel, FrozenModel, TaggedModel
+from exo.utils.pydantic_ext import FrozenModel, TaggedModel
 
 
 class EventId(Id):
@@ -91,7 +92,7 @@ class NodeDownloadProgress(BaseEvent):
 
 class ChunkGenerated(BaseEvent):
     command_id: CommandId
-    chunk: GenerationChunk
+    chunk: Chunk
 
 
 class InputChunkReceived(BaseEvent):
@@ -137,6 +138,14 @@ class TracesMerged(BaseEvent):
     traces: list[TraceEventData]
 
 
+class InstanceLinkCreated(BaseEvent):
+    link: InstanceLink
+
+
+class InstanceLinkDeleted(BaseEvent):
+    link_id: InstanceLinkId
+
+
 Event = (
     TestEvent
     | TaskCreated
@@ -158,17 +167,19 @@ Event = (
     | TracesMerged
     | CustomModelCardAdded
     | CustomModelCardDeleted
+    | InstanceLinkCreated
+    | InstanceLinkDeleted
 )
 
 
-class IndexedEvent(CamelCaseModel):
+class IndexedEvent(FrozenModel):
     """An event indexed by the master, with a globally unique index"""
 
     idx: int = Field(ge=0)
     event: Event
 
 
-class GlobalForwarderEvent(CamelCaseModel):
+class GlobalForwarderEvent(FrozenModel):
     """An event the forwarder will serialize and send over the network"""
 
     origin_idx: int = Field(ge=0)
@@ -177,7 +188,7 @@ class GlobalForwarderEvent(CamelCaseModel):
     event: Event
 
 
-class LocalForwarderEvent(CamelCaseModel):
+class LocalForwarderEvent(FrozenModel):
     """An event the forwarder will serialize and send over the network"""
 
     origin_idx: int = Field(ge=0)
diff --git a/src/exo/shared/types/instance_link.py b/src/exo/shared/types/instance_link.py
new file mode 100644
index 000000000..a3674a922
--- /dev/null
+++ b/src/exo/shared/types/instance_link.py
@@ -0,0 +1,13 @@
+from exo.shared.types.common import Id
+from exo.shared.types.worker.instances import InstanceId
+from exo.utils.pydantic_ext import FrozenModel
+
+
+class InstanceLinkId(Id):
+    pass
+
+
+class InstanceLink(FrozenModel):
+    link_id: InstanceLinkId
+    prefill_instances: list[InstanceId]
+    decode_instances: list[InstanceId]
diff --git a/src/exo/shared/types/profiling.py b/src/exo/shared/types/profiling.py
index ad1d48c0f..a3548dc91 100644
--- a/src/exo/shared/types/profiling.py
+++ b/src/exo/shared/types/profiling.py
@@ -7,10 +7,10 @@ import psutil
 
 from exo.shared.types.memory import Memory
 from exo.shared.types.thunderbolt import ThunderboltIdentifier
-from exo.utils.pydantic_ext import CamelCaseModel
+from exo.utils.pydantic_ext import FrozenModel
 
 
-class MemoryUsage(CamelCaseModel):
+class MemoryUsage(FrozenModel):
     ram_total: Memory
     ram_available: Memory
     swap_total: Memory
@@ -40,7 +40,7 @@ class MemoryUsage(CamelCaseModel):
         )
 
 
-class DiskUsage(CamelCaseModel):
+class DiskUsage(FrozenModel):
     """Disk space usage for the models directory."""
 
     total: Memory
@@ -56,7 +56,7 @@ class DiskUsage(CamelCaseModel):
         )
 
 
-class SystemPerformanceProfile(CamelCaseModel):
+class SystemPerformanceProfile(FrozenModel):
     # TODO: flops_fp16: float
 
     gpu_usage: float = 0.0
@@ -69,13 +69,13 @@ class SystemPerformanceProfile(CamelCaseModel):
 InterfaceType = Literal["wifi", "ethernet", "maybe_ethernet", "thunderbolt", "unknown"]
 
 
-class NetworkInterfaceInfo(CamelCaseModel):
+class NetworkInterfaceInfo(FrozenModel):
     name: str
     ip_address: str
     interface_type: InterfaceType = "unknown"
 
 
-class NodeIdentity(CamelCaseModel):
+class NodeIdentity(FrozenModel):
     """Static and slow-changing node identification data."""
 
     model_id: str = "Unknown"
@@ -85,25 +85,25 @@ class NodeIdentity(CamelCaseModel):
     os_build_version: str = "Unknown"
 
 
-class NodeNetworkInfo(CamelCaseModel):
+class NodeNetworkInfo(FrozenModel):
     """Network interface information for a node."""
 
     interfaces: Sequence[NetworkInterfaceInfo] = []
 
 
-class NodeThunderboltInfo(CamelCaseModel):
+class NodeThunderboltInfo(FrozenModel):
     """Thunderbolt interface identifiers for a node."""
 
     interfaces: Sequence[ThunderboltIdentifier] = []
 
 
-class NodeRdmaCtlStatus(CamelCaseModel):
+class NodeRdmaCtlStatus(FrozenModel):
     """Whether RDMA is enabled on this node (via rdma_ctl)."""
 
     enabled: bool
 
 
-class ThunderboltBridgeStatus(CamelCaseModel):
+class ThunderboltBridgeStatus(FrozenModel):
     """Whether the Thunderbolt Bridge network service is enabled on this node."""
 
     enabled: bool
diff --git a/src/exo/shared/types/state.py b/src/exo/shared/types/state.py
index 7350cfb0f..baef1d72b 100644
--- a/src/exo/shared/types/state.py
+++ b/src/exo/shared/types/state.py
@@ -5,8 +5,11 @@ from typing import Any, cast
 from pydantic import ConfigDict, Field, field_serializer, field_validator
 from pydantic.alias_generators import to_camel
 
+from exo.shared.models.model_cards import ModelCard
 from exo.shared.topology import Topology, TopologySnapshot
-from exo.shared.types.common import NodeId
+from exo.shared.types.backends import Backend
+from exo.shared.types.common import ModelId, NodeId
+from exo.shared.types.instance_link import InstanceLink, InstanceLinkId
 from exo.shared.types.profiling import (
     DiskUsage,
     MemoryUsage,
@@ -21,10 +24,10 @@ from exo.shared.types.tasks import Task, TaskId
 from exo.shared.types.worker.downloads import DownloadProgress
 from exo.shared.types.worker.instances import Instance, InstanceId
 from exo.shared.types.worker.runners import RunnerId, RunnerStatus
-from exo.utils.pydantic_ext import CamelCaseModel
+from exo.utils.pydantic_ext import FrozenModel
 
 
-class State(CamelCaseModel):
+class State(FrozenModel):
     """Global system state.
 
     The :class:`Topology` instance is encoded/decoded via an immutable
@@ -57,10 +60,17 @@ class State(CamelCaseModel):
     node_thunderbolt: Mapping[NodeId, NodeThunderboltInfo] = {}
     node_thunderbolt_bridge: Mapping[NodeId, ThunderboltBridgeStatus] = {}
     node_rdma_ctl: Mapping[NodeId, NodeRdmaCtlStatus] = {}
+    node_backends: Mapping[NodeId, list[Backend]] = {}
 
     # Detected cycles where all nodes have Thunderbolt bridge enabled (>2 nodes)
     thunderbolt_bridge_cycles: Sequence[Sequence[NodeId]] = []
 
+    instance_links: Mapping[InstanceLinkId, InstanceLink] = {}
+    prefill_server_ports: Mapping[RunnerId, int] = {}
+
+    # User-added model cards. Workers can reconcile their on-disk custom card cache
+    custom_model_cards: Mapping[ModelId, ModelCard] = {}
+
     @field_serializer("topology", mode="plain")
     def _encode_topology(self, value: Topology) -> TopologySnapshot:
         return value.to_snapshot()
diff --git a/src/exo/shared/types/tasks.py b/src/exo/shared/types/tasks.py
index e764ec337..a5fec8cc3 100644
--- a/src/exo/shared/types/tasks.py
+++ b/src/exo/shared/types/tasks.py
@@ -101,3 +101,6 @@ Task = (
     | ImageEdits
     | Shutdown
 )
+TextTask = TextGeneration
+ImageTask = ImageGeneration | ImageEdits
+GenerationTask = TextTask | ImageTask
diff --git a/src/exo/shared/types/text_generation.py b/src/exo/shared/types/text_generation.py
index 4cc0a1ece..29228c363 100644
--- a/src/exo/shared/types/text_generation.py
+++ b/src/exo/shared/types/text_generation.py
@@ -8,10 +8,28 @@ from typing import Annotated, Any, Literal
 
 from pydantic import BaseModel, Field, WrapValidator
 
+from exo.shared.logging import logger
 from exo.shared.types.common import ModelId, TruncatingString
 
 MessageRole = Literal["user", "assistant", "system", "developer", "tool"]
 ReasoningEffort = Literal["none", "minimal", "low", "medium", "high", "xhigh"]
+# How a model wants prior-turn reasoning content handled. Drives both the
+# server-side encoder (drop vs keep) and the integration configs we emit
+# (e.g. opencode's per-model `interleaved` flag).
+#   - "none":            model has no reasoning channel.
+#   - "post_last_user":  reasoning is only meaningful for the latest assistant
+#                        turn; older turns can drop it (drop_thinking=True).
+#   - "suffix":          reasoning is embedded in the assistant content as a
+#                        suffix/prefix; round-tripping content already covers
+#                        it (no separate `reasoning_content` round-trip).
+#   - "channel":         reasoning lives on a dedicated channel (Harmony, etc.)
+#                        and must be sent back verbatim every turn.
+#   - "tool_conditional": always round-trip when the conversation has tools;
+#                        the model relies on prior reasoning to chain tool
+#                        calls (DeepSeek V3.2 / V4).
+ReasoningDialect = Literal[
+    "none", "post_last_user", "suffix", "channel", "tool_conditional"
+]
 
 
 def resolve_reasoning_params(
@@ -97,6 +115,7 @@ class TextGenerationTaskParams(BaseModel, frozen=True):
     stream: bool = False
     tools: list[dict[str, Any]] | None = None
     bench: bool = False
+    use_prefix_cache: bool = False
     top_k: int | None = None
     stop: str | list[str] | None = None
     seed: int | None = None
@@ -108,7 +127,45 @@ class TextGenerationTaskParams(BaseModel, frozen=True):
     min_p: float | None = None
     repetition_penalty: float | None = None
     repetition_context_size: int | None = None
+    presence_penalty: float | None = None
+    frequency_penalty: float | None = None
     images: list[Base64Image] = Field(default_factory=list)
     image_hashes: dict[int, Base64ImageHash] = Field(default_factory=dict)
-    total_input_chunks: int = 0
-    image_count: int = 0
+
+    prefill_endpoint: str | None = None
+
+    def with_card_sampling_defaults(self) -> "TextGenerationTaskParams":
+        from exo.shared.models import model_cards
+
+        card = model_cards.card_cache.get(self.model)
+        if card is None:
+            return self
+
+        flat = card.sampling_defaults
+        if self.enable_thinking is True and flat.thinking is not None:
+            card_values = flat.thinking
+        elif self.enable_thinking is False and flat.non_thinking is not None:
+            card_values = flat.non_thinking
+        else:
+            card_values = flat
+
+        def resolve[T](request: T | None, card_value: T | None) -> T | None:
+            return request if request is not None else card_value
+
+        updates = {
+            "temperature": resolve(self.temperature, card_values.temperature),
+            "top_p": resolve(self.top_p, card_values.top_p),
+            "top_k": resolve(self.top_k, card_values.top_k),
+            "min_p": resolve(self.min_p, card_values.min_p),
+            "repetition_penalty": resolve(
+                self.repetition_penalty, card_values.repetition_penalty
+            ),
+            "presence_penalty": resolve(
+                self.presence_penalty, card_values.presence_penalty
+            ),
+            "frequency_penalty": resolve(
+                self.frequency_penalty, card_values.frequency_penalty
+            ),
+        }
+        logger.debug(f"Using sampling params for {self.model}:\n{updates}")
+        return self.model_copy(update=updates)
diff --git a/src/exo/shared/types/thunderbolt.py b/src/exo/shared/types/thunderbolt.py
index 809cf9fa1..34cd1ccad 100644
--- a/src/exo/shared/types/thunderbolt.py
+++ b/src/exo/shared/types/thunderbolt.py
@@ -1,15 +1,15 @@
 import anyio
 from pydantic import BaseModel, Field
 
-from exo.utils.pydantic_ext import CamelCaseModel
+from exo.utils.pydantic_ext import FrozenModel
 
 
-class ThunderboltConnection(CamelCaseModel):
+class ThunderboltConnection(FrozenModel):
     source_uuid: str
     sink_uuid: str
 
 
-class ThunderboltIdentifier(CamelCaseModel):
+class ThunderboltIdentifier(FrozenModel):
     rdma_interface: str
     domain_uuid: str
     link_speed: str = ""
diff --git a/bench/src/exo_bench/__init__.py b/src/exo/shared/types/worker/__init__.py
similarity index 100%
rename from bench/src/exo_bench/__init__.py
rename to src/exo/shared/types/worker/__init__.py
diff --git a/src/exo/shared/types/worker/downloads.py b/src/exo/shared/types/worker/downloads.py
index 29540fe12..938be82ed 100644
--- a/src/exo/shared/types/worker/downloads.py
+++ b/src/exo/shared/types/worker/downloads.py
@@ -6,10 +6,10 @@ from pydantic import BaseModel, ConfigDict, Field, PositiveInt
 from exo.shared.types.common import NodeId
 from exo.shared.types.memory import Memory
 from exo.shared.types.worker.shards import ShardMetadata
-from exo.utils.pydantic_ext import CamelCaseModel, TaggedModel
+from exo.utils.pydantic_ext import FrozenModel, TaggedModel
 
 
-class DownloadProgressData(CamelCaseModel):
+class DownloadProgressData(FrozenModel):
     total: Memory
     downloaded: Memory
     downloaded_this_session: Memory
@@ -53,7 +53,7 @@ DownloadProgress = (
 
 
 class ModelSafetensorsIndexMetadata(BaseModel):
-    total_size: PositiveInt
+    total_size: PositiveInt | None = None
 
 
 class ModelSafetensorsIndex(BaseModel):
diff --git a/src/exo/shared/types/worker/instances.py b/src/exo/shared/types/worker/instances.py
index 76bd6fd4e..16233f3f0 100644
--- a/src/exo/shared/types/worker/instances.py
+++ b/src/exo/shared/types/worker/instances.py
@@ -5,7 +5,7 @@ from pydantic import model_validator
 from exo.shared.models.model_cards import ModelTask
 from exo.shared.types.common import Host, Id, NodeId
 from exo.shared.types.worker.runners import RunnerId, ShardAssignments, ShardMetadata
-from exo.utils.pydantic_ext import CamelCaseModel, TaggedModel
+from exo.utils.pydantic_ext import FrozenModel, TaggedModel
 
 
 class InstanceId(Id):
@@ -39,7 +39,7 @@ class MlxJacclInstance(BaseInstance):
 Instance = MlxRingInstance | MlxJacclInstance
 
 
-class BoundInstance(CamelCaseModel):
+class BoundInstance(FrozenModel):
     instance: Instance
     bound_runner_id: RunnerId
     bound_node_id: NodeId
diff --git a/src/exo/shared/types/worker/runner_response.py b/src/exo/shared/types/worker/runner_response.py
index 27a7a3a0c..9fb330190 100644
--- a/src/exo/shared/types/worker/runner_response.py
+++ b/src/exo/shared/types/worker/runner_response.py
@@ -16,10 +16,6 @@ class BaseRunnerResponse(TaggedModel):
     pass
 
 
-class TokenizedResponse(BaseRunnerResponse):
-    prompt_tokens: int
-
-
 class GenerationResponse(BaseRunnerResponse):
     text: str
     token: int
@@ -70,6 +66,15 @@ class FinishedResponse(BaseRunnerResponse):
     pass
 
 
+class ModelLoadingResponse(BaseRunnerResponse):
+    layers_loaded: int
+    total: int
+
+
+class CancelledResponse(BaseRunnerResponse):
+    pass
+
+
 class PrefillProgressResponse(BaseRunnerResponse):
     processed_tokens: int
     total_tokens: int
diff --git a/src/exo/shared/types/worker/runners.py b/src/exo/shared/types/worker/runners.py
index 1ac68947d..c9ff3b5c1 100644
--- a/src/exo/shared/types/worker/runners.py
+++ b/src/exo/shared/types/worker/runners.py
@@ -5,7 +5,8 @@ from pydantic import model_validator
 from exo.shared.models.model_cards import ModelId
 from exo.shared.types.common import Id, NodeId
 from exo.shared.types.worker.shards import ShardMetadata
-from exo.utils.pydantic_ext import CamelCaseModel, TaggedModel
+from exo.utils.pydantic_ext import FrozenModel, TaggedModel
+from exo.worker.runner.diagnostics import KnownRunnerDiagnostic
 
 
 class RunnerId(Id):
@@ -47,7 +48,7 @@ class RunnerWarmingUp(BaseRunnerStatus):
 
 
 class RunnerReady(BaseRunnerStatus):
-    pass
+    prefill_server_port: int | None = None
 
 
 class RunnerRunning(BaseRunnerStatus):
@@ -64,6 +65,7 @@ class RunnerShutdown(BaseRunnerStatus):
 
 class RunnerFailed(BaseRunnerStatus):
     error_message: str | None = None
+    diagnostics: list[KnownRunnerDiagnostic]
 
 
 RunnerStatus = (
@@ -81,7 +83,7 @@ RunnerStatus = (
 )
 
 
-class ShardAssignments(CamelCaseModel):
+class ShardAssignments(FrozenModel):
     model_id: ModelId
     runner_to_shard: Mapping[RunnerId, ShardMetadata]
     node_to_runner: Mapping[NodeId, RunnerId]
diff --git a/src/exo/utils/async_process.py b/src/exo/utils/async_process.py
new file mode 100644
index 000000000..f7633374a
--- /dev/null
+++ b/src/exo/utils/async_process.py
@@ -0,0 +1,290 @@
+from __future__ import annotations
+
+import contextlib
+import faulthandler
+import multiprocessing as mp
+import os
+import sys
+from collections.abc import Callable, Iterable, Mapping
+from multiprocessing.process import BaseProcess
+from multiprocessing.resource_sharer import DupFd
+from typing import final
+
+from anyio import (
+    TASK_STATUS_IGNORED,
+    BrokenResourceError,
+    CancelScope,
+    ClosedResourceError,
+    Event,
+    create_task_group,
+    move_on_after,
+    sleep,
+    to_thread,
+    wait_readable,
+)
+from anyio.abc import TaskStatus
+from loguru import logger
+
+from exo.utils.channels import Receiver, Sender, channel
+
+_STDOUT_FD = 1
+_STDERR_FD = 2
+_READ_CHUNK_SIZE = 64 * 1024
+_JOIN_GRACE_SECONDS = 3.0
+_TERMINATE_GRACE_SECONDS = 5.0
+_TERMINATE_RETRY_GRACE_SECONDS = 2.0
+_TERMINATE_ATTEMPTS = 10
+_KILL_GRACE_SECONDS = 2.0
+
+
+@final
+class AsyncProcess:
+    def __init__(
+        self,
+        target: Callable[..., object] | None = None,
+        name: str | None = None,
+        args: Iterable[object] = (),
+        kwargs: Mapping[str, object] | None = None,
+        *,
+        daemon: bool | None = None,
+    ) -> None:
+        # setup state
+        self._target = target
+        self._name = name
+        self._args = args
+        self._kwargs = kwargs
+        self._daemon = daemon
+
+        # lifecycle state
+        self._process: BaseProcess | None = None
+        self._pid: int | None = None
+        self._stdout_tx, self._stdout_rx = channel[bytes]()
+        self._stderr_tx, self._stderr_rx = channel[bytes]()
+        self._started = Event()
+        self._done = Event()
+        self._run_cancel_scope: CancelScope | None = None
+        self._start_error: BaseException | None = None
+        self._exitcode: int | None = None
+
+    async def run(self, *, task_status: TaskStatus[None] = TASK_STATUS_IGNORED) -> None:
+        if self._run_cancel_scope is not None or self._done.is_set():
+            raise RuntimeError("process has already been started")
+
+        stdout_read_fd: int | None = None
+        stdout_write_fd: int | None = None
+        stderr_read_fd: int | None = None
+        stderr_write_fd: int | None = None
+
+        def cleanup_stdio_fd() -> None:
+            nonlocal stdout_read_fd, stdout_write_fd, stderr_read_fd, stderr_write_fd
+            stdout_read_fd = _close_fd(stdout_read_fd)
+            stdout_write_fd = _close_fd(stdout_write_fd)
+            stderr_read_fd = _close_fd(stderr_read_fd)
+            stderr_write_fd = _close_fd(stderr_write_fd)
+
+        try:
+            with CancelScope() as run_cancel_scope:
+                self._run_cancel_scope = run_cancel_scope
+                stdout_read_fd, stdout_write_fd = os.pipe()
+                stderr_read_fd, stderr_write_fd = os.pipe()
+
+                process = mp.Process(
+                    target=_run_with_captured_stdio,
+                    name=self._name,
+                    args=(
+                        DupFd(stdout_write_fd),
+                        DupFd(stderr_write_fd),
+                        self._target,
+                        *self._args,
+                    ),
+                    kwargs={} if self._kwargs is None else self._kwargs,
+                    daemon=self._daemon,
+                )
+                process.start()
+                pid = process.pid
+                if pid is None:
+                    raise RuntimeError("started process has no pid")
+
+                # important to close parent write-side FD to prevent hangs
+                stdout_write_fd = _close_fd(stdout_write_fd)
+                stderr_write_fd = _close_fd(stderr_write_fd)
+
+                self._process = process
+                self._pid = pid
+                self._started.set()
+
+                async with create_task_group() as tg:
+                    tg.start_soon(_drain_fd, stdout_read_fd, self._stdout_tx)
+                    stdout_read_fd = None
+                    tg.start_soon(_drain_fd, stderr_read_fd, self._stderr_tx)
+                    stderr_read_fd = None
+                    task_status.started()
+                    await self.wait()
+        except BaseException as exc:
+            if not self._started.is_set():
+                self._start_error = exc
+                self._started.set()
+            raise
+        finally:
+            try:
+                with CancelScope(shield=True):
+                    await self._terminate_if_still_alive()
+            finally:
+                cleanup_stdio_fd()
+                for tx in (self._stdout_tx, self._stderr_tx):
+                    with contextlib.suppress(Exception):
+                        await tx.aclose()
+                if self._process is not None:
+                    with contextlib.suppress(ValueError):
+                        self._process.close()
+                self._run_cancel_scope = None
+                self._done.set()
+
+    async def stop(self) -> None:
+        if self._run_cancel_scope is None and not self._done.is_set():
+            raise RuntimeError("process has not been started")
+        if self._run_cancel_scope is not None:
+            self._run_cancel_scope.cancel()
+        await self._done.wait()
+
+    async def aclose(self) -> None:
+        await self.stop()
+
+    async def wait(self) -> int:
+        if self._exitcode is not None:
+            return self._exitcode
+
+        await self._started.wait()
+        if self._start_error is not None:
+            raise self._start_error
+        assert self._process is not None
+
+        while True:
+            exitcode = self.exitcode
+            if exitcode is not None:
+                return exitcode
+            await sleep(0.01)
+
+    @property
+    def pid(self) -> int:
+        if self._pid is None:
+            raise RuntimeError("process has not been started")
+        return self._pid
+
+    @property
+    def exitcode(self) -> int | None:
+        if self._exitcode is not None:
+            return self._exitcode
+        if self._process is None:
+            return None
+
+        with contextlib.suppress(ValueError):
+            exitcode = self._process.exitcode
+            if exitcode is not None:
+                self._exitcode = exitcode
+            return exitcode
+        return None
+
+    def is_alive(self) -> bool:
+        if self._process is None:
+            return False
+
+        with contextlib.suppress(ValueError):
+            return self._process.is_alive()
+        return False
+
+    # TODO: maybe in the future if needed, create stdin that is also installed,
+    #       and a ByteSendStream handle is provided for it :)
+
+    @property
+    def stdout(self) -> Receiver[bytes]:
+        return self._stdout_rx
+
+    @property
+    def stderr(self) -> Receiver[bytes]:
+        return self._stderr_rx
+
+    async def _terminate_if_still_alive(self) -> None:
+        process = self._process
+        if process is None or self.exitcode is not None:
+            return
+
+        with contextlib.suppress(ValueError):
+            await to_thread.run_sync(process.join, _JOIN_GRACE_SECONDS)
+            if self.exitcode is not None or not process.is_alive():
+                return
+
+            logger.warning("Child process didn't shut down successfully, terminating")
+            process.terminate()
+            with move_on_after(_TERMINATE_GRACE_SECONDS):
+                await self.wait()
+
+            if self.exitcode is not None or not process.is_alive():
+                logger.warning("Terminated nicely in the first attempt!")
+                return
+
+            for attempt in range(2, _TERMINATE_ATTEMPTS + 1):
+                process.terminate()
+                with move_on_after(_TERMINATE_RETRY_GRACE_SECONDS):
+                    await self.wait()
+
+                if self.exitcode is not None or not process.is_alive():
+                    logger.warning(f"That took {attempt} attempts :)")
+                    return
+
+            logger.critical("Child process didn't respond to SIGTERM, killing")
+            j = 0
+            while True:
+                process.kill()
+                with move_on_after(_KILL_GRACE_SECONDS):
+                    await self.wait()
+                j += 1
+                if self.exitcode is not None or not process.is_alive():
+                    break
+            logger.warning(f"That took {j} attempts :(")
+
+
+# Spawn-mode multiprocessing requires a module-level target that can be pickled.
+def _run_with_captured_stdio(
+    stdout: DupFd,
+    stderr: DupFd,
+    target: Callable[..., object] | None,
+    *target_args: object,
+    **target_kwargs: object,
+) -> None:
+    stdout_fd = stdout.detach()
+    stderr_fd = stderr.detach()
+
+    try:
+        os.dup2(stdout_fd, _STDOUT_FD)
+        os.dup2(stderr_fd, _STDERR_FD)
+    finally:
+        for fd in (stdout_fd, stderr_fd):
+            if fd not in (_STDOUT_FD, _STDERR_FD):
+                _close_fd(fd)
+
+    faulthandler.enable(file=sys.stderr, all_threads=True)
+    if target is not None:
+        target(*target_args, **target_kwargs)
+
+
+async def _drain_fd(fd: int, tx: Sender[bytes]) -> None:
+    try:
+        while True:
+            await wait_readable(fd)
+            chunk = os.read(fd, _READ_CHUNK_SIZE)
+            if not chunk:
+                return
+            await tx.send(chunk)
+    except (BrokenPipeError, BrokenResourceError, ClosedResourceError):
+        pass
+    finally:
+        _close_fd(fd)
+        await tx.aclose()
+
+
+def _close_fd(fd: int | None) -> None:
+    if fd is None:
+        return
+    with contextlib.suppress(OSError):
+        os.close(fd)
diff --git a/src/exo/utils/daemon.py b/src/exo/utils/daemon.py
new file mode 100644
index 000000000..7636d6808
--- /dev/null
+++ b/src/exo/utils/daemon.py
@@ -0,0 +1,28 @@
+import os
+import sys
+
+_STDIN_FD = 0
+_STDOUT_FD = 1
+_STDERR_FD = 2
+
+
+def detach_stdio_to_devnull() -> None:
+    """Redirect process stdio file descriptors to /dev/null."""
+
+    for stream in (sys.stdout, sys.stderr, sys.__stdout__, sys.__stderr__):
+        if stream is not None:
+            stream.flush()
+
+    stdin_fd = os.open(os.devnull, os.O_RDONLY)
+    stdout_fd = os.open(os.devnull, os.O_WRONLY)
+    stderr_fd = os.open(os.devnull, os.O_WRONLY)
+
+    try:
+        # dup2 closes the target fd first, but leaves the source fd open.
+        os.dup2(stdin_fd, _STDIN_FD)
+        os.dup2(stdout_fd, _STDOUT_FD)
+        os.dup2(stderr_fd, _STDERR_FD)
+    finally:
+        for fd in (stdin_fd, stdout_fd, stderr_fd):
+            if fd not in (_STDIN_FD, _STDOUT_FD, _STDERR_FD):
+                os.close(fd)
diff --git a/src/exo/utils/info_gatherer/info_gatherer.py b/src/exo/utils/info_gatherer/info_gatherer.py
index 9e75e1435..81194c928 100644
--- a/src/exo/utils/info_gatherer/info_gatherer.py
+++ b/src/exo/utils/info_gatherer/info_gatherer.py
@@ -14,6 +14,7 @@ from loguru import logger
 from pydantic import ValidationError
 
 from exo.shared.constants import EXO_CONFIG_FILE, EXO_DEFAULT_MODELS_DIR
+from exo.shared.types.backends import Backend
 from exo.shared.types.memory import Memory
 from exo.shared.types.profiling import (
     DiskUsage,
@@ -353,6 +354,35 @@ async def _gather_iface_map() -> dict[str, str] | None:
     return ports
 
 
+def _has_nvml_cuda() -> bool:
+    try:
+        import pynvml as nvml  # pyright: ignore[reportMissingModuleSource]
+    except ImportError:
+        return False
+    try:
+        nvml.nvmlInit()
+        try:
+            return nvml.nvmlDeviceGetCount() > 0
+        finally:
+            nvml.nvmlShutdown()
+    except Exception:
+        return False
+
+
+class NodeBackends(TaggedModel):
+    backends: list[Backend]
+
+    @classmethod
+    async def gather(cls) -> Self:
+        backends: list[Backend] = [Backend.MlxCpu]
+        if IS_DARWIN:
+            backends.append(Backend.MlxMetal)
+        if await to_thread.run_sync(_has_nvml_cuda):
+            backends.append(Backend.MlxCuda)
+            backends.append(Backend.Vllm)
+        return cls(backends=backends)
+
+
 GatheredInfo = (
     MacmonMetrics
     | MemoryUsage
@@ -365,6 +395,7 @@ GatheredInfo = (
     | MiscData
     | StaticNodeInformation
     | NodeDiskUsage
+    | NodeBackends
 )
 
 
@@ -428,6 +459,8 @@ class InfoGatherer:
             if nc is not None:
                 await self.info_sender.send(nc)
 
+            await self.info_sender.send(await NodeBackends.gather())
+
     def shutdown(self):
         self._tg.cancel_tasks()
 
diff --git a/src/exo/utils/pidfile.py b/src/exo/utils/pidfile.py
new file mode 100644
index 000000000..99d5bcd4c
--- /dev/null
+++ b/src/exo/utils/pidfile.py
@@ -0,0 +1,28 @@
+from __future__ import annotations
+
+import os
+from typing import Final
+
+from exo_pyo3_bindings import Pidfile, PidfileError
+
+from exo.shared.constants import EXO_PID_FILE
+
+_PIDFILE_MODE: Final = 0o600
+
+
+class PidfileLockError(RuntimeError):
+    pass
+
+
+def acquire_exo_pidfile() -> Pidfile:
+    path = EXO_PID_FILE
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    try:
+        pidfile = Pidfile(path, _PIDFILE_MODE)
+        pidfile.write()
+    except (OSError, PidfileError) as exception:
+        raise PidfileLockError(
+            f"Failed to acquire EXO pidfile at {path}: {exception}"
+        ) from exception
+
+    return pidfile
diff --git a/src/exo/utils/ports.py b/src/exo/utils/ports.py
new file mode 100644
index 000000000..f23463df6
--- /dev/null
+++ b/src/exo/utils/ports.py
@@ -0,0 +1,6 @@
+import random
+
+
+def random_ephemeral_port() -> int:
+    port = random.randint(49153, 65535)
+    return port - 1 if port <= 52415 else port
diff --git a/src/exo/utils/power_sampler.py b/src/exo/utils/power_sampler.py
index b8e985a11..c6e61b41a 100644
--- a/src/exo/utils/power_sampler.py
+++ b/src/exo/utils/power_sampler.py
@@ -19,19 +19,21 @@ class PowerSampler:
     ):
         self._get_node_system = get_node_system
         self._interval = interval
-        self._samples: defaultdict[NodeId, list[SystemPerformanceProfile]] = (
-            defaultdict(list)
-        )
+        self._samples: defaultdict[
+            NodeId, list[tuple[float, SystemPerformanceProfile]]
+        ] = defaultdict(list)
         self._start_time: float | None = None
         self._stopped = False
 
-    def _take_sample(self) -> None:
+    def _take_sample(self, t_rel: float | None = None) -> None:
+        assert self._start_time is not None
+        ts = t_rel if t_rel is not None else time.perf_counter() - self._start_time
         for node_id, profile in self._get_node_system().items():
-            self._samples[node_id].append(profile)
+            self._samples[node_id].append((ts, profile))
 
     async def run(self) -> None:
         self._start_time = time.perf_counter()
-        self._take_sample()
+        self._take_sample(t_rel=0.0)
         while not self._stopped:
             await anyio.sleep(self._interval)
             self._take_sample()
@@ -39,26 +41,51 @@ class PowerSampler:
     def result(self) -> PowerUsage:
         self._stopped = True
         assert self._start_time is not None, "result() called before run()"
-        self._take_sample()
         elapsed = time.perf_counter() - self._start_time
+        self._take_sample(t_rel=elapsed)
 
         node_stats: list[NodePowerStats] = []
-        for node_id, profiles in self._samples.items():
-            n = len(profiles)
+        total_energy_j = 0.0
+        for node_id, ts_profiles in self._samples.items():
+            n = len(ts_profiles)
             if n == 0:
                 continue
+            node_energy_j = trapezoidal_energy(ts_profiles, elapsed)
+            avg_power_w = node_energy_j / elapsed if elapsed > 0 else 0.0
+            total_energy_j += node_energy_j
             node_stats.append(
                 NodePowerStats(
                     node_id=node_id,
                     samples=n,
-                    avg_sys_power=sum(p.sys_power for p in profiles) / n,
+                    avg_sys_power=avg_power_w,
                 )
             )
 
-        total_avg_sys = sum(ns.avg_sys_power for ns in node_stats)
+        total_avg_sys_w = total_energy_j / elapsed if elapsed > 0 else 0.0
         return PowerUsage(
             elapsed_seconds=elapsed,
             nodes=node_stats,
-            total_avg_sys_power_watts=total_avg_sys,
-            total_energy_joules=total_avg_sys * elapsed,
+            total_avg_sys_power_watts=total_avg_sys_w,
+            total_energy_joules=total_energy_j,
         )
+
+
+def trapezoidal_energy(
+    ts_profiles: list[tuple[float, SystemPerformanceProfile]],
+    elapsed: float,
+) -> float:
+    """Integrate sys_power(t) over the sample window using the trapezoidal rule.
+    First sample is anchored at t=0 and last at t=elapsed (set by `run` /
+    `result`), so the integral spans the full request interval. Falls back to
+    power * elapsed when only one sample exists (constant-power assumption)."""
+    if len(ts_profiles) == 1:
+        return ts_profiles[0][1].sys_power * elapsed
+    energy_j = 0.0
+    for i in range(1, len(ts_profiles)):
+        t_prev, p_prev = ts_profiles[i - 1]
+        t_cur, p_cur = ts_profiles[i]
+        dt = t_cur - t_prev
+        if dt <= 0:
+            continue
+        energy_j += (p_prev.sys_power + p_cur.sys_power) / 2.0 * dt
+    return energy_j
diff --git a/src/exo/utils/pydantic_ext.py b/src/exo/utils/pydantic_ext.py
index 07c8dc5e8..e8c6068e5 100644
--- a/src/exo/utils/pydantic_ext.py
+++ b/src/exo/utils/pydantic_ext.py
@@ -1,5 +1,3 @@
-# pyright: reportAny=false, reportUnknownArgumentType=false, reportUnknownVariableType=false
-
 from typing import Any, Self
 
 from pydantic import BaseModel, ConfigDict, model_serializer, model_validator
@@ -10,19 +8,6 @@ from pydantic_core.core_schema import (
 )
 
 
-class CamelCaseModel(BaseModel):
-    """
-    A model whose fields are aliased to camel-case from snake-case.
-    """
-
-    model_config = ConfigDict(
-        alias_generator=to_camel,
-        validate_by_name=True,
-        extra="forbid",
-        strict=True,
-    )
-
-
 class FrozenModel(BaseModel):
     model_config = ConfigDict(
         alias_generator=to_camel,
@@ -33,19 +18,19 @@ class FrozenModel(BaseModel):
     )
 
 
-class TaggedModel(CamelCaseModel):
+class TaggedModel(FrozenModel):
     @model_serializer(mode="wrap")
     def _serialize(self, handler: SerializerFunctionWrapHandler):
-        inner = handler(self)
+        inner = handler(self)  # pyright: ignore[reportAny]
         return {self.__class__.__name__: inner}
 
     @model_validator(mode="wrap")
     @classmethod
-    def _validate(cls, v: Any, handler: ValidatorFunctionWrapHandler) -> Self:
-        if isinstance(v, dict) and len(v) == 1 and cls.__name__ in v:
-            return handler(v[cls.__name__])
+    def _validate(cls, v: Any, handler: ValidatorFunctionWrapHandler) -> Self:  # pyright: ignore[reportAny]
+        if isinstance(v, dict) and len(v) == 1 and cls.__name__ in v:  # pyright: ignore[reportUnknownArgumentType]
+            return handler(v[cls.__name__])  # pyright: ignore[reportAny]
 
-        return handler(v)
+        return handler(v)  # pyright: ignore[reportAny]
 
     def __str__(self) -> str:
         return f"{self.__class__.__name__}({super().__str__()})"
diff --git a/src/exo/utils/tests/conftest.py b/src/exo/utils/tests/conftest.py
new file mode 100644
index 000000000..a4cae26cc
--- /dev/null
+++ b/src/exo/utils/tests/conftest.py
@@ -0,0 +1,8 @@
+import multiprocessing as mp
+
+import pytest
+
+
+@pytest.fixture(scope="session", autouse=True)
+def mp_force_spawn():
+    mp.set_start_method("spawn", force=True)
diff --git a/src/exo/utils/tests/test_async_process.py b/src/exo/utils/tests/test_async_process.py
new file mode 100644
index 000000000..07e567c10
--- /dev/null
+++ b/src/exo/utils/tests/test_async_process.py
@@ -0,0 +1,417 @@
+import contextlib
+import os
+import signal
+import sys
+from collections.abc import AsyncIterator, Callable
+
+import pytest
+from _pytest.capture import CaptureFixture
+from anyio import EndOfStream, create_task_group, fail_after
+
+from exo.utils.async_process import (
+    AsyncProcess,
+)
+from exo.utils.channels import MpSender, Receiver, mp_channel
+
+
+def _write_to_stdio(prefix: str, *, stderr_suffix: str) -> None:
+    print(f"{prefix}: python stdout")
+    print(f"{prefix}: python stderr {stderr_suffix}", file=sys.stderr)
+    os.write(1, f"{prefix}: fd stdout\n".encode())
+    os.write(2, f"{prefix}: fd stderr {stderr_suffix}\n".encode())
+
+
+def _write_large_output() -> None:
+    os.write(1, b"stdout-0123456789")
+    os.write(2, b"stderr-0123456789")
+
+
+def _write_all(fd: int, data: bytes) -> None:
+    remaining = memoryview(data)
+    while remaining:
+        written = os.write(fd, remaining)
+        remaining = remaining[written:]
+
+
+def _write_large_exact_output(size: int) -> None:
+    _write_all(1, b"stdout:" + (b"x" * size))
+    _write_all(2, b"stderr:" + (b"y" * size))
+
+
+def _raise_after_stderr_write() -> None:
+    os.write(2, b"stderr before exception\n")
+    raise RuntimeError("child boom")
+
+
+def _exit_after_stdio_write(prefix: str, exitcode: int) -> None:
+    os.write(1, f"{prefix}: stdout before _exit\n".encode())
+    os.write(2, f"{prefix}: stderr before _exit\n".encode())
+    os._exit(exitcode)
+
+
+def _abort_after_stdio_write(prefix: str) -> None:
+    os.write(1, f"{prefix}: stdout before abort\n".encode())
+    os.write(2, f"{prefix}: stderr before abort\n".encode())
+    os.abort()
+
+
+def _close_stdio_and_exit() -> None:
+    os.close(1)
+    os.close(2)
+    os._exit(0)
+
+
+def _send_over_mp_channel(send: MpSender[str]) -> None:
+    send.send("hello from child")
+    send.close()
+
+
+def _mlx_force_oom(size: int = 40_000) -> None:
+    """
+    Force an Out-Of-Memory (OOM) error in MLX by performing large tensor operations.
+    """
+    import mlx.core as mx
+
+    print("CHILD: start")
+
+    mx.set_default_device(mx.gpu)
+    a = mx.random.uniform(shape=(size, size), dtype=mx.float32)
+    b = mx.random.uniform(shape=(size, size), dtype=mx.float32)
+    mx.eval(a, b)
+    c = mx.matmul(a, b)
+    d = mx.matmul(a, c)
+    e = mx.matmul(b, c)
+    f = mx.sigmoid(d + e)
+    mx.eval(f)
+
+    print("CHILD: end")
+
+
+async def _collect_stream(
+    stream: Receiver[bytes],
+    output: bytearray,
+) -> None:
+    while True:
+        try:
+            output.extend(await stream.receive())
+        except EndOfStream:
+            return
+
+
+async def _collect_process_output(
+    process: AsyncProcess,
+) -> tuple[int, bytes, bytes]:
+    stdout = bytearray()
+    stderr = bytearray()
+    exitcodes: list[int] = []
+
+    async with create_task_group() as task_group:
+        task_group.start_soon(_collect_stream, process.stdout, stdout)
+        task_group.start_soon(_collect_stream, process.stderr, stderr)
+        exitcodes.append(await process.wait())
+
+    if not exitcodes:
+        raise RuntimeError("process exited without a return code")
+    return exitcodes[0], bytes(stdout), bytes(stderr)
+
+
+def _fd_identity(fd: int) -> tuple[int, int]:
+    fd_stat = os.fstat(fd)
+    return fd_stat.st_dev, fd_stat.st_ino
+
+
+def _fd_count() -> int | None:
+    for fd_dir in ("/proc/self/fd", "/dev/fd"):
+        with contextlib.suppress(OSError):
+            return len(os.listdir(fd_dir))
+    return None
+
+
+@contextlib.asynccontextmanager
+async def _started_process(process: AsyncProcess) -> AsyncIterator[None]:
+    async with create_task_group() as task_group:
+        await task_group.start(process.run)
+        try:
+            yield
+        finally:
+            await process.stop()
+
+
+async def _run_and_collect(
+    target: Callable[..., object] | None,
+    *,
+    args: tuple[object, ...] = (),
+    kwargs: dict[str, object] | None = None,
+) -> tuple[int, bytes, bytes]:
+    process = AsyncProcess(
+        target,
+        args=args,
+        kwargs=kwargs,
+    )
+    async with _started_process(process):
+        return await _collect_process_output(process)
+
+
+@pytest.mark.anyio
+async def test_spawn_process_captures_stdout_and_stderr_separately(
+    capfd: CaptureFixture[str],
+) -> None:
+    process = AsyncProcess(
+        _write_to_stdio,
+        args=("child",),
+        kwargs={"stderr_suffix": "error"},
+    )
+    async with _started_process(process):
+        exitcode, stdout_bytes, stderr_bytes = await _collect_process_output(process)
+
+    parent_output = capfd.readouterr()
+    stdout = stdout_bytes.decode("utf-8", errors="replace")
+    stderr = stderr_bytes.decode("utf-8", errors="replace")
+
+    assert exitcode == 0
+    assert "child: python stdout" in stdout
+    assert "child: fd stdout" in stdout
+    assert "child: python stderr error" in stderr
+    assert "child: fd stderr error" in stderr
+    assert "child:" not in parent_output.out
+    assert "child:" not in parent_output.err
+
+
+@pytest.mark.anyio
+async def test_process_with_no_target_exits_successfully() -> None:
+    exitcode, stdout, stderr = await _run_and_collect(None)
+
+    assert exitcode == 0
+    assert stdout == b""
+    assert stderr == b""
+
+
+@pytest.mark.anyio
+async def test_output_receivers_and_wait_are_safe_immediately_after_run_starts() -> (
+    None
+):
+    process = AsyncProcess(
+        _write_to_stdio,
+        args=("immediate",),
+        kwargs={"stderr_suffix": "error"},
+    )
+    result: tuple[int, bytes, bytes] | None = None
+
+    async with create_task_group() as task_group:
+        await task_group.start(process.run)
+        try:
+            result = await _collect_process_output(process)
+        finally:
+            await process.stop()
+
+    assert result is not None
+    exitcode, stdout, stderr = result
+    assert exitcode == 0
+    assert b"immediate: fd stdout\n" in stdout
+    assert b"immediate: fd stderr error\n" in stderr
+
+
+@pytest.mark.anyio
+async def test_stop_before_run_raises() -> None:
+    process = AsyncProcess(
+        _write_to_stdio,
+        args=("never",),
+        kwargs={"stderr_suffix": "run"},
+    )
+
+    assert not process.is_alive()
+    with pytest.raises(RuntimeError, match="process has not been started"):
+        await process.stop()
+
+
+@pytest.mark.anyio
+async def test_process_run_is_one_shot() -> None:
+    process = AsyncProcess(None)
+
+    await process.run()
+
+    with pytest.raises(RuntimeError, match="process has already been started"):
+        await process.run()
+
+
+@pytest.mark.anyio
+async def test_stdout_receiver_yields_bytes_chunks() -> None:
+    process = AsyncProcess(_write_large_output)
+
+    async with _started_process(process):
+        first_stdout = await process.stdout.receive()
+        exitcode, remaining_stdout, stderr = await _collect_process_output(process)
+
+    assert exitcode == 0
+    assert first_stdout + remaining_stdout == b"stdout-0123456789"
+    assert stderr == b"stderr-0123456789"
+
+
+@pytest.mark.anyio
+async def test_output_can_be_read_after_process_exits() -> None:
+    process = AsyncProcess(_write_large_output)
+
+    async with create_task_group() as task_group:
+        await task_group.start(process.run)
+        assert await process.wait() == 0
+
+    assert await process.stdout.receive() == b"stdout-0123456789"
+    assert await process.stderr.receive() == b"stderr-0123456789"
+    with pytest.raises(EndOfStream):
+        await process.stdout.receive()
+    with pytest.raises(EndOfStream):
+        await process.stderr.receive()
+
+
+@pytest.mark.anyio
+async def test_large_stdout_and_stderr_are_not_lost() -> None:
+    size = 1024 * 1024
+    exitcode, stdout, stderr = await _run_and_collect(
+        _write_large_exact_output,
+        args=(size,),
+    )
+
+    assert exitcode == 0
+    assert stdout == b"stdout:" + (b"x" * size)
+    assert stderr == b"stderr:" + (b"y" * size)
+
+
+@pytest.mark.anyio
+async def test_child_exception_traceback_is_captured_from_stderr() -> None:
+    process = AsyncProcess(_raise_after_stderr_write)
+
+    async with _started_process(process):
+        exitcode, _, stderr_bytes = await _collect_process_output(process)
+
+    assert exitcode == 1
+    stderr = stderr_bytes.decode("utf-8", errors="replace")
+    assert "stderr before exception" in stderr
+    assert "RuntimeError: child boom" in stderr
+
+
+@pytest.mark.anyio
+async def test_repeated_bad_children_do_not_pollute_or_replace_parent_stdio(
+    capfd: CaptureFixture[str],
+) -> None:
+    stdout_object = sys.stdout
+    stderr_object = sys.stderr
+    stdout_identity = _fd_identity(1)
+    stderr_identity = _fd_identity(2)
+
+    cases: tuple[tuple[Callable[..., object], tuple[object, ...]], ...] = (
+        (_raise_after_stderr_write, ()),
+        (_exit_after_stdio_write, ("exit-child", 17)),
+        (_abort_after_stdio_write, ("abort-child",)),
+    )
+
+    for iteration in range(3):
+        for target, args in cases:
+            exitcode, stdout, stderr = await _run_and_collect(
+                target,
+                args=args,
+            )
+
+            assert exitcode != 0
+            if target is _exit_after_stdio_write:
+                assert stdout == b"exit-child: stdout before _exit\n"
+                assert stderr == b"exit-child: stderr before _exit\n"
+            elif target is _abort_after_stdio_write:
+                assert b"abort-child: stdout before abort\n" in stdout
+                assert b"abort-child: stderr before abort\n" in stderr
+                assert exitcode == -signal.SIGABRT
+            else:
+                assert stdout == b""
+                assert b"stderr before exception\n" in stderr
+                assert b"RuntimeError: child boom" in stderr
+
+        print(f"parent stdout still works {iteration}")
+        print(f"parent stderr still works {iteration}", file=sys.stderr)
+
+    parent_output = capfd.readouterr()
+
+    assert sys.stdout is stdout_object
+    assert sys.stderr is stderr_object
+    assert _fd_identity(1) == stdout_identity
+    assert _fd_identity(2) == stderr_identity
+    assert "parent stdout still works 0" in parent_output.out
+    assert "parent stdout still works 2" in parent_output.out
+    assert "parent stderr still works 0" in parent_output.err
+    assert "parent stderr still works 2" in parent_output.err
+    assert "exit-child:" not in parent_output.out
+    assert "exit-child:" not in parent_output.err
+    assert "abort-child:" not in parent_output.out
+    assert "abort-child:" not in parent_output.err
+    assert "child boom" not in parent_output.err
+
+
+@pytest.mark.anyio
+async def test_child_can_close_stdio_without_corrupting_parent_stdio(
+    capfd: CaptureFixture[str],
+) -> None:
+    stdout_identity = _fd_identity(1)
+    stderr_identity = _fd_identity(2)
+
+    exitcode, stdout, stderr = await _run_and_collect(_close_stdio_and_exit)
+    os.write(1, b"parent stdout after child closed stdio\n")
+    os.write(2, b"parent stderr after child closed stdio\n")
+    parent_output = capfd.readouterr()
+
+    assert exitcode == 0
+    assert stdout == b""
+    assert stderr == b""
+    assert _fd_identity(1) == stdout_identity
+    assert _fd_identity(2) == stderr_identity
+    assert "parent stdout after child closed stdio" in parent_output.out
+    assert "parent stderr after child closed stdio" in parent_output.err
+
+
+@pytest.mark.anyio
+async def test_repeated_crashing_children_do_not_grow_parent_fd_table() -> None:
+    await _run_and_collect(_exit_after_stdio_write, args=("warmup", 23))
+    before = _fd_count()
+    if before is None:
+        pytest.skip("fd table count is not available on this platform")
+
+    for iteration in range(20):
+        exitcode, stdout, stderr = await _run_and_collect(
+            _exit_after_stdio_write,
+            args=(f"fd-child-{iteration}", 31),
+        )
+
+        assert exitcode == 31
+        assert stdout == f"fd-child-{iteration}: stdout before _exit\n".encode()
+        assert stderr == f"fd-child-{iteration}: stderr before _exit\n".encode()
+
+    after = _fd_count()
+    assert after is not None
+    assert after <= before + 2
+
+
+@pytest.mark.anyio
+async def test_process_can_use_mp_channel_with_global_spawn_context() -> None:
+    send, recv = mp_channel[str]()
+    process = AsyncProcess(_send_over_mp_channel, args=(send,))
+
+    async with _started_process(process):
+        with fail_after(2):
+            assert await recv.receive_async() == "hello from child"
+            assert await process.wait() == 0
+
+    with contextlib.suppress(Exception):
+        recv.close()
+
+
+@pytest.mark.anyio
+@pytest.mark.skip(reason="manual MLX OOM isolation check")
+async def test_death(capsys: CaptureFixture[str]) -> None:
+    with capsys.disabled():
+        process = AsyncProcess(_mlx_force_oom)
+        stdout = b""
+        stderr = b""
+        async with _started_process(process):
+            _, stdout, stderr = await _collect_process_output(process)
+
+        print("PARENT: done")
+
+        print("CHILD out:", stdout.decode("utf-8", errors="replace"))
+        print("CHILD err:", stderr.decode("utf-8", errors="replace"), "hello :)")
diff --git a/src/exo/utils/tests/test_daemon.py b/src/exo/utils/tests/test_daemon.py
new file mode 100644
index 000000000..964afebf8
--- /dev/null
+++ b/src/exo/utils/tests/test_daemon.py
@@ -0,0 +1,168 @@
+import contextlib
+import os
+from collections.abc import AsyncIterator
+
+import anyio
+import pytest
+from anyio import EndOfStream, create_task_group, fail_after
+
+from exo.utils.async_process import AsyncProcess
+from exo.utils.channels import MpReceiver, MpSender, Receiver, mp_channel
+from exo.utils.daemon import detach_stdio_to_devnull
+
+
+def _write_before_and_after_detach() -> None:
+    os.write(1, b"before stdout\n")
+    os.write(2, b"before stderr\n")
+    detach_stdio_to_devnull()
+    os.write(1, b"after stdout\n")
+    os.write(2, b"after stderr\n")
+
+
+def _write_grandchild_stdio(label: str) -> None:
+    os.write(1, f"{label} stdout\n".encode())
+    os.write(2, f"{label} stderr\n".encode())
+
+
+async def _spawn_grandchild_and_report(
+    result_sender: MpSender[tuple[int, bytes, bytes]],
+    label: str,
+) -> None:
+    result_sender.send(await _collect_spawned_child(label))
+    result_sender.close()
+
+
+async def _collect_spawned_child(label: str) -> tuple[int, bytes, bytes]:
+    process = AsyncProcess(_write_grandchild_stdio, args=(label,))
+    async with _started_process(process):
+        return await _collect_process_output(process)
+
+
+def _detach_stdio_then_spawn_captured_child(
+    result_sender: MpSender[tuple[int, bytes, bytes]],
+) -> None:
+    detach_stdio_to_devnull()
+    anyio.run(_spawn_grandchild_and_report, result_sender, "grandchild")
+
+
+def _detach_stdio_then_spawn_captured_children_sequentially(
+    result_sender: MpSender[list[tuple[int, bytes, bytes]]],
+) -> None:
+    async def run_children() -> list[tuple[int, bytes, bytes]]:
+        results: list[tuple[int, bytes, bytes]] = []
+        for index in range(5):
+            results.append(await _collect_spawned_child(f"grandchild-{index}"))
+        return results
+
+    detach_stdio_to_devnull()
+    result_sender.send(anyio.run(run_children))
+    result_sender.close()
+
+
+async def _collect_stream(stream: Receiver[bytes], output: bytearray) -> None:
+    while True:
+        try:
+            output.extend(await stream.receive())
+        except EndOfStream:
+            return
+
+
+async def _collect_process_output(
+    process: AsyncProcess,
+) -> tuple[int, bytes, bytes]:
+    stdout = bytearray()
+    stderr = bytearray()
+    exitcodes: list[int] = []
+
+    async with create_task_group() as collect_group:
+        collect_group.start_soon(_collect_stream, process.stdout, stdout)
+        collect_group.start_soon(_collect_stream, process.stderr, stderr)
+        exitcodes.append(await process.wait())
+
+    if not exitcodes:
+        raise RuntimeError("process exited without a return code")
+    return exitcodes[0], bytes(stdout), bytes(stderr)
+
+
+@contextlib.asynccontextmanager
+async def _started_process(process: AsyncProcess) -> AsyncIterator[None]:
+    async with create_task_group() as task_group:
+        await task_group.start(process.run)
+        try:
+            yield
+        finally:
+            await process.stop()
+
+
+async def _run_process_and_receive[T](
+    process: AsyncProcess,
+    recv: MpReceiver[T],
+    *,
+    timeout: float,
+) -> tuple[int, T]:
+    async with _started_process(process):
+        with fail_after(timeout):
+            result = await recv.receive_async()
+            exitcode = await process.wait()
+
+    return exitcode, result
+
+
+@pytest.mark.anyio
+async def test_detach_stdio_to_devnull_redirects_stdio_away_from_capture() -> None:
+    process = AsyncProcess(_write_before_and_after_detach)
+
+    async with _started_process(process):
+        exitcode, stdout, stderr = await _collect_process_output(process)
+
+    assert exitcode == 0
+    assert stdout == b"before stdout\n"
+    assert stderr == b"before stderr\n"
+
+
+@pytest.mark.anyio
+async def test_detached_stdio_process_can_spawn_and_capture_child_stdio() -> None:
+    send, recv = mp_channel[tuple[int, bytes, bytes]]()
+    process = AsyncProcess(_detach_stdio_then_spawn_captured_child, args=(send,))
+
+    try:
+        daemonized_parent_exitcode, result = await _run_process_and_receive(
+            process, recv, timeout=5
+        )
+    finally:
+        recv.close()
+
+    child_exitcode, child_stdout, child_stderr = result
+
+    assert daemonized_parent_exitcode == 0
+    assert child_exitcode == 0
+    assert child_stdout == b"grandchild stdout\n"
+    assert child_stderr == b"grandchild stderr\n"
+
+
+@pytest.mark.anyio
+async def test_detached_stdio_process_can_spawn_captured_children_sequentially() -> (
+    None
+):
+    send, recv = mp_channel[list[tuple[int, bytes, bytes]]]()
+    process = AsyncProcess(
+        _detach_stdio_then_spawn_captured_children_sequentially,
+        args=(send,),
+    )
+
+    try:
+        daemonized_parent_exitcode, results = await _run_process_and_receive(
+            process, recv, timeout=10
+        )
+    finally:
+        recv.close()
+
+    assert daemonized_parent_exitcode == 0
+    assert results == [
+        (
+            0,
+            f"grandchild-{index} stdout\n".encode(),
+            f"grandchild-{index} stderr\n".encode(),
+        )
+        for index in range(5)
+    ]
diff --git a/src/exo/utils/tests/test_pidfile.py b/src/exo/utils/tests/test_pidfile.py
new file mode 100644
index 000000000..c4fa86698
--- /dev/null
+++ b/src/exo/utils/tests/test_pidfile.py
@@ -0,0 +1,84 @@
+from __future__ import annotations
+
+import gc
+import os
+import subprocess
+import sys
+import textwrap
+from pathlib import Path
+from typing import Final
+
+import pytest
+
+import exo.utils.pidfile as pidfile
+from exo.utils.pidfile import acquire_exo_pidfile
+
+_CHILD_ACQUIRE_PIDFILE_SCRIPT: Final = textwrap.dedent(
+    """
+    import sys
+    from pathlib import Path
+    from unittest.mock import patch
+
+    import exo.utils.pidfile as pidfile
+    from exo.utils.pidfile import PidfileLockError, acquire_exo_pidfile
+
+    with patch.object(pidfile, "EXO_PID_FILE", Path(sys.argv[1])):
+        try:
+            handle = acquire_exo_pidfile()
+        except PidfileLockError as exception:
+            print(str(exception))
+            raise SystemExit(73) from exception
+
+        del handle
+    """
+)
+
+
+def _use_pidfile_path(monkeypatch: pytest.MonkeyPatch, path: Path) -> None:
+    monkeypatch.setattr(pidfile, "EXO_PID_FILE", path)
+
+
+def _run_child_acquire_pidfile(path: Path) -> subprocess.CompletedProcess[str]:
+    return subprocess.run(
+        [sys.executable, "-c", _CHILD_ACQUIRE_PIDFILE_SCRIPT, str(path)],
+        check=False,
+        capture_output=True,
+        text=True,
+    )
+
+
+def test_acquire_exo_pidfile_writes_current_pid_and_removes_on_drop(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    path = tmp_path / "exo.pid"
+    _use_pidfile_path(monkeypatch, path)
+
+    handle = acquire_exo_pidfile()
+    assert path.read_text() == str(os.getpid())
+
+    del handle
+    gc.collect()
+
+    assert not path.exists()
+
+
+def test_acquire_exo_pidfile_rejects_second_process(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    path = tmp_path / "exo.pid"
+    _use_pidfile_path(monkeypatch, path)
+
+    handle = acquire_exo_pidfile()
+    try:
+        blocked_child = _run_child_acquire_pidfile(path)
+        assert blocked_child.returncode == 73
+        assert "Failed to acquire EXO pidfile" in blocked_child.stdout
+    finally:
+        del handle
+        gc.collect()
+
+    unblocked_child = _run_child_acquire_pidfile(path)
+    assert unblocked_child.returncode == 0
+    assert unblocked_child.stdout == ""
diff --git a/src/exo/utils/tests/test_power_sampler.py b/src/exo/utils/tests/test_power_sampler.py
index 69f4ccee9..7880936cb 100644
--- a/src/exo/utils/tests/test_power_sampler.py
+++ b/src/exo/utils/tests/test_power_sampler.py
@@ -111,6 +111,36 @@ async def test_empty_state() -> None:
     assert result.total_energy_joules == 0.0
 
 
+def test_trapezoidal_unit_dt_weighting() -> None:
+    """Pure unit test on the integration helper. Crafted samples where the
+    arithmetic mean is wildly wrong vs the time-weighted result."""
+    from exo.utils.power_sampler import trapezoidal_energy
+
+    # 5 s window. Power = 10 W for the first 4.9 s, then 100 W for the last 0.1 s.
+    # Three samples: t=0 W=10, t=4.9 W=10, t=5.0 W=100.
+    samples = [
+        (0.0, _make_profile(10.0)),
+        (4.9, _make_profile(10.0)),
+        (5.0, _make_profile(100.0)),
+    ]
+    energy = trapezoidal_energy(samples, elapsed=5.0)
+    # (10+10)/2 * 4.9 + (10+100)/2 * 0.1 = 49 + 5.5 = 54.5 J
+    assert abs(energy - 54.5) < 1e-9
+    avg = energy / 5.0  # 10.9 W
+    # Arithmetic mean of the three samples would be (10+10+100)/3 ≈ 40 W.
+    # Trapezoidal correctly weights each segment by its dt.
+    assert abs(avg - 10.9) < 1e-9
+
+
+def test_trapezoidal_unit_single_sample() -> None:
+    """One sample: no window to integrate over, so fall back to constant power
+    over the elapsed duration."""
+    from exo.utils.power_sampler import trapezoidal_energy
+
+    samples = [(0.0, _make_profile(42.0))]
+    assert trapezoidal_energy(samples, elapsed=3.0) == 42.0 * 3.0
+
+
 async def test_result_stops_sampling() -> None:
     """Calling result() should stop the sampler's run loop."""
     state: dict[NodeId, SystemPerformanceProfile] = {
diff --git a/src/exo/worker/disaggregated/__init__.py b/src/exo/worker/disaggregated/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/exo/worker/disaggregated/protocol.py b/src/exo/worker/disaggregated/protocol.py
new file mode 100644
index 000000000..7e2c07673
--- /dev/null
+++ b/src/exo/worker/disaggregated/protocol.py
@@ -0,0 +1,152 @@
+from typing import BinaryIO, Literal
+
+import msgspec
+
+DType = Literal["bfloat16", "float16", "float32"]
+
+
+class ProtocolError(Exception):
+    pass
+
+
+class Header(msgspec.Struct):
+    request_id: str = ""
+    model_id: str = ""
+    num_layers: int = 0
+    dtype: DType = "bfloat16"
+    start_pos: int = 0
+
+
+class TensorBlob(msgspec.Struct):
+    dtype: DType
+    shape: tuple[int, ...]
+    data: bytes
+
+
+class KVChunk(msgspec.Struct, tag="kv_chunk"):
+    layer_idx: int
+    num_tokens: int
+    n_heads: int
+    head_dim: int
+    dtype: DType
+    keys: bytes
+    values: bytes
+
+    @property
+    def shape(self) -> tuple[int, int, int]:
+        return (self.num_tokens, self.n_heads, self.head_dim)
+
+
+class ArraysState(msgspec.Struct, tag="arrays_state"):
+    layer_idx: int
+    arrays: list[TensorBlob] = []
+
+
+class Done(msgspec.Struct, tag="done"):
+    total_tokens: int
+
+
+class ErrorMessage(msgspec.Struct, tag="error"):
+    code: int
+    message: str
+
+
+Message = KVChunk | ArraysState | Done | ErrorMessage
+
+_msg_encoder = msgspec.msgpack.Encoder()
+_msg_decoder: msgspec.msgpack.Decoder[Message] = msgspec.msgpack.Decoder(Message)
+_header_encoder = msgspec.msgpack.Encoder()
+_header_decoder: msgspec.msgpack.Decoder[Header] = msgspec.msgpack.Decoder(Header)
+
+
+def _read_exactly(stream: BinaryIO, n: int) -> bytes:
+    buf = bytearray()
+    while len(buf) < n:
+        chunk = stream.read(n - len(buf))
+        if not chunk:
+            if len(buf) == 0:
+                return b""
+            raise ConnectionError(f"Connection closed after {len(buf)}/{n} bytes")
+        buf.extend(chunk)
+    return bytes(buf)
+
+
+def write_frame(stream: BinaryIO, payload: bytes) -> None:
+    stream.write(len(payload).to_bytes(4, "big"))
+    stream.write(payload)
+    stream.flush()
+
+
+def read_frame(stream: BinaryIO) -> bytes:
+    raw = _read_exactly(stream, 4)
+    if not raw:
+        return b""
+    length = int.from_bytes(raw, "big")
+    return _read_exactly(stream, length)
+
+
+def write_header(stream: BinaryIO, header: Header) -> None:
+    write_frame(stream, _header_encoder.encode(header))
+
+
+def read_header(stream: BinaryIO) -> Header:
+    payload = read_frame(stream)
+    if not payload:
+        raise ConnectionError("No header received")
+    try:
+        return _header_decoder.decode(payload)
+    except msgspec.DecodeError as exc:
+        raise ProtocolError(f"Bad header: {exc}") from exc
+
+
+def write_message(stream: BinaryIO, msg: Message) -> None:
+    write_frame(stream, _msg_encoder.encode(msg))
+
+
+def read_message(stream: BinaryIO) -> Message | None:
+    payload = read_frame(stream)
+    if not payload:
+        return None
+    try:
+        return _msg_decoder.decode(payload)
+    except msgspec.DecodeError as exc:
+        raise ProtocolError(f"Bad message: {exc}") from exc
+
+
+def write_kv_chunk(
+    stream: BinaryIO,
+    *,
+    layer_idx: int,
+    num_tokens: int,
+    n_heads: int,
+    head_dim: int,
+    dtype: DType,
+    keys: bytes,
+    values: bytes,
+) -> None:
+    write_message(
+        stream,
+        KVChunk(
+            layer_idx=layer_idx,
+            num_tokens=num_tokens,
+            n_heads=n_heads,
+            head_dim=head_dim,
+            dtype=dtype,
+            keys=keys,
+            values=values,
+        ),
+    )
+
+
+def write_arrays_state(
+    stream: BinaryIO, layer_idx: int, arrays: list[TensorBlob]
+) -> None:
+    write_message(stream, ArraysState(layer_idx=layer_idx, arrays=arrays))
+
+
+def write_done(stream: BinaryIO, total_tokens: int) -> None:
+    write_message(stream, Done(total_tokens=total_tokens))
+
+
+def write_error(stream: BinaryIO, code: int, message: str) -> None:
+    write_message(stream, ErrorMessage(code=code, message=message))
diff --git a/src/exo/worker/disaggregated/server.py b/src/exo/worker/disaggregated/server.py
new file mode 100644
index 000000000..67ba89d55
--- /dev/null
+++ b/src/exo/worker/disaggregated/server.py
@@ -0,0 +1,105 @@
+import socket
+import socketserver
+import threading
+from collections.abc import Callable
+from typing import BinaryIO, cast
+
+import msgspec
+from loguru import logger
+
+from exo.worker.disaggregated.protocol import (
+    Header,
+    read_frame,
+    write_error,
+    write_frame,
+    write_header,
+)
+
+
+class PrefillRequest(msgspec.Struct):
+    request_id: str = ""
+    model_id: str = ""
+    token_ids: list[int] = msgspec.field(default_factory=list)
+    start_pos: int = 0
+
+
+_request_encoder = msgspec.msgpack.Encoder()
+_request_decoder: msgspec.msgpack.Decoder[PrefillRequest] = msgspec.msgpack.Decoder(
+    PrefillRequest
+)
+
+
+def write_request(stream: BinaryIO, job: PrefillRequest) -> None:
+    write_frame(stream, _request_encoder.encode(job))
+
+
+def read_request(stream: BinaryIO) -> PrefillRequest:
+    payload = read_frame(stream)
+    if not payload:
+        raise ConnectionError("No request received")
+    return _request_decoder.decode(payload)
+
+
+ResolveHandler = Callable[[PrefillRequest, BinaryIO], bool]
+
+
+def _send_error(wfile: BinaryIO, code: int, message: str) -> None:
+    try:
+        write_header(wfile, Header(num_layers=0, dtype="float32"))
+        write_error(wfile, code=code, message=message)
+    except Exception:
+        pass
+
+
+class _PrefillHandler(socketserver.StreamRequestHandler):
+    def setup(self) -> None:
+        super().setup()
+        sock = cast(socket.socket, self.request)
+        sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+        sock.setsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF, 4 * 1024 * 1024)
+
+    def handle(self) -> None:
+        server = cast(PrefillServer, self.server)
+        wfile: BinaryIO = cast(BinaryIO, cast(object, self.wfile))
+        rfile: BinaryIO = cast(BinaryIO, cast(object, self.rfile))
+        try:
+            job = read_request(rfile)
+        except ConnectionError:
+            return
+        except (msgspec.DecodeError, ValueError) as exc:
+            _send_error(wfile, 400, f"Bad request: {exc}")
+            return
+        try:
+            picked_up = server.resolve(job, wfile)
+        except Exception as e:
+            logger.opt(exception=e).warning(
+                f"Prefill resolve error for request_id={job.request_id}"
+            )
+            _send_error(wfile, 500, str(e))
+            return
+        if not picked_up:
+            _send_error(
+                wfile, 503, f"Prefill not picked up for request_id={job.request_id!r}"
+            )
+
+
+class PrefillServer(socketserver.ThreadingTCPServer):
+    allow_reuse_address = True
+    daemon_threads = True
+    resolve: ResolveHandler
+
+    def __init__(self, resolve: ResolveHandler, host: str, port: int) -> None:
+        super().__init__((host, port), _PrefillHandler)
+        self.resolve = resolve
+        self._thread = threading.Thread(
+            target=self.serve_forever, name="prefill-server"
+        )
+        self._thread.start()
+        logger.info(f"Prefill server listening on {host}:{port}")
+
+    def stop(self) -> None:
+        self.shutdown()
+        self.server_close()
+        if self._thread is not None:
+            self._thread.join(timeout=5)
+            self._thread = None
diff --git a/src/exo/worker/engines/base.py b/src/exo/worker/engines/base.py
new file mode 100644
index 000000000..45b3195bd
--- /dev/null
+++ b/src/exo/worker/engines/base.py
@@ -0,0 +1,60 @@
+from abc import ABC, abstractmethod
+from collections.abc import Generator, Iterable
+from typing import BinaryIO
+
+from exo.shared.types.chunks import Chunk
+from exo.shared.types.tasks import CANCEL_ALL_TASKS, GenerationTask, TaskId
+from exo.shared.types.worker.instances import BoundInstance
+from exo.shared.types.worker.runner_response import (
+    CancelledResponse,
+    FinishedResponse,
+    ModelLoadingResponse,
+)
+from exo.worker.disaggregated.server import PrefillRequest
+
+
+class Engine(ABC):
+    _cancelled_tasks: set[TaskId]
+
+    def should_cancel(self, task_id: TaskId) -> bool:
+        return (
+            task_id in self._cancelled_tasks
+            or CANCEL_ALL_TASKS in self._cancelled_tasks
+        )
+
+    @abstractmethod
+    def warmup(self) -> None: ...
+
+    @abstractmethod
+    def submit(
+        self,
+        task: GenerationTask,
+    ) -> None: ...
+
+    @abstractmethod
+    def step(
+        self,
+    ) -> Iterable[tuple[TaskId, Chunk | CancelledResponse | FinishedResponse]]: ...
+
+    @abstractmethod
+    def close(self) -> None: ...
+
+    @abstractmethod
+    def serve_prefill(self, request: PrefillRequest, wfile: BinaryIO) -> None: ...
+
+
+class Builder(ABC):
+    @abstractmethod
+    def connect(self, bound_instance: BoundInstance) -> None: ...
+
+    @abstractmethod
+    def load(
+        self,
+        bound_instance: BoundInstance,
+    ) -> Generator[ModelLoadingResponse]: ...
+
+    @abstractmethod
+    def build(self) -> Engine: ...
+
+    @abstractmethod
+    def close(self) -> None: ...
diff --git a/src/exo/worker/engines/image/__init__.py b/src/exo/worker/engines/image/__init__.py
index c83b4702f..d0ccbac89 100644
--- a/src/exo/worker/engines/image/__init__.py
+++ b/src/exo/worker/engines/image/__init__.py
@@ -1,12 +1,16 @@
+from exo.worker.engines.image.builder import (
+    ImageEngine,
+    MfluxBuilder,
+)
 from exo.worker.engines.image.distributed_model import (
     DistributedImageModel,
-    initialize_image_model,
 )
 from exo.worker.engines.image.generate import generate_image, warmup_image_generator
 
 __all__ = [
+    "MfluxBuilder",
+    "ImageEngine",
     "DistributedImageModel",
     "generate_image",
-    "initialize_image_model",
     "warmup_image_generator",
 ]
diff --git a/src/exo/worker/engines/image/builder.py b/src/exo/worker/engines/image/builder.py
new file mode 100644
index 000000000..4d20fd887
--- /dev/null
+++ b/src/exo/worker/engines/image/builder.py
@@ -0,0 +1,224 @@
+import contextlib
+from collections import deque
+from collections.abc import Generator, Iterable
+from dataclasses import dataclass, field
+from typing import BinaryIO
+
+import mlx.core as mx
+from loguru import logger
+
+from exo.api.types import ImageEditsTaskParams, ImageGenerationTaskParams
+from exo.shared.constants import EXO_TRACING_ENABLED
+from exo.shared.tracing import clear_trace_buffer, get_trace_buffer
+from exo.shared.types.chunks import Chunk, ErrorChunk
+from exo.shared.types.events import (
+    Event,
+    TraceEventData,
+    TracesCollected,
+)
+from exo.shared.types.tasks import (
+    GenerationTask,
+    ImageEdits,
+    ImageGeneration,
+    ImageTask,
+    TaskId,
+)
+from exo.shared.types.worker.instances import BoundInstance
+from exo.shared.types.worker.runner_response import (
+    CancelledResponse,
+    FinishedResponse,
+    ModelLoadingResponse,
+)
+from exo.shared.types.worker.shards import (
+    CfgShardMetadata,
+    PipelineShardMetadata,
+    ShardMetadata,
+)
+from exo.utils.channels import MpReceiver, MpSender
+from exo.worker.disaggregated.server import PrefillRequest
+from exo.worker.engines.base import Builder, Engine
+from exo.worker.engines.image.distributed_model import (
+    DistributedImageModel,
+)
+from exo.worker.engines.image.generate import (
+    generate_image,
+    warmup_image_generator,
+)
+from exo.worker.engines.mlx.utils_mlx import (
+    initialize_mlx,
+)
+
+
+def _is_primary_output_node(shard_metadata: ShardMetadata) -> bool:
+    """Check if this node is the primary output node for image generation.
+
+    For CFG models: the last pipeline stage in CFG group 0 (positive prompt).
+    For non-CFG models: the last pipeline stage.
+    """
+    if isinstance(shard_metadata, CfgShardMetadata):
+        is_pipeline_last = (
+            shard_metadata.pipeline_rank == shard_metadata.pipeline_world_size - 1
+        )
+        return is_pipeline_last and shard_metadata.cfg_rank == 0
+    elif isinstance(shard_metadata, PipelineShardMetadata):
+        return shard_metadata.device_rank == shard_metadata.world_size - 1
+    return False
+
+
+def _send_traces_if_enabled(
+    event_sender: MpSender[Event],
+    task_id: TaskId,
+    rank: int,
+) -> None:
+    if not EXO_TRACING_ENABLED:
+        return
+
+    traces = get_trace_buffer()
+    if traces:
+        trace_data = [
+            TraceEventData(
+                name=t.name,
+                start_us=t.start_us,
+                duration_us=t.duration_us,
+                rank=t.rank,
+                category=t.category,
+            )
+            for t in traces
+        ]
+        event_sender.send(
+            TracesCollected(
+                task_id=task_id,
+                rank=rank,
+                traces=trace_data,
+            )
+        )
+    clear_trace_buffer()
+
+
+@dataclass
+class MfluxBuilder(Builder):
+    event_sender: MpSender[Event]
+    cancel_receiver: MpReceiver[TaskId]
+    shard_metadata: ShardMetadata | None = None
+    image_model: DistributedImageModel | None = None
+    group: mx.distributed.Group | None = None
+
+    def connect(self, bound_instance: BoundInstance) -> None:
+        self.group = initialize_mlx(bound_instance)
+
+    def load(self, bound_instance: BoundInstance) -> Generator[ModelLoadingResponse]:
+        self.shard_metadata = bound_instance.bound_shard
+        self.image_model = DistributedImageModel.from_shard_metadata(
+            bound_instance.bound_shard, self.group
+        )
+        return
+        # very important!
+        yield
+
+    def close(self) -> None:
+        with contextlib.suppress(NameError, AttributeError):
+            del self.image_model, self.group
+
+    def build(
+        self,
+    ) -> Engine:
+        assert self.image_model
+        assert self.shard_metadata
+
+        return ImageEngine(
+            self.image_model,
+            self.shard_metadata,
+            self.event_sender,
+            self.cancel_receiver,
+        )
+
+
+@dataclass
+class ImageEngine(Engine):
+    image_model: DistributedImageModel
+    shard_metadata: ShardMetadata
+    event_sender: MpSender[Event]
+    cancel_receiver: MpReceiver[TaskId]
+    current_gen: (
+        Generator[tuple[TaskId, Chunk | FinishedResponse | CancelledResponse]] | None
+    ) = field(init=False, default=None)
+    queue: deque[ImageTask] = field(init=False, default_factory=deque)
+    _cancelled_tasks: set[TaskId] = field(init=False, default_factory=set)
+
+    def warmup(self) -> None:
+        image = warmup_image_generator(model=self.image_model)
+        if image is not None:
+            logger.info(f"warmed up by generating {image.size} image")
+        else:
+            logger.info("warmup completed (non-primary node)")
+
+    def submit(
+        self,
+        task: GenerationTask,
+    ) -> None:
+        assert isinstance(task, (ImageGeneration, ImageEdits))
+        self.queue.append(task)
+
+    def step(
+        self,
+    ) -> Iterable[tuple[TaskId, Chunk | CancelledResponse | FinishedResponse]]:
+        resp = None
+        if self.current_gen is not None:
+            resp = next(self.current_gen, None)
+        if resp is None and len(self.queue) > 0:
+            task = self.queue.popleft()
+            self.current_gen = self._run_image_task(task.task_id, task.task_params)
+            resp = next(self.current_gen, None)
+        return (
+            (resp,)
+            if resp is not None and _is_primary_output_node(self.shard_metadata)
+            else ()
+        )
+
+    def close(self) -> None:
+        with contextlib.suppress(NameError, AttributeError):
+            del self.image_model
+
+    def serve_prefill(self, request: PrefillRequest, wfile: BinaryIO) -> None:
+        raise NotImplementedError() from None
+
+    def _run_image_task(
+        self,
+        task_id: TaskId,
+        task_params: ImageGenerationTaskParams | ImageEditsTaskParams,
+    ) -> Generator[tuple[TaskId, Chunk | FinishedResponse | CancelledResponse]]:
+        assert self.image_model
+        logger.info(f"received image task: {str(task_params)[:500]}")
+
+        def cancel_checker() -> bool:
+            for cancel_id in self.cancel_receiver.collect():
+                self._cancelled_tasks.add(cancel_id)
+            return self.should_cancel(task_id)
+
+        try:
+            # todo: yield CancelledResponse properly
+            for response in generate_image(
+                model=self.image_model,
+                task=task_params,
+                cancel_checker=cancel_checker,
+            ):
+                if _is_primary_output_node(self.shard_metadata):
+                    yield (task_id, response)
+        except Exception as e:
+            if _is_primary_output_node(self.shard_metadata):
+                yield (
+                    task_id,
+                    ErrorChunk(
+                        model=self.shard_metadata.model_card.model_id,
+                        finish_reason="error",
+                        error_message=str(e),
+                    ),
+                )
+            raise
+        finally:
+            _send_traces_if_enabled(
+                self.event_sender, task_id, self.shard_metadata.device_rank
+            )
+            yield (task_id, FinishedResponse())
+
+        return
diff --git a/src/exo/worker/engines/image/distributed_model.py b/src/exo/worker/engines/image/distributed_model.py
index 7d866a3a5..4fc375cac 100644
--- a/src/exo/worker/engines/image/distributed_model.py
+++ b/src/exo/worker/engines/image/distributed_model.py
@@ -1,6 +1,6 @@
 from collections.abc import Callable, Generator
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal
 
 import mlx.core as mx
 from mflux.models.common.config.config import Config
@@ -8,8 +8,12 @@ from PIL import Image
 
 from exo.api.types import AdvancedImageParams
 from exo.download.download_utils import build_model_path
-from exo.shared.types.worker.instances import BoundInstance
-from exo.shared.types.worker.shards import CfgShardMetadata, PipelineShardMetadata
+from exo.shared.types.common import ModelId
+from exo.shared.types.worker.shards import (
+    CfgShardMetadata,
+    PipelineShardMetadata,
+    ShardMetadata,
+)
 from exo.worker.engines.image.config import ImageModelConfig
 from exo.worker.engines.image.models import (
     create_adapter_for_model,
@@ -17,21 +21,22 @@ from exo.worker.engines.image.models import (
 )
 from exo.worker.engines.image.models.base import ModelAdapter
 from exo.worker.engines.image.pipeline import DiffusionRunner
-from exo.worker.engines.mlx.utils_mlx import mlx_distributed_init, mx_barrier
+from exo.worker.engines.mlx.utils_mlx import mx_barrier
 from exo.worker.runner.bootstrap import logger
 
 
 class DistributedImageModel:
+    model_id: ModelId
     _config: ImageModelConfig
     _adapter: ModelAdapter[Any, Any]
     _runner: DiffusionRunner
 
     def __init__(
         self,
-        model_id: str,
+        model_id: ModelId,
         local_path: Path,
         shard_metadata: PipelineShardMetadata | CfgShardMetadata,
-        group: Optional[mx.distributed.Group] = None,
+        group: mx.distributed.Group | None,
         quantize: int | None = None,
     ):
         config = get_config_for_model(model_id)
@@ -68,37 +73,27 @@ class DistributedImageModel:
         else:
             logger.info("Single-node initialization")
 
+        self.model_id = model_id
         self._config = config
         self._adapter = adapter
         self._runner = runner
 
     @classmethod
-    def from_bound_instance(
-        cls, bound_instance: BoundInstance
+    def from_shard_metadata(
+        cls, shard: ShardMetadata, group: mx.distributed.Group | None
     ) -> "DistributedImageModel":
-        model_id = bound_instance.bound_shard.model_card.model_id
+        model_id = shard.model_card.model_id
         model_path = build_model_path(model_id)
 
-        shard_metadata = bound_instance.bound_shard
-        if not isinstance(shard_metadata, (PipelineShardMetadata, CfgShardMetadata)):
+        if not isinstance(shard, (PipelineShardMetadata, CfgShardMetadata)):
             raise ValueError(
                 "Expected PipelineShardMetadata or CfgShardMetadata for image generation"
             )
 
-        is_distributed = (
-            len(bound_instance.instance.shard_assignments.node_to_runner) > 1
-        )
-
-        if is_distributed:
-            logger.info("Starting distributed init for image model")
-            group = mlx_distributed_init(bound_instance)
-        else:
-            group = None
-
         return cls(
             model_id=model_id,
             local_path=model_path,
-            shard_metadata=shard_metadata,
+            shard_metadata=shard,
             group=group,
         )
 
@@ -173,7 +168,3 @@ class DistributedImageModel:
             else:
                 logger.info("generated image")
                 yield result
-
-
-def initialize_image_model(bound_instance: BoundInstance) -> DistributedImageModel:
-    return DistributedImageModel.from_bound_instance(bound_instance)
diff --git a/src/exo/worker/engines/image/generate.py b/src/exo/worker/engines/image/generate.py
index fbc238156..d393d5e89 100644
--- a/src/exo/worker/engines/image/generate.py
+++ b/src/exo/worker/engines/image/generate.py
@@ -3,7 +3,7 @@ import io
 import random
 import tempfile
 import time
-from collections.abc import Callable
+from collections.abc import Callable, Iterator
 from pathlib import Path
 from typing import Generator, Literal
 
@@ -17,11 +17,10 @@ from exo.api.types import (
     ImageGenerationTaskParams,
     ImageSize,
 )
+from exo.shared.constants import EXO_MAX_CHUNK_SIZE
+from exo.shared.types.chunks import ImageChunk
+from exo.shared.types.common import ModelId
 from exo.shared.types.memory import Memory
-from exo.shared.types.worker.runner_response import (
-    ImageGenerationResponse,
-    PartialImageResponse,
-)
 from exo.worker.engines.image.distributed_model import DistributedImageModel
 
 
@@ -71,16 +70,8 @@ def generate_image(
     model: DistributedImageModel,
     task: ImageGenerationTaskParams | ImageEditsTaskParams,
     cancel_checker: Callable[[], bool] | None = None,
-) -> Generator[ImageGenerationResponse | PartialImageResponse, None, None]:
-    """Generate image(s), optionally yielding partial results.
-
-    When partial_images > 0 or stream=True, yields PartialImageResponse for
-    intermediate images, then ImageGenerationResponse for the final image.
-
-    Yields:
-        PartialImageResponse for intermediate images (if partial_images > 0, first image only)
-        ImageGenerationResponse for final complete images
-    """
+) -> Generator[ImageChunk, None, None]:
+    """Generate image(s), optionally yielding partial results."""
     width, height = parse_size(task.size)
     quality: Literal["low", "medium", "high"] = task.quality or "medium"
 
@@ -142,12 +133,14 @@ def generate_image(
                         image = image.convert("RGB")
                     image.save(buffer, format=image_format)
 
-                    yield PartialImageResponse(
+                    yield from _process_image_response(
                         image_data=buffer.getvalue(),
-                        format=task.output_format,
+                        image_format=task.output_format,
                         partial_index=partial_idx,
                         total_partials=total_partials,
                         image_index=image_num,
+                        model_id=model.model_id,
+                        stats=None,
                     )
                 else:
                     image = result
@@ -189,9 +182,54 @@ def generate_image(
                         image = image.convert("RGB")
                     image.save(buffer, format=image_format)
 
-                    yield ImageGenerationResponse(
+                    yield from _process_image_response(
                         image_data=buffer.getvalue(),
-                        format=task.output_format,
+                        image_format=task.output_format,
                         stats=stats,
                         image_index=image_num,
+                        model_id=model.model_id,
+                        partial_index=None,
+                        total_partials=None,
                     )
+
+
+def _process_image_response(
+    image_data: bytes,
+    image_index: int,
+    image_format: Literal["png", "jpeg", "webp"],
+    partial_index: int | None,
+    total_partials: int | None,
+    stats: ImageGenerationStats | None,
+    model_id: ModelId,
+) -> Iterator[ImageChunk]:
+    """Process a single image response and send chunks."""
+    is_partial = partial_index is not None
+    encoded_data = base64.b64encode(image_data).decode("utf-8")
+    # Extract stats from final ImageGenerationResponse if available
+    data_chunks = [
+        encoded_data[i : i + EXO_MAX_CHUNK_SIZE]
+        for i in range(0, len(encoded_data), EXO_MAX_CHUNK_SIZE)
+    ]
+    total_chunks = len(data_chunks)
+
+    def _data_to_chunk(item: tuple[int, str]) -> ImageChunk:
+        chunk_index, chunk_data = item
+        # Only include stats on the last chunk of the final image
+        chunk_stats = (
+            stats if chunk_index == total_chunks - 1 and not is_partial else None
+        )
+
+        return ImageChunk(
+            model=model_id,
+            data=chunk_data,
+            chunk_index=chunk_index,
+            total_chunks=total_chunks,
+            image_index=image_index,
+            is_partial=is_partial,
+            partial_index=partial_index,
+            total_partials=total_partials,
+            stats=chunk_stats,
+            format=image_format,
+        )
+
+    return map(_data_to_chunk, enumerate(data_chunks))
diff --git a/src/exo/worker/engines/mlx/auto_parallel.py b/src/exo/worker/engines/mlx/auto_parallel.py
index 3b6e8421b..733773f26 100644
--- a/src/exo/worker/engines/mlx/auto_parallel.py
+++ b/src/exo/worker/engines/mlx/auto_parallel.py
@@ -1,10 +1,8 @@
-import os
-import threading
 from abc import ABC, abstractmethod
-from collections.abc import Callable
+from collections.abc import Callable, Generator
 from functools import partial
 from inspect import signature
-from typing import TYPE_CHECKING, Any, Literal, Protocol, cast
+from typing import TYPE_CHECKING, Literal, Protocol, cast
 
 import mlx.core as mx
 import mlx.nn as nn
@@ -14,13 +12,16 @@ from mlx.nn.layers.distributed import (
     sum_gradients,
 )
 from mlx_lm.models.base import (
-    scaled_dot_product_attention,  # pyright: ignore[reportUnknownVariableType]
+    scaled_dot_product_attention,
 )
 from mlx_lm.models.cache import ArraysCache, KVCache
 from mlx_lm.models.deepseek_v3 import DeepseekV3MLP
 from mlx_lm.models.deepseek_v3 import Model as DeepseekV3Model
+from mlx_lm.models.deepseek_v4 import DeepseekV4MoE, V4Attention
+from mlx_lm.models.deepseek_v4 import Model as DeepseekV4Model
 from mlx_lm.models.deepseek_v32 import DeepseekV32MLP
 from mlx_lm.models.deepseek_v32 import Model as DeepseekV32Model
+from mlx_lm.models.gemma4 import Model as Gemma4Model
 from mlx_lm.models.glm4_moe import Model as Glm4MoeModel
 from mlx_lm.models.glm4_moe import MoE
 from mlx_lm.models.glm4_moe_lite import Glm4MoeLiteDecoderLayer, Glm4MoeLiteMLP
@@ -39,6 +40,8 @@ from mlx_lm.models.nemotron_h import (
     NemotronHMoE,
 )
 from mlx_lm.models.nemotron_h import NemotronHModel as NemotronHInnerModel
+from mlx_lm.models.qwen3 import Model as Qwen3Model
+from mlx_lm.models.qwen3 import TransformerBlock as Qwen3TransformerBlock
 from mlx_lm.models.qwen3_5 import DecoderLayer as Qwen3_5DecoderLayer
 from mlx_lm.models.qwen3_5 import Model as Qwen3_5TextModel
 from mlx_lm.models.qwen3_5 import Qwen3_5TextModel as Qwen3_5TextModelInner
@@ -53,19 +56,18 @@ from mlx_lm.models.qwen3_next import (
     Qwen3NextSparseMoeBlock,
 )
 from mlx_lm.models.qwen3_next import Qwen3NextModel as Qwen3NextInnerModel
+from mlx_lm.models.qwen3_vl import Model as Qwen3VLModel
 from mlx_lm.models.step3p5 import Model as Step35Model
 from mlx_lm.models.step3p5 import Step3p5MLP as Step35MLP
 from mlx_lm.models.step3p5 import Step3p5Model as Step35InnerModel
 
+from exo.shared.types.worker.runner_response import ModelLoadingResponse
 from exo.shared.types.worker.shards import PipelineShardMetadata
 from exo.worker.runner.bootstrap import logger
 
 if TYPE_CHECKING:
     from mlx_lm.models.cache import Cache
 
-TimeoutCallback = Callable[[], None]
-LayerLoadedCallback = Callable[[int, int], None]  # (layers_loaded, total_layers)
-
 
 _pending_prefill_sends: list[tuple[mx.array, int, mx.distributed.Group]] = []
 
@@ -82,38 +84,6 @@ def clear_prefill_sends() -> None:
     _pending_prefill_sends.clear()
 
 
-def eval_with_timeout(
-    mlx_item: Any,  # pyright: ignore[reportAny]
-    timeout_seconds: float = 60.0,
-    on_timeout: TimeoutCallback | None = None,
-) -> None:
-    """Evaluate MLX item with a hard timeout.
-
-    If on_timeout callback is provided, it will be called before terminating
-    the process. This allows the runner to send a failure event before exit.
-    """
-    completed = threading.Event()
-
-    def watchdog() -> None:
-        if not completed.wait(timeout=timeout_seconds):
-            logger.error(
-                f"mlx_item evaluation timed out after {timeout_seconds:.0f}s. "
-                "This may indicate an issue with FAST_SYNCH and tensor parallel sharding. "
-                "Terminating process."
-            )
-            if on_timeout is not None:
-                on_timeout()
-            os._exit(1)
-
-    watchdog_thread = threading.Thread(target=watchdog, daemon=True)
-    watchdog_thread.start()
-
-    try:
-        mx.eval(mlx_item)  # pyright: ignore[reportAny]
-    finally:
-        completed.set()
-
-
 class _LayerCallable(Protocol):
     """Structural type that any compatible layer must satisfy.
 
@@ -307,8 +277,7 @@ def pipeline_auto_parallel(
     model: nn.Module,
     group: mx.distributed.Group,
     model_shard_meta: PipelineShardMetadata,
-    on_layer_loaded: LayerLoadedCallback | None,
-) -> nn.Module:
+) -> Generator[ModelLoadingResponse, None, nn.Module]:
     """
     Automatically parallelize a model across multiple devices.
     Args:
@@ -328,8 +297,8 @@ def pipeline_auto_parallel(
     total = len(layers)
     for i, layer in enumerate(layers):
         mx.eval(layer)  # type: ignore
-        if on_layer_loaded is not None:
-            on_layer_loaded(i, total)
+        mx.clear_cache()
+        yield ModelLoadingResponse(layers_loaded=i, total=total)
 
     layers[0] = PipelineFirstLayer(layers[0], device_rank, group=group)
     layers[-1] = PipelineLastLayer(
@@ -340,24 +309,20 @@ def pipeline_auto_parallel(
     )
 
     if isinstance(inner_model_instance, GptOssMoeModel):
-        inner_model_instance.layer_types = inner_model_instance.layer_types[  # type: ignore
+        inner_model_instance.layer_types = inner_model_instance.layer_types[
             start_layer:end_layer
         ]
         # We can assume the model has at least one layer thanks to placement.
         # If a layer type doesn't exist, we can set it to 0.
         inner_model_instance.swa_idx = (
             0
-            if "sliding_attention" not in inner_model_instance.layer_types  # type: ignore
-            else inner_model_instance.layer_types.index(  # type: ignore
-                "sliding_attention"
-            )
+            if "sliding_attention" not in inner_model_instance.layer_types
+            else inner_model_instance.layer_types.index("sliding_attention")
         )
         inner_model_instance.ga_idx = (
             0
-            if "full_attention" not in inner_model_instance.layer_types  # type: ignore
-            else inner_model_instance.layer_types.index(  # type: ignore
-                "full_attention"
-            )
+            if "full_attention" not in inner_model_instance.layer_types
+            else inner_model_instance.layer_types.index("full_attention")
         )
 
     if isinstance(inner_model_instance, Step35InnerModel):
@@ -491,10 +456,7 @@ def patch_tensor_model[T](model: T) -> T:
 def tensor_auto_parallel(
     model: nn.Module,
     group: mx.distributed.Group,
-    timeout_seconds: float,
-    on_timeout: TimeoutCallback | None,
-    on_layer_loaded: LayerLoadedCallback | None,
-) -> nn.Module:
+) -> Generator[ModelLoadingResponse, None, nn.Module]:
     all_to_sharded_linear = partial(
         shard_linear,
         sharding="all-to-sharded",
@@ -551,6 +513,14 @@ def tensor_auto_parallel(
             all_to_sharded_linear_in_place,
             sharded_to_all_linear_in_place,
         )
+    elif isinstance(model, DeepseekV4Model):
+        tensor_parallel_sharding_strategy = DeepseekV4ShardingStrategy(
+            group,
+            all_to_sharded_linear,
+            sharded_to_all_linear,
+            all_to_sharded_linear_in_place,
+            sharded_to_all_linear_in_place,
+        )
     elif isinstance(model, MiniMaxModel):
         tensor_parallel_sharding_strategy = MiniMaxShardingStrategy(
             group,
@@ -576,7 +546,15 @@ def tensor_auto_parallel(
             sharded_to_all_linear_in_place,
         )
     elif isinstance(
-        model, (Qwen3MoeModel, Qwen3NextModel, Qwen3_5TextModel, Qwen3_5MoeModel)
+        model,
+        (
+            Qwen3Model,
+            Qwen3MoeModel,
+            Qwen3NextModel,
+            Qwen3_5TextModel,
+            Qwen3_5MoeModel,
+            Qwen3VLModel,
+        ),
     ):
         tensor_parallel_sharding_strategy = QwenShardingStrategy(
             group,
@@ -609,12 +587,18 @@ def tensor_auto_parallel(
             all_to_sharded_linear_in_place,
             sharded_to_all_linear_in_place,
         )
+    elif isinstance(model, Gemma4Model):
+        tensor_parallel_sharding_strategy = Gemma4ShardingStrategy(
+            group,
+            all_to_sharded_linear,
+            sharded_to_all_linear,
+            all_to_sharded_linear_in_place,
+            sharded_to_all_linear_in_place,
+        )
     else:
         raise ValueError(f"Unsupported model type: {type(model)}")
 
-    model = tensor_parallel_sharding_strategy.shard_model(
-        model, timeout_seconds, on_timeout, on_layer_loaded
-    )
+    model = yield from tensor_parallel_sharding_strategy.shard_model(model)
     return patch_tensor_model(model)
 
 
@@ -638,25 +622,19 @@ class TensorParallelShardingStrategy(ABC):
     def shard_model(
         self,
         model: nn.Module,
-        timeout_seconds: float,
-        on_timeout: TimeoutCallback | None,
-        on_layer_loaded: LayerLoadedCallback | None,
-    ) -> nn.Module: ...
+    ) -> Generator[ModelLoadingResponse, None, nn.Module]: ...
 
 
 class LlamaShardingStrategy(TensorParallelShardingStrategy):
     def shard_model(
         self,
         model: nn.Module,
-        timeout_seconds: float,
-        on_timeout: TimeoutCallback | None,
-        on_layer_loaded: LayerLoadedCallback | None,
-    ) -> nn.Module:
+    ) -> Generator[ModelLoadingResponse, None, nn.Module]:
         model = cast(LlamaModel, model)
         total = len(model.layers)
         for i, layer in enumerate(model.layers):
             # Force load weights before sharding to avoid FAST_SYNCH deadlock
-            eval_with_timeout(layer.parameters(), timeout_seconds / total, on_timeout)
+            mx.eval(layer.parameters())
             layer.self_attn.q_proj = self.all_to_sharded_linear(layer.self_attn.q_proj)
             layer.self_attn.k_proj = self.all_to_sharded_linear(layer.self_attn.k_proj)
             layer.self_attn.v_proj = self.all_to_sharded_linear(layer.self_attn.v_proj)
@@ -669,8 +647,8 @@ class LlamaShardingStrategy(TensorParallelShardingStrategy):
             layer.mlp.down_proj = self.sharded_to_all_linear(layer.mlp.down_proj)
             layer.mlp.up_proj = self.all_to_sharded_linear(layer.mlp.up_proj)
             mx.eval(layer)
-            if on_layer_loaded is not None:
-                on_layer_loaded(i, total)
+
+            yield ModelLoadingResponse(layers_loaded=i, total=total)
         return model
 
 
@@ -681,7 +659,14 @@ def _set_layers(model: nn.Module, layers: list[_LayerCallable]) -> None:
 
         # Update DeepSeek V3 specific parameters when layers are shrunk
         if isinstance(
-            model, (DeepseekV3Model, DeepseekV32Model, Glm4MoeModel, KimiK25Model)
+            model,
+            (
+                DeepseekV3Model,
+                DeepseekV32Model,
+                DeepseekV4Model,
+                Glm4MoeModel,
+                KimiK25Model,
+            ),
         ) and hasattr(inner_model_instance, "num_layers"):
             logger.info(
                 f"Setting num_layers to {len(layers)} for model {model.model.__class__.__name__}"
@@ -704,15 +689,12 @@ class DeepSeekShardingStrategy(TensorParallelShardingStrategy):
     def shard_model(
         self,
         model: nn.Module,
-        timeout_seconds: float,
-        on_timeout: TimeoutCallback | None,
-        on_layer_loaded: LayerLoadedCallback | None,
-    ) -> nn.Module:
+    ) -> Generator[ModelLoadingResponse, None, nn.Module]:
         model = cast(DeepseekV3Model, model)
         total = len(model.layers)
 
         for i, layer in enumerate(model.layers):
-            eval_with_timeout(layer.parameters(), timeout_seconds / total, on_timeout)
+            mx.eval(layer.parameters())
 
             # Shard the self attention
             if layer.self_attn.q_lora_rank is None:
@@ -763,8 +745,8 @@ class DeepSeekShardingStrategy(TensorParallelShardingStrategy):
                 layer.mlp.sharding_group = self.group
 
             mx.eval(layer)
-            if on_layer_loaded is not None:
-                on_layer_loaded(i, total)
+
+            yield ModelLoadingResponse(layers_loaded=i, total=total)
 
         return model
 
@@ -785,23 +767,188 @@ class ShardedMoE(CustomMlxLayer):
         return y
 
 
+class ShardedMoEV4(CustomMlxLayer):
+    """Same as ShardedMoE but for DeepseekV4MoE which takes (x, input_ids)."""
+
+    def __init__(self, layer: DeepseekV4MoE):
+        super().__init__(cast(_LayerCallable, cast(object, layer)))
+        self._v4_inner = layer
+        self.sharding_group: mx.distributed.Group | None = None
+
+    def __call__(self, x: mx.array, input_ids: mx.array) -> mx.array:
+        if self.sharding_group is not None:
+            x = sum_gradients(self.sharding_group)(x)
+        y = self._v4_inner(x, input_ids)
+        if self.sharding_group is not None:
+            y = mx.distributed.all_sum(y, group=self.sharding_group)
+        return y
+
+
+def _shard_quantized_rows(
+    q: nn.QuantizedLinear,
+    head_dim: int,
+    slicer: Callable[[mx.array, int], mx.array],
+) -> None:
+    weight = q["weight"]
+    scales = q["scales"]
+    assert isinstance(weight, mx.array)
+    assert isinstance(scales, mx.array)
+    q.weight = slicer(weight, head_dim)
+    q.scales = slicer(scales, head_dim)
+    biases = q.get("biases")
+    if isinstance(biases, mx.array):
+        q.biases = slicer(biases, head_dim)
+
+
+class _AllSumLinear(nn.Module):
+    """Wraps an unsharded wo_b that takes a head-sharded partial wo_a output.
+
+    Flow per rank:
+      1. all_sum the incoming partial wo_a output (summed across the head
+         input shards → full wo_a_out on every rank)
+      2. apply the unsharded wo_b → full hidden on every rank
+
+    One collective per layer on the smaller of (n_groups * o_lora_rank) vs
+    hidden. wo_b compute is replicated, but at decode B=1 it's only ~30M FLOPs
+    per layer and 61 extra all_gathers/token cost more than running wo_b on
+    every rank.
+    """
+
+    def __init__(self, inner: nn.Module, group: mx.distributed.Group):
+        super().__init__()
+        self.inner = inner
+        self._group = group
+
+    def __call__(self, x: mx.array) -> mx.array:
+        x = mx.distributed.all_sum(x, group=self._group)
+        return cast(Callable[[mx.array], mx.array], self.inner)(x)
+
+
+def _shard_v4_attention_heads(
+    attn: V4Attention,
+    world_size: int,
+    rank: int,
+) -> None:
+    """Interleaved-per-group head sharding for V4Attention.
+
+    V4 uses a grouped low-rank output projection: `_grouped_output_projection`
+    reshapes the flat `n_heads * head_dim` dim into `(o_groups, heads_per_group,
+    head_dim)`, so group g owns heads `[g * heads_per_group : (g+1) * heads_per_group]`.
+
+    A naive contiguous `shard_linear("all-to-sharded")` on wq_b puts whole
+    original groups on each rank — the per-rank "group g" ends up containing
+    heads that don't belong to original group g. That breaks the wo_a grouped
+    weight mapping. We instead slice heads interleaved-by-group: each rank
+    owns `heads_per_group / N` heads *from every original group*, kept in
+    group-major order so SDPA → reshape → wo_a preserves the group mapping.
+
+    Affects `wq_b.weight` / `wq_b.bias`, `attn_sink`. wo_a is sharded via a
+    normal input-dim block split (the default axis-(-1) behavior of
+    shard_inplace), which now correctly aligns with the interleaved head
+    layout because the last dim of out after reshape is `heads_per_group/N *
+    head_dim` per group.
+    """
+    n_heads: int = attn.n_heads
+    head_dim: int = attn.head_dim
+    o_groups: int = attn.n_groups
+    assert n_heads % o_groups == 0, "n_heads must be divisible by o_groups"
+    heads_per_group = n_heads // o_groups
+    assert heads_per_group % world_size == 0, (
+        f"heads_per_group ({heads_per_group}) must be divisible by world_size "
+        f"({world_size}) for interleaved per-group head sharding"
+    )
+    hpg_per_rank = heads_per_group // world_size
+    start = rank * hpg_per_rank
+    end = start + hpg_per_rank
+
+    def _slice_head_major_flat(arr: mx.array, stride: int) -> mx.array:
+        """Slice arr on axis 0 where the flat 0-axis is (o_groups *
+        heads_per_group * stride), returning a fresh contiguous allocation
+        so the full unsharded array can be freed. Without the contiguous
+        copy the slice is a view and the original weight stays resident —
+        OOM on large V4. Quantized packed weights don't round-trip through
+        numpy so we use mx.contiguous directly."""
+        rest = arr.shape[1:]
+        reshaped = arr.reshape(o_groups, heads_per_group, stride, *rest)
+        sliced = reshaped[:, start:end].reshape(o_groups * hpg_per_rank * stride, *rest)
+        detached = mx.contiguous(sliced)
+        mx.eval(detached)
+        return detached
+
+    wq_b: nn.Module = attn.wq_b
+    if isinstance(wq_b, nn.QuantizedLinear):
+        # Packed weight: (n_heads*head_dim, q_lora_rank/el_per_int).
+        # scales/biases: (n_heads*head_dim, q_lora_rank/group_size).
+        # Slice axis 0 interleaved-by-group with head_dim stride.
+        _shard_quantized_rows(wq_b, head_dim, _slice_head_major_flat)
+    else:
+        dense = wq_b
+        assert isinstance(dense, nn.Linear)
+        w = dense.weight
+        q_lora_rank = w.shape[-1]
+        w_sharded = _slice_head_major_flat(w, head_dim)
+        has_bias = "bias" in dense
+        new_wq_b = nn.Linear(q_lora_rank, w_sharded.shape[0], bias=has_bias)
+        new_wq_b.weight = w_sharded
+        if has_bias:
+            b = dense.bias
+            assert b is not None
+            new_wq_b.bias = _slice_head_major_flat(b[:, None], head_dim).reshape(-1)
+        attn.wq_b = new_wq_b
+
+    sink = attn.attn_sink
+    reshaped = sink.reshape(o_groups, heads_per_group)[:, start:end].reshape(-1)
+    detached_sink = mx.contiguous(reshaped)
+    mx.eval(detached_sink)
+    attn.attn_sink = detached_sink
+    attn.n_heads = o_groups * hpg_per_rank
+
+
+class DeepseekV4ShardingStrategy(TensorParallelShardingStrategy):
+    def shard_model(
+        self,
+        model: nn.Module,
+    ) -> Generator[ModelLoadingResponse, None, nn.Module]:
+        model = cast(DeepseekV4Model, model)
+        total = len(model.layers)
+
+        for i, layer in enumerate(model.layers):
+            mx.eval(layer.parameters())
+
+            # Head-parallel attention with interleaved-per-group sharding.
+            _shard_v4_attention_heads(layer.attn, self.N, self.group.rank())
+            self.sharded_to_all_linear_in_place(layer.attn.wo_a)
+            layer.attn.wo_b = _AllSumLinear(layer.attn.wo_b, self.group)  # type: ignore
+
+            ffn = layer.ffn
+            if getattr(ffn, "shared_experts", None) is not None:
+                self.all_to_sharded_linear_in_place(ffn.shared_experts.gate_proj)
+                self.sharded_to_all_linear_in_place(ffn.shared_experts.down_proj)
+                self.all_to_sharded_linear_in_place(ffn.shared_experts.up_proj)
+            self.all_to_sharded_linear_in_place(ffn.switch_mlp.gate_proj)
+            self.sharded_to_all_linear_in_place(ffn.switch_mlp.down_proj)
+            self.all_to_sharded_linear_in_place(ffn.switch_mlp.up_proj)
+            wrapped = ShardedMoEV4(ffn)
+            wrapped.sharding_group = self.group
+            layer.ffn = wrapped  # type: ignore
+
+            mx.eval(layer)
+            mx.clear_cache()
+            yield ModelLoadingResponse(layers_loaded=i, total=total)
+
+        return model
+
+
 class GLM4MoeLiteShardingStrategy(TensorParallelShardingStrategy):
     def shard_model(
         self,
         model: nn.Module,
-        timeout_seconds: float,
-        on_timeout: TimeoutCallback | None,
-        on_layer_loaded: LayerLoadedCallback | None,
-    ) -> nn.Module:
+    ) -> Generator[ModelLoadingResponse, None, nn.Module]:
         model = cast(GLM4MoeLiteModel, model)
         total = len(model.layers)  # type: ignore
         for i, layer in enumerate(model.layers):  # type: ignore
             layer = cast(Glm4MoeLiteDecoderLayer, layer)
-            eval_with_timeout(
-                layer.parameters(),
-                timeout_seconds / total,
-                on_timeout,
-            )
+            mx.eval(layer.parameters())
             if layer.self_attn.q_lora_rank is None:  # type: ignore
                 layer.self_attn.q_proj = self.all_to_sharded_linear(
                     layer.self_attn.q_proj
@@ -847,8 +994,9 @@ class GLM4MoeLiteShardingStrategy(TensorParallelShardingStrategy):
                 layer.mlp = ShardedMoE(layer.mlp)  # type: ignore
                 layer.mlp.sharding_group = self.group  # type: ignore
             mx.eval(layer)
-            if on_layer_loaded is not None:
-                on_layer_loaded(i, total)
+            mx.clear_cache()
+
+            yield ModelLoadingResponse(layers_loaded=i, total=total)
 
         return model
 
@@ -922,7 +1070,7 @@ class WrappedMiniMaxAttention(CustomMlxLayer):
             keys,
             values,
             cache=cache,
-            scale=self._original_layer.scale,  # type: ignore
+            scale=self._original_layer.scale,
             mask=mask,
         )
 
@@ -935,14 +1083,11 @@ class MiniMaxShardingStrategy(TensorParallelShardingStrategy):
     def shard_model(
         self,
         model: nn.Module,
-        timeout_seconds: float,
-        on_timeout: TimeoutCallback | None,
-        on_layer_loaded: LayerLoadedCallback | None,
-    ) -> nn.Module:
+    ) -> Generator[ModelLoadingResponse, None, nn.Module]:
         model = cast(MiniMaxModel, model)
         total = len(model.layers)
         for i, layer in enumerate(model.layers):
-            eval_with_timeout(layer.parameters(), timeout_seconds / total, on_timeout)
+            mx.eval(layer.parameters())
             # Shard the self attention
             layer.self_attn.q_proj = self.all_to_sharded_linear(layer.self_attn.q_proj)
             layer.self_attn.k_proj = self.all_to_sharded_linear(layer.self_attn.k_proj)
@@ -964,11 +1109,12 @@ class MiniMaxShardingStrategy(TensorParallelShardingStrategy):
             self.all_to_sharded_linear_in_place(
                 layer.block_sparse_moe.switch_mlp.up_proj
             )
-            layer.block_sparse_moe = ShardedMoE(layer.block_sparse_moe)  # pyright: ignore[reportAttributeAccessIssue, reportArgumentType]
-            layer.block_sparse_moe.sharding_group = self.group  # pyright: ignore[reportAttributeAccessIssue]
+            layer.block_sparse_moe = ShardedMoE(layer.block_sparse_moe)  # type: ignore
+            layer.block_sparse_moe.sharding_group = self.group
             mx.eval(layer)
-            if on_layer_loaded is not None:
-                on_layer_loaded(i, total)
+            mx.clear_cache()
+
+            yield ModelLoadingResponse(layers_loaded=i, total=total)
         return model
 
 
@@ -976,18 +1122,21 @@ class QwenShardingStrategy(TensorParallelShardingStrategy):
     def shard_model(
         self,
         model: nn.Module,
-        timeout_seconds: float,
-        on_timeout: TimeoutCallback | None,
-        on_layer_loaded: LayerLoadedCallback | None,
-    ) -> nn.Module:
+    ) -> Generator[ModelLoadingResponse, None, nn.Module]:
         model = cast(
-            Qwen3MoeModel | Qwen3NextModel | Qwen3_5TextModel | Qwen3_5MoeModel, model
+            Qwen3Model
+            | Qwen3MoeModel
+            | Qwen3NextModel
+            | Qwen3_5TextModel
+            | Qwen3_5MoeModel
+            | Qwen3VLModel,
+            model,
         )
         total = len(model.layers)
         for i, layer in enumerate(model.layers):
-            eval_with_timeout(layer.parameters(), timeout_seconds / total, on_timeout)
+            mx.eval(layer.parameters())
             # Shard the self attention
-            if isinstance(layer, Qwen3MoeDecoderLayer):
+            if isinstance(layer, (Qwen3MoeDecoderLayer, Qwen3TransformerBlock)):
                 layer.self_attn.q_proj = self.all_to_sharded_linear(
                     layer.self_attn.q_proj
                 )
@@ -1128,8 +1277,9 @@ class QwenShardingStrategy(TensorParallelShardingStrategy):
                 layer.mlp.up_proj = self.all_to_sharded_linear(layer.mlp.up_proj)
 
             mx.eval(layer)
-            if on_layer_loaded is not None:
-                on_layer_loaded(i, total)
+            mx.clear_cache()
+
+            yield ModelLoadingResponse(layers_loaded=i, total=total)
         return model
 
 
@@ -1137,14 +1287,11 @@ class Glm4MoeShardingStrategy(TensorParallelShardingStrategy):
     def shard_model(
         self,
         model: nn.Module,
-        timeout_seconds: float,
-        on_timeout: TimeoutCallback | None,
-        on_layer_loaded: LayerLoadedCallback | None,
-    ) -> nn.Module:
+    ) -> Generator[ModelLoadingResponse, None, nn.Module]:
         model = cast(Glm4MoeModel, model)
         total = len(model.layers)
         for i, layer in enumerate(model.layers):
-            eval_with_timeout(layer.parameters(), timeout_seconds / total, on_timeout)
+            mx.eval(layer.parameters())
 
             layer.self_attn.q_proj = self.all_to_sharded_linear(layer.self_attn.q_proj)
             layer.self_attn.k_proj = self.all_to_sharded_linear(layer.self_attn.k_proj)
@@ -1176,8 +1323,9 @@ class Glm4MoeShardingStrategy(TensorParallelShardingStrategy):
                 layer.mlp.up_proj = self.all_to_sharded_linear(layer.mlp.up_proj)
 
             mx.eval(layer)
-            if on_layer_loaded is not None:
-                on_layer_loaded(i, total)
+            mx.clear_cache()
+
+            yield ModelLoadingResponse(layers_loaded=i, total=total)
         return model
 
 
@@ -1185,15 +1333,12 @@ class GptOssShardingStrategy(TensorParallelShardingStrategy):
     def shard_model(
         self,
         model: nn.Module,
-        timeout_seconds: float,
-        on_timeout: TimeoutCallback | None,
-        on_layer_loaded: LayerLoadedCallback | None,
-    ) -> nn.Module:
+    ) -> Generator[ModelLoadingResponse, None, nn.Module]:
         model = cast(GptOssMoeModel, model)
         total = len(model.layers)
 
         for i, layer in enumerate(model.layers):
-            eval_with_timeout(layer.parameters(), timeout_seconds / total, on_timeout)
+            mx.eval(layer.parameters())
             layer.self_attn.q_proj = self.all_to_sharded_linear(layer.self_attn.q_proj)
             layer.self_attn.k_proj = self.all_to_sharded_linear(layer.self_attn.k_proj)
             layer.self_attn.v_proj = self.all_to_sharded_linear(layer.self_attn.v_proj)
@@ -1217,10 +1362,11 @@ class GptOssShardingStrategy(TensorParallelShardingStrategy):
             self.all_to_sharded_linear_in_place(layer.mlp.experts.up_proj)
 
             layer.mlp = ShardedMoE(layer.mlp)  # type: ignore
-            layer.mlp.sharding_group = self.group  # pyright: ignore[reportAttributeAccessIssue]
+            layer.mlp.sharding_group = self.group
             mx.eval(layer)
-            if on_layer_loaded is not None:
-                on_layer_loaded(i, total)
+            mx.clear_cache()
+
+            yield ModelLoadingResponse(layers_loaded=i, total=total)
         return model
 
 
@@ -1228,15 +1374,12 @@ class Step35ShardingStrategy(TensorParallelShardingStrategy):
     def shard_model(
         self,
         model: nn.Module,
-        timeout_seconds: float,
-        on_timeout: TimeoutCallback | None,
-        on_layer_loaded: LayerLoadedCallback | None,
-    ) -> nn.Module:
+    ) -> Generator[ModelLoadingResponse, None, nn.Module]:
         model = cast(Step35Model, model)
         total = len(model.layers)
 
         for i, layer in enumerate(model.layers):
-            eval_with_timeout(layer.parameters(), timeout_seconds / total, on_timeout)
+            mx.eval(layer.parameters())
             layer.self_attn.q_proj = self.all_to_sharded_linear(layer.self_attn.q_proj)
             layer.self_attn.k_proj = self.all_to_sharded_linear(layer.self_attn.k_proj)
             layer.self_attn.v_proj = self.all_to_sharded_linear(layer.self_attn.v_proj)
@@ -1264,8 +1407,9 @@ class Step35ShardingStrategy(TensorParallelShardingStrategy):
                 self.sharded_to_all_linear_in_place(layer.mlp.switch_mlp.down_proj)
 
             mx.eval(layer)
-            if on_layer_loaded is not None:
-                on_layer_loaded(i, total)
+            mx.clear_cache()
+
+            yield ModelLoadingResponse(layers_loaded=i, total=total)
         return model
 
 
@@ -1273,15 +1417,12 @@ class NemotronHShardingStrategy(TensorParallelShardingStrategy):
     def shard_model(
         self,
         model: nn.Module,
-        timeout_seconds: float,
-        on_timeout: TimeoutCallback | None,
-        on_layer_loaded: LayerLoadedCallback | None,
-    ) -> nn.Module:
+    ) -> Generator[ModelLoadingResponse, None, nn.Module]:
         model = cast(NemotronHModel, model)
         rank = self.group.rank()
         total = len(model.layers)
         for i, layer in enumerate(model.layers):
-            eval_with_timeout(layer.parameters(), timeout_seconds / total, on_timeout)
+            mx.eval(layer.parameters())
 
             mixer = layer.mixer
 
@@ -1309,8 +1450,8 @@ class NemotronHShardingStrategy(TensorParallelShardingStrategy):
                 layer.mixer = mixer  # pyright: ignore[reportAttributeAccessIssue]
 
             mx.eval(layer)
-            if on_layer_loaded is not None:
-                on_layer_loaded(i, total)
+            mx.clear_cache()
+            yield ModelLoadingResponse(layers_loaded=i, total=total)
         return model
 
     def _shard_mamba2_mixer(self, mixer: NemotronHMamba2Mixer, rank: int) -> None:
@@ -1395,3 +1536,58 @@ class NemotronHShardingStrategy(TensorParallelShardingStrategy):
         mixer.intermediate_size = is_per_rank
         mixer.conv_dim = new_conv_dim
         mixer.heads_per_group = heads_per_rank // groups_per_rank
+
+
+class WrappedGemma4Experts(CustomMlxLayer):
+    def __init__(self, layer: _LayerCallable):
+        super().__init__(layer)
+        self.sharding_group: mx.distributed.Group | None = None
+
+    def __call__(
+        self, x: mx.array, top_k_indices: mx.array, top_k_weights: mx.array
+    ) -> mx.array:
+        if self.sharding_group is not None:
+            x = sum_gradients(self.sharding_group)(x)
+        y: mx.array = self.original_layer(x, top_k_indices, top_k_weights)
+        if self.sharding_group is not None:
+            y = mx.distributed.all_sum(y, group=self.sharding_group)
+        return y
+
+
+class Gemma4ShardingStrategy(TensorParallelShardingStrategy):
+    def shard_model(
+        self,
+        model: nn.Module,
+    ) -> Generator[ModelLoadingResponse, None, nn.Module]:
+        model = cast(Gemma4Model, model)
+        layers = model.language_model.model.layers
+        total = len(layers)
+        for i, layer in enumerate(layers):
+            mx.eval(layer.parameters())
+
+            attn = layer.self_attn
+            attn.q_proj = self.all_to_sharded_linear(attn.q_proj)
+            has_kv: bool = cast(bool, attn.has_kv)
+            if has_kv:
+                attn.k_proj = self.all_to_sharded_linear(attn.k_proj)
+                if not attn.use_k_eq_v:
+                    attn.v_proj = self.all_to_sharded_linear(attn.v_proj)
+            attn.o_proj = self.sharded_to_all_linear(attn.o_proj)
+            attn.n_heads //= self.N
+            attn.n_kv_heads //= self.N
+
+            layer.mlp.gate_proj = self.all_to_sharded_linear(layer.mlp.gate_proj)
+            layer.mlp.down_proj = self.sharded_to_all_linear(layer.mlp.down_proj)
+            layer.mlp.up_proj = self.all_to_sharded_linear(layer.mlp.up_proj)
+
+            if layer.enable_moe:
+                self.all_to_sharded_linear_in_place(layer.experts.switch_glu.gate_proj)
+                self.sharded_to_all_linear_in_place(layer.experts.switch_glu.down_proj)
+                self.all_to_sharded_linear_in_place(layer.experts.switch_glu.up_proj)
+                layer.experts = WrappedGemma4Experts(layer.experts)  # pyright: ignore[reportAttributeAccessIssue,reportArgumentType]
+                layer.experts.sharding_group = self.group
+
+            mx.eval(layer)
+            mx.clear_cache()
+            yield ModelLoadingResponse(layers_loaded=i, total=total)
+        return model
diff --git a/src/exo/worker/engines/mlx/builder.py b/src/exo/worker/engines/mlx/builder.py
new file mode 100644
index 000000000..af7c75bb9
--- /dev/null
+++ b/src/exo/worker/engines/mlx/builder.py
@@ -0,0 +1,113 @@
+import contextlib
+import os
+from collections.abc import Generator
+from dataclasses import dataclass
+
+import mlx.core as mx
+from mlx_lm.tokenizer_utils import TokenizerWrapper
+
+from exo.shared.types.common import ModelId
+from exo.shared.types.events import Event
+from exo.shared.types.tasks import TaskId
+from exo.shared.types.worker.instances import BoundInstance
+from exo.shared.types.worker.runner_response import ModelLoadingResponse
+from exo.utils.channels import MpReceiver, MpSender
+from exo.worker.engines.base import Builder, Engine
+from exo.worker.runner.bootstrap import logger
+from exo.worker.runner.llm_inference.batch_generator import (
+    BatchGenerator,
+    SequentialGenerator,
+)
+from exo.worker.runner.llm_inference.tool_parsers import make_mlx_parser
+
+from .cache import KVPrefixCache
+from .types import Model
+from .utils_mlx import (
+    initialize_mlx,
+    load_mlx_items,
+)
+from .vision import VisionProcessor
+
+
+@dataclass
+class MlxBuilder(Builder):
+    model_id: ModelId
+    event_sender: MpSender[Event]
+    cancel_receiver: MpReceiver[TaskId]
+    inference_model: Model | None = None
+    tokenizer: TokenizerWrapper | None = None
+    group: mx.distributed.Group | None = None
+    vision_processor: VisionProcessor | None = None
+
+    def connect(self, bound_instance: BoundInstance) -> None:
+        self.group = initialize_mlx(bound_instance)
+
+    def load(self, bound_instance: BoundInstance) -> Generator[ModelLoadingResponse]:
+        (
+            self.inference_model,
+            self.tokenizer,
+            self.vision_processor,
+        ) = yield from load_mlx_items(bound_instance, self.group)
+
+    def close(self) -> None:
+        with contextlib.suppress(NameError, AttributeError):
+            del self.inference_model
+        with contextlib.suppress(NameError, AttributeError):
+            del self.tokenizer
+        with contextlib.suppress(NameError, AttributeError):
+            del self.group
+
+    def build(
+        self,
+    ) -> Engine:
+        assert self.inference_model
+        assert self.tokenizer
+
+        vision_processor = self.vision_processor
+
+        tool_parser = None
+        logger.info(
+            f"model has_tool_calling={self.tokenizer.has_tool_calling} using tokens {self.tokenizer.tool_call_start}, {self.tokenizer.tool_call_end}"
+        )
+        if (
+            self.tokenizer.tool_call_start
+            and self.tokenizer.tool_call_end
+            and self.tokenizer.tool_parser  # type: ignore
+        ):
+            tool_parser = make_mlx_parser(
+                self.tokenizer.tool_call_start,
+                self.tokenizer.tool_call_end,
+                self.tokenizer.tool_parser,  # type: ignore
+            )
+
+        kv_prefix_cache = KVPrefixCache(self.group)
+
+        device_rank = 0 if self.group is None else self.group.rank()
+        if os.environ.get("EXO_NO_BATCH"):
+            logger.info("using SequentialGenerator (batching disabled)")
+            return SequentialGenerator(
+                model=self.inference_model,
+                tokenizer=self.tokenizer,
+                group=self.group,
+                tool_parser=tool_parser,
+                kv_prefix_cache=kv_prefix_cache,
+                model_id=self.model_id,
+                device_rank=device_rank,
+                cancel_receiver=self.cancel_receiver,
+                event_sender=self.event_sender,
+                vision_processor=vision_processor,
+            )
+        else:
+            logger.info("using BatchGenerator")
+            return BatchGenerator(
+                model=self.inference_model,
+                tokenizer=self.tokenizer,
+                group=self.group,
+                tool_parser=tool_parser,
+                kv_prefix_cache=kv_prefix_cache,
+                model_id=self.model_id,
+                device_rank=device_rank,
+                cancel_receiver=self.cancel_receiver,
+                event_sender=self.event_sender,
+                vision_processor=vision_processor,
+            )
diff --git a/src/exo/worker/engines/mlx/cache.py b/src/exo/worker/engines/mlx/cache.py
index ad941f90e..7cdcc77fb 100644
--- a/src/exo/worker/engines/mlx/cache.py
+++ b/src/exo/worker/engines/mlx/cache.py
@@ -1,8 +1,10 @@
+import gc
 import os
 from copy import deepcopy
 from typing import TYPE_CHECKING
 
 import mlx.core as mx
+import numpy as np
 import psutil
 from mlx_lm.models.cache import (
     ArraysCache,
@@ -11,11 +13,17 @@ from mlx_lm.models.cache import (
     QuantizedKVCache,
     RotatingKVCache,
 )
+from mlx_lm.models.deepseek_v4 import (
+    DeepseekV4Cache,
+)
+from mlx_lm.models.deepseek_v4 import (
+    _CompressorBranch as CompressorBranch,  # type: ignore
+)
 from mlx_lm.tokenizer_utils import TokenizerWrapper
 
 from exo.shared.types.memory import Memory
-from exo.shared.types.mlx import KVCacheType, Model
 from exo.worker.engines.mlx.constants import CACHE_GROUP_SIZE, KV_CACHE_BITS
+from exo.worker.engines.mlx.types import KVCacheType, Model
 from exo.worker.runner.bootstrap import logger
 
 if TYPE_CHECKING:
@@ -44,17 +52,148 @@ class CacheSnapshot:
     """Snapshot of states at a known token position."""
 
     def __init__(
-        self, states: list[RotatingKVCache | ArraysCache | None], token_count: int
+        self,
+        states: list[
+            RotatingKVCache | ArraysCache | CacheList | DeepseekV4Cache | None
+        ],
+        token_count: int,
     ):
         self.states = states
         self.token_count = token_count
 
 
+def _detached_copy(a: mx.array) -> mx.array:
+    dtype = a.dtype
+    if dtype == mx.bfloat16:
+        return mx.array(np.array(a.astype(mx.float32))).astype(mx.bfloat16)
+    return mx.array(np.array(a))
+
+
+def copy_rotating_kv_cache(cache: RotatingKVCache) -> RotatingKVCache | None:
+    """
+    Deepcopy copies the metadata associated with an mx array.
+    Specifically, it shares a shared_ptr to the underlying data and
+    the mlx graph inputs of the array. This causes a memory leak for rotating
+    kv cache. By creating an np array, no metadata is stored so the old cache
+    can be cleaned up nicely.
+    """
+    if cache.keys is None or cache.values is None:
+        return None
+    n = min(cache.max_size, cache.keys.shape[2])
+    k_slice = _detached_copy(cache.keys[..., -n:, :])
+    v_slice = _detached_copy(cache.values[..., -n:, :])
+    mx.eval(k_slice, v_slice)
+    snap = RotatingKVCache.__new__(RotatingKVCache)
+    snap.keys = k_slice
+    snap.values = v_slice
+    snap.offset = cache.offset
+    snap._idx = n
+    snap.keep = cache.keep
+    snap.max_size = cache.max_size
+    return snap
+
+
+def _copy_arrays_cache(ac: ArraysCache) -> ArraysCache:
+    entries: list[mx.array | None] = []
+    for entry in ac.cache:  # type: ignore[reportUnknownMemberType]
+        if entry is None:
+            entries.append(None)
+            continue
+        assert isinstance(entry, mx.array)
+        entries.append(_detached_copy(entry))
+    copy = ArraysCache(len(entries))
+    copy.cache = entries  # type: ignore[reportUnknownMemberType]
+    return copy
+
+
+def _copy_cache_list(cl: CacheList) -> CacheList:
+    inners: list[object] = list(cl)  # type: ignore[reportUnknownArgumentType]
+    copied: list[object] = []
+    for inner in inners:
+        if isinstance(inner, RotatingKVCache):
+            snap = copy_rotating_kv_cache(inner)
+            copied.append(snap if snap is not None else deepcopy(inner))
+        elif isinstance(inner, ArraysCache):
+            copied.append(_copy_arrays_cache(inner))
+        else:
+            copied.append(deepcopy(inner))
+    return CacheList(*copied)
+
+
+def _detached_copy_or_none(a: mx.array | None) -> mx.array | None:
+    if a is None:
+        return None
+    out = _detached_copy(a)
+    mx.eval(out)
+    return out
+
+
+def _copy_compressor_branch(b: CompressorBranch) -> CompressorBranch:
+    out = CompressorBranch.__new__(CompressorBranch)
+    out.buffer_kv = _detached_copy_or_none(b.buffer_kv)
+    out.buffer_gate = _detached_copy_or_none(b.buffer_gate)
+    out.prev_kv = _detached_copy_or_none(b.prev_kv)
+    out.prev_gate = _detached_copy_or_none(b.prev_gate)
+    out.pool = _detached_copy_or_none(b.pool)
+    out.buffer_lengths = deepcopy(b.buffer_lengths)
+    out.pool_lengths = deepcopy(b.pool_lengths)
+    out.buffer_count = deepcopy(b.buffer_count)
+    out._new_pool_lengths = deepcopy(b._new_pool_lengths)
+    return out
+
+
+def _copy_v4_cache(c: DeepseekV4Cache) -> DeepseekV4Cache:
+    snap = DeepseekV4Cache.__new__(DeepseekV4Cache)
+
+    local: RotatingKVCache = c.local
+    local_snap = copy_rotating_kv_cache(local)
+    if local_snap is None:
+        local_snap = RotatingKVCache.__new__(RotatingKVCache)
+        local_snap.keys = None
+        local_snap.values = None
+        local_snap.offset = local.offset
+        local_snap._idx = 0
+        local_snap.keep = local.keep
+        local_snap.max_size = local.max_size
+    snap.local = local_snap
+
+    snap._branches = {
+        key: _copy_compressor_branch(branch) for key, branch in c._branches.items()
+    }
+    snap._pending_lengths = deepcopy(c._pending_lengths)
+    return snap
+
+
+def copy_snapshot_entry(
+    entry: ArraysCache | RotatingKVCache | CacheList | DeepseekV4Cache | None,
+) -> ArraysCache | RotatingKVCache | CacheList | DeepseekV4Cache | None:
+    match entry:
+        case None:
+            return None
+        case RotatingKVCache():
+            snap = copy_rotating_kv_cache(entry)
+            return snap if snap is not None else deepcopy(entry)
+        case ArraysCache():
+            return _copy_arrays_cache(entry)
+        case CacheList():
+            return _copy_cache_list(entry)
+        case DeepseekV4Cache():
+            return _copy_v4_cache(entry)
+
+
 def snapshot_ssm_states(cache: KVCacheType) -> CacheSnapshot:
-    states: list[ArraysCache | RotatingKVCache | None] = []
+    states: list[
+        RotatingKVCache | ArraysCache | CacheList | DeepseekV4Cache | None
+    ] = []
     for c in cache:
-        if isinstance(c, (ArraysCache, RotatingKVCache)):
-            states.append(deepcopy(c))
+        if isinstance(c, ArraysCache):
+            states.append(_copy_arrays_cache(c))
+        elif isinstance(c, RotatingKVCache):
+            states.append(copy_rotating_kv_cache(c))
+        elif isinstance(c, CacheList) and not bool(c.is_trimmable()):  # type: ignore[reportUnknownMemberType]
+            states.append(_copy_cache_list(c))
+        elif isinstance(c, DeepseekV4Cache):
+            states.append(_copy_v4_cache(c))
         else:
             states.append(None)
     token_count = cache_length(cache)
@@ -74,9 +213,20 @@ def _find_nearest_snapshot(
     return best
 
 
+def is_non_trimmable_cache_entry(c: object) -> bool:
+    """A cache entry is non-trimmable if `trim(n)` can't roll back its full
+    state — meaning the prefill +2 rollback must snapshot+restore it instead.
+    """
+    if isinstance(c, (ArraysCache, RotatingKVCache)):
+        return True
+    if isinstance(c, CacheList):
+        return not bool(c.is_trimmable())  # type: ignore[reportUnknownMemberType]
+    return isinstance(c, DeepseekV4Cache)
+
+
 def has_non_kv_caches(cache: KVCacheType) -> bool:
     """Check if a cache contains any ArraysCache (SSM) entries."""
-    return any(isinstance(c, (ArraysCache, RotatingKVCache)) for c in cache)
+    return any(is_non_trimmable_cache_entry(c) for c in cache)
 
 
 class KVPrefixCache:
@@ -86,6 +236,7 @@ class KVPrefixCache:
         self._snapshots: list[list[CacheSnapshot] | None] = []
         self._media_regions: list[list["MediaRegion"]] = []
         self._last_used: list[int] = []  # monotonic counter of last access per entry
+        self.prefill_tps: list[float] = []
         self._access_counter: int = 0
         self._group = group
 
@@ -96,6 +247,7 @@ class KVPrefixCache:
         self._snapshots.clear()
         self._media_regions.clear()
         self._last_used.clear()
+        self.prefill_tps.clear()
 
     def add_kv_cache(
         self,
@@ -103,6 +255,7 @@ class KVPrefixCache:
         cache: KVCacheType,
         ssm_snapshots: list[CacheSnapshot] | None = None,
         media_regions: list["MediaRegion"] | None = None,
+        prefill_tps: float = 0.0,
     ):
         """Add a new cache entry. Evicts LRU entries if memory is high."""
         self._evict_if_needed()
@@ -110,6 +263,7 @@ class KVPrefixCache:
         self.caches.append(deepcopy(cache))
         self._snapshots.append(ssm_snapshots)
         self._media_regions.append(media_regions or [])
+        self.prefill_tps.append(prefill_tps)
         self._access_counter += 1
         self._last_used.append(self._access_counter)
         logger.info(f"KV cache added: {len(prompt_tokens)} tokens")
@@ -122,6 +276,7 @@ class KVPrefixCache:
         snapshots: list[CacheSnapshot] | None,
         restore_pos: int,
         media_regions: list["MediaRegion"] | None = None,
+        prefill_tps: float = 0.0,
     ):
         """Update an existing cache entry in-place."""
         old_snapshots = self._snapshots[index]
@@ -135,6 +290,7 @@ class KVPrefixCache:
         self.caches[index] = deepcopy(cache)
         self._snapshots[index] = merged or None
         self._media_regions[index] = media_regions or []
+        self.prefill_tps[index] = prefill_tps
         self._access_counter += 1
         self._last_used[index] = self._access_counter
         logger.info(f"KV cache updated (index {index}): {len(prompt_tokens)} tokens")
@@ -160,14 +316,15 @@ class KVPrefixCache:
         model: Model,
         prompt_tokens: mx.array,
         media_regions: list["MediaRegion"] | None = None,
-    ) -> tuple[KVCacheType, mx.array, int | None]:
+    ) -> tuple[KVCacheType, mx.array, int | None, bool]:
         """Get KV cache for prompt, returning remaining tokens to prefill.
 
         Returns:
-            Tuple of (cache, remaining_tokens, matched_index) where:
+            Tuple of (cache, remaining_tokens, matched_index, is_exact) where:
             - cache: KV cache to use for generation
             - remaining_tokens: tokens that still need prefilling
             - matched_index: index of the matched entry (None if no match)
+            - is_exact: True if the full prompt matched the cached entry
 
         For models with SSM layers (which are ArraysCache in mlx), the cache is trimmed to the
         nearest SSM snapshot position at or before the match point for correctness.
@@ -201,26 +358,34 @@ class KVPrefixCache:
                 best_index, best_length = i, length
 
         if best_index is None:
-            return make_kv_cache(model), prompt_tokens, None
+            return make_kv_cache(model), prompt_tokens, None, False
 
         # For exact match: trim to max_length-1 so remaining has the last token
         # For partial match: trim to best_length, remaining has suffix to prefill
         # This ensures stream_generate always has at least one token to start with
         has_ssm = has_non_kv_caches(self.caches[best_index])
-        target = (max_length - 1) if is_exact and not has_ssm else best_length
+        cached_length = cache_length(self.caches[best_index])
+        if has_ssm:
+            target = best_length
+        else:
+            desired = (max_length - 1) if is_exact else best_length
+            target = min(cached_length, desired)
         restore_pos, restore_snap = self._get_snapshot(best_index, target)
 
         # No usable snapshot — need fresh cache
         if restore_snap is None and has_ssm:
-            return make_kv_cache(model), prompt_tokens, None
+            return make_kv_cache(model), prompt_tokens, None, False
 
         prompt_cache = deepcopy(self.caches[best_index])
-        cached_length = cache_length(self.caches[best_index])
         tokens_to_trim = cached_length - restore_pos
         if tokens_to_trim > 0:
             trim_cache(prompt_cache, tokens_to_trim, restore_snap)
             # Reset cache offset to match trimmed length
             for c in prompt_cache:
+                if isinstance(c, (ArraysCache, RotatingKVCache)):
+                    continue
+                if isinstance(c, DeepseekV4Cache):
+                    continue
                 if hasattr(c, "offset"):
                     c.offset = restore_pos
 
@@ -228,7 +393,7 @@ class KVPrefixCache:
         self._last_used[best_index] = self._access_counter
         remaining = prompt_tokens[restore_pos:]
 
-        return prompt_cache, remaining, best_index
+        return prompt_cache, remaining, best_index, is_exact
 
     @staticmethod
     def _validate_media_match(
@@ -266,6 +431,7 @@ class KVPrefixCache:
         if len(self.caches) == 0:
             return
 
+        evicted_any = False
         # Evict LRU entries until below threshold
         while (
             len(self.caches) > 0
@@ -278,10 +444,17 @@ class KVPrefixCache:
             self._snapshots.pop(lru_index)
             self._media_regions.pop(lru_index)
             self._last_used.pop(lru_index)
+            self.prefill_tps.pop(lru_index)
+
+            evicted_any = True
             logger.info(
                 f"KV cache evicted LRU entry ({evicted_tokens} tokens) due to memory usage"
             )
 
+        if evicted_any:
+            gc.collect()
+            mx.clear_cache()
+
     def get_memory_used_percentage(self) -> float:
         local_pressure: float = get_memory_used_percentage()
 
@@ -303,11 +476,27 @@ def trim_cache(
     snapshot: CacheSnapshot | None = None,
 ) -> None:
     for i, c in enumerate(cache):
-        if isinstance(c, (ArraysCache, RotatingKVCache)):
+        non_trimmable = isinstance(c, (ArraysCache, RotatingKVCache)) or (
+            isinstance(c, CacheList) and not bool(c.is_trimmable())  # type: ignore[reportUnknownMemberType]
+        )
+        if non_trimmable:
             if snapshot is not None and snapshot.states[i] is not None:
-                cache[i] = deepcopy(snapshot.states[i])  # type: ignore
-            else:
+                restored = copy_snapshot_entry(snapshot.states[i])
+                if restored is not None:
+                    cache[i] = restored  # type: ignore
+            elif isinstance(c, (ArraysCache, RotatingKVCache)):
                 c.state = [None] * len(c.state)
+                if isinstance(c, RotatingKVCache):
+                    c.offset = 0
+                    c._idx = 0
+            else:
+                # CacheList without a snapshot — zero each inner cache's state
+                for inner in c:  # type: ignore[reportUnknownVariableType]
+                    if isinstance(inner, (ArraysCache, RotatingKVCache)):
+                        inner.state = [None] * len(inner.state)
+                        if isinstance(inner, RotatingKVCache):
+                            inner.offset = 0
+                            inner._idx = 0
         else:
             c.trim(num_tokens)
 
@@ -325,7 +514,12 @@ def encode_prompt(tokenizer: TokenizerWrapper, prompt: str) -> mx.array:
 
 
 def _entry_length(
-    c: KVCache | RotatingKVCache | QuantizedKVCache | ArraysCache | CacheList,
+    c: KVCache
+    | RotatingKVCache
+    | QuantizedKVCache
+    | ArraysCache
+    | CacheList
+    | DeepseekV4Cache,
 ) -> int:
     # Use .offset attribute which KVCache types have (len() not implemented in older QuantizedKVCache).
     if hasattr(c, "offset"):
diff --git a/src/exo/worker/engines/mlx/disaggregated/__init__.py b/src/exo/worker/engines/mlx/disaggregated/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/exo/worker/engines/mlx/disaggregated/adapter.py b/src/exo/worker/engines/mlx/disaggregated/adapter.py
new file mode 100644
index 000000000..6753495fc
--- /dev/null
+++ b/src/exo/worker/engines/mlx/disaggregated/adapter.py
@@ -0,0 +1,233 @@
+from typing import BinaryIO
+
+import mlx.core as mx
+import numpy as np
+from mlx_lm.models.cache import (
+    ArraysCache,
+    CacheList,
+    KVCache,
+    QuantizedKVCache,
+    RotatingKVCache,
+)
+from mlx_lm.models.deepseek_v4 import DeepseekV4Cache
+
+from exo.worker.disaggregated.protocol import (
+    DType,
+    Header,
+    KVChunk,
+    TensorBlob,
+    write_arrays_state,
+    write_done,
+    write_header,
+    write_kv_chunk,
+)
+from exo.worker.engines.mlx.types import KVCacheType
+from exo.worker.runner.bootstrap import logger
+
+_STR_TO_MX: dict[DType, mx.Dtype] = {
+    "bfloat16": mx.bfloat16,
+    "float16": mx.float16,
+    "float32": mx.float32,
+}
+
+_MX_TO_STR: dict[mx.Dtype, DType] = {v: k for k, v in _STR_TO_MX.items()}
+
+
+def mx_dtype_to_str(dtype: mx.Dtype) -> DType:
+    if dtype not in _MX_TO_STR:
+        raise ValueError(f"Unsupported mlx dtype on wire: {dtype}")
+    return _MX_TO_STR[dtype]
+
+
+def wire_dtype_from_cache(caches: KVCacheType) -> DType:
+    for c in caches:
+        keys: mx.array | None = getattr(c, "keys", None)
+        if keys is None:
+            continue
+        if keys.dtype in _MX_TO_STR:
+            return _MX_TO_STR[keys.dtype]
+        break
+    return "bfloat16"
+
+
+def str_to_mx_dtype(dtype: DType) -> mx.Dtype:
+    if dtype not in _STR_TO_MX:
+        raise ValueError(f"Unsupported wire dtype: {dtype!r}")
+    return _STR_TO_MX[dtype]
+
+
+def array_to_bytes(t: mx.array) -> bytes:
+    # bf16 has no native numpy dtype; bitcast through uint16.
+    if t.dtype == mx.bfloat16:
+        return np.asarray(t.view(mx.uint16)).tobytes()
+    if t.dtype in (mx.float16, mx.float32):
+        return np.asarray(t).tobytes()
+    raise ValueError(f"Unsupported mlx dtype for wire: {t.dtype}")
+
+
+def bytes_to_array(data: bytes, shape: tuple[int, ...], dtype: DType) -> mx.array:
+    match dtype:
+        case "bfloat16":
+            arr = np.frombuffer(data, dtype=np.uint16).reshape(shape).copy()
+            return mx.array(arr).view(mx.bfloat16)
+        case "float16":
+            arr = np.frombuffer(data, dtype=np.float16).reshape(shape).copy()
+            return mx.array(arr)
+        case "float32":
+            arr = np.frombuffer(data, dtype=np.float32).reshape(shape).copy()
+            return mx.array(arr)
+
+
+def bhsd_to_nhd(t: mx.array) -> mx.array:
+    if t.ndim != 4 or int(t.shape[0]) != 1:
+        raise ValueError(f"Expected BHSD with B=1, got shape={tuple(t.shape)}")
+    return mx.transpose(t[0], (1, 0, 2))
+
+
+def nhd_to_bhsd(t: mx.array) -> mx.array:
+    if t.ndim != 3:
+        raise ValueError(f"Expected NHD (3D), got shape={tuple(t.shape)}")
+    return mx.expand_dims(mx.transpose(t, (1, 0, 2)), 0)
+
+
+def send_mlx_kv_cache(
+    stream: BinaryIO,
+    caches: KVCacheType,
+    *,
+    dtype: DType,
+    start_pos: int = 0,
+    max_tokens: int | None = None,
+) -> int:
+    tokens_sent = 0
+    for layer_idx, c in enumerate(caches):
+        match c:
+            case QuantizedKVCache() | CacheList() | DeepseekV4Cache():
+                raise NotImplementedError
+            case KVCache() | RotatingKVCache():
+                keys = c.keys
+                values = c.values
+                if keys is None or values is None:
+                    continue
+                offset = int(c.offset)
+                if max_tokens is not None:
+                    offset = min(offset, max_tokens)
+                if offset <= start_pos:
+                    continue
+                with mx.stream(mx.Device(mx.cpu)):
+                    k = mx.array(keys[:, :, start_pos:offset, :])
+                    v = mx.array(values[:, :, start_pos:offset, :])
+                    k_nhd = bhsd_to_nhd(k)
+                    v_nhd = bhsd_to_nhd(v)
+                    mx.eval(k_nhd, v_nhd)
+                num_tokens = int(k_nhd.shape[0])
+                n_heads = int(k_nhd.shape[1])
+                head_dim = int(k_nhd.shape[2])
+                write_kv_chunk(
+                    stream,
+                    layer_idx=layer_idx,
+                    num_tokens=num_tokens,
+                    n_heads=n_heads,
+                    head_dim=head_dim,
+                    dtype=dtype,
+                    keys=array_to_bytes(k_nhd),
+                    values=array_to_bytes(v_nhd),
+                )
+                if tokens_sent != 0 and num_tokens != tokens_sent:
+                    logger.critical(
+                        f"Unexpected number of tokens sent {num_tokens} != {tokens_sent}"
+                    )
+                tokens_sent = num_tokens
+            case ArraysCache():
+                blobs: list[TensorBlob] = []
+                for a in c.state:
+                    if a is None:
+                        continue
+                    with mx.stream(mx.Device(mx.cpu)):
+                        a_cpu = mx.array(a)
+                        mx.eval(a_cpu)
+                    blobs.append(
+                        TensorBlob(
+                            dtype=mx_dtype_to_str(a_cpu.dtype),
+                            shape=tuple(int(d) for d in a_cpu.shape),
+                            data=array_to_bytes(a_cpu),
+                        )
+                    )
+                if blobs:
+                    write_arrays_state(stream, layer_idx, blobs)
+    return tokens_sent
+
+
+def chunk_to_mlx_nhd(chunk: KVChunk) -> tuple[mx.array, mx.array]:
+    shape = chunk.shape
+    return (
+        bytes_to_array(chunk.keys, shape, chunk.dtype),
+        bytes_to_array(chunk.values, shape, chunk.dtype),
+    )
+
+
+def blob_to_mlx(blob: TensorBlob) -> mx.array:
+    return bytes_to_array(blob.data, blob.shape, blob.dtype)
+
+
+def inject_kv_chunk(
+    cache: KVCache,
+    keys_nhd: mx.array,
+    values_nhd: mx.array,
+    offset: int,
+    *,
+    start_pos: int = 0,
+    existing_k: mx.array | None = None,
+    existing_v: mx.array | None = None,
+) -> None:
+    k_bhsd = nhd_to_bhsd(keys_nhd)
+    v_bhsd = nhd_to_bhsd(values_nhd)
+    if start_pos > 0 and existing_k is not None and existing_v is not None:
+        cache.keys = mx.concatenate([existing_k[:, :, :start_pos, :], k_bhsd], axis=2)
+        cache.values = mx.concatenate([existing_v[:, :, :start_pos, :], v_bhsd], axis=2)
+    else:
+        cache.keys = k_bhsd
+        cache.values = v_bhsd
+    cache.offset = offset
+
+
+def inject_rotating_kv_chunk(
+    cache: RotatingKVCache,
+    keys_nhd: mx.array,
+    values_nhd: mx.array,
+    offset: int,
+) -> None:
+    k_bhsd = nhd_to_bhsd(keys_nhd)
+    v_bhsd = nhd_to_bhsd(values_nhd)
+    cache.keys = k_bhsd
+    cache.values = v_bhsd
+    cache.offset = offset
+    cache._idx = int(k_bhsd.shape[2])
+
+
+def inject_arrays_cache(cache: ArraysCache, blobs: list[TensorBlob]) -> None:
+    cache.state = [blob_to_mlx(b) for b in blobs]
+
+
+def write_cache_to_wire(
+    wfile: BinaryIO,
+    cache: KVCacheType,
+    *,
+    request_id: str = "",
+    model_id: str = "",
+    start_pos: int = 0,
+) -> int:
+    dtype = wire_dtype_from_cache(cache)
+    write_header(
+        wfile,
+        Header(
+            request_id=request_id,
+            model_id=model_id,
+            num_layers=len(cache),
+            dtype=dtype,
+            start_pos=start_pos,
+        ),
+    )
+    tokens_sent = send_mlx_kv_cache(wfile, cache, dtype=dtype, start_pos=start_pos)
+    write_done(wfile, tokens_sent)
+    wfile.flush()
+    return tokens_sent
diff --git a/src/exo/worker/engines/mlx/disaggregated/client.py b/src/exo/worker/engines/mlx/disaggregated/client.py
new file mode 100644
index 000000000..7634ac0f5
--- /dev/null
+++ b/src/exo/worker/engines/mlx/disaggregated/client.py
@@ -0,0 +1,147 @@
+import socket
+from collections import defaultdict
+from collections.abc import Callable
+from dataclasses import dataclass, field
+from typing import BinaryIO, cast
+
+import mlx.core as mx
+from loguru import logger
+from mlx_lm.models.cache import ArraysCache, KVCache, RotatingKVCache
+
+from exo.worker.disaggregated.protocol import (
+    ArraysState,
+    Done,
+    Header,
+    KVChunk,
+    TensorBlob,
+    read_header,
+    read_message,
+)
+from exo.worker.disaggregated.server import PrefillRequest, write_request
+from exo.worker.engines.mlx.disaggregated.adapter import (
+    chunk_to_mlx_nhd,
+    inject_arrays_cache,
+    inject_kv_chunk,
+    inject_rotating_kv_chunk,
+)
+
+_SOCKET_TIMEOUT_SECS = 60
+_RECV_BUFFER_BYTES = 4 * 1024 * 1024
+
+
+@dataclass
+class PrefillResult:
+    header: Header
+    kv_chunks: dict[int, list[KVChunk]] = field(
+        default_factory=dict[int, list[KVChunk]]
+    )
+    arrays: dict[int, list[TensorBlob]] = field(
+        default_factory=dict[int, list[TensorBlob]]
+    )
+    total_tokens: int = 0
+
+
+def _parse_endpoint(endpoint: str) -> tuple[str, int]:
+    if ":" in endpoint:
+        host, port_str = endpoint.rsplit(":", 1)
+        return host, int(port_str)
+    raise ValueError(f"Invalid endpoint {endpoint}")
+
+
+def remote_prefill_fetch(
+    endpoint: str,
+    request: PrefillRequest,
+    on_header: Callable[[Header], None] | None = None,
+    on_kv_chunk: Callable[[KVChunk, int], None] | None = None,
+    timeout_secs: float = _SOCKET_TIMEOUT_SECS,
+) -> PrefillResult:
+    host, port = _parse_endpoint(endpoint)
+    logger.info(
+        f"Connecting to prefill server at {host}:{port} "
+        f"({len(request.token_ids)} tokens, start_pos={request.start_pos})"
+    )
+
+    sock = socket.create_connection((host, port), timeout=timeout_secs)
+    sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+    sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, _RECV_BUFFER_BYTES)
+    try:
+        wfile = sock.makefile("wb", buffering=256 * 1024)
+        wstream: BinaryIO = cast(BinaryIO, cast(object, wfile))
+        write_request(wstream, request)
+
+        raw_stream = sock.makefile("rb", buffering=256 * 1024)
+        stream: BinaryIO = cast(BinaryIO, cast(object, raw_stream))
+
+        header = read_header(stream)
+        if on_header is not None:
+            on_header(header)
+
+        result = PrefillResult(header=header)
+        kv_by_layer: dict[int, list[KVChunk]] = defaultdict(list)
+        chunks_received = 0
+
+        while True:
+            msg = read_message(stream)
+            if msg is None:
+                break
+            if isinstance(msg, KVChunk):
+                kv_by_layer[msg.layer_idx].append(msg)
+                chunks_received += 1
+                if on_kv_chunk is not None:
+                    on_kv_chunk(msg, chunks_received)
+            elif isinstance(msg, ArraysState):
+                result.arrays[msg.layer_idx] = msg.arrays
+            elif isinstance(msg, Done):
+                result.total_tokens = msg.total_tokens
+                break
+            else:
+                raise RuntimeError(f"Prefill server error [{msg.code}]: {msg.message}")
+
+        result.kv_chunks = dict(kv_by_layer)
+        return result
+    finally:
+        sock.close()
+
+
+def ingest_into_mlx_cache(
+    result: PrefillResult,
+    caches: list[KVCache | RotatingKVCache | ArraysCache],
+    *,
+    start_pos: int = 0,
+) -> int:
+    max_received = max(
+        (sum(c.num_tokens for c in chunks) for chunks in result.kv_chunks.values()),
+        default=0,
+    )
+    final_offset = start_pos + max_received
+
+    for i, cache in enumerate(caches):
+        if i in result.kv_chunks:
+            chunks = result.kv_chunks[i]
+            if len(chunks) == 1:
+                k_nhd, v_nhd = chunk_to_mlx_nhd(chunks[0])
+            else:
+                decoded = [chunk_to_mlx_nhd(c) for c in chunks]
+                k_nhd = mx.concatenate([k for k, _ in decoded], axis=0)
+                v_nhd = mx.concatenate([v for _, v in decoded], axis=0)
+
+            if isinstance(cache, RotatingKVCache):
+                inject_rotating_kv_chunk(cache, k_nhd, v_nhd, final_offset)
+            elif isinstance(cache, KVCache):
+                if start_pos > 0:
+                    inject_kv_chunk(
+                        cache,
+                        k_nhd,
+                        v_nhd,
+                        final_offset,
+                        start_pos=start_pos,
+                        existing_k=cache.keys,
+                        existing_v=cache.values,
+                    )
+                else:
+                    inject_kv_chunk(cache, k_nhd, v_nhd, final_offset)
+
+        if i in result.arrays and isinstance(cache, ArraysCache):
+            inject_arrays_cache(cache, result.arrays[i])
+
+    return final_offset
diff --git a/src/exo/worker/engines/mlx/disaggregated/serve.py b/src/exo/worker/engines/mlx/disaggregated/serve.py
new file mode 100644
index 000000000..5220e9d1f
--- /dev/null
+++ b/src/exo/worker/engines/mlx/disaggregated/serve.py
@@ -0,0 +1,86 @@
+import time
+
+import mlx.core as mx
+from mlx_lm.sample_utils import make_sampler
+from mlx_lm.tokenizer_utils import TokenizerWrapper
+
+from exo.worker.disaggregated.server import PrefillRequest
+from exo.worker.engines.mlx.cache import (
+    KVPrefixCache,
+    cache_length,
+    make_kv_cache,
+    snapshot_ssm_states,
+)
+from exo.worker.engines.mlx.generator.generate import prefill as mlx_prefill
+from exo.worker.engines.mlx.types import KVCacheType, Model
+from exo.worker.engines.mlx.utils_mlx import fix_unmatched_think_end_tokens
+from exo.worker.runner.bootstrap import logger
+
+
+def run_prefill_for_request(
+    *,
+    model: Model,
+    tokenizer: TokenizerWrapper,
+    group: mx.distributed.Group | None,
+    kv_prefix_cache: KVPrefixCache | None,
+    request: PrefillRequest,
+) -> KVCacheType:
+    prompt_tokens = mx.array(request.token_ids)
+    prompt_tokens = fix_unmatched_think_end_tokens(prompt_tokens, tokenizer)
+    n_tokens = int(prompt_tokens.shape[0])
+    t0 = time.perf_counter()
+
+    matched_index: int | None = None
+    prefix_hit_length = 0
+    if kv_prefix_cache is not None:
+        cache, remaining, matched_index, _ = kv_prefix_cache.get_kv_cache(
+            model, prompt_tokens
+        )
+        prefix_hit_length = n_tokens - int(remaining.shape[0])
+    else:
+        cache = make_kv_cache(model)
+        remaining = prompt_tokens
+
+    target_offset = max(0, n_tokens - 2)
+    new_tokens = max(0, target_offset - prefix_hit_length)
+    prefill_input = remaining[:new_tokens]
+    if int(prefill_input.shape[0]) > 0:
+        sampler = make_sampler(temp=1.0)
+        _ = mlx_prefill(
+            model=model,
+            tokenizer=tokenizer,
+            sampler=sampler,
+            prompt_tokens=prefill_input,
+            cache=cache,
+            group=group,
+            on_prefill_progress=None,
+            distributed_prompt_progress_callback=None,
+        )
+
+    if kv_prefix_cache is not None:
+        try:
+            cache_snapshots = [snapshot_ssm_states(cache)]
+            hit_ratio = prefix_hit_length / n_tokens if n_tokens > 0 else 0.0
+            if matched_index is not None and hit_ratio >= 0.5:
+                kv_prefix_cache.update_kv_cache(
+                    matched_index,
+                    prompt_tokens,
+                    cache,
+                    cache_snapshots,
+                    restore_pos=prefix_hit_length,
+                )
+            else:
+                kv_prefix_cache.add_kv_cache(prompt_tokens, cache, cache_snapshots)
+        except Exception:
+            logger.opt(exception=True).warning(
+                "Failed to save prefix cache on prefill server"
+            )
+
+    elapsed = time.perf_counter() - t0
+    final_offset = cache_length(cache)
+    logger.info(
+        f"Prefill: request_id={request.request_id} "
+        f"{n_tokens} tokens (prefix_hit={prefix_hit_length}, "
+        f"final_offset={final_offset}) in {elapsed * 1000:.0f}ms"
+    )
+    return cache
diff --git a/src/exo/worker/engines/mlx/disaggregated/tests/__init__.py b/src/exo/worker/engines/mlx/disaggregated/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/exo/worker/engines/mlx/disaggregated/tests/test_end_to_end.py b/src/exo/worker/engines/mlx/disaggregated/tests/test_end_to_end.py
new file mode 100644
index 000000000..2cfdfc9ba
--- /dev/null
+++ b/src/exo/worker/engines/mlx/disaggregated/tests/test_end_to_end.py
@@ -0,0 +1,167 @@
+from typing import BinaryIO
+
+import mlx.core as mx
+import numpy as np
+import pytest
+from mlx_lm.models.cache import KVCache
+
+from exo.worker.disaggregated.protocol import Header, write_done, write_header
+from exo.worker.disaggregated.server import PrefillRequest, PrefillServer
+from exo.worker.engines.mlx.disaggregated.adapter import (
+    send_mlx_kv_cache,
+    wire_dtype_from_cache,
+)
+from exo.worker.engines.mlx.disaggregated.client import (
+    ingest_into_mlx_cache,
+    remote_prefill_fetch,
+)
+
+
+def _equal(a: mx.array, b: mx.array) -> bool:
+    if a.dtype != b.dtype or tuple(a.shape) != tuple(b.shape):
+        return False
+    if a.dtype == mx.bfloat16:
+        return bool(
+            np.array_equal(np.asarray(a.view(mx.uint16)), np.asarray(b.view(mx.uint16)))
+        )
+    return bool(np.array_equal(np.asarray(a), np.asarray(b)))
+
+
+def _make_cache(seq_len: int, n_heads: int, head_dim: int) -> KVCache:
+    mx.random.seed(0)
+    cache = KVCache()
+    with mx.stream(mx.Device(mx.cpu)):
+        cache.keys = (
+            mx.random.uniform(shape=(1, n_heads, seq_len, head_dim)) * 10
+        ).astype(mx.bfloat16)
+        cache.values = (
+            mx.random.uniform(shape=(1, n_heads, seq_len, head_dim)) * 10
+        ).astype(mx.bfloat16)
+        mx.eval(cache.keys, cache.values)
+    cache.offset = seq_len
+    return cache
+
+
+def _stream_cache(
+    wfile: BinaryIO, cache: KVCache, *, request_id: str, start_pos: int = 0
+) -> None:
+    dtype = wire_dtype_from_cache([cache])
+    write_header(
+        wfile,
+        Header(
+            request_id=request_id,
+            model_id="test-model",
+            num_layers=1,
+            dtype=dtype,
+            start_pos=start_pos,
+        ),
+    )
+    tokens_sent = send_mlx_kv_cache(wfile, [cache], dtype=dtype, start_pos=start_pos)
+    write_done(wfile, tokens_sent)
+    wfile.flush()
+
+
+@pytest.mark.slow
+def test_server_client_roundtrip() -> None:
+    seq_len = 5
+    n_heads = 2
+    head_dim = 4
+    gold = _make_cache(seq_len, n_heads, head_dim)
+
+    def resolve(job: PrefillRequest, wfile: BinaryIO) -> bool:
+        _stream_cache(wfile, gold, request_id=job.request_id)
+        return True
+
+    server = PrefillServer(resolve=resolve, host="127.0.0.1", port=52417)
+    try:
+        result = remote_prefill_fetch(
+            endpoint="127.0.0.1:52417",
+            request=PrefillRequest(
+                model_id="test-model",
+                token_ids=list(range(seq_len)),
+                request_id="req-1",
+            ),
+        )
+        assert result.total_tokens == seq_len
+        assert 0 in result.kv_chunks
+
+        dst = KVCache()
+        final_offset = ingest_into_mlx_cache(result, [dst])
+        assert final_offset == seq_len
+        assert dst.offset == seq_len
+        dst_k = dst.keys
+        dst_v = dst.values
+        gold_k = gold.keys
+        gold_v = gold.values
+        assert dst_k is not None and dst_v is not None
+        assert gold_k is not None and gold_v is not None
+        assert _equal(dst_k, gold_k)
+        assert _equal(dst_v, gold_v)
+    finally:
+        server.stop()
+
+
+@pytest.mark.slow
+def test_server_reports_pickup_failure() -> None:
+    def resolve(_job: PrefillRequest, _wfile: BinaryIO) -> bool:
+        return False
+
+    server = PrefillServer(resolve=resolve, host="127.0.0.1", port=52418)
+    try:
+        with pytest.raises(RuntimeError, match="not picked up"):
+            _ = remote_prefill_fetch(
+                endpoint="127.0.0.1:52418",
+                request=PrefillRequest(
+                    model_id="test-model",
+                    token_ids=[1, 2, 3],
+                    request_id="never-registered",
+                ),
+            )
+    finally:
+        server.stop()
+
+
+@pytest.mark.slow
+def test_server_client_roundtrip_with_start_pos() -> None:
+    seq_len = 8
+    start_pos = 5
+    n_heads = 2
+    head_dim = 4
+    gold = _make_cache(seq_len, n_heads, head_dim)
+
+    def resolve(job: PrefillRequest, wfile: BinaryIO) -> bool:
+        _stream_cache(wfile, gold, request_id=job.request_id, start_pos=start_pos)
+        return True
+
+    server = PrefillServer(resolve=resolve, host="127.0.0.1", port=52419)
+    try:
+        result = remote_prefill_fetch(
+            endpoint="127.0.0.1:52419",
+            request=PrefillRequest(
+                model_id="test-model",
+                token_ids=list(range(seq_len)),
+                request_id="req-1",
+                start_pos=start_pos,
+            ),
+        )
+        assert result.total_tokens == seq_len - start_pos
+        assert result.header.start_pos == start_pos
+
+        dst = KVCache()
+        gold_k = gold.keys
+        gold_v = gold.values
+        assert gold_k is not None and gold_v is not None
+        dst.keys = mx.array(gold_k[:, :, :start_pos, :])
+        dst.values = mx.array(gold_v[:, :, :start_pos, :])
+        dst.offset = start_pos
+
+        final_offset = ingest_into_mlx_cache(result, [dst], start_pos=start_pos)
+        assert final_offset == seq_len
+        assert dst.offset == seq_len
+        dst_k = dst.keys
+        dst_v = dst.values
+        assert dst_k is not None and dst_v is not None
+        assert _equal(dst_k, gold_k)
+        assert _equal(dst_v, gold_v)
+    finally:
+        server.stop()
diff --git a/src/exo/worker/engines/mlx/disaggregated/tests/test_mlx_adapter.py b/src/exo/worker/engines/mlx/disaggregated/tests/test_mlx_adapter.py
new file mode 100644
index 000000000..a706a6b4c
--- /dev/null
+++ b/src/exo/worker/engines/mlx/disaggregated/tests/test_mlx_adapter.py
@@ -0,0 +1,270 @@
+import io
+
+import mlx.core as mx
+import numpy as np
+from mlx_lm.models.cache import ArraysCache, KVCache, RotatingKVCache
+
+from exo.worker.disaggregated.protocol import (
+    ArraysState,
+    Done,
+    Header,
+    KVChunk,
+    TensorBlob,
+    read_header,
+    read_message,
+    write_done,
+    write_header,
+)
+from exo.worker.engines.mlx.disaggregated.adapter import (
+    array_to_bytes,
+    bhsd_to_nhd,
+    bytes_to_array,
+    chunk_to_mlx_nhd,
+    inject_arrays_cache,
+    inject_kv_chunk,
+    inject_rotating_kv_chunk,
+    nhd_to_bhsd,
+    send_mlx_kv_cache,
+    wire_dtype_from_cache,
+)
+from exo.worker.engines.mlx.disaggregated.client import (
+    PrefillResult,
+    ingest_into_mlx_cache,
+)
+
+
+def _equal(a: mx.array, b: mx.array) -> bool:
+    if a.dtype != b.dtype or tuple(a.shape) != tuple(b.shape):
+        return False
+    if a.dtype == mx.bfloat16:
+        return bool(
+            np.array_equal(np.asarray(a.view(mx.uint16)), np.asarray(b.view(mx.uint16)))
+        )
+    return bool(np.array_equal(np.asarray(a), np.asarray(b)))
+
+
+def _rand(shape: tuple[int, ...], dtype: mx.Dtype) -> mx.array:
+    mx.random.seed(0)
+    return (mx.random.uniform(shape=shape) * 10).astype(dtype)
+
+
+def _make_kv_cache(seq_len: int, n_heads: int, head_dim: int) -> KVCache:
+    cache = KVCache()
+    cache.keys = _rand((1, n_heads, seq_len, head_dim), mx.bfloat16)
+    cache.values = _rand((1, n_heads, seq_len, head_dim), mx.bfloat16)
+    cache.offset = seq_len
+    return cache
+
+
+def test_bytes_roundtrip_bf16() -> None:
+    x = _rand((2, 3, 4), mx.bfloat16)
+    y = bytes_to_array(array_to_bytes(x), (2, 3, 4), "bfloat16")
+    assert _equal(x, y)
+
+
+def test_bytes_roundtrip_f16() -> None:
+    x = _rand((5,), mx.float16)
+    y = bytes_to_array(array_to_bytes(x), (5,), "float16")
+    assert _equal(x, y)
+
+
+def test_bytes_roundtrip_f32() -> None:
+    x = _rand((2, 2), mx.float32)
+    y = bytes_to_array(array_to_bytes(x), (2, 2), "float32")
+    assert _equal(x, y)
+
+
+def test_bhsd_nhd_roundtrip() -> None:
+    bhsd = _rand((1, 4, 7, 8), mx.float32)
+    nhd = bhsd_to_nhd(bhsd)
+    assert tuple(nhd.shape) == (7, 4, 8)
+    back = nhd_to_bhsd(nhd)
+    assert _equal(bhsd, back)
+
+
+def test_kv_cache_inject_roundtrip() -> None:
+    n_heads, seq_len, head_dim = 3, 5, 4
+    k_bhsd = _rand((1, n_heads, seq_len, head_dim), mx.float32)
+    v_bhsd = _rand((1, n_heads, seq_len, head_dim), mx.float32)
+    k_nhd = bhsd_to_nhd(k_bhsd)
+    v_nhd = bhsd_to_nhd(v_bhsd)
+
+    cache = KVCache()
+    inject_kv_chunk(cache, k_nhd, v_nhd, offset=seq_len)
+    assert cache.offset == seq_len
+    assert cache.keys is not None and cache.values is not None
+    assert _equal(cache.keys, k_bhsd)
+    assert _equal(cache.values, v_bhsd)
+
+
+def test_arrays_cache_inject() -> None:
+    a = _rand((3,), mx.float32)
+    b = _rand((2, 2), mx.bfloat16)
+    blobs = [
+        TensorBlob(dtype="float32", shape=(3,), data=array_to_bytes(a)),
+        TensorBlob(dtype="bfloat16", shape=(2, 2), data=array_to_bytes(b)),
+    ]
+    cache = ArraysCache(size=2)
+    inject_arrays_cache(cache, blobs)
+    s0 = cache.state[0]
+    s1 = cache.state[1]
+    assert s0 is not None and s1 is not None
+    assert _equal(s0, a)
+    assert _equal(s1, b)
+
+
+def test_send_mlx_cache_end_to_end() -> None:
+    n_heads, head_dim = 2, 4
+    seq_len = 3
+    src = _make_kv_cache(seq_len, n_heads, head_dim)
+    k_bhsd, v_bhsd = src.keys, src.values
+    assert k_bhsd is not None and v_bhsd is not None
+
+    buf = io.BytesIO()
+    write_header(buf, Header(num_layers=1, dtype="bfloat16"))
+    tokens = send_mlx_kv_cache(buf, [src], dtype="bfloat16")
+    write_done(buf, tokens)
+    buf.seek(0)
+
+    got_hdr = read_header(buf)
+    assert got_hdr.num_layers == 1
+
+    msg = read_message(buf)
+    assert isinstance(msg, KVChunk)
+    assert msg.num_tokens == seq_len
+    k_nhd, v_nhd = chunk_to_mlx_nhd(msg)
+    dst = KVCache()
+    inject_kv_chunk(dst, k_nhd, v_nhd, offset=msg.num_tokens)
+
+    done = read_message(buf)
+    assert isinstance(done, Done)
+    assert done.total_tokens == seq_len
+
+    assert dst.offset == seq_len
+    assert dst.keys is not None and dst.values is not None
+    assert _equal(dst.keys, k_bhsd)
+    assert _equal(dst.values, v_bhsd)
+    _ = ArraysState
+
+
+def test_send_with_start_pos_only_ships_suffix() -> None:
+    n_heads, head_dim = 2, 4
+    seq_len, start_pos = 6, 4
+    src = _make_kv_cache(seq_len, n_heads, head_dim)
+
+    buf = io.BytesIO()
+    write_header(buf, Header(num_layers=1, dtype="bfloat16", start_pos=start_pos))
+    tokens = send_mlx_kv_cache(buf, [src], dtype="bfloat16", start_pos=start_pos)
+    write_done(buf, tokens)
+    buf.seek(0)
+
+    _ = read_header(buf)
+    msg = read_message(buf)
+    assert isinstance(msg, KVChunk)
+    assert msg.num_tokens == seq_len - start_pos
+
+
+def test_send_skips_layer_when_offset_below_start_pos() -> None:
+    n_heads, head_dim = 2, 4
+    seq_len, start_pos = 3, 5
+    src = _make_kv_cache(seq_len, n_heads, head_dim)
+
+    buf = io.BytesIO()
+    write_header(buf, Header(num_layers=1, dtype="bfloat16", start_pos=start_pos))
+    tokens = send_mlx_kv_cache(buf, [src], dtype="bfloat16", start_pos=start_pos)
+    write_done(buf, tokens)
+    buf.seek(0)
+
+    _ = read_header(buf)
+    msg = read_message(buf)
+    assert isinstance(msg, Done)
+    assert msg.total_tokens == 0
+    assert tokens == 0
+
+
+def test_wire_dtype_from_cache() -> None:
+    src = _make_kv_cache(3, 2, 4)
+    assert wire_dtype_from_cache([src]) == "bfloat16"
+
+    f32 = KVCache()
+    f32.keys = _rand((1, 2, 3, 4), mx.float32)
+    f32.values = _rand((1, 2, 3, 4), mx.float32)
+    f32.offset = 3
+    assert wire_dtype_from_cache([f32]) == "float32"
+
+
+def _decode_payload(payload: bytes) -> PrefillResult:
+    buf = io.BytesIO(payload)
+    hdr = read_header(buf)
+    result = PrefillResult(header=hdr)
+    while True:
+        msg = read_message(buf)
+        if msg is None:
+            break
+        if isinstance(msg, KVChunk):
+            result.kv_chunks.setdefault(msg.layer_idx, []).append(msg)
+        elif isinstance(msg, ArraysState):
+            result.arrays[msg.layer_idx] = msg.arrays
+        elif isinstance(msg, Done):
+            result.total_tokens = msg.total_tokens
+            break
+    return result
+
+
+def test_mixed_cache_roundtrip() -> None:
+    n_heads, head_dim, seq_len = 2, 4, 6
+
+    src_kv = _make_kv_cache(seq_len, n_heads, head_dim)
+
+    src_rot = RotatingKVCache(max_size=16, keep=0)
+    src_rot.keys = _rand((1, n_heads, seq_len, head_dim), mx.bfloat16)
+    src_rot.values = _rand((1, n_heads, seq_len, head_dim), mx.bfloat16)
+    src_rot.offset = seq_len
+    src_rot._idx = seq_len
+
+    src_arr = ArraysCache(size=2)
+    arr_a = _rand((3,), mx.bfloat16)
+    arr_b = _rand((2, 4), mx.bfloat16)
+    src_arr.state = [arr_a, arr_b]
+
+    buf = io.BytesIO()
+    write_header(
+        buf,
+        Header(request_id="req", model_id="m", num_layers=3, dtype="bfloat16"),
+    )
+    tokens_sent = send_mlx_kv_cache(buf, [src_kv, src_rot, src_arr], dtype="bfloat16")
+    write_done(buf, tokens_sent)
+    result = _decode_payload(buf.getvalue())
+
+    assert result.header.num_layers == 3
+    assert result.total_tokens == seq_len
+
+    dst_kv = KVCache()
+    dst_rot = RotatingKVCache(max_size=16, keep=0)
+    dst_arr = ArraysCache(size=2)
+    final_offset = ingest_into_mlx_cache(result, [dst_kv, dst_rot, dst_arr])
+
+    assert final_offset == seq_len
+
+    assert dst_kv.offset == seq_len
+    assert dst_kv.keys is not None and dst_kv.values is not None
+    src_kv_k, src_kv_v = src_kv.keys, src_kv.values
+    assert src_kv_k is not None and src_kv_v is not None
+    assert _equal(dst_kv.keys, src_kv_k)
+    assert _equal(dst_kv.values, src_kv_v)
+
+    assert dst_rot.offset == seq_len
+    assert dst_rot.keys is not None and dst_rot.values is not None
+    src_rot_k, src_rot_v = src_rot.keys, src_rot.values
+    assert src_rot_k is not None and src_rot_v is not None
+    assert _equal(dst_rot.keys, src_rot_k)
+    assert _equal(dst_rot.values, src_rot_v)
+    assert dst_rot._idx == seq_len
+
+    assert len(dst_arr.state) == 2
+    s0, s1 = dst_arr.state[0], dst_arr.state[1]
+    assert s0 is not None and s1 is not None
+    assert _equal(s0, arr_a)
+    assert _equal(s1, arr_b)
+    _ = inject_rotating_kv_chunk
+    _ = nhd_to_bhsd
diff --git a/src/exo/worker/engines/mlx/disaggregated/tests/test_protocol_roundtrip.py b/src/exo/worker/engines/mlx/disaggregated/tests/test_protocol_roundtrip.py
new file mode 100644
index 000000000..7fdfba38e
--- /dev/null
+++ b/src/exo/worker/engines/mlx/disaggregated/tests/test_protocol_roundtrip.py
@@ -0,0 +1,154 @@
+import io
+
+import pytest
+
+from exo.worker.disaggregated.protocol import (
+    ArraysState,
+    Done,
+    ErrorMessage,
+    Header,
+    KVChunk,
+    ProtocolError,
+    TensorBlob,
+    read_header,
+    read_message,
+    write_arrays_state,
+    write_done,
+    write_error,
+    write_header,
+    write_kv_chunk,
+)
+
+
+def _mk_bytes(n: int) -> bytes:
+    return bytes(i & 0xFF for i in range(n))
+
+
+def test_header_roundtrip() -> None:
+    hdr = Header(
+        request_id="r",
+        model_id="m",
+        num_layers=32,
+        dtype="bfloat16",
+        start_pos=42,
+    )
+    buf = io.BytesIO()
+    write_header(buf, hdr)
+    buf.seek(0)
+    got = read_header(buf)
+    assert got == hdr
+    assert got.dtype == "bfloat16"
+    assert got.num_layers == 32
+    assert got.start_pos == 42
+
+
+def test_kv_chunk_roundtrip() -> None:
+    num_tokens, n_heads, head_dim = 7, 4, 8
+    n_bytes = num_tokens * n_heads * head_dim * 2
+    keys = _mk_bytes(n_bytes)
+    values = _mk_bytes(n_bytes)[::-1]
+
+    buf = io.BytesIO()
+    write_kv_chunk(
+        buf,
+        layer_idx=3,
+        num_tokens=num_tokens,
+        n_heads=n_heads,
+        head_dim=head_dim,
+        dtype="bfloat16",
+        keys=keys,
+        values=values,
+    )
+    buf.seek(0)
+    msg = read_message(buf)
+    assert isinstance(msg, KVChunk)
+    assert msg.layer_idx == 3
+    assert msg.shape == (num_tokens, n_heads, head_dim)
+    assert msg.dtype == "bfloat16"
+    assert msg.keys == keys
+    assert msg.values == values
+
+
+def test_arrays_state_roundtrip() -> None:
+    arrs = [
+        TensorBlob(dtype="float32", shape=(2, 3), data=_mk_bytes(2 * 3 * 4)),
+        TensorBlob(dtype="bfloat16", shape=(5,), data=_mk_bytes(5 * 2)),
+    ]
+    buf = io.BytesIO()
+    write_arrays_state(buf, layer_idx=9, arrays=arrs)
+    buf.seek(0)
+    msg = read_message(buf)
+    assert isinstance(msg, ArraysState)
+    assert msg.layer_idx == 9
+    assert len(msg.arrays) == 2
+    assert msg.arrays[0].dtype == "float32"
+    assert msg.arrays[0].shape == (2, 3)
+    assert msg.arrays[0].data == arrs[0].data
+    assert msg.arrays[1].dtype == "bfloat16"
+    assert msg.arrays[1].shape == (5,)
+    assert msg.arrays[1].data == arrs[1].data
+
+
+def test_done_roundtrip() -> None:
+    buf = io.BytesIO()
+    write_done(buf, 1234)
+    buf.seek(0)
+    msg = read_message(buf)
+    assert isinstance(msg, Done)
+    assert msg.total_tokens == 1234
+
+
+def test_error_roundtrip() -> None:
+    buf = io.BytesIO()
+    write_error(buf, code=42, message="boom")
+    buf.seek(0)
+    msg = read_message(buf)
+    assert isinstance(msg, ErrorMessage)
+    assert msg.code == 42
+    assert msg.message == "boom"
+
+
+def test_stream_of_messages() -> None:
+    hdr = Header(num_layers=2, dtype="float32")
+    buf = io.BytesIO()
+    write_header(buf, hdr)
+    write_kv_chunk(
+        buf,
+        layer_idx=0,
+        num_tokens=1,
+        n_heads=1,
+        head_dim=2,
+        dtype="float32",
+        keys=_mk_bytes(1 * 1 * 2 * 4),
+        values=_mk_bytes(1 * 1 * 2 * 4),
+    )
+    write_arrays_state(
+        buf,
+        layer_idx=1,
+        arrays=[TensorBlob(dtype="float32", shape=(1,), data=_mk_bytes(4))],
+    )
+    write_done(buf, total_tokens=1)
+    buf.seek(0)
+
+    got_hdr = read_header(buf)
+    assert got_hdr == hdr
+
+    m1 = read_message(buf)
+    m2 = read_message(buf)
+    m3 = read_message(buf)
+    m4 = read_message(buf)
+    assert isinstance(m1, KVChunk)
+    assert isinstance(m2, ArraysState)
+    assert isinstance(m3, Done)
+    assert m4 is None
+
+
+def test_corrupt_message_raises() -> None:
+    buf = io.BytesIO()
+    write_header(buf, Header(num_layers=1, dtype="float32"))
+    buf.write((5).to_bytes(4, "big"))
+    buf.write(b"\xff\xff\xff\xff\xff")
+    buf.seek(0)
+    _ = read_header(buf)
+    with pytest.raises(ProtocolError):
+        _ = read_message(buf)
diff --git a/src/exo/worker/engines/mlx/disaggregated/tests/test_server_drain.py b/src/exo/worker/engines/mlx/disaggregated/tests/test_server_drain.py
new file mode 100644
index 000000000..a9ad65e5a
--- /dev/null
+++ b/src/exo/worker/engines/mlx/disaggregated/tests/test_server_drain.py
@@ -0,0 +1,120 @@
+"""Server thread receives request, runs resolve in another thread (mimicking
+runner main thread + work queue), streams cache bytes."""
+
+import queue
+import threading
+from typing import BinaryIO
+
+import mlx.core as mx
+import numpy as np
+import pytest
+from mlx_lm.models.cache import KVCache
+
+from exo.utils.ports import random_ephemeral_port
+from exo.worker.disaggregated.protocol import Header, write_done, write_header
+from exo.worker.disaggregated.server import PrefillRequest, PrefillServer
+from exo.worker.engines.mlx.disaggregated.adapter import (
+    send_mlx_kv_cache,
+    wire_dtype_from_cache,
+)
+from exo.worker.engines.mlx.disaggregated.client import (
+    PrefillResult,
+    ingest_into_mlx_cache,
+    remote_prefill_fetch,
+)
+
+
+def _equal(a: mx.array, b: mx.array) -> bool:
+    if a.dtype != b.dtype or tuple(a.shape) != tuple(b.shape):
+        return False
+    if a.dtype == mx.bfloat16:
+        return bool(
+            np.array_equal(np.asarray(a.view(mx.uint16)), np.asarray(b.view(mx.uint16)))
+        )
+    return bool(np.array_equal(np.asarray(a), np.asarray(b)))
+
+
+def _make_cache(seq_len: int, n_heads: int, head_dim: int) -> KVCache:
+    mx.random.seed(0)
+    cache = KVCache()
+    cache.keys = (mx.random.uniform(shape=(1, n_heads, seq_len, head_dim)) * 10).astype(
+        mx.bfloat16
+    )
+    cache.values = (
+        mx.random.uniform(shape=(1, n_heads, seq_len, head_dim)) * 10
+    ).astype(mx.bfloat16)
+    cache.offset = seq_len
+    return cache
+
+
+@pytest.mark.slow
+def test_server_drains_via_main_thread() -> None:
+    seq_len = 4
+    n_heads = 2
+    head_dim = 4
+    gold = _make_cache(seq_len, n_heads, head_dim)
+
+    request_queue: queue.Queue[tuple[PrefillRequest, BinaryIO, threading.Event]] = (
+        queue.Queue()
+    )
+
+    def resolve(job: PrefillRequest, wfile: BinaryIO) -> bool:
+        done = threading.Event()
+        request_queue.put((job, wfile, done))
+        return done.wait(timeout=5)
+
+    server = PrefillServer(
+        resolve=resolve, host="127.0.0.1", port=(port := random_ephemeral_port())
+    )
+
+    def serve_one(wfile: BinaryIO) -> None:
+        dtype = wire_dtype_from_cache([gold])
+        write_header(
+            wfile,
+            Header(request_id="req-1", model_id="m", num_layers=1, dtype=dtype),
+        )
+        tokens = send_mlx_kv_cache(wfile, [gold], dtype=dtype)
+        write_done(wfile, tokens)
+        wfile.flush()
+
+    drained_job: list[PrefillRequest] = []
+    fetch_result: list[PrefillResult] = []
+
+    def fetcher() -> None:
+        fetch_result.append(
+            remote_prefill_fetch(
+                endpoint=f"127.0.0.1:{port}",
+                request=PrefillRequest(
+                    model_id="m", token_ids=list(range(seq_len)), request_id="req-1"
+                ),
+            )
+        )
+
+    fetch = threading.Thread(target=fetcher, daemon=True)
+    fetch.start()
+    try:
+        job, wfile, done = request_queue.get(timeout=5)
+        drained_job.append(job)
+        try:
+            serve_one(wfile)
+        finally:
+            done.set()
+        fetch.join(timeout=5)
+        assert fetch_result, "fetcher did not return"
+        result = fetch_result[0]
+        assert drained_job[0].request_id == "req-1"
+        assert result.total_tokens == seq_len
+
+        dst = KVCache()
+        ingest_into_mlx_cache(result, [dst])
+        assert dst.offset == seq_len
+        dst_k = dst.keys
+        dst_v = dst.values
+        gold_k = gold.keys
+        gold_v = gold.values
+        assert dst_k is not None and dst_v is not None
+        assert gold_k is not None and gold_v is not None
+        assert _equal(dst_k, gold_k)
+        assert _equal(dst_v, gold_v)
+    finally:
+        server.stop()
diff --git a/src/exo/worker/engines/mlx/generator/batch_generate.py b/src/exo/worker/engines/mlx/generator/batch_generate.py
index 61b927a99..5c7394dde 100644
--- a/src/exo/worker/engines/mlx/generator/batch_generate.py
+++ b/src/exo/worker/engines/mlx/generator/batch_generate.py
@@ -1,7 +1,8 @@
 import contextlib
 import time
+import uuid
 from dataclasses import dataclass, field
-from typing import Callable, cast
+from typing import Callable, Literal, cast
 
 import mlx.core as mx
 from mlx_lm.generate import (
@@ -23,7 +24,6 @@ from exo.api.types import (
     Usage,
 )
 from exo.shared.types.memory import Memory
-from exo.shared.types.mlx import KVCacheType, Model
 from exo.shared.types.text_generation import TextGenerationTaskParams
 from exo.shared.types.worker.runner_response import GenerationResponse
 from exo.worker.engines.mlx.cache import (
@@ -40,10 +40,12 @@ from exo.worker.engines.mlx.generator.generate import (
     patch_embed_tokens,
     prefill,
 )
+from exo.worker.engines.mlx.generator.remote_prefill import remote_prefill
 from exo.worker.engines.mlx.patches.opt_batch_gen import (
     set_needs_topk,
     take_ready_topk,
 )
+from exo.worker.engines.mlx.types import KVCacheType, Model
 from exo.worker.engines.mlx.utils_mlx import (
     fix_unmatched_think_end_tokens,
     system_prompt_token_count,
@@ -57,6 +59,7 @@ from exo.worker.engines.mlx.vision import (
 from exo.worker.runner.bootstrap import logger
 
 _MIN_PREFIX_HIT_RATIO_TO_UPDATE = 0.5
+REMOTE_PREFILL_MIN_TOKENS = 1000
 
 
 def _stop_sequences(task_params: TextGenerationTaskParams) -> list[str]:
@@ -74,7 +77,6 @@ class _EngineTask:
     all_prompt_tokens: mx.array
     prefix_hit_length: int
     matched_index: int | None
-    cache_snapshots: list[CacheSnapshot] | None
     detokenizer: StreamingDetokenizer
     on_generation_token: Callable[[], None] | None = None
     generated_text_parts: list[str] = field(default_factory=list)
@@ -82,6 +84,7 @@ class _EngineTask:
     completion_tokens: int = 0
     generation_start_time: float = 0.0
     prefill_tps: float = 0.0
+    prefix_cache_hit: Literal["none", "partial", "exact"] = "none"
     media_regions: list[MediaRegion] = field(default_factory=list)
     first_gen_token_time: float | None = None
     last_gen_token_time: float | None = None
@@ -155,11 +158,16 @@ class ExoBatchGenerator:
 
         prefix_hit_length = 0
         matched_index: int | None = None
+        is_exact_hit = False
         prompt_tokens = all_prompt_tokens
 
-        if self.kv_prefix_cache is not None and not is_bench:
-            cache, remaining_tokens, matched_index = self.kv_prefix_cache.get_kv_cache(
-                self.model, all_prompt_tokens, media_regions=media_regions
+        if self.kv_prefix_cache is not None and (
+            not is_bench or task_params.use_prefix_cache
+        ):
+            cache, remaining_tokens, matched_index, is_exact_hit = (
+                self.kv_prefix_cache.get_kv_cache(
+                    self.model, all_prompt_tokens, media_regions=media_regions
+                )
             )
             prefix_hit_length = len(all_prompt_tokens) - len(remaining_tokens)
             if prefix_hit_length > 0:
@@ -168,8 +176,6 @@ class ExoBatchGenerator:
                     f"cached ({100 * prefix_hit_length / len(all_prompt_tokens):.1f}%)"
                 )
                 prompt_tokens = remaining_tokens
-            else:
-                cache = make_kv_cache(self.model)
         else:
             cache = make_kv_cache(self.model)
 
@@ -196,17 +202,54 @@ class ExoBatchGenerator:
             if vision is not None
             else contextlib.nullcontext()
         )
+        uncached_count = len(prompt_tokens)
+        use_remote = (
+            uncached_count > REMOTE_PREFILL_MIN_TOKENS
+            and task_params.prefill_endpoint is not None
+        )
+
+        _prefill_tps: float = 0.0
+        _prefill_tokens: int = 0
+        cache_snapshots: list[CacheSnapshot] = []
+        remote_prefilled = False
         with vision_ctx:
-            _prefill_tps, _prefill_tokens, cache_snapshots = prefill(
-                self.model,
-                self.tokenizer,
-                sampler,
-                prompt_tokens[:-1],
-                cache,
-                self.group,
-                on_prefill_progress,
-                distributed_prompt_progress_callback,
-            )
+            if use_remote and task_params.prefill_endpoint is not None:
+                try:
+                    _prefill_tps, _prefill_tokens, cache_snapshots = remote_prefill(
+                        prompt_tokens[:-1],
+                        cache,
+                        on_prefill_progress,
+                        endpoint=task_params.prefill_endpoint,
+                        request_id=str(uuid.uuid4()),
+                        model_id=str(task_params.model),
+                        start_pos=prefix_hit_length,
+                    )
+                    remote_prefilled = True
+                except Exception:
+                    logger.opt(exception=True).warning(
+                        "Remote prefill failed, falling back to local prefill"
+                    )
+
+            if not remote_prefilled:
+                _prefill_tps, _prefill_tokens, cache_snapshots = prefill(
+                    self.model,
+                    self.tokenizer,
+                    sampler,
+                    prompt_tokens[:-1],
+                    cache,
+                    self.group,
+                    on_prefill_progress,
+                    distributed_prompt_progress_callback,
+                )
+
+        prefix_cache_hit: Literal["none", "partial", "exact"] = "none"
+        if matched_index is not None and prefix_hit_length > 0:
+            assert self.kv_prefix_cache is not None
+            if is_exact_hit:
+                prefix_cache_hit = "exact"
+                _prefill_tps = self.kv_prefix_cache.prefill_tps[matched_index]
+            else:
+                prefix_cache_hit = "partial"
 
         # We need to clamp rotating kv caches to max size so that mlx lm's _merge_caches behaves
         for c in cache:
@@ -221,7 +264,7 @@ class ExoBatchGenerator:
                 c.values = c._trim(trim_size, c.values)
                 c._idx = c.max_size
 
-        if not is_bench:
+        if not is_bench or task_params.use_prefix_cache:
             min_prefix_hit_length = max(
                 1000, system_prompt_token_count(task_params, self.tokenizer)
             )
@@ -233,6 +276,7 @@ class ExoBatchGenerator:
                 matched_index,
                 min_prefix_hit_length,
                 media_regions,
+                prefill_tps=_prefill_tps,
             )
 
         last_tokens = prompt_tokens[-2:]
@@ -240,7 +284,11 @@ class ExoBatchGenerator:
         logits_processors: list[Callable[[mx.array, mx.array], mx.array]] = (
             make_logits_processors(
                 repetition_penalty=task_params.repetition_penalty,
-                repetition_context_size=task_params.repetition_context_size,
+                repetition_context_size=task_params.repetition_context_size
+                if task_params.repetition_context_size is not None
+                else 20,
+                presence_penalty=task_params.presence_penalty,
+                frequency_penalty=task_params.frequency_penalty,
             )
         )
         if is_bench:
@@ -268,11 +316,11 @@ class ExoBatchGenerator:
             all_prompt_tokens=all_prompt_tokens,
             prefix_hit_length=prefix_hit_length,
             matched_index=matched_index,
-            cache_snapshots=cache_snapshots or None,
             detokenizer=self.tokenizer.detokenizer,
             on_generation_token=on_generation_token,
             generation_start_time=time.perf_counter(),
             prefill_tps=_prefill_tps,
+            prefix_cache_hit=prefix_cache_hit,
             media_regions=media_regions,
         )
 
@@ -383,6 +431,7 @@ class ExoBatchGenerator:
                     prompt_tokens=len(state.all_prompt_tokens),
                     generation_tokens=state.completion_tokens,
                     peak_memory_usage=Memory.from_gb(mx.get_peak_memory() / 1e9),
+                    prefix_cache_hit=state.prefix_cache_hit,
                 )
                 total_prompt_tokens = len(state.all_prompt_tokens)
                 usage = Usage(
@@ -439,6 +488,7 @@ class ExoBatchGenerator:
 
     def close(self) -> None:
         self._mlx_gen.close()
+        mx.clear_cache()
 
     def _save_prefix_cache(
         self,
@@ -449,6 +499,7 @@ class ExoBatchGenerator:
         matched_index: int | None,
         min_prefix_hit_length: int = 1000,
         media_regions: list[MediaRegion] | None = None,
+        prefill_tps: float = 0.0,
     ) -> None:
         if self.kv_prefix_cache is None:
             return
@@ -470,6 +521,7 @@ class ExoBatchGenerator:
                     cache_snapshots,
                     restore_pos=prefix_hit_length,
                     media_regions=media_regions,
+                    prefill_tps=prefill_tps,
                 )
             else:
                 self.kv_prefix_cache.add_kv_cache(
@@ -477,6 +529,7 @@ class ExoBatchGenerator:
                     cache,
                     cache_snapshots,
                     media_regions=media_regions,
+                    prefill_tps=prefill_tps,
                 )
         except Exception:
             logger.warning("Failed to save prefix cache", exc_info=True)
diff --git a/src/exo/worker/engines/mlx/generator/generate.py b/src/exo/worker/engines/mlx/generator/generate.py
index 3a439f3e3..2e3d05125 100644
--- a/src/exo/worker/engines/mlx/generator/generate.py
+++ b/src/exo/worker/engines/mlx/generator/generate.py
@@ -2,7 +2,7 @@ import contextlib
 import functools
 import math
 import time
-from copy import deepcopy
+import uuid
 from typing import Callable, Generator, cast, get_args
 
 import mlx.core as mx
@@ -10,7 +10,6 @@ from mlx_lm.generate import (
     maybe_quantize_kv_cache,
     stream_generate,
 )
-from mlx_lm.models.cache import ArraysCache, RotatingKVCache
 from mlx_lm.sample_utils import make_logits_processors, make_sampler
 from mlx_lm.tokenizer_utils import TokenizerWrapper
 
@@ -24,7 +23,6 @@ from exo.api.types import (
 )
 from exo.shared.types.common import ModelId
 from exo.shared.types.memory import Memory
-from exo.shared.types.mlx import KVCacheType, Model
 from exo.shared.types.text_generation import (
     InputMessage,
     InputMessageContent,
@@ -44,8 +42,10 @@ from exo.worker.engines.mlx.auto_parallel import (
 from exo.worker.engines.mlx.cache import (
     CacheSnapshot,
     KVPrefixCache,
+    copy_snapshot_entry,
     encode_prompt,
     has_non_kv_caches,
+    is_non_trimmable_cache_entry,
     make_kv_cache,
     snapshot_ssm_states,
 )
@@ -55,6 +55,8 @@ from exo.worker.engines.mlx.constants import (
     KV_GROUP_SIZE,
     MAX_TOKENS,
 )
+from exo.worker.engines.mlx.generator.remote_prefill import remote_prefill
+from exo.worker.engines.mlx.types import KVCacheType, Model
 from exo.worker.engines.mlx.utils_mlx import (
     apply_chat_template,
     fix_unmatched_think_end_tokens,
@@ -70,6 +72,8 @@ from exo.worker.engines.mlx.vision import (
 )
 from exo.worker.runner.bootstrap import logger
 
+REMOTE_PREFILL_MIN_TOKENS = 1000
+
 generation_stream = mx.new_stream(mx.default_device())
 
 _MIN_PREFIX_HIT_RATIO_TO_UPDATE = 0.5
@@ -370,14 +374,16 @@ def prefill(
 
     # stream_generate added 1 extra generated token to the cache, so we should trim it.
     # Because of needing to roll back arrays cache, we will generate on 2 tokens so trim 1 more.
-    pre_gen = deepcopy(snapshots[-2]) if has_ssm else None
+    pre_gen = snapshots[-2] if has_ssm else None
     for i, c in enumerate(cache):
-        if has_ssm and isinstance(c, (ArraysCache, RotatingKVCache)):
+        non_trimmable = is_non_trimmable_cache_entry(c)
+        if has_ssm and non_trimmable:
             assert pre_gen is not None
-            if pre_gen.states[i] is not None:
-                cache[i] = deepcopy(pre_gen.states[i])  # type: ignore
+            restored = copy_snapshot_entry(pre_gen.states[i])
+            if restored is not None:
+                cache[i] = restored  # type: ignore
         else:
-            assert not isinstance(c, (ArraysCache, RotatingKVCache))
+            assert not non_trimmable
             c.trim(2)
 
     elapsed = time.perf_counter() - start_time
@@ -568,18 +574,21 @@ def mlx_generate(
 
     # Do not use the prefix cache if we are trying to do benchmarks.
     is_bench = task.bench
-    if is_bench:
+    if is_bench and not task.use_prefix_cache:
         kv_prefix_cache = None
 
     # Use prefix cache if available, otherwise create fresh cache
     prefix_hit_length = 0
     matched_index: int | None = None
+    is_exact_hit = False
     if kv_prefix_cache is None:
         caches = make_kv_cache(model=model)
         prompt_tokens = all_prompt_tokens
     else:
-        caches, prompt_tokens, matched_index = kv_prefix_cache.get_kv_cache(
-            model, all_prompt_tokens, media_regions=media_regions
+        caches, prompt_tokens, matched_index, is_exact_hit = (
+            kv_prefix_cache.get_kv_cache(
+                model, all_prompt_tokens, media_regions=media_regions
+            )
         )
         prefix_hit_length = len(all_prompt_tokens) - len(prompt_tokens)
         if prefix_hit_length > 0:
@@ -590,7 +599,11 @@ def mlx_generate(
     logits_processors: list[Callable[[mx.array, mx.array], mx.array]] = (
         make_logits_processors(
             repetition_penalty=task.repetition_penalty,
-            repetition_context_size=task.repetition_context_size,
+            repetition_context_size=task.repetition_context_size
+            if task.repetition_context_size is not None
+            else 20,
+            presence_penalty=task.presence_penalty,
+            frequency_penalty=task.frequency_penalty,
         )
     )
     if is_bench:
@@ -624,19 +637,75 @@ def mlx_generate(
         if vision is not None
         else contextlib.nullcontext()
     )
+    use_remote = (
+        len(prompt_tokens) > REMOTE_PREFILL_MIN_TOKENS
+        and task.prefill_endpoint is not None
+    )
+    remote_prefilled = False
+    prefill_tps = 0.0
+    prefill_tokens = 0
+    ssm_snapshots_list: list[CacheSnapshot] = []
     with maybe_vision_ctx:
-        prefill_tps, prefill_tokens, ssm_snapshots_list = prefill(
-            model,
-            tokenizer,
-            sampler,
-            prompt_tokens[:-1],
-            caches,
-            group,
-            on_prefill_progress,
-            distributed_prompt_progress_callback,
-        )
+        if use_remote and task.prefill_endpoint is not None:
+            try:
+                prefill_tps, prefill_tokens, ssm_snapshots_list = remote_prefill(
+                    prompt_tokens[:-1],
+                    caches,
+                    on_prefill_progress,
+                    endpoint=task.prefill_endpoint,
+                    request_id=str(uuid.uuid4()),
+                    model_id=str(task.model),
+                    start_pos=prefix_hit_length,
+                )
+                remote_prefilled = True
+            except Exception:
+                logger.opt(exception=True).warning(
+                    "Remote prefill failed, falling back to local prefill"
+                )
+        if not remote_prefilled:
+            prefill_tps, prefill_tokens, ssm_snapshots_list = prefill(
+                model,
+                tokenizer,
+                sampler,
+                prompt_tokens[:-1],
+                caches,
+                group,
+                on_prefill_progress,
+                distributed_prompt_progress_callback,
+            )
     cache_snapshots: list[CacheSnapshot] | None = ssm_snapshots_list or None
 
+    if kv_prefix_cache is not None and matched_index is not None and is_exact_hit:
+        prefill_tps = kv_prefix_cache.prefill_tps[matched_index]
+
+    if kv_prefix_cache is not None:
+        hit_ratio = (
+            prefix_hit_length / len(all_prompt_tokens)
+            if len(all_prompt_tokens) > 0
+            else 0.0
+        )
+        if matched_index is not None and (
+            prefix_hit_length >= min_prefix_hit_length
+            and hit_ratio >= _MIN_PREFIX_HIT_RATIO_TO_UPDATE
+        ):
+            kv_prefix_cache.update_kv_cache(
+                matched_index,
+                all_prompt_tokens,
+                caches,
+                cache_snapshots,
+                restore_pos=prefix_hit_length,
+                media_regions=media_regions,
+                prefill_tps=prefill_tps,
+            )
+        else:
+            kv_prefix_cache.add_kv_cache(
+                all_prompt_tokens,
+                caches,
+                cache_snapshots,
+                media_regions=media_regions,
+                prefill_tps=prefill_tps,
+            )
+
     # stream_generate starts from the last token
     last_token = prompt_tokens[-2:]
 
@@ -736,40 +805,6 @@ def mlx_generate(
                 f"{prefill_tps:.1f} tok/s, generated {generated_tokens} tokens @ "
                 f"{generation_tps:.1f} tok/s"
             )
-            if kv_prefix_cache is not None:
-                generated_tokens_array = mx.array(
-                    tokenizer.encode(
-                        "".join(generated_text_parts), add_special_tokens=False
-                    )
-                )
-                full_prompt_tokens = mx.concatenate(
-                    [all_prompt_tokens, generated_tokens_array]
-                )
-                hit_ratio = (
-                    prefix_hit_length / len(all_prompt_tokens)
-                    if len(all_prompt_tokens) > 0
-                    else 0.0
-                )
-                if matched_index is not None and (
-                    prefix_hit_length >= min_prefix_hit_length
-                    and hit_ratio >= _MIN_PREFIX_HIT_RATIO_TO_UPDATE
-                ):
-                    kv_prefix_cache.update_kv_cache(
-                        matched_index,
-                        full_prompt_tokens,
-                        caches,
-                        cache_snapshots,
-                        restore_pos=prefix_hit_length,
-                        media_regions=media_regions,
-                    )
-                else:
-                    kv_prefix_cache.add_kv_cache(
-                        full_prompt_tokens,
-                        caches,
-                        cache_snapshots,
-                        media_regions=media_regions,
-                    )
-
         if on_generation_token is not None:
             on_generation_token()
 
diff --git a/src/exo/worker/engines/mlx/generator/remote_prefill.py b/src/exo/worker/engines/mlx/generator/remote_prefill.py
new file mode 100644
index 000000000..b58ec65b8
--- /dev/null
+++ b/src/exo/worker/engines/mlx/generator/remote_prefill.py
@@ -0,0 +1,72 @@
+import time
+from collections.abc import Callable
+from typing import cast
+
+import mlx.core as mx
+from mlx_lm.models.cache import ArraysCache, KVCache, RotatingKVCache
+
+from exo.worker.disaggregated.protocol import Header, KVChunk
+from exo.worker.disaggregated.server import PrefillRequest
+from exo.worker.engines.mlx.cache import CacheSnapshot, snapshot_ssm_states
+from exo.worker.engines.mlx.disaggregated.client import (
+    ingest_into_mlx_cache,
+    remote_prefill_fetch,
+)
+from exo.worker.engines.mlx.types import KVCacheType
+from exo.worker.runner.bootstrap import logger
+
+
+def remote_prefill(
+    prompt_tokens: mx.array,
+    cache: KVCacheType,
+    on_prefill_progress: Callable[[int, int], None] | None,
+    *,
+    endpoint: str,
+    request_id: str,
+    model_id: str,
+    start_pos: int = 0,
+) -> tuple[float, int, list[CacheSnapshot]]:
+    t0 = time.perf_counter()
+    total_prompt_tokens = int(prompt_tokens.shape[0])
+    num_layers: int = 0
+
+    def _on_header(header: Header) -> None:
+        nonlocal num_layers
+        num_layers = header.num_layers
+
+    def _on_chunk(_chunk: KVChunk, chunks_received: int) -> None:
+        nonlocal num_layers
+        if on_prefill_progress is None:
+            return
+        if num_layers > 0 and chunks_received % num_layers == 0:
+            tokens_so_far = chunks_received // num_layers
+            on_prefill_progress(
+                min(tokens_so_far, total_prompt_tokens),
+                total_prompt_tokens,
+            )
+
+    request = PrefillRequest(
+        model_id=model_id,
+        token_ids=cast(list[int], prompt_tokens.tolist()),
+        start_pos=start_pos,
+        request_id=request_id,
+    )
+    result = remote_prefill_fetch(
+        endpoint, request, on_header=_on_header, on_kv_chunk=_on_chunk
+    )
+    t_received = time.perf_counter()
+
+    caches = cast(list[KVCache | RotatingKVCache | ArraysCache], list(cache))
+    final_offset = ingest_into_mlx_cache(result, caches, start_pos=start_pos)
+    t_done = time.perf_counter()
+
+    num_tokens = final_offset - start_pos
+    tps = num_tokens / max(t_done - t0, 0.001)
+
+    logger.info(
+        f"Remote prefill: {num_tokens} tokens (start_pos={start_pos}, "
+        f"final_offset={final_offset}) at {tps:.0f} tok/s, "
+        f"transfer={(t_received - t0) * 1000:.0f}ms, "
+        f"inject={(t_done - t_received) * 1000:.0f}ms"
+    )
+    return tps, num_tokens, [snapshot_ssm_states(cache)]
diff --git a/src/exo/worker/engines/mlx/patches/opt_batch_gen.py b/src/exo/worker/engines/mlx/patches/opt_batch_gen.py
index 7b07412b3..e7bff4196 100644
--- a/src/exo/worker/engines/mlx/patches/opt_batch_gen.py
+++ b/src/exo/worker/engines/mlx/patches/opt_batch_gen.py
@@ -58,6 +58,7 @@ def _patched_step(self: GenerationBatch) -> tuple[list[int], list[mx.array]]:
     self._current_tokens = self._next_tokens
     self._current_logprobs = self._next_logprobs
     inputs = self._current_tokens
+    assert inputs is not None, "_step requires initialized _next_tokens"
 
     buf = _get_buffer(self)
     buf.ready = buf.pending
@@ -87,7 +88,7 @@ def _patched_step(self: GenerationBatch) -> tuple[list[int], list[mx.array]]:
         sampled = self.fallback_sampler(logprobs)
 
     self._next_tokens = sampled
-    self._next_logprobs = list(logprobs)
+    self._next_logprobs = logprobs
 
     if buf.needs_topk:
         batch_size = len(self.uids)
@@ -106,19 +107,29 @@ def _patched_step(self: GenerationBatch) -> tuple[list[int], list[mx.array]]:
         )
         mx.async_eval(
             self._next_tokens,
-            *self._next_logprobs,
+            self._next_logprobs,
             pending_indices,
             pending_values,
             pending_selected,
         )
     else:
-        mx.async_eval(self._next_tokens, *self._next_logprobs)
+        mx.async_eval(self._next_tokens, self._next_logprobs)
+
+    current_lp = self._current_logprobs
+    if isinstance(current_lp, mx.array):
+        mx.eval(inputs, current_lp)
+    elif current_lp:
+        mx.eval(inputs, *current_lp)
+    else:
+        mx.eval(inputs)
 
-    mx.eval(inputs, *self._current_logprobs)
     token_list = cast(list[int], inputs.tolist())
     for sti, ti in zip(self.tokens, token_list, strict=True):
         sti.append(ti)
-    return token_list, self._current_logprobs
+
+    if isinstance(current_lp, mx.array):
+        current_lp = list(current_lp)
+    return token_list, current_lp
 
 
 def apply_batch_gen_patch() -> None:
diff --git a/src/exo/worker/engines/mlx/tests/test_batch_generate.py b/src/exo/worker/engines/mlx/tests/test_batch_generate.py
index 3d48590cf..6904c18ae 100644
--- a/src/exo/worker/engines/mlx/tests/test_batch_generate.py
+++ b/src/exo/worker/engines/mlx/tests/test_batch_generate.py
@@ -24,9 +24,9 @@ from transformers import AutoTokenizer
 
 # Import batch_generate to activate the right-padding BatchKVCache patch
 import exo.worker.engines.mlx.generator.batch_generate  # noqa: F401
-from exo.shared.types.mlx import Model
 from exo.worker.engines.mlx.cache import encode_prompt, make_kv_cache
 from exo.worker.engines.mlx.generator.generate import prefill
+from exo.worker.engines.mlx.types import Model
 
 NUM_STEPS = 20
 
diff --git a/src/exo/shared/types/mlx.py b/src/exo/worker/engines/mlx/types.py
similarity index 82%
rename from src/exo/shared/types/mlx.py
rename to src/exo/worker/engines/mlx/types.py
index f15fed42d..0b70c1aa5 100644
--- a/src/exo/shared/types/mlx.py
+++ b/src/exo/worker/engines/mlx/types.py
@@ -11,10 +11,16 @@ from mlx_lm.models.cache import (
     QuantizedKVCache,
     RotatingKVCache,
 )
+from mlx_lm.models.deepseek_v4 import DeepseekV4Cache
 
 # This list contains one cache entry per transformer layer
 KVCacheType = Sequence[
-    KVCache | RotatingKVCache | QuantizedKVCache | ArraysCache | CacheList
+    KVCache
+    | RotatingKVCache
+    | QuantizedKVCache
+    | ArraysCache
+    | CacheList
+    | DeepseekV4Cache
 ]
 
 
diff --git a/src/exo/worker/engines/mlx/utils_mlx.py b/src/exo/worker/engines/mlx/utils_mlx.py
index 63f1b6ed8..730abf64e 100644
--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -4,6 +4,7 @@ import re
 import sys
 import tempfile
 import time
+from collections.abc import Generator
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, cast
 
@@ -43,7 +44,6 @@ from pydantic import RootModel
 from exo.download.download_utils import build_model_path
 from exo.shared.types.common import Host
 from exo.shared.types.memory import Memory
-from exo.shared.types.mlx import Model
 from exo.shared.types.tasks import TaskId, TextGeneration
 from exo.shared.types.text_generation import ChatTemplateValue, TextGenerationTaskParams
 from exo.shared.types.worker.instances import (
@@ -51,6 +51,7 @@ from exo.shared.types.worker.instances import (
     MlxJacclInstance,
     MlxRingInstance,
 )
+from exo.shared.types.worker.runner_response import ModelLoadingResponse
 from exo.shared.types.worker.shards import (
     CfgShardMetadata,
     PipelineShardMetadata,
@@ -58,18 +59,14 @@ from exo.shared.types.worker.shards import (
     TensorShardMetadata,
 )
 from exo.worker.engines.mlx.auto_parallel import (
-    LayerLoadedCallback,
-    TimeoutCallback,
-    eval_with_timeout,
     get_inner_model,
     get_layers,
     pipeline_auto_parallel,
     tensor_auto_parallel,
 )
+from exo.worker.engines.mlx.types import Model
 from exo.worker.runner.bootstrap import logger
 
-Group = mx.distributed.Group
-
 
 def get_weights_size(model_shard_meta: ShardMetadata) -> Memory:
     return Memory.from_float_kb(
@@ -84,10 +81,6 @@ def get_weights_size(model_shard_meta: ShardMetadata) -> Memory:
     )
 
 
-class ModelLoadingTimeoutError(Exception):
-    pass
-
-
 class HostList(RootModel[list[str]]):
     @classmethod
     def from_hosts(cls, hosts: list[Host]) -> "HostList":
@@ -96,7 +89,7 @@ class HostList(RootModel[list[str]]):
 
 def mlx_distributed_init(
     bound_instance: BoundInstance,
-) -> Group:
+) -> mx.distributed.Group:
     """
     Initialize MLX distributed.
     """
@@ -122,7 +115,8 @@ def mlx_distributed_init(
 
                 os.environ["MLX_HOSTFILE"] = coordination_file
                 os.environ["MLX_RANK"] = str(rank)
-                os.environ["MLX_RING_VERBOSE"] = "1"
+                # os.environ["MLX_RING_VERBOSE"] = "1"  # NOTE: we don't use it enough to care (turn on again if need to)
+
                 group = mx.distributed.init(backend="ring", strict=True)
 
             case MlxJacclInstance(
@@ -155,7 +149,7 @@ def mlx_distributed_init(
 
 def initialize_mlx(
     bound_instance: BoundInstance,
-) -> Group:
+) -> mx.distributed.Group:
     # should we unseed it?
     # TODO: pass in seed from params
     mx.random.seed(42)
@@ -168,10 +162,12 @@ def initialize_mlx(
 
 def load_mlx_items(
     bound_instance: BoundInstance,
-    group: Group | None,
-    on_timeout: TimeoutCallback | None,
-    on_layer_loaded: LayerLoadedCallback | None,
-) -> "tuple[Model, TokenizerWrapper, VisionProcessor | None]":
+    group: mx.distributed.Group | None,
+) -> Generator[
+    ModelLoadingResponse, None, tuple[Model, TokenizerWrapper, "VisionProcessor | None"]
+]:
+    set_wired_limit_for_model(get_weights_size(bound_instance.bound_shard))
+
     if group is None:
         logger.info(f"Single device used for {bound_instance.instance}")
         model_path = build_model_path(bound_instance.bound_shard.model_card.model_id)
@@ -184,8 +180,7 @@ def load_mlx_items(
             total = len(layers)
             for i, layer in enumerate(layers):
                 mx.eval(layer)  # type: ignore
-                if on_layer_loaded is not None:
-                    on_layer_loaded(i, total)
+                yield ModelLoadingResponse(layers_loaded=i, total=total)
         except ValueError as e:
             logger.opt(exception=e).debug(
                 "Model architecture doesn't support layer-by-layer progress tracking",
@@ -198,19 +193,15 @@ def load_mlx_items(
     else:
         logger.info("Starting distributed init")
         start_time = time.perf_counter()
-        model, tokenizer = shard_and_load(
+        model, tokenizer = yield from shard_and_load(
             bound_instance.bound_shard,
             group=group,
-            on_timeout=on_timeout,
-            on_layer_loaded=on_layer_loaded,
         )
         end_time = time.perf_counter()
         logger.info(
             f"Time taken to shard and load model: {(end_time - start_time):.2f}s"
         )
 
-    set_wired_limit_for_model(get_weights_size(bound_instance.bound_shard))
-
     mx.clear_cache()
 
     vision_config = bound_instance.bound_shard.model_card.vision
@@ -218,9 +209,20 @@ def load_mlx_items(
     if vision_config is not None:
         from exo.worker.engines.mlx.vision import VisionProcessor
 
-        vision_processor: VisionProcessor | None = VisionProcessor(
-            vision_config, bound_instance.bound_shard.model_card.model_id
-        )
+        vision_start_time = time.perf_counter()
+        try:
+            vision_processor: VisionProcessor | None = VisionProcessor(
+                vision_config, bound_instance.bound_shard.model_card.model_id
+            )
+            vision_processor.load()
+            logger.info(
+                f"Time taken to load vision weights: {(time.perf_counter() - vision_start_time):.2f}s"
+            )
+        except Exception as e:
+            logger.opt(exception=e).error(
+                "Failed to load vision weights — disabling vision for this runner"
+            )
+            vision_processor = None
     else:
         vision_processor = None
 
@@ -229,10 +231,8 @@ def load_mlx_items(
 
 def shard_and_load(
     shard_metadata: ShardMetadata,
-    group: Group,
-    on_timeout: TimeoutCallback | None,
-    on_layer_loaded: LayerLoadedCallback | None,
-) -> tuple[nn.Module, TokenizerWrapper]:
+    group: mx.distributed.Group,
+) -> Generator[ModelLoadingResponse, None, tuple[nn.Module, TokenizerWrapper]]:
     model_path = build_model_path(shard_metadata.model_card.model_id)
 
     model, _ = load_model(model_path, lazy=True, strict=False)
@@ -260,27 +260,14 @@ def shard_and_load(
 
     logger.info(f"Group size: {group.size()}, group rank: {group.rank()}")
 
-    # Estimate timeout based on model size (5x default for large queued workloads)
-    base_timeout = float(os.environ.get("EXO_MODEL_LOAD_TIMEOUT", "300"))
-    model_size = get_weights_size(shard_metadata)
-    timeout_seconds = base_timeout + model_size.in_gb
-    logger.info(
-        f"Evaluating model parameters with timeout of {timeout_seconds:.0f}s "
-        f"(model size: {model_size.in_gb:.1f}GB)"
-    )
-
     match shard_metadata:
         case TensorShardMetadata():
             logger.info(f"loading model from {model_path} with tensor parallelism")
-            model = tensor_auto_parallel(
-                model, group, timeout_seconds, on_timeout, on_layer_loaded
-            )
+            model = yield from tensor_auto_parallel(model, group)
         case PipelineShardMetadata():
             logger.info(f"loading model from {model_path} with pipeline parallelism")
-            model = pipeline_auto_parallel(
-                model, group, shard_metadata, on_layer_loaded=on_layer_loaded
-            )
-            eval_with_timeout(model.parameters(), timeout_seconds, on_timeout)
+            model = yield from pipeline_auto_parallel(model, group, shard_metadata)
+            mx.eval(model.parameters())
         case CfgShardMetadata():
             raise ValueError(
                 "CfgShardMetadata is not supported for text model loading - "
@@ -324,17 +311,21 @@ def get_eos_token_ids_for_model(model_id: ModelId) -> list[int] | None:
     model_id_lower = model_id.lower()
     if "kimi-k2" in model_id_lower:
         return [163586]
-    elif "glm-5" in model_id_lower or "glm-4.7" in model_id_lower:
-        # For GLM-5 and GLM-4.7
+    elif "glm-5" in model_id_lower:
         # 154820: <|endoftext|>, 154827: <|user|>, 154829: <|observation|>
         return [154820, 154827, 154829]
     elif "glm" in model_id_lower:
-        # For GLM-4.5 and older
+        # For GLM-4.7 and older
         return [151336, 151329, 151338]
     elif "gpt-oss" in model_id_lower:
         return [200002, 200012]
-    elif "qwen3.5" in model_id_lower or "qwen-3.5" in model_id_lower:
-        # For Qwen3.5: 248046 (<|im_end|>), 248044 (<|endoftext|>)
+    elif (
+        "qwen3.5" in model_id_lower
+        or "qwen-3.5" in model_id_lower
+        or "qwen3.6" in model_id_lower
+        or "qwen-3.6" in model_id_lower
+    ):
+        # For Qwen3.5 / Qwen3.6: 248046 (<|im_end|>), 248044 (<|endoftext|>)
         return [248046, 248044]
     elif "gemma-4" in model_id_lower or "gemma-3" in model_id_lower:
         return [1, 106, 50]
@@ -495,6 +486,32 @@ def _needs_dsml_encoding(task_params: TextGenerationTaskParams) -> bool:
     return "deepseek-v3.2" in task_params.model.lower()
 
 
+def _needs_v4_encoding(task_params: TextGenerationTaskParams) -> bool:
+    return "deepseek-v4" in task_params.model.lower()
+
+
+def _v4_reasoning_effort(task_params: TextGenerationTaskParams) -> str | None:
+    effort = task_params.reasoning_effort
+    if effort == "xhigh":
+        return "max"
+    if effort == "high":
+        return "high"
+    return None
+
+
+def _strip_v4_thinking_markers(content: str) -> str:
+    """Remove `<think>…</think>` blocks and any stray `<think>`/`</think>` tags
+    from prior-turn assistant content.
+
+    The V4 encoder drops `reasoning_content` for older turns when
+    `drop_thinking=True`"""
+    block = re.compile(r"<think>.*?</think>", re.DOTALL)
+    if not content:
+        return content
+    cleaned = block.sub("", content)
+    return cleaned.replace("<think>", "").replace("</think>", "")
+
+
 def consolidate_system_messages(
     messages: list[dict[str, Any]],
 ) -> list[dict[str, Any]]:
@@ -544,7 +561,7 @@ def render_chat_template(
         formatted_messages = formatted_messages[:-1]
 
     if _needs_dsml_encoding(task_params):
-        from exo.worker.engines.mlx.dsml_encoding import encode_messages
+        from exo.worker.engines.mlx.vendor.dsml_encoding import encode_messages
 
         prompt = encode_messages(
             messages=formatted_messages,
@@ -556,12 +573,51 @@ def render_chat_template(
         )
         if partial_assistant_content:
             prompt += partial_assistant_content
-        logger.info(prompt)
+        return prompt
+
+    if _needs_v4_encoding(task_params):
+        from exo.worker.engines.mlx.vendor.deepseek_v4_encoding import (
+            encode_messages as encode_messages_v4,
+        )
+
+        v4_messages = [dict(m) for m in formatted_messages]
+        for msg in v4_messages:
+            if msg.get("role") == "assistant":
+                content = msg.get("content")
+                if isinstance(content, str):
+                    msg["content"] = _strip_v4_thinking_markers(content)
+        if task_params.tools:
+            for msg in v4_messages:
+                if msg.get("role") in ("system", "developer"):
+                    msg["tools"] = task_params.tools
+                    break
+            else:
+                v4_messages.insert(
+                    0, {"role": "system", "content": "", "tools": task_params.tools}
+                )
+
+        prompt = encode_messages_v4(
+            messages=v4_messages,
+            thinking_mode="chat"
+            if task_params.enable_thinking is False
+            else "thinking",
+            reasoning_effort=_v4_reasoning_effort(task_params),
+        )
+        if partial_assistant_content:
+            prompt += partial_assistant_content
         return prompt
 
     for msg in formatted_messages:
         _normalize_tool_calls(msg)
 
+    # Put reasoning content in thinking block for GPT OSS
+    if "gpt-oss" in task_params.model.lower():
+        for msg in formatted_messages:
+            if msg.get("role") == "assistant" and "thinking" not in msg:
+                rc = msg.get("reasoning_content")
+                if isinstance(rc, str) and rc:
+                    msg["thinking"] = rc
+
     extra_kwargs: dict[str, Any] = {}
     if task_params.enable_thinking is not None:
         # Qwen3 and GLM use "enable_thinking"; DeepSeek uses "thinking".
@@ -620,7 +676,7 @@ def apply_chat_template(
             messages.append({"role": msg.role, "content": msg.content})
 
     prompt = render_chat_template(tokenizer, messages, task_params)
-    logger.info(prompt)
+    logger.debug(prompt)
 
     return prompt
 
@@ -763,7 +819,9 @@ def set_wired_limit_for_model(model_size: Memory):
 
 
 def mlx_cleanup(
-    model: Model | None, tokenizer: TokenizerWrapper | None, group: Group | None
+    model: Model | None,
+    tokenizer: TokenizerWrapper | None,
+    group: mx.distributed.Group | None,
 ) -> None:
     del model, tokenizer, group
     mx.clear_cache()
@@ -772,7 +830,7 @@ def mlx_cleanup(
     gc.collect()
 
 
-def mx_any(bool_: bool, group: Group | None) -> bool:
+def mx_any(bool_: bool, group: mx.distributed.Group | None) -> bool:
     if group is None:
         return bool_
     num_true = mx.distributed.all_sum(
@@ -782,7 +840,7 @@ def mx_any(bool_: bool, group: Group | None) -> bool:
     return num_true.item() > 0
 
 
-def mx_barrier(group: Group | None):
+def mx_barrier(group: mx.distributed.Group | None):
     if group is None:
         return
     mx.eval(
diff --git a/src/exo/worker/engines/mlx/vendor/__init__.py b/src/exo/worker/engines/mlx/vendor/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/exo/worker/engines/mlx/vendor/deepseek_v4_encoding.py b/src/exo/worker/engines/mlx/vendor/deepseek_v4_encoding.py
new file mode 100644
index 000000000..4eb100c35
--- /dev/null
+++ b/src/exo/worker/engines/mlx/vendor/deepseek_v4_encoding.py
@@ -0,0 +1,836 @@
+# type: ignore
+"""
+DeepSeek-V4 Encoding
+
+From upstream
+"""
+
+import copy
+import json
+import re
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+# ============================================================
+# Special Tokens
+# ============================================================
+
+bos_token: str = "<｜begin▁of▁sentence｜>"
+eos_token: str = "<｜end▁of▁sentence｜>"
+thinking_start_token: str = "<think>"
+thinking_end_token: str = "</think>"
+dsml_token: str = "｜DSML｜"
+
+USER_SP_TOKEN = "<｜User｜>"
+ASSISTANT_SP_TOKEN = "<｜Assistant｜>"
+LATEST_REMINDER_SP_TOKEN = "<｜latest_reminder｜>"
+
+# Task special tokens for internal classification tasks
+DS_TASK_SP_TOKENS = {
+    "action": "<｜action｜>",
+    "query": "<｜query｜>",
+    "authority": "<｜authority｜>",
+    "domain": "<｜domain｜>",
+    "title": "<｜title｜>",
+    "read_url": "<｜read_url｜>",
+}
+VALID_TASKS = set(DS_TASK_SP_TOKENS.keys())
+
+# ============================================================
+# Templates
+# ============================================================
+
+system_msg_template: str = "{content}"
+user_msg_template: str = "{content}"
+latest_reminder_msg_template: str = "{content}"
+assistant_msg_template: str = "{reasoning}{content}{tool_calls}" + eos_token
+assistant_msg_wo_eos_template: str = "{reasoning}{content}{tool_calls}"
+thinking_template: str = "{reasoning_content}"
+
+response_format_template: str = "## Response Format:\n\nYou MUST strictly adhere to the following schema to reply:\n{schema}"
+tool_call_template: str = (
+    '<{dsml_token}invoke name="{name}">\n{arguments}\n</{dsml_token}invoke>'
+)
+tool_calls_template = (
+    "<{dsml_token}{tc_block_name}>\n{tool_calls}\n</{dsml_token}{tc_block_name}>"
+)
+tool_calls_block_name: str = "tool_calls"
+
+tool_output_template: str = "<tool_result>{content}</tool_result>"
+
+REASONING_EFFORT_MAX = (
+    "Reasoning Effort: Absolute maximum with no shortcuts permitted.\n"
+    "You MUST be very thorough in your thinking and comprehensively decompose the problem to resolve the root cause, rigorously stress-testing your logic against all potential paths, edge cases, and adversarial scenarios.\n"
+    "Explicitly write out your entire deliberation process, documenting every intermediate step, considered alternative, and rejected hypothesis to ensure absolutely no assumption is left unchecked.\n\n"
+)
+
+TOOLS_TEMPLATE = """## Tools
+
+You have access to a set of tools to help answer the user's question. You can invoke tools by writing a "<{dsml_token}tool_calls>" block like the following:
+
+<{dsml_token}tool_calls>
+<{dsml_token}invoke name="$TOOL_NAME">
+<{dsml_token}parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</{dsml_token}parameter>
+...
+</{dsml_token}invoke>
+<{dsml_token}invoke name="$TOOL_NAME2">
+...
+</{dsml_token}invoke>
+</{dsml_token}tool_calls>
+
+String parameters should be specified as is and set `string="true"`. For all other types (numbers, booleans, arrays, objects), pass the value in JSON format and set `string="false"`.
+
+If thinking_mode is enabled (triggered by {thinking_start_token}), you MUST output your complete reasoning inside {thinking_start_token}...{thinking_end_token} BEFORE any tool calls or final response.
+
+Otherwise, output directly after {thinking_end_token} with tool calls or final response.
+
+### Available Tool Schemas
+
+{tool_schemas}
+
+You MUST strictly follow the above defined tool name and parameter schemas to invoke tool calls.
+"""
+
+# ============================================================
+# Utility Functions
+# ============================================================
+
+
+def to_json(value: Any) -> str:
+    """Serialize a value to JSON string."""
+    try:
+        return json.dumps(value, ensure_ascii=False)
+    except:  # noqa: E722
+        return json.dumps(value, ensure_ascii=True)
+
+
+def tools_from_openai_format(tools):
+    """Extract function definitions from OpenAI-format tool list."""
+    return [tool["function"] for tool in tools]
+
+
+def tool_calls_from_openai_format(tool_calls):
+    """Convert OpenAI-format tool calls to internal format."""
+    return [
+        {
+            "name": tool_call["function"]["name"],
+            "arguments": tool_call["function"]["arguments"],
+        }
+        for tool_call in tool_calls
+    ]
+
+
+def tool_calls_to_openai_format(tool_calls):
+    """Convert internal tool calls to OpenAI format."""
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": tool_call["name"],
+                "arguments": tool_call["arguments"],
+            },
+        }
+        for tool_call in tool_calls
+    ]
+
+
+def encode_arguments_to_dsml(tool_call: Dict[str, str]) -> str:
+    """
+    Encode tool call arguments into DSML parameter format.
+
+    Args:
+        tool_call: Dict with "name" and "arguments" (JSON string) keys.
+
+    Returns:
+        DSML-formatted parameter string.
+    """
+    p_dsml_template = '<{dsml_token}parameter name="{key}" string="{is_str}">{value}</{dsml_token}parameter>'
+    P_dsml_strs = []  # noqa: N806
+
+    try:
+        arguments = json.loads(tool_call["arguments"])
+    except Exception:
+        arguments = {"arguments": tool_call["arguments"]}
+
+    for k, v in arguments.items():
+        p_dsml_str = p_dsml_template.format(
+            dsml_token=dsml_token,
+            key=k,
+            is_str="true" if isinstance(v, str) else "false",
+            value=v if isinstance(v, str) else to_json(v),
+        )
+        P_dsml_strs.append(p_dsml_str)
+
+    return "\n".join(P_dsml_strs)
+
+
+def decode_dsml_to_arguments(
+    tool_name: str, tool_args: Dict[str, Tuple[str, str]]
+) -> Dict[str, str]:
+    """
+    Decode DSML parameters back to a tool call dict.
+
+    Args:
+        tool_name: Name of the tool.
+        tool_args: Dict mapping param_name -> (value, is_string_flag).
+
+    Returns:
+        Dict with "name" and "arguments" (JSON string) keys.
+    """
+
+    def _decode_value(key: str, value: str, string: str):
+        if string == "true":
+            value = to_json(value)
+        return f"{to_json(key)}: {value}"
+
+    tool_args_json = (
+        "{"
+        + ", ".join(
+            [_decode_value(k, v, string=is_str) for k, (v, is_str) in tool_args.items()]
+        )
+        + "}"
+    )
+    return dict(name=tool_name, arguments=tool_args_json)
+
+
+def render_tools(tools: List[Dict[str, Union[str, Dict[str, Any]]]]) -> str:
+    """
+    Render tool schemas into the system prompt format.
+
+    Args:
+        tools: List of tool schema dicts (each with name, description, parameters).
+
+    Returns:
+        Formatted tools section string.
+    """
+    tools_json = [to_json(t) for t in tools]
+
+    return TOOLS_TEMPLATE.format(
+        tool_schemas="\n".join(tools_json),
+        dsml_token=dsml_token,
+        thinking_start_token=thinking_start_token,
+        thinking_end_token=thinking_end_token,
+    )
+
+
+def find_last_user_index(messages: List[Dict[str, Any]]) -> int:
+    """Find the index of the last user/developer message."""
+    last_user_index = -1
+    for idx in range(len(messages) - 1, -1, -1):
+        if messages[idx].get("role") in ["user", "developer"]:
+            last_user_index = idx
+            break
+    return last_user_index
+
+
+# ============================================================
+# Message Rendering
+# ============================================================
+
+
+def render_message(
+    index: int,
+    messages: List[Dict[str, Any]],
+    thinking_mode: str,
+    drop_thinking: bool = True,
+    reasoning_effort: Optional[str] = None,
+) -> str:
+    """
+    Render a single message at the given index into its encoded string form.
+
+    This is the core function that converts each message in the conversation
+    into the DeepSeek-V4 format.
+
+    Args:
+        index: Index of the message to render.
+        messages: Full list of messages in the conversation.
+        thinking_mode: Either "chat" or "thinking".
+        drop_thinking: Whether to drop reasoning content from earlier turns.
+        reasoning_effort: Optional reasoning effort level ("max", "high", or None).
+
+    Returns:
+        Encoded string for this message.
+    """
+    assert 0 <= index < len(messages)
+    assert thinking_mode in ["chat", "thinking"], (
+        f"Invalid thinking_mode `{thinking_mode}`"
+    )
+
+    prompt = ""
+    msg = messages[index]
+    last_user_idx = find_last_user_index(messages)
+
+    role = msg.get("role")
+    content = msg.get("content")
+    tools = msg.get("tools")
+    response_format = msg.get("response_format")
+    tool_calls = msg.get("tool_calls")
+    reasoning_content = msg.get("reasoning_content")
+    wo_eos = msg.get("wo_eos", False)
+
+    if tools:
+        tools = tools_from_openai_format(tools)
+    if tool_calls:
+        tool_calls = tool_calls_from_openai_format(tool_calls)
+
+    # Reasoning effort prefix (only at index 0 in thinking mode with max effort)
+    assert reasoning_effort in ["max", None, "high"], (
+        f"Invalid reasoning effort: {reasoning_effort}"
+    )
+    if index == 0 and thinking_mode == "thinking" and reasoning_effort == "max":
+        prompt += REASONING_EFFORT_MAX
+
+    if role == "system":
+        prompt += system_msg_template.format(content=content or "")
+        if tools:
+            prompt += "\n\n" + render_tools(tools)
+        if response_format:
+            prompt += "\n\n" + response_format_template.format(
+                schema=to_json(response_format)
+            )
+
+    elif role == "developer":
+        assert content, f"Invalid message for role `{role}`: {msg}"
+
+        content_developer = USER_SP_TOKEN
+        content_developer += content
+
+        if tools:
+            content_developer += "\n\n" + render_tools(tools)
+        if response_format:
+            content_developer += "\n\n" + response_format_template.format(
+                schema=to_json(response_format)
+            )
+
+        prompt += user_msg_template.format(content=content_developer)
+
+    elif role == "user":
+        prompt += USER_SP_TOKEN
+
+        # Handle content blocks (tool results mixed with text)
+        content_blocks = msg.get("content_blocks")
+        if content_blocks:
+            parts = []
+            for block in content_blocks:
+                block_type = block.get("type")
+                if block_type == "text":
+                    parts.append(block.get("text", ""))
+                elif block_type == "tool_result":
+                    tool_content = block.get("content", "")
+                    if isinstance(tool_content, list):
+                        text_parts = []
+                        for b in tool_content:
+                            if b.get("type") == "text":
+                                text_parts.append(b.get("text", ""))
+                            else:
+                                text_parts.append(f"[Unsupported {b.get('type')}]")
+                        tool_content = "\n\n".join(text_parts)
+                    parts.append(tool_output_template.format(content=tool_content))
+                else:
+                    parts.append(f"[Unsupported {block_type}]")
+            prompt += "\n\n".join(parts)
+        else:
+            prompt += content or ""
+
+    elif role == "latest_reminder":
+        prompt += LATEST_REMINDER_SP_TOKEN + latest_reminder_msg_template.format(
+            content=content
+        )
+
+    elif role == "tool":
+        raise NotImplementedError(
+            "deepseek_v4 merges tool messages into user; please preprocess with merge_tool_messages()"
+        )
+
+    elif role == "assistant":
+        thinking_part = ""
+        tc_content = ""
+
+        if tool_calls:
+            tc_list = [
+                tool_call_template.format(
+                    dsml_token=dsml_token,
+                    name=tc.get("name"),
+                    arguments=encode_arguments_to_dsml(tc),
+                )
+                for tc in tool_calls
+            ]
+            tc_content += "\n\n" + tool_calls_template.format(
+                dsml_token=dsml_token,
+                tool_calls="\n".join(tc_list),
+                tc_block_name=tool_calls_block_name,
+            )
+
+        summary_content = content or ""
+        rc = reasoning_content or ""
+
+        # Check if previous message has a task - if so, this is a task output (no thinking)
+        prev_has_task = index - 1 >= 0 and messages[index - 1].get("task") is not None
+
+        if thinking_mode == "thinking" and not prev_has_task:
+            if not drop_thinking or index > last_user_idx:
+                thinking_part = (
+                    thinking_template.format(reasoning_content=rc) + thinking_end_token
+                )
+            else:
+                thinking_part = ""
+
+        if wo_eos:
+            prompt += assistant_msg_wo_eos_template.format(
+                reasoning=thinking_part,
+                content=summary_content,
+                tool_calls=tc_content,
+            )
+        else:
+            prompt += assistant_msg_template.format(
+                reasoning=thinking_part,
+                content=summary_content,
+                tool_calls=tc_content,
+            )
+    else:
+        raise NotImplementedError(f"Unknown role: {role}")
+
+    # Append transition tokens based on what follows
+    if index + 1 < len(messages) and messages[index + 1].get("role") not in [
+        "assistant",
+        "latest_reminder",
+    ]:
+        return prompt
+
+    task = messages[index].get("task")
+    if task is not None:
+        # Task special token for internal classification tasks
+        assert task in VALID_TASKS, (
+            f"Invalid task: '{task}'. Valid tasks are: {list(VALID_TASKS)}"
+        )
+        task_sp_token = DS_TASK_SP_TOKENS[task]
+
+        if task != "action":
+            # Non-action tasks: append task sp token directly after the message
+            prompt += task_sp_token
+        else:
+            # Action task: append Assistant + thinking token + action sp token
+            prompt += ASSISTANT_SP_TOKEN
+            prompt += (
+                thinking_end_token
+                if thinking_mode != "thinking"
+                else thinking_start_token
+            )
+            prompt += task_sp_token
+
+    elif messages[index].get("role") in ["user", "developer"]:
+        # Normal generation: append Assistant + thinking token
+        prompt += ASSISTANT_SP_TOKEN
+        if (
+            not drop_thinking
+            and thinking_mode == "thinking"
+            or drop_thinking
+            and thinking_mode == "thinking"
+            and index >= last_user_idx
+        ):
+            prompt += thinking_start_token
+        else:
+            prompt += thinking_end_token
+
+    return prompt
+
+
+# ============================================================
+# Preprocessing
+# ============================================================
+
+
+def merge_tool_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Merge tool messages into the preceding user message using content_blocks format.
+
+    DeepSeek-V4 does not have a standalone "tool" role; instead, tool results
+    are encoded as <tool_result> blocks within user messages.
+
+    This function converts a standard OpenAI-format conversation (with separate
+    "tool" role messages) into V4 format where tool results are merged into
+    user messages.
+
+    Args:
+        messages: List of message dicts in OpenAI format.
+
+    Returns:
+        Processed message list with tool messages merged into user messages.
+    """
+    merged: List[Dict[str, Any]] = []
+
+    for msg in messages:
+        msg = copy.deepcopy(msg)
+        role = msg.get("role")
+
+        if role == "tool":
+            # Convert tool message to a user message with tool_result block
+            tool_block = {
+                "type": "tool_result",
+                "tool_use_id": msg.get("tool_call_id", ""),
+                "content": msg.get("content", ""),
+            }
+            # Merge into previous message if it's already a user (merged tool)
+            if (
+                merged
+                and merged[-1].get("role") == "user"
+                and "content_blocks" in merged[-1]
+            ):
+                merged[-1]["content_blocks"].append(tool_block)
+            else:
+                merged.append(
+                    {
+                        "role": "user",
+                        "content_blocks": [tool_block],
+                    }
+                )
+        elif role == "user":
+            text_block = {"type": "text", "text": msg.get("content", "")}
+            if (
+                merged
+                and merged[-1].get("role") == "user"
+                and "content_blocks" in merged[-1]
+                and merged[-1].get("task") is None
+            ):
+                merged[-1]["content_blocks"].append(text_block)
+            else:
+                new_msg = {
+                    "role": "user",
+                    "content": msg.get("content", ""),
+                    "content_blocks": [text_block],
+                }
+                # Preserve extra fields (task, wo_eos, mask, etc.)
+                for key in ("task", "wo_eos", "mask"):
+                    if key in msg:
+                        new_msg[key] = msg[key]
+                merged.append(new_msg)
+        else:
+            merged.append(msg)
+
+    return merged
+
+
+def sort_tool_results_by_call_order(
+    messages: List[Dict[str, Any]],
+) -> List[Dict[str, Any]]:
+    """
+    Sort tool_result blocks within user messages by the order of tool_calls
+    in the preceding assistant message.
+
+    Args:
+        messages: Preprocessed message list (after merge_tool_messages).
+
+    Returns:
+        Message list with sorted tool result blocks.
+    """
+    last_tool_call_order: Dict[str, int] = {}
+
+    for msg in messages:
+        role = msg.get("role")
+        if role == "assistant" and msg.get("tool_calls"):
+            last_tool_call_order = {}
+            for idx, tc in enumerate(msg["tool_calls"]):
+                tc_id = tc.get("id") or tc.get("function", {}).get("id", "")
+                if tc_id:
+                    last_tool_call_order[tc_id] = idx
+
+        elif role == "user" and msg.get("content_blocks"):
+            tool_blocks = [
+                b for b in msg["content_blocks"] if b.get("type") == "tool_result"
+            ]
+            if len(tool_blocks) > 1 and last_tool_call_order:
+                sorted_blocks = sorted(
+                    tool_blocks,
+                    key=lambda b: last_tool_call_order.get(b.get("tool_use_id", ""), 0),
+                )
+                sorted_idx = 0
+                new_blocks = []
+                for block in msg["content_blocks"]:
+                    if block.get("type") == "tool_result":
+                        new_blocks.append(sorted_blocks[sorted_idx])
+                        sorted_idx += 1
+                    else:
+                        new_blocks.append(block)
+                msg["content_blocks"] = new_blocks
+
+    return messages
+
+
+# ============================================================
+# Main Encoding Function
+# ============================================================
+
+
+def encode_messages(
+    messages: List[Dict[str, Any]],
+    thinking_mode: str,
+    context: Optional[List[Dict[str, Any]]] = None,
+    drop_thinking: bool = True,
+    add_default_bos_token: bool = True,
+    reasoning_effort: Optional[str] = None,
+) -> str:
+    """
+    Encode a list of messages into the DeepSeek-V4 prompt format.
+
+    This is the main entry point for encoding conversations. It handles:
+    - BOS token insertion
+    - Thinking mode with optional reasoning content dropping
+    - Tool message merging into user messages
+    - Multi-turn conversation context
+
+    Args:
+        messages: List of message dicts to encode.
+        thinking_mode: Either "chat" or "thinking".
+        context: Optional preceding context messages (already encoded prefix).
+        drop_thinking: If True, drop reasoning_content from earlier assistant turns
+                      (only keep reasoning for messages after the last user message).
+        add_default_bos_token: Whether to prepend BOS token at conversation start.
+        reasoning_effort: Optional reasoning effort level ("max", "high", or None).
+
+    Returns:
+        The encoded prompt string.
+    """
+    context = context if context else []
+
+    # Preprocess: merge tool messages and sort tool results
+    messages = merge_tool_messages(messages)
+    messages = sort_tool_results_by_call_order(context + messages)[len(context) :]
+    if context:
+        context = merge_tool_messages(context)
+        context = sort_tool_results_by_call_order(context)
+
+    full_messages = context + messages
+
+    prompt = bos_token if add_default_bos_token and len(context) == 0 else ""
+
+    effective_drop_thinking = drop_thinking
+    if any(m.get("tools") for m in full_messages):
+        effective_drop_thinking = False
+
+    if thinking_mode == "thinking" and effective_drop_thinking:
+        full_messages = _drop_thinking_messages(full_messages)
+        # After dropping, recalculate how many messages to render
+        # (context may have shrunk too)
+        num_to_render = len(full_messages) - len(_drop_thinking_messages(context))
+        context_len = len(full_messages) - num_to_render
+    else:
+        num_to_render = len(messages)
+        context_len = len(context)
+
+    for idx in range(num_to_render):
+        prompt += render_message(
+            idx + context_len,
+            full_messages,
+            thinking_mode=thinking_mode,
+            drop_thinking=effective_drop_thinking,
+            reasoning_effort=reasoning_effort,
+        )
+
+    return prompt
+
+
+def _drop_thinking_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Drop reasoning_content and non-essential messages before the last user message.
+
+    Behavior:
+    - Messages with role in ["user", "system", "tool", "latest_reminder"] are always kept.
+    - Messages at or after the last user index are always kept.
+    - Assistant messages before the last user get reasoning_content removed.
+    - Developer messages before the last user are dropped entirely.
+    """
+    last_user_idx = find_last_user_index(messages)
+    result = []
+    keep_roles = {"user", "system", "tool", "latest_reminder", "direct_search_results"}
+
+    for idx, msg in enumerate(messages):
+        role = msg.get("role")
+        if role in keep_roles or idx >= last_user_idx:
+            result.append(msg)
+        elif role == "assistant":
+            msg = copy.copy(msg)
+            msg.pop("reasoning_content", None)
+            result.append(msg)
+        # developer and other roles before last_user_idx are dropped
+
+    return result
+
+
+# ============================================================
+# Parsing (Decoding model output)
+# ============================================================
+
+
+def _read_until_stop(
+    index: int, text: str, stop: List[str]
+) -> Tuple[int, str, Optional[str]]:
+    """
+    Read text from index until one of the stop strings is found.
+
+    Returns:
+        Tuple of (new_index, content_before_stop, matched_stop_string_or_None).
+    """
+    min_pos = len(text)
+    matched_stop = None
+
+    for s in stop:
+        pos = text.find(s, index)
+        if pos != -1 and pos < min_pos:
+            min_pos = pos
+            matched_stop = s
+
+    if matched_stop:
+        content = text[index:min_pos]
+        return min_pos + len(matched_stop), content, matched_stop
+    else:
+        content = text[index:]
+        return len(text), content, None
+
+
+def parse_tool_calls(
+    index: int, text: str
+) -> Tuple[int, Optional[str], List[Dict[str, str]]]:
+    """
+    Parse DSML tool calls from text starting at the given index.
+
+    Args:
+        index: Starting position in text.
+        text: The full text to parse.
+
+    Returns:
+        Tuple of (new_index, last_stop_token, list_of_tool_call_dicts).
+        Each tool call dict has "name" and "arguments" keys.
+    """
+    tool_calls: List[Dict[str, Any]] = []
+    stop_token = None
+    tool_calls_end_token = f"</{dsml_token}{tool_calls_block_name}>"
+
+    while index < len(text):
+        index, _, stop_token = _read_until_stop(
+            index, text, [f"<{dsml_token}invoke", tool_calls_end_token]
+        )
+        if _ != ">\n":
+            raise ValueError(f"Tool call format error: expected '>\\n' but got '{_}'")
+
+        if stop_token == tool_calls_end_token:
+            break
+
+        if stop_token is None:
+            raise ValueError("Missing special token in tool calls")
+
+        index, tool_name_content, stop_token = _read_until_stop(
+            index, text, [f"<{dsml_token}parameter", f"</{dsml_token}invoke"]
+        )
+
+        p_tool_name = re.findall(
+            r'^\s*name="(.*?)">\n$', tool_name_content, flags=re.DOTALL
+        )
+        if len(p_tool_name) != 1:
+            raise ValueError(f"Tool name format error: '{tool_name_content}'")
+        tool_name = p_tool_name[0]
+
+        tool_args: Dict[str, Tuple[str, str]] = {}
+        while stop_token == f"<{dsml_token}parameter":
+            index, param_content, stop_token = _read_until_stop(
+                index, text, [f"/{dsml_token}parameter"]
+            )
+
+            param_kv = re.findall(
+                r'^ name="(.*?)" string="(true|false)">(.*?)<$',
+                param_content,
+                flags=re.DOTALL,
+            )
+            if len(param_kv) != 1:
+                raise ValueError(f"Parameter format error: '{param_content}'")
+            param_name, string, param_value = param_kv[0]
+
+            if param_name in tool_args:
+                raise ValueError(f"Duplicate parameter name: '{param_name}'")
+            tool_args[param_name] = (param_value, string)
+
+            index, content, stop_token = _read_until_stop(
+                index, text, [f"<{dsml_token}parameter", f"</{dsml_token}invoke"]
+            )
+            if content != ">\n":
+                raise ValueError(
+                    f"Parameter format error: expected '>\\n' but got '{content}'"
+                )
+
+        tool_call = decode_dsml_to_arguments(tool_name=tool_name, tool_args=tool_args)
+        tool_calls.append(tool_call)
+
+    return index, stop_token, tool_calls
+
+
+def parse_message_from_completion_text(text: str, thinking_mode: str) -> Dict[str, Any]:
+    """
+    Parse a model completion text into a structured assistant message.
+
+    This function takes the raw text output from the model (a single assistant turn)
+    and extracts:
+    - reasoning_content (thinking block)
+    - content (summary/response)
+    - tool_calls (if any)
+
+    NOTE: This function is designed to parse only correctly formatted strings and
+    will raise ValueError for malformed output.
+
+    Args:
+        text: The raw completion text (including EOS token).
+        thinking_mode: Either "chat" or "thinking".
+
+    Returns:
+        Dict with keys: "role", "content", "reasoning_content", "tool_calls".
+        tool_calls are in OpenAI format.
+    """
+    summary_content, reasoning_content, tool_calls = "", "", []
+    index, stop_token = 0, None
+    tool_calls_start_token = f"\n\n<{dsml_token}{tool_calls_block_name}"
+
+    is_thinking = thinking_mode == "thinking"
+    is_tool_calling = False
+
+    if is_thinking:
+        index, content_delta, stop_token = _read_until_stop(
+            index, text, [thinking_end_token, tool_calls_start_token]
+        )
+        reasoning_content = content_delta
+        assert stop_token == thinking_end_token, (
+            "Invalid thinking format: missing </think>"
+        )
+
+    index, content_delta, stop_token = _read_until_stop(
+        index, text, [eos_token, tool_calls_start_token]
+    )
+    summary_content = content_delta
+    if stop_token == tool_calls_start_token:
+        is_tool_calling = True
+    else:
+        assert stop_token == eos_token, "Invalid format: missing EOS token"
+
+    if is_tool_calling:
+        index, stop_token, tool_calls = parse_tool_calls(index, text)
+
+        index, tool_ends_text, stop_token = _read_until_stop(index, text, [eos_token])
+        assert not tool_ends_text, "Unexpected content after tool calls"
+
+    assert len(text) == index and stop_token in [eos_token, None], (
+        "Unexpected content at end"
+    )
+
+    for sp_token in [
+        bos_token,
+        eos_token,
+        thinking_start_token,
+        thinking_end_token,
+        dsml_token,
+    ]:
+        assert sp_token not in summary_content and sp_token not in reasoning_content, (
+            f"Unexpected special token '{sp_token}' in content"
+        )
+
+    return {
+        "role": "assistant",
+        "content": summary_content,
+        "reasoning_content": reasoning_content,
+        "tool_calls": tool_calls_to_openai_format(tool_calls),
+    }
diff --git a/src/exo/worker/engines/mlx/dsml_encoding.py b/src/exo/worker/engines/mlx/vendor/dsml_encoding.py
similarity index 76%
rename from src/exo/worker/engines/mlx/dsml_encoding.py
rename to src/exo/worker/engines/mlx/vendor/dsml_encoding.py
index 9d1dfdd27..893df06b3 100644
--- a/src/exo/worker/engines/mlx/dsml_encoding.py
+++ b/src/exo/worker/engines/mlx/vendor/dsml_encoding.py
@@ -17,6 +17,13 @@ TOOL_CALLS_START = f"<{DSML_TOKEN}function_calls>"
 TOOL_CALLS_END = f"</{DSML_TOKEN}function_calls>"
 _ORPHAN_THINK_END = ASSISTANT_TOKEN + THINKING_END
 _FIXED_THINK_BLOCK = ASSISTANT_TOKEN + THINKING_START + "\n" + THINKING_END
+_FUNCTION_RESULTS_CLOSE = "</function_results>"
+_ORPHAN_TOOL_RESULT_SUFFIX = _FUNCTION_RESULTS_CLOSE + "\n\n" + THINKING_END
+_EMPTY_THINK_BLOCKS = (
+    THINKING_START + "\n\n" + THINKING_END,
+    THINKING_START + "\n" + THINKING_END,
+    THINKING_START + THINKING_END,
+)
 
 
 def encode_messages(
@@ -27,15 +34,25 @@ def encode_messages(
     add_default_bos_token: bool = True,
     tools: Any = None,  # pyright: ignore[reportAny]
 ) -> str:
+    # V3.2 (like V4) is `tool_conditional`: when tools are in play, prior-turn
+    # reasoning_content must be retained so multi-step tool chains stay
+    # coherent.
+    effective_drop_thinking = drop_thinking
+    if tools:
+        effective_drop_thinking = False
     prompt: str = deepseek_v32.encode_messages(
         messages,
         thinking_mode=thinking_mode,
         context=context,
-        drop_thinking=drop_thinking,
+        drop_thinking=effective_drop_thinking,
         add_default_bos_token=add_default_bos_token,
         tools=tools,
     )
-    return prompt.replace(_ORPHAN_THINK_END, _FIXED_THINK_BLOCK)
+    prompt = prompt.replace(_ORPHAN_TOOL_RESULT_SUFFIX, _FUNCTION_RESULTS_CLOSE)
+    prompt = prompt.replace(_ORPHAN_THINK_END, _FIXED_THINK_BLOCK)
+    for empty in _EMPTY_THINK_BLOCKS:
+        prompt = prompt.replace(empty, "")
+    return prompt
 
 
 _INVOKE_PATTERN = re.compile(
diff --git a/src/exo/worker/engines/mlx/vision.py b/src/exo/worker/engines/mlx/vision.py
index 10d5312f0..a79925bc7 100644
--- a/src/exo/worker/engines/mlx/vision.py
+++ b/src/exo/worker/engines/mlx/vision.py
@@ -25,9 +25,9 @@ from transformers import AutoImageProcessor
 from exo.download.download_utils import build_model_path
 from exo.shared.models.model_cards import VisionCardConfig
 from exo.shared.types.common import ModelId
-from exo.shared.types.mlx import Model
 from exo.shared.types.text_generation import Base64Image, TextGenerationTaskParams
 from exo.worker.engines.mlx.cache import encode_prompt
+from exo.worker.engines.mlx.types import Model
 from exo.worker.engines.mlx.utils_mlx import (
     fix_unmatched_think_end_tokens,
     render_chat_template,
@@ -36,6 +36,19 @@ from exo.worker.runner.bootstrap import logger
 
 _video_processor_patched = False
 
+_MLX_VLM_MODEL_TYPE_ALIASES: dict[str, str] = {
+    "kimi_k25": "kimi_vl",
+    "kimi_k26": "kimi_vl",
+}
+
+
+def _torch_tensor_to_mx(
+    tensor: Any,  # pyright: ignore[reportAny]
+) -> mx.array:
+    if str(tensor.dtype) == "torch.bfloat16":  # type: ignore
+        return mx.array(tensor.float().numpy(), dtype=mx.bfloat16)  # type: ignore
+    return mx.array(tensor.numpy())  # type: ignore
+
 
 def _filter_config(cls: type, d: dict[str, Any]) -> dict[str, Any]:
     valid = set(inspect.signature(cls.__init__).parameters.keys()) - {"self"}
@@ -85,6 +98,8 @@ def _instantiate_projector(
     params = {n: p for n, p in init_sig.parameters.items() if n != "self"}
     kwargs: dict[str, Any] = {}
 
+    if "config" in params:
+        kwargs["config"] = model_config
     if "embedding_dim" in params:
         kwargs["embedding_dim"] = vision_config.hidden_size  # pyright: ignore[reportAny]
     if "text_hidden_size" in params:
@@ -205,7 +220,9 @@ class VisionEncoder:
         return {}
 
     def _import_mlx_vlm(self, *submodules: str) -> Any:  # type: ignore
-        mt = self._config.model_type
+        mt = _MLX_VLM_MODEL_TYPE_ALIASES.get(
+            self._config.model_type, self._config.model_type
+        )
         results: list[Any] = []
         for sub in submodules:
             name = f"mlx_vlm.models.{mt}.{sub}"
@@ -238,7 +255,7 @@ class VisionEncoder:
     def _load_image_processor_from_module(self, repo: str) -> "ImageProcessor | None":
         # mlx_vlm.utils.load_image_processor only works for models that set
         # `Model.ImageProcessor = <cls>`, but Gemma4 just uses
-        # `Gemma4ImageProcessor` from the package `__init__.py`
+        # `Gemma4ImageProcessor` from the package `__init__.py`.
         try:
             pkg: Any = importlib.import_module(
                 f"mlx_vlm.models.{self._config.model_type}"
@@ -319,10 +336,16 @@ class VisionEncoder:
         else:
             self._load_weights_from_model_repo()
 
-        repo = processor_repo or str(self._model_path)
-        image_proc = load_image_processor(
-            repo
-        ) or self._load_image_processor_from_module(repo)
+        if processor_repo:
+            repo = str(build_model_path(ModelId(processor_repo)))
+        else:
+            repo = str(self._model_path)
+        try:
+            image_proc = load_image_processor(repo)
+        except ValueError:
+            image_proc = None
+        if image_proc is None:
+            image_proc = self._load_image_processor_from_module(repo)
         if image_proc is not None:
             self._processor = image_proc
         else:
@@ -339,39 +362,42 @@ class VisionEncoder:
         if not safetensors_files:
             raise FileNotFoundError(f"No safetensors files found in {self._model_path}")
 
-        weights: dict[str, mx.array] = {}
-        for sf_path in safetensors_files:
-            with safe_open(str(sf_path), framework="pt") as f:
-                keys = f.keys()
-                for key in keys:
-                    tensor = f.get_tensor(key)  # type: ignore
-                    np_tensor = tensor.float().numpy()  # type: ignore
-                    weights[key] = mx.array(np_tensor, dtype=mx.bfloat16)  # type: ignore
-
         vision_weights: dict[str, mx.array] = {}
         projector_weights: dict[str, mx.array] = {}
-        for key, val in weights.items():
-            if key.startswith("vision_tower."):
-                short_key = key[len("vision_tower.") :]
-                if short_key.startswith("encoder."):
-                    short_key = short_key[len("encoder.") :]
-                m = re.match(r"^(blocks\.\d+)\.(wqkv|wo)\.(weight|bias)$", short_key)
-                if m:
-                    short_key = f"{m.group(1)}.attn.{m.group(2)}.{m.group(3)}"
-                if short_key == "patch_embed.proj.weight" and val.ndim == 4:
-                    val = val.transpose(0, 2, 3, 1)
-                vision_weights[short_key] = val
-            elif key.startswith(("mm_projector.", "multi_modal_projector.")):
-                if key.startswith("multi_modal_projector."):
-                    short_key = key[len("multi_modal_projector.") :]
-                    if short_key.startswith("mm_projector."):
-                        short_key = short_key[len("mm_projector.") :]
-                else:
-                    short_key = key[len("mm_projector.") :]
-                short_key = short_key.replace("proj.0.", "linear_1.").replace(
-                    "proj.2.", "linear_2."
-                )
-                projector_weights[short_key] = val
+
+        for sf_path in safetensors_files:
+            with safe_open(str(sf_path), framework="pt") as f:
+                keys = cast(list[str], list(f.keys()))  # type: ignore
+                for key in keys:
+                    if key.startswith("vision_tower."):
+                        short_key = key[len("vision_tower.") :]
+                        if short_key.startswith("encoder."):
+                            short_key = short_key[len("encoder.") :]
+                        m = re.match(
+                            r"^(blocks\.\d+)\.(wqkv|wo)\.(weight|bias)$", short_key
+                        )
+                        if m:
+                            short_key = f"{m.group(1)}.attn.{m.group(2)}.{m.group(3)}"
+                        tensor = f.get_tensor(key)  # type: ignore
+                        val = mx.array(tensor.float().numpy(), dtype=mx.bfloat16)  # type: ignore
+                        if short_key == "patch_embed.proj.weight" and val.ndim == 4:
+                            val = val.transpose(0, 2, 3, 1)
+                        vision_weights[short_key] = val
+                    elif key.startswith(("mm_projector.", "multi_modal_projector.")):
+                        if key.startswith("multi_modal_projector."):
+                            short_key = key[len("multi_modal_projector.") :]
+                            if short_key.startswith("mm_projector."):
+                                short_key = short_key[len("mm_projector.") :]
+                        else:
+                            short_key = key[len("mm_projector.") :]
+                        short_key = short_key.replace("proj.0.", "linear_1.").replace(
+                            "proj.2.", "linear_2."
+                        )
+                        tensor = f.get_tensor(key)  # type: ignore
+                        projector_weights[short_key] = mx.array(
+                            tensor.float().numpy(),  # type: ignore
+                            dtype=mx.bfloat16,
+                        )
 
         assert self._vision_tower is not None
         self._vision_tower.load_weights(list(vision_weights.items()))
@@ -407,18 +433,26 @@ class VisionEncoder:
         needs_sanitize = False
 
         for sf_path in safetensors_files:
-            file_weights: dict[str, mx.array] = mx.load(str(sf_path))  # type: ignore
-            for key, val in file_weights.items():
-                for prefix in vision_prefixes:
-                    if key.startswith(prefix):
-                        vision_weights[key[len(prefix) :]] = val
-                        if prefix == "model.visual.":
-                            needs_sanitize = True
-                        break
-                else:
+            with safe_open(str(sf_path), framework="pt") as f:
+                keys = cast(list[str], list(f.keys()))  # type: ignore
+                for key in keys:
+                    matched = False
+                    for prefix in vision_prefixes:
+                        if key.startswith(prefix):
+                            vision_weights[key[len(prefix) :]] = _torch_tensor_to_mx(
+                                f.get_tensor(key)
+                            )
+                            if prefix == "model.visual.":
+                                needs_sanitize = True
+                            matched = True
+                            break
+                    if matched:
+                        continue
                     for prefix in projector_prefixes:
                         if key.startswith(prefix):
-                            projector_weights[key[len(prefix) :]] = val
+                            projector_weights[key[len(prefix) :]] = _torch_tensor_to_mx(
+                                f.get_tensor(key)
+                            )
                             break
 
         if not vision_weights:
@@ -463,7 +497,12 @@ class VisionEncoder:
         grid_thw: mx.array | None
         n_tokens_per_image: list[int]
 
-        if self._config.processor_repo:
+        is_kimi_vl_processor = any(
+            "mlx_vlm.models.kimi_vl" in cls.__module__
+            for cls in type(self._processor).__mro__
+        )
+
+        if self._config.processor_repo and not is_kimi_vl_processor:
             processed = self._processor.preprocess(
                 [{"type": "image", "image": img} for img in pil_images],
                 return_tensors="np",
@@ -481,6 +520,24 @@ class VisionEncoder:
                 int(mx.prod(grid_thw[i]).item()) // merge_length
                 for i in range(grid_thw.shape[0])
             ]
+        elif is_kimi_vl_processor:
+            proc: Any = self._processor
+            raw_processed = proc.preprocess(pil_images, return_tensors="np")  # type: ignore
+            stacked_pixels = mx.array(raw_processed["pixel_values"])  # type: ignore
+            if stacked_pixels.ndim == 3:
+                stacked_pixels = stacked_pixels[None]
+            per_image_pixels = [
+                stacked_pixels[i : i + 1] for i in range(stacked_pixels.shape[0])
+            ]
+            grid_raw = raw_processed.get("image_grid_hws")  # type: ignore
+            if grid_raw is None:
+                grid_raw = raw_processed["grid_thws"]  # type: ignore
+            grid_thw = mx.array(grid_raw)  # type: ignore
+            merge_length = int(np.prod(self._merge_kernel_size or [2, 2]))
+            n_tokens_per_image = [
+                int(mx.prod(grid_thw[i]).item()) // merge_length
+                for i in range(grid_thw.shape[0])
+            ]
         else:
             batch, tokens_override = _run_processor(self._processor, pil_images)
             # `Gemma4ImageProcessor` returns pixel_values as a plain ndarray
diff --git a/src/exo/worker/main.py b/src/exo/worker/main.py
index 81c8c26c8..5c34e7b4b 100644
--- a/src/exo/worker/main.py
+++ b/src/exo/worker/main.py
@@ -10,7 +10,7 @@ from exo.api.types import ImageEditsTaskParams
 from exo.download.download_utils import is_read_only_model_dir, resolve_existing_model
 from exo.shared.apply import apply
 from exo.shared.constants import EXO_MAX_INSTANCE_RETRIES
-from exo.shared.models.model_cards import ModelId, add_to_card_cache, delete_custom_card
+from exo.shared.models.model_cards import ModelId, card_cache
 from exo.shared.types.chunks import InputImageChunk
 from exo.shared.types.commands import (
     DeleteInstance,
@@ -20,8 +20,6 @@ from exo.shared.types.commands import (
 )
 from exo.shared.types.common import CommandId, NodeId, SystemId
 from exo.shared.types.events import (
-    CustomModelCardAdded,
-    CustomModelCardDeleted,
     Event,
     IndexedEvent,
     InputChunkReceived,
@@ -57,7 +55,7 @@ from exo.utils.info_gatherer.net_profile import check_reachable
 from exo.utils.keyed_backoff import KeyedBackoff
 from exo.utils.task_group import TaskGroup
 from exo.worker.plan import plan
-from exo.worker.runner.runner_supervisor import RunnerSupervisor
+from exo.worker.runner.supervisor import RunnerSupervisor
 
 
 class Worker:
@@ -110,6 +108,8 @@ class Worker:
                 tg.start_soon(self.plan_step)
                 tg.start_soon(self._event_applier)
                 tg.start_soon(self._poll_connection_updates)
+                tg.start_soon(self._reconcile_custom_cards)
+
         finally:
             # Actual shutdown code - waits for all tasks to complete before executing.
             logger.info("Stopping Worker")
@@ -151,13 +151,38 @@ class Worker:
                     self.input_chunk_buffer[cmd_id][event.chunk.chunk_index] = (
                         event.chunk
                     )
+                    if (
+                        len(self.input_chunk_buffer[cmd_id])
+                        == self.input_chunk_counts[cmd_id]
+                    ):
+                        per_image: defaultdict[int, list[InputImageChunk]] = (
+                            defaultdict(list)
+                        )
+                        for chunk in self.input_chunk_buffer[cmd_id].values():
+                            per_image[chunk.image_index].append(chunk)
+                        for chunks_for_image in per_image.values():
+                            sorted_chunks = sorted(
+                                chunks_for_image, key=lambda c: c.chunk_index
+                            )
+                            img = Base64Image("".join(c.data for c in sorted_chunks))
+                            self.image_cache[
+                                Base64ImageHash(
+                                    hashlib.sha256(img.encode("ascii")).hexdigest()
+                                )
+                            ] = img
 
-                if isinstance(event, CustomModelCardAdded):
-                    await event.model_card.save_to_custom_dir()
-                    add_to_card_cache(event.model_card)
+    async def _reconcile_custom_cards(self) -> None:
+        while True:
+            await anyio.sleep(1)
+            target = dict(self.state.custom_model_cards)
+            for model_id, card in target.items():
+                if card_cache.get(model_id) == card:
+                    continue
+                await card_cache.save(card)
 
-                if isinstance(event, CustomModelCardDeleted):
-                    await delete_custom_card(event.model_id)
+            for card in await card_cache.list_all():
+                if card.model_id not in target:
+                    await card_cache.pop(card.model_id)
 
     async def plan_step(self):
         while True:
@@ -170,6 +195,7 @@ class Worker:
                 self.state.runners,
                 self.state.tasks,
                 self.input_chunk_buffer,
+                self.image_cache,
                 self._instance_backoff,
                 self._download_backoff,
             )
@@ -197,7 +223,7 @@ class Worker:
             # lets not kill the worker if a runner is unresponsive
             match task:
                 case CreateRunner():
-                    self._create_supervisor(task)
+                    await self._create_supervisor(task)
                     self._instance_backoff.record_attempt(task.instance_id)
                     await self.event_sender.send(
                         TaskStatusUpdated(
@@ -209,7 +235,7 @@ class Worker:
                     self._download_backoff.record_attempt(model_id)
 
                     found_path = await to_thread.run_sync(
-                        resolve_existing_model, model_id
+                        resolve_existing_model, model_id, shard.model_card
                     )
                     if found_path is not None:
                         logger.info(f"Model {model_id} found at {found_path}")
@@ -307,42 +333,11 @@ class Worker:
                         del self.input_chunk_counts[cmd_id]
                     await self._start_runner_task(modified_task)
 
-                case TextGeneration() if (
-                    task.task_params.image_hashes
-                    or task.task_params.total_input_chunks > 0
-                ):
+                case TextGeneration() if task.task_params.image_hashes:
                     cmd_id = task.command_id
-                    by_index: dict[int, Base64Image] = {}
-
-                    for idx, h in task.task_params.image_hashes.items():
-                        assert h in self.image_cache
-                        by_index[idx] = self.image_cache[h]
-
-                    if task.task_params.total_input_chunks > 0:
-                        chunk_buffer = self.input_chunk_buffer.get(cmd_id, {})
-                        per_image: defaultdict[int, list[InputImageChunk]] = (
-                            defaultdict(list)
-                        )
-                        for chunk in chunk_buffer.values():
-                            per_image[chunk.image_index].append(chunk)
-                        for img_idx in sorted(per_image):
-                            sorted_chunks = sorted(
-                                per_image[img_idx], key=lambda c: c.chunk_index
-                            )
-                            img = Base64Image("".join(c.data for c in sorted_chunks))
-                            self.image_cache[
-                                Base64ImageHash(
-                                    hashlib.sha256(img.encode("ascii")).hexdigest()
-                                )
-                            ] = img
-                            by_index[img_idx] = img
-                        logger.info(
-                            f"Assembled {len(per_image)} VLM image(s) "
-                            f"from {len(chunk_buffer)} chunks"
-                        )
-
                     resolved_images = [
-                        Base64Image(by_index[i]) for i in sorted(by_index)
+                        self.image_cache[h]
+                        for _, h in sorted(task.task_params.image_hashes.items())
                     ]
                     modified_task = task.model_copy(
                         update={
@@ -375,9 +370,9 @@ class Worker:
                 instance.shard_assignments.node_to_runner[self.node_id]
             ].start_task(task)
 
-    def _create_supervisor(self, task: CreateRunner) -> RunnerSupervisor:
+    async def _create_supervisor(self, task: CreateRunner) -> RunnerSupervisor:
         """Creates and stores a new AssignedRunner with initial downloading status."""
-        runner = RunnerSupervisor.create(
+        runner = await RunnerSupervisor.create(
             bound_instance=task.bound_instance,
             event_sender=self.event_sender.clone(),
         )
diff --git a/src/exo/worker/plan.py b/src/exo/worker/plan.py
index 07aeeab8d..3824e4bb7 100644
--- a/src/exo/worker/plan.py
+++ b/src/exo/worker/plan.py
@@ -19,6 +19,7 @@ from exo.shared.types.tasks import (
     TaskStatus,
     TextGeneration,
 )
+from exo.shared.types.text_generation import Base64Image, Base64ImageHash
 from exo.shared.types.worker.downloads import (
     DownloadCompleted,
     DownloadFailed,
@@ -40,7 +41,7 @@ from exo.shared.types.worker.runners import (
     RunnerWarmingUp,
 )
 from exo.utils.keyed_backoff import KeyedBackoff
-from exo.worker.runner.runner_supervisor import RunnerSupervisor
+from exo.worker.runner.supervisor import RunnerSupervisor
 
 
 def plan(
@@ -52,6 +53,7 @@ def plan(
     all_runners: Mapping[RunnerId, RunnerStatus],  # all global
     tasks: Mapping[TaskId, Task],
     input_chunk_buffer: Mapping[CommandId, Mapping[int, InputImageChunk]],
+    image_cache: Mapping[Base64ImageHash, Base64Image],
     instance_backoff: KeyedBackoff[InstanceId],
     download_backoff: KeyedBackoff[ModelId],
 ) -> Task | None:
@@ -66,7 +68,7 @@ def plan(
         or _init_distributed_backend(runners, all_runners)
         or _load_model(runners, all_runners, global_download_status)
         or _ready_to_warmup(runners, all_runners)
-        or _pending_tasks(runners, tasks, all_runners, input_chunk_buffer)
+        or _pending_tasks(runners, tasks, all_runners, input_chunk_buffer, image_cache)
     )
 
 
@@ -300,6 +302,7 @@ def _pending_tasks(
     tasks: Mapping[TaskId, Task],
     all_runners: Mapping[RunnerId, RunnerStatus],
     input_chunk_buffer: Mapping[CommandId, Mapping[int, InputImageChunk]],
+    image_cache: Mapping[Base64ImageHash, Base64Image],
 ) -> Task | None:
     for task in tasks.values():
         # for now, just forward chat completions
@@ -309,16 +312,20 @@ def _pending_tasks(
         if task.task_status not in (TaskStatus.Pending, TaskStatus.Running):
             continue
 
-        # For tasks with images, verify all input chunks have been received
-        expected_image_chunks = 0
-        if isinstance(task, (ImageEdits, TextGeneration)):
-            expected_image_chunks = task.task_params.total_input_chunks
-        if expected_image_chunks > 0:
-            cmd_id = task.command_id
-            received = len(input_chunk_buffer.get(cmd_id, {}))
-            if received < expected_image_chunks:
+        if isinstance(task, ImageEdits) and task.task_params.total_input_chunks > 0:
+            received = len(input_chunk_buffer.get(task.command_id, {}))
+            if received < task.task_params.total_input_chunks:
                 continue  # Wait for all chunks to arrive
 
+        if (
+            isinstance(task, TextGeneration)
+            and task.task_params.image_hashes
+            and not all(
+                h in image_cache for h in task.task_params.image_hashes.values()
+            )
+        ):
+            continue  # Wait for all images to be assembled into the cache
+
         for runner in runners.values():
             if task.instance_id != runner.bound_instance.instance.instance_id:
                 continue
diff --git a/src/exo/worker/runner/bootstrap.py b/src/exo/worker/runner/bootstrap.py
index 675dd3021..f981667c0 100644
--- a/src/exo/worker/runner/bootstrap.py
+++ b/src/exo/worker/runner/bootstrap.py
@@ -1,21 +1,45 @@
 import os
 import resource
+import traceback
+from dataclasses import dataclass
+from typing import Self, cast
 
 import loguru
 
-from exo.shared.types.events import Event, RunnerStatusUpdated
+from exo.shared.types.events import Event
 from exo.shared.types.tasks import Task, TaskId
 from exo.shared.types.worker.instances import BoundInstance
-from exo.shared.types.worker.runners import RunnerFailed
 from exo.utils.channels import ClosedResourceError, MpReceiver, MpSender
-from exo.worker.engines.mlx.patches import apply_mlx_patches
+from exo.worker.engines.base import Builder
 
 logger: "loguru.Logger" = loguru.logger
 
 
+@dataclass(frozen=True)
+class RunnerTerminationError:
+    exception_type: str
+    exception_message: str
+    exception_repr: str
+    traceback: str
+
+    @classmethod
+    def from_exception(cls, e: Exception) -> Self:
+        return cls(
+            exception_type=type(e).__qualname__,
+            exception_message=str(e),
+            exception_repr=repr(e),
+            traceback="".join(
+                traceback.TracebackException.from_exception(e).format(chain=True)
+            ),
+        )
+
+    def __str__(self) -> str:
+        return f"{self.exception_type}: {self.exception_message}\n{self.traceback}"
+
+
 def entrypoint(
     bound_instance: BoundInstance,
-    event_sender: MpSender[Event],
+    event_sender: MpSender[Event | RunnerTerminationError],
     task_receiver: MpReceiver[Task],
     cancel_receiver: MpReceiver[TaskId],
     _logger: "loguru.Logger",
@@ -36,35 +60,41 @@ def entrypoint(
 
     # Import main after setting global logger - this lets us just import logger from this module
     try:
-        if bound_instance.is_image_model:
-            from exo.worker.runner.image_models.runner import Runner as ImageRunner
+        event_sender_downcast: MpSender[Event] = cast(MpSender[Event], event_sender)
 
-            runner = ImageRunner(
-                bound_instance, event_sender, task_receiver, cancel_receiver
+        from exo.worker.runner.runner import Runner
+
+        builder: Builder
+        if bound_instance.is_image_model:
+            from exo.worker.engines.image.builder import MfluxBuilder
+
+            builder = MfluxBuilder(
+                event_sender_downcast, cancel_receiver, bound_instance.bound_shard
             )
-            runner.main()
         else:
-            from exo.worker.runner.llm_inference.runner import Runner
+            from exo.worker.engines.mlx.patches import apply_mlx_patches
 
             apply_mlx_patches()
 
-            runner = Runner(
-                bound_instance, event_sender, task_receiver, cancel_receiver
-            )
-            runner.main()
+            from exo.worker.engines.mlx.builder import MlxBuilder
 
+            # evil sharing of the event sender
+            builder = MlxBuilder(
+                model_id=bound_instance.bound_shard.model_card.model_id,
+                event_sender=event_sender_downcast,
+                cancel_receiver=cancel_receiver,
+            )
+
+        runner = Runner(bound_instance, builder, event_sender_downcast, task_receiver)
+        runner.main()
     except ClosedResourceError:
         logger.warning("Runner communication closed unexpectedly")
     except Exception as e:
         logger.opt(exception=e).warning(
             f"Runner {bound_instance.bound_runner_id} crashed with critical exception {e}"
         )
-        event_sender.send(
-            RunnerStatusUpdated(
-                runner_id=bound_instance.bound_runner_id,
-                runner_status=RunnerFailed(error_message=str(e)),
-            )
-        )
+        event_sender.send(RunnerTerminationError.from_exception(e))
+        raise SystemExit(1) from e
     finally:
         try:
             event_sender.close()
diff --git a/src/exo/worker/runner/diagnostics.py b/src/exo/worker/runner/diagnostics.py
new file mode 100644
index 000000000..e106c66a3
--- /dev/null
+++ b/src/exo/worker/runner/diagnostics.py
@@ -0,0 +1,144 @@
+from __future__ import annotations
+
+import errno
+import os
+import re
+from collections import deque
+from typing import final
+
+from exo.utils.pydantic_ext import TaggedModel
+
+_EVIDENCE_LINES = 4
+
+_METAL_GPU_TIMEOUT_RE = re.compile(
+    r"^\s*(?:libc\+\+abi:.*std::runtime_error:\s*)?"
+    r"(?P<message>\[METAL\].*GPU\s+Timeout.*)\s*$",
+    re.IGNORECASE,
+)
+_RING_SOCKET_ERRNO_RE = re.compile(
+    r"^\s*\[ring\]\s+Receiving\s+from\s+socket\s+\d+\s+failed\s+with\s+errno\s+"
+    r"(?P<error_number>\d+)\s*$",
+    re.IGNORECASE,
+)
+_RING_TRANSPORT_ABORT_RE = re.compile(
+    r"^\s*\[ring\]\s+Too\s+many\s+send/recv\s+errors\.\s+Aborting\.\.\.\s*$",
+    re.IGNORECASE,
+)
+
+
+class BaseRunnerDiagnostic(TaggedModel):
+    message: str
+    evidence: tuple[str, ...] = ()
+
+
+class RunnerMetalGpuTimeout(BaseRunnerDiagnostic):
+    pass
+
+
+class RunnerRingTransportError(BaseRunnerDiagnostic):
+    pass
+
+
+class RunnerRingSocketReceivingError(BaseRunnerDiagnostic):
+    error_number: int
+    error_name: str
+    error_description: str
+
+
+class RunnerUnknown(BaseRunnerDiagnostic):
+    pass
+
+
+KnownRunnerDiagnostic = (
+    RunnerMetalGpuTimeout | RunnerRingTransportError | RunnerRingSocketReceivingError
+)
+
+RunnerDiagnostic = KnownRunnerDiagnostic | RunnerUnknown
+
+
+@final
+class RunnerDiagnosticCollector:
+    def __init__(self) -> None:
+        self._stderr_tail: deque[str] = deque(maxlen=_EVIDENCE_LINES)
+        self._diagnostics: list[RunnerDiagnostic] = []
+
+    def record_line(self, line: str) -> None:
+        if not line or line.isspace():
+            return
+
+        self._stderr_tail.append(line)
+        evidence = tuple(self._stderr_tail)
+        diagnostic = self._classify_line(line, evidence) or RunnerUnknown(
+            message="Unclassified runner stderr line",
+            evidence=evidence,
+        )
+
+        # TODO: Eventually this will become a stateful parser with a more advanced architecture,
+        #       right now the statefulness is restricted to bespoke handling of specific errors
+        #
+        # `RunnerRingSocketReceivingError` usually happens a few times before `RunnerRingTransportError`
+        #  therefore we deduplicate and only keep last `RunnerRingSocketReceivingError`
+        if len(self._diagnostics) > 0 and (
+            isinstance(self._diagnostics[-1], RunnerRingSocketReceivingError)
+            and isinstance(diagnostic, RunnerRingSocketReceivingError)
+        ):
+            self._diagnostics[-1] = diagnostic
+            return
+
+        self._diagnostics.append(diagnostic)
+
+    def diagnostics(self) -> tuple[RunnerDiagnostic, ...]:
+        return tuple(self._diagnostics)
+
+    def _classify_line(
+        self, line: str, evidence: tuple[str, ...]
+    ) -> KnownRunnerDiagnostic | None:
+        if metal_error := _parse_metal_gpu_timeout(line, evidence):
+            return metal_error
+
+        if socket_error := _parse_ring_socket_error(line, evidence):
+            return socket_error
+
+        if _RING_TRANSPORT_ABORT_RE.match(line):
+            return RunnerRingTransportError(
+                message="Ring transport aborted after too many send/recv errors",
+                evidence=evidence,
+            )
+
+        return None
+
+
+def _parse_metal_gpu_timeout(
+    line: str, evidence: tuple[str, ...]
+) -> RunnerMetalGpuTimeout | None:
+    match = _METAL_GPU_TIMEOUT_RE.match(line)
+    if match is None:
+        return None
+
+    return RunnerMetalGpuTimeout(
+        message=f"Metal GPU timeout: {match.group('message')}",
+        evidence=evidence,
+    )
+
+
+def _parse_ring_socket_error(
+    line: str, evidence: tuple[str, ...]
+) -> RunnerRingSocketReceivingError | None:
+    match = _RING_SOCKET_ERRNO_RE.match(line)
+    if match is None:
+        return None
+
+    error_number = int(match.group("error_number"))
+    error_name = errno.errorcode.get(error_number, "UNKNOWN_ERRNO")
+    error_description = os.strerror(error_number)
+
+    return RunnerRingSocketReceivingError(
+        error_number=error_number,
+        error_name=error_name,
+        error_description=error_description,
+        evidence=evidence,
+        message=(
+            f"Ring socket receive failed: errno {error_number} "
+            f"{error_name} ({error_description})"
+        ),
+    )
diff --git a/src/exo/worker/runner/image_models/runner.py b/src/exo/worker/runner/image_models/runner.py
deleted file mode 100644
index 2eb90baec..000000000
--- a/src/exo/worker/runner/image_models/runner.py
+++ /dev/null
@@ -1,403 +0,0 @@
-import base64
-import time
-from typing import TYPE_CHECKING, Literal
-
-import mlx.core as mx
-
-from exo.api.types import (
-    ImageEditsTaskParams,
-    ImageGenerationStats,
-    ImageGenerationTaskParams,
-)
-from exo.shared.constants import EXO_MAX_CHUNK_SIZE, EXO_TRACING_ENABLED
-from exo.shared.models.model_cards import ModelTask
-from exo.shared.tracing import clear_trace_buffer, get_trace_buffer
-from exo.shared.types.chunks import ErrorChunk, ImageChunk
-from exo.shared.types.common import CommandId, ModelId
-from exo.shared.types.events import (
-    ChunkGenerated,
-    Event,
-    RunnerStatusUpdated,
-    TaskAcknowledged,
-    TaskStatusUpdated,
-    TraceEventData,
-    TracesCollected,
-)
-from exo.shared.types.tasks import (
-    CANCEL_ALL_TASKS,
-    ConnectToGroup,
-    ImageEdits,
-    ImageGeneration,
-    LoadModel,
-    Shutdown,
-    StartWarmup,
-    Task,
-    TaskId,
-    TaskStatus,
-)
-from exo.shared.types.worker.instances import BoundInstance
-from exo.shared.types.worker.runner_response import (
-    ImageGenerationResponse,
-    PartialImageResponse,
-)
-from exo.shared.types.worker.runners import (
-    RunnerConnected,
-    RunnerConnecting,
-    RunnerIdle,
-    RunnerLoaded,
-    RunnerLoading,
-    RunnerReady,
-    RunnerRunning,
-    RunnerShutdown,
-    RunnerShuttingDown,
-    RunnerStatus,
-    RunnerWarmingUp,
-)
-from exo.shared.types.worker.shards import (
-    CfgShardMetadata,
-    PipelineShardMetadata,
-    ShardMetadata,
-)
-from exo.utils.channels import MpReceiver, MpSender
-from exo.worker.engines.image import (
-    DistributedImageModel,
-    generate_image,
-    initialize_image_model,
-    warmup_image_generator,
-)
-from exo.worker.engines.mlx.utils_mlx import (
-    initialize_mlx,
-)
-from exo.worker.runner.bootstrap import logger
-
-
-def _is_primary_output_node(shard_metadata: ShardMetadata) -> bool:
-    """Check if this node is the primary output node for image generation.
-
-    For CFG models: the last pipeline stage in CFG group 0 (positive prompt).
-    For non-CFG models: the last pipeline stage.
-    """
-    if isinstance(shard_metadata, CfgShardMetadata):
-        is_pipeline_last = (
-            shard_metadata.pipeline_rank == shard_metadata.pipeline_world_size - 1
-        )
-        return is_pipeline_last and shard_metadata.cfg_rank == 0
-    elif isinstance(shard_metadata, PipelineShardMetadata):
-        return shard_metadata.device_rank == shard_metadata.world_size - 1
-    return False
-
-
-def _process_image_response(
-    response: ImageGenerationResponse | PartialImageResponse,
-    command_id: CommandId,
-    shard_metadata: ShardMetadata,
-    event_sender: MpSender[Event],
-    image_index: int,
-) -> None:
-    """Process a single image response and send chunks."""
-    encoded_data = base64.b64encode(response.image_data).decode("utf-8")
-    is_partial = isinstance(response, PartialImageResponse)
-    # Extract stats from final ImageGenerationResponse if available
-    stats = response.stats if isinstance(response, ImageGenerationResponse) else None
-    _send_image_chunk(
-        encoded_data=encoded_data,
-        command_id=command_id,
-        model_id=shard_metadata.model_card.model_id,
-        event_sender=event_sender,
-        image_index=response.image_index,
-        is_partial=is_partial,
-        partial_index=response.partial_index if is_partial else None,
-        total_partials=response.total_partials if is_partial else None,
-        stats=stats,
-        image_format=response.format,
-    )
-
-
-def _send_traces_if_enabled(
-    event_sender: MpSender[Event],
-    task_id: TaskId,
-    rank: int,
-) -> None:
-    if not EXO_TRACING_ENABLED:
-        return
-
-    traces = get_trace_buffer()
-    if traces:
-        trace_data = [
-            TraceEventData(
-                name=t.name,
-                start_us=t.start_us,
-                duration_us=t.duration_us,
-                rank=t.rank,
-                category=t.category,
-            )
-            for t in traces
-        ]
-        event_sender.send(
-            TracesCollected(
-                task_id=task_id,
-                rank=rank,
-                traces=trace_data,
-            )
-        )
-    clear_trace_buffer()
-
-
-def _send_image_chunk(
-    encoded_data: str,
-    command_id: CommandId,
-    model_id: ModelId,
-    event_sender: MpSender[Event],
-    image_index: int,
-    is_partial: bool,
-    partial_index: int | None = None,
-    total_partials: int | None = None,
-    stats: ImageGenerationStats | None = None,
-    image_format: Literal["png", "jpeg", "webp"] | None = None,
-) -> None:
-    """Send base64-encoded image data as chunks via events."""
-    data_chunks = [
-        encoded_data[i : i + EXO_MAX_CHUNK_SIZE]
-        for i in range(0, len(encoded_data), EXO_MAX_CHUNK_SIZE)
-    ]
-    total_chunks = len(data_chunks)
-    for chunk_index, chunk_data in enumerate(data_chunks):
-        # Only include stats on the last chunk of the final image
-        chunk_stats = (
-            stats if chunk_index == total_chunks - 1 and not is_partial else None
-        )
-        event_sender.send(
-            ChunkGenerated(
-                command_id=command_id,
-                chunk=ImageChunk(
-                    model=model_id,
-                    data=chunk_data,
-                    chunk_index=chunk_index,
-                    total_chunks=total_chunks,
-                    image_index=image_index,
-                    is_partial=is_partial,
-                    partial_index=partial_index,
-                    total_partials=total_partials,
-                    stats=chunk_stats,
-                    format=image_format,
-                ),
-            )
-        )
-
-
-class Runner:
-    def __init__(
-        self,
-        bound_instance: BoundInstance,
-        event_sender: MpSender[Event],
-        task_receiver: MpReceiver[Task],
-        cancel_receiver: MpReceiver[TaskId],
-    ):
-        self.event_sender = event_sender
-        self.task_receiver = task_receiver
-        self.cancel_receiver = cancel_receiver
-        self.bound_instance = bound_instance
-
-        self.instance, self.runner_id, self.shard_metadata = (
-            bound_instance.instance,
-            bound_instance.bound_runner_id,
-            bound_instance.bound_shard,
-        )
-        self.device_rank = self.shard_metadata.device_rank
-
-        logger.info("hello from the runner")
-        if getattr(self.shard_metadata, "immediate_exception", False):
-            raise Exception("Fake exception - runner failed to spin up.")
-        if timeout := getattr(self.shard_metadata, "should_timeout", 0):
-            time.sleep(timeout)
-
-        self.setup_start_time = time.time()
-        self.cancelled_tasks = set[TaskId]()
-
-        self.image_model: DistributedImageModel | None = None
-        self.group = None
-
-        self.current_status: RunnerStatus = RunnerIdle()
-        logger.info("runner created")
-        self.update_status(RunnerIdle())
-        self.seen = set[TaskId]()
-
-    def update_status(self, status: RunnerStatus):
-        self.current_status = status
-        self.event_sender.send(
-            RunnerStatusUpdated(
-                runner_id=self.runner_id, runner_status=self.current_status
-            )
-        )
-
-    def send_task_status(self, task: Task, status: TaskStatus):
-        self.event_sender.send(
-            TaskStatusUpdated(task_id=task.task_id, task_status=status)
-        )
-
-    def acknowledge_task(self, task: Task):
-        self.event_sender.send(TaskAcknowledged(task_id=task.task_id))
-
-    def _check_cancelled(self, task_id: TaskId) -> bool:
-        for cancel_id in self.cancel_receiver.collect():
-            self.cancelled_tasks.add(cancel_id)
-        return (
-            task_id in self.cancelled_tasks or CANCEL_ALL_TASKS in self.cancelled_tasks
-        )
-
-    def _run_image_task(
-        self,
-        task: Task,
-        task_params: ImageGenerationTaskParams | ImageEditsTaskParams,
-        command_id: CommandId,
-    ) -> None:
-        assert self.image_model
-        logger.info(f"received image task: {str(task)[:500]}")
-        logger.info("runner running")
-        self.update_status(RunnerRunning())
-        self.acknowledge_task(task)
-
-        def cancel_checker() -> bool:
-            return self._check_cancelled(task.task_id)
-
-        try:
-            image_index = 0
-            for response in generate_image(
-                model=self.image_model,
-                task=task_params,
-                cancel_checker=cancel_checker,
-            ):
-                if _is_primary_output_node(self.shard_metadata):
-                    match response:
-                        case PartialImageResponse():
-                            logger.info(
-                                f"sending partial ImageChunk {response.partial_index}/{response.total_partials}"
-                            )
-                            _process_image_response(
-                                response,
-                                command_id,
-                                self.shard_metadata,
-                                self.event_sender,
-                                image_index,
-                            )
-                        case ImageGenerationResponse():
-                            logger.info("sending final ImageChunk")
-                            _process_image_response(
-                                response,
-                                command_id,
-                                self.shard_metadata,
-                                self.event_sender,
-                                image_index,
-                            )
-                            image_index += 1
-        except Exception as e:
-            if _is_primary_output_node(self.shard_metadata):
-                self.event_sender.send(
-                    ChunkGenerated(
-                        command_id=command_id,
-                        chunk=ErrorChunk(
-                            model=self.shard_metadata.model_card.model_id,
-                            finish_reason="error",
-                            error_message=str(e),
-                        ),
-                    )
-                )
-            raise
-        finally:
-            _send_traces_if_enabled(self.event_sender, task.task_id, self.device_rank)
-
-        self.current_status = RunnerReady()
-        logger.info("runner ready")
-
-    def main(self):
-        with self.task_receiver as tasks:
-            for task in tasks:
-                if task.task_id in self.seen:
-                    logger.warning("repeat task - potential error")
-                self.seen.add(task.task_id)
-                self.cancelled_tasks.discard(CANCEL_ALL_TASKS)
-                self.send_task_status(task, TaskStatus.Running)
-                self.handle_task(task)
-                was_cancelled = (task.task_id in self.cancelled_tasks) or (
-                    CANCEL_ALL_TASKS in self.cancelled_tasks
-                )
-                if not was_cancelled:
-                    self.send_task_status(task, TaskStatus.Complete)
-                self.update_status(self.current_status)
-
-                if isinstance(self.current_status, RunnerShutdown):
-                    break
-
-    def handle_task(self, task: Task):
-        match task:
-            case ConnectToGroup() if isinstance(self.current_status, RunnerIdle):
-                logger.info("runner connecting")
-                self.update_status(RunnerConnecting())
-                self.acknowledge_task(task)
-                self.group = initialize_mlx(self.bound_instance)
-
-                logger.info("runner connected")
-                self.current_status = RunnerConnected()
-
-            # we load the model if it's connected with a group, or idle without a group. we should never tell a model to connect if it doesn't need to
-            case LoadModel() if (
-                isinstance(self.current_status, RunnerConnected)
-                and self.group is not None
-            ) or (isinstance(self.current_status, RunnerIdle) and self.group is None):
-                logger.info("runner loading")
-                self.update_status(RunnerLoading())
-                self.acknowledge_task(task)
-
-                assert (
-                    ModelTask.TextToImage in self.shard_metadata.model_card.tasks
-                    or ModelTask.ImageToImage in self.shard_metadata.model_card.tasks
-                ), f"Incorrect model task(s): {self.shard_metadata.model_card.tasks}"
-
-                self.image_model = initialize_image_model(self.bound_instance)
-                self.current_status = RunnerLoaded()
-                logger.info("runner loaded")
-
-            case StartWarmup() if isinstance(self.current_status, RunnerLoaded):
-                logger.info("runner warming up")
-                self.update_status(RunnerWarmingUp())
-                self.acknowledge_task(task)
-
-                logger.info(f"warming up inference for instance: {self.instance}")
-
-                assert self.image_model
-                image = warmup_image_generator(model=self.image_model)
-                if image is not None:
-                    logger.info(f"warmed up by generating {image.size} image")
-                else:
-                    logger.info("warmup completed (non-primary node)")
-
-                logger.info(
-                    f"runner initialized in {time.time() - self.setup_start_time} seconds"
-                )
-
-                self.current_status = RunnerReady()
-                logger.info("runner ready")
-
-            case (
-                ImageGeneration(task_params=task_params, command_id=command_id)
-                | ImageEdits(task_params=task_params, command_id=command_id)
-            ) if isinstance(self.current_status, RunnerReady):
-                self._run_image_task(task, task_params, command_id)
-
-            case Shutdown():
-                logger.info("runner shutting down")
-                if not TYPE_CHECKING:
-                    del self.image_model, self.group
-                    mx.clear_cache()
-                    import gc
-
-                    gc.collect()
-
-                self.update_status(RunnerShuttingDown())
-                self.acknowledge_task(task)
-
-                self.current_status = RunnerShutdown()
-            case _:
-                raise ValueError(
-                    f"Received {task.__class__.__name__} outside of state machine in {self.current_status=}"
-                )
diff --git a/src/exo/worker/runner/llm_inference/batch_generator.py b/src/exo/worker/runner/llm_inference/batch_generator.py
index 5284d53ae..098c829e9 100644
--- a/src/exo/worker/runner/llm_inference/batch_generator.py
+++ b/src/exo/worker/runner/llm_inference/batch_generator.py
@@ -1,29 +1,42 @@
 import itertools
 import time
-from abc import ABC, abstractmethod
 from collections import deque
-from collections.abc import Generator, Iterable
+from collections.abc import Generator, Iterator
 from dataclasses import dataclass, field
+from typing import BinaryIO
 
 import mlx.core as mx
 from mlx_lm.tokenizer_utils import TokenizerWrapper
 
 from exo.shared.constants import EXO_MAX_CONCURRENT_REQUESTS
-from exo.shared.types.chunks import ErrorChunk, PrefillProgressChunk
+from exo.shared.types.chunks import ErrorChunk, GenerationChunk, PrefillProgressChunk
 from exo.shared.types.common import ModelId
 from exo.shared.types.events import ChunkGenerated, Event
-from exo.shared.types.mlx import Model
-from exo.shared.types.tasks import CANCEL_ALL_TASKS, TaskId, TextGeneration
+from exo.shared.types.tasks import (
+    CANCEL_ALL_TASKS,
+    GenerationTask,
+    TaskId,
+    TextGeneration,
+)
 from exo.shared.types.text_generation import TextGenerationTaskParams
-from exo.shared.types.worker.runner_response import GenerationResponse, ToolCallResponse
+from exo.shared.types.worker.runner_response import (
+    CancelledResponse,
+    FinishedResponse,
+    GenerationResponse,
+)
 from exo.utils.channels import MpReceiver, MpSender
+from exo.worker.disaggregated.server import PrefillRequest
+from exo.worker.engines.base import Engine
 from exo.worker.engines.mlx.cache import KVPrefixCache
+from exo.worker.engines.mlx.disaggregated.adapter import write_cache_to_wire
+from exo.worker.engines.mlx.disaggregated.serve import run_prefill_for_request
 from exo.worker.engines.mlx.generator.batch_generate import ExoBatchGenerator
 from exo.worker.engines.mlx.generator.generate import (
     PrefillCancelled,
     mlx_generate,
     warmup_inference,
 )
+from exo.worker.engines.mlx.types import Model
 from exo.worker.engines.mlx.utils_mlx import (
     apply_chat_template,
     mx_all_gather_tasks,
@@ -32,18 +45,10 @@ from exo.worker.engines.mlx.utils_mlx import (
 from exo.worker.engines.mlx.vision import VisionProcessor
 from exo.worker.runner.bootstrap import logger
 
-from .model_output_parsers import apply_all_parsers
+from .model_output_parsers import apply_all_parsers, map_responses_to_chunks
 from .tool_parsers import ToolParser
 
 
-class Cancelled:
-    pass
-
-
-class Finished:
-    pass
-
-
 class GeneratorQueue[T]:
     def __init__(self):
         self._q = deque[T]()
@@ -59,35 +64,6 @@ class GeneratorQueue[T]:
                 yield self._q.popleft()
 
 
-class InferenceGenerator(ABC):
-    _cancelled_tasks: set[TaskId]
-
-    def should_cancel(self, task_id: TaskId) -> bool:
-        return (
-            task_id in self._cancelled_tasks
-            or CANCEL_ALL_TASKS in self._cancelled_tasks
-        )
-
-    @abstractmethod
-    def warmup(self) -> None: ...
-
-    @abstractmethod
-    def submit(
-        self,
-        task: TextGeneration,
-    ) -> None: ...
-
-    @abstractmethod
-    def step(
-        self,
-    ) -> Iterable[
-        tuple[TaskId, ToolCallResponse | GenerationResponse | Cancelled | Finished]
-    ]: ...
-
-    @abstractmethod
-    def close(self) -> None: ...
-
-
 EXO_RUNNER_MUST_FAIL = "EXO RUNNER MUST FAIL"
 EXO_RUNNER_MUST_OOM = "EXO RUNNER MUST OOM"
 EXO_RUNNER_MUST_TIMEOUT = "EXO RUNNER MUST TIMEOUT"
@@ -111,7 +87,7 @@ def _check_for_debug_prompts(task_params: TextGenerationTaskParams) -> None:
 
 
 @dataclass(eq=False)
-class SequentialGenerator(InferenceGenerator):
+class SequentialGenerator(Engine):
     model: Model
     tokenizer: TokenizerWrapper
     group: mx.distributed.Group | None
@@ -137,7 +113,7 @@ class SequentialGenerator(InferenceGenerator):
             # queue that the 1st generator should push to and 3rd generator should pull from
             GeneratorQueue[GenerationResponse],
             # generator to get parsed outputs
-            Generator[GenerationResponse | ToolCallResponse | None],
+            Iterator[GenerationChunk | None],
         ]
         | None
     ) = field(default=None, init=False)
@@ -152,8 +128,9 @@ class SequentialGenerator(InferenceGenerator):
 
     def submit(
         self,
-        task: TextGeneration,
+        task: GenerationTask,
     ) -> None:
+        assert isinstance(task, TextGeneration)
         self._cancelled_tasks.discard(CANCEL_ALL_TASKS)
         self._all_tasks[task.task_id] = task
         self._maybe_queue.append(task)
@@ -161,8 +138,10 @@ class SequentialGenerator(InferenceGenerator):
     def agree_on_tasks(self) -> None:
         """Agree between all ranks about the task ordering (some may have received in different order or not at all)."""
         agreed, different = mx_all_gather_tasks(self._maybe_queue, self.group)
-        self._queue.extend(task for task in self._maybe_queue if task in agreed)
-        self._maybe_queue = [task for task in self._maybe_queue if task in different]
+        # Extend from `agreed` (sorted by task_id on all ranks) to guarantee every
+        # rank enqueues tasks in the same order, preventing TP collective deadlocks.
+        self._queue.extend(agreed)
+        self._maybe_queue = list(different)
 
     def agree_on_cancellations(self) -> None:
         """Agree between all ranks about which tasks to cancel."""
@@ -183,8 +162,8 @@ class SequentialGenerator(InferenceGenerator):
 
     def step(
         self,
-    ) -> Iterable[
-        tuple[TaskId, GenerationResponse | ToolCallResponse | Cancelled | Finished]
+    ) -> Iterator[
+        tuple[TaskId, GenerationChunk | FinishedResponse | CancelledResponse]
     ]:
         if self._active is None:
             self.agree_on_tasks()
@@ -192,23 +171,25 @@ class SequentialGenerator(InferenceGenerator):
             if self._queue:
                 self._start_next()
             else:
-                return map(lambda task: (task, Cancelled()), self._cancelled_tasks)
+                return map(
+                    lambda task: (task, CancelledResponse()), self._cancelled_tasks
+                )
 
         assert self._active is not None
 
-        task, mlx_gen, queue, output_generator = self._active
+        task, gen, queue, output_generator = self._active
         output: list[
-            tuple[TaskId, GenerationResponse | ToolCallResponse | Cancelled | Finished]
+            tuple[TaskId, GenerationChunk | CancelledResponse | FinishedResponse]
         ] = []
         try:
-            response = next(mlx_gen)
+            response = next(gen)
             queue.push(response)
             # drain potentially many responses every time
             while (parsed := next(output_generator, None)) is not None:
                 output.append((task.task_id, parsed))
 
         except (StopIteration, PrefillCancelled):
-            output.append((task.task_id, Finished()))
+            output.append((task.task_id, FinishedResponse()))
             self._active = None
             if self._queue:
                 self._start_next()
@@ -218,22 +199,29 @@ class SequentialGenerator(InferenceGenerator):
             self._active = None
             raise
 
-        return itertools.chain(
-            output,
-            map(lambda task: (task, Cancelled()), self._cancelled_tasks),
+        return filter(
+            lambda chunk: (
+                not isinstance(chunk[1], GenerationChunk) or self.device_rank == 0
+            ),
+            itertools.chain(
+                output,
+                map(lambda task: (task, CancelledResponse()), self._cancelled_tasks),
+            ),
         )
 
     def _start_next(self) -> None:
         task = self._queue.popleft()
         try:
-            mlx_gen = self._build_generator(task)
+            gen = self._build_generator(task)
         except Exception as e:
             self._send_error(task, e)
             raise
         queue = GeneratorQueue[GenerationResponse]()
 
         if task.task_params.bench:
-            output_generator = queue.gen()
+            output_generator: Iterator[GenerationChunk | None] = map(
+                lambda r: map_responses_to_chunks(r, self.model_id), queue.gen()
+            )
         else:
             output_generator = apply_all_parsers(
                 queue.gen(),
@@ -244,7 +232,7 @@ class SequentialGenerator(InferenceGenerator):
                 self.model_id,
                 task.task_params.tools,
             )
-        self._active = (task, mlx_gen, queue, output_generator)
+        self._active = (task, gen, queue, output_generator)
 
     def _send_error(self, task: TextGeneration, e: Exception) -> None:
         if self.device_rank == 0:
@@ -312,9 +300,25 @@ class SequentialGenerator(InferenceGenerator):
     def close(self) -> None:
         del self.model, self.tokenizer, self.group
 
+    def serve_prefill(self, request: PrefillRequest, wfile: BinaryIO) -> None:
+        cache = run_prefill_for_request(
+            model=self.model,
+            tokenizer=self.tokenizer,
+            group=self.group,
+            kv_prefix_cache=self.kv_prefix_cache,
+            request=request,
+        )
+        write_cache_to_wire(
+            wfile,
+            cache,
+            request_id=request.request_id,
+            model_id=request.model_id,
+            start_pos=request.start_pos,
+        )
+
 
 @dataclass(eq=False)
-class BatchGenerator(InferenceGenerator):
+class BatchGenerator(Engine):
     model: Model
     tokenizer: TokenizerWrapper
     group: mx.distributed.Group | None
@@ -332,18 +336,18 @@ class BatchGenerator(InferenceGenerator):
     _maybe_cancel: list[TextGeneration] = field(default_factory=list, init=False)
     _all_tasks: dict[TaskId, TextGeneration] = field(default_factory=dict, init=False)
     _queue: deque[TextGeneration] = field(default_factory=deque, init=False)
-    _mlx_gen: ExoBatchGenerator = field(init=False)
+    _gen: ExoBatchGenerator = field(init=False)
     _active_tasks: dict[
         int,
         tuple[
             TextGeneration,
             GeneratorQueue[GenerationResponse],
-            Generator[GenerationResponse | ToolCallResponse | None],
+            Iterator[GenerationChunk | None],
         ],
     ] = field(default_factory=dict, init=False)
 
     def __post_init__(self) -> None:
-        self._mlx_gen = ExoBatchGenerator(
+        self._gen = ExoBatchGenerator(
             model=self.model,
             tokenizer=self.tokenizer,
             group=self.group,
@@ -361,8 +365,9 @@ class BatchGenerator(InferenceGenerator):
 
     def submit(
         self,
-        task: TextGeneration,
+        task: GenerationTask,
     ) -> None:
+        assert isinstance(task, TextGeneration)
         self._cancelled_tasks.discard(CANCEL_ALL_TASKS)
         self._all_tasks[task.task_id] = task
         self._maybe_queue.append(task)
@@ -370,8 +375,10 @@ class BatchGenerator(InferenceGenerator):
     def agree_on_tasks(self) -> None:
         """Agree between all ranks about the task ordering (some may have received in different order or not at all)."""
         agreed, different = mx_all_gather_tasks(self._maybe_queue, self.group)
-        self._queue.extend(task for task in self._maybe_queue if task in agreed)
-        self._maybe_queue = [task for task in self._maybe_queue if task in different]
+        # Extend from `agreed` (sorted by task_id on all ranks) to guarantee every
+        # rank enqueues tasks in the same order, preventing TP collective deadlocks.
+        self._queue.extend(agreed)
+        self._maybe_queue = list(different)
 
     def agree_on_cancellations(self) -> None:
         """Agree between all ranks about which tasks to cancel."""
@@ -392,8 +399,8 @@ class BatchGenerator(InferenceGenerator):
 
     def step(
         self,
-    ) -> Iterable[
-        tuple[TaskId, GenerationResponse | ToolCallResponse | Cancelled | Finished]
+    ) -> Iterator[
+        tuple[TaskId, GenerationChunk | CancelledResponse | FinishedResponse]
     ]:
         if not self._queue:
             self.agree_on_tasks()
@@ -411,7 +418,9 @@ class BatchGenerator(InferenceGenerator):
 
             queue = GeneratorQueue[GenerationResponse]()
             if task.task_params.bench:
-                output_generator = queue.gen()
+                output_generator: Iterator[GenerationChunk | None] = map(
+                    lambda r: map_responses_to_chunks(r, self.model_id), queue.gen()
+                )
             else:
                 output_generator = apply_all_parsers(
                     queue.gen(),
@@ -424,13 +433,13 @@ class BatchGenerator(InferenceGenerator):
                 )
             self._active_tasks[uid] = (task, queue, output_generator)
 
-        if not self._mlx_gen.has_work:
+        if not self._gen.has_work:
             return self._apply_cancellations()
 
-        results = self._mlx_gen.step()
+        results = self._gen.step()
 
         output: list[
-            tuple[TaskId, GenerationResponse | ToolCallResponse | Cancelled | Finished]
+            tuple[TaskId, GenerationChunk | CancelledResponse | FinishedResponse]
         ] = []
         for uid, response in results:
             if uid not in self._active_tasks:
@@ -446,38 +455,43 @@ class BatchGenerator(InferenceGenerator):
 
             # check if original response was terminal and append a Finished()
             if response.finish_reason is not None:
-                output.append((task.task_id, Finished()))
+                output.append((task.task_id, FinishedResponse()))
                 del self._active_tasks[uid]
 
-        return itertools.chain(output, self._apply_cancellations())
+        return filter(
+            lambda chunk: (
+                not isinstance(chunk[1], GenerationChunk) or self.device_rank == 0
+            ),
+            itertools.chain(output, self._apply_cancellations()),
+        )
 
     def _apply_cancellations(
         self,
-    ) -> list[tuple[TaskId, Cancelled]]:
+    ) -> Iterator[tuple[TaskId, CancelledResponse]]:
         if not self._cancelled_tasks:
-            return []
+            return iter([])
 
         cancel_all = CANCEL_ALL_TASKS in self._cancelled_tasks
 
         uids_to_cancel: list[int] = []
-        results: list[tuple[TaskId, Cancelled]] = []
+        results: list[tuple[TaskId, CancelledResponse]] = []
 
         for uid, (task, _, _) in list(self._active_tasks.items()):
             if task.task_id in self._cancelled_tasks or cancel_all:
                 uids_to_cancel.append(uid)
-                results.append((task.task_id, Cancelled()))
+                results.append((task.task_id, CancelledResponse()))
                 del self._active_tasks[uid]
 
         if uids_to_cancel:
-            self._mlx_gen.cancel(uids_to_cancel)
+            self._gen.cancel(uids_to_cancel)
 
         already_cancelled = {tid for tid, _ in results}
         for tid in self._cancelled_tasks:
             if tid != CANCEL_ALL_TASKS and tid not in already_cancelled:
-                results.append((tid, Cancelled()))
+                results.append((tid, CancelledResponse()))
 
         self._cancelled_tasks.clear()
-        return results
+        return iter(results)
 
     def _send_error(self, task: TextGeneration, e: Exception) -> None:
         if self.device_rank == 0:
@@ -529,7 +543,7 @@ class BatchGenerator(InferenceGenerator):
 
                 self.agree_on_tasks()
 
-        return self._mlx_gen.submit(
+        return self._gen.submit(
             task_params=task.task_params,
             prompt=prompt,
             on_prefill_progress=on_prefill_progress,
@@ -538,5 +552,21 @@ class BatchGenerator(InferenceGenerator):
         )
 
     def close(self) -> None:
-        self._mlx_gen.close()
+        self._gen.close()
         del self.model, self.tokenizer, self.group
+
+    def serve_prefill(self, request: PrefillRequest, wfile: BinaryIO) -> None:
+        cache = run_prefill_for_request(
+            model=self.model,
+            tokenizer=self.tokenizer,
+            group=self.group,
+            kv_prefix_cache=self.kv_prefix_cache,
+            request=request,
+        )
+        write_cache_to_wire(
+            wfile,
+            cache,
+            request_id=request.request_id,
+            model_id=request.model_id,
+            start_pos=request.start_pos,
+        )
diff --git a/src/exo/worker/runner/llm_inference/model_output_parsers.py b/src/exo/worker/runner/llm_inference/model_output_parsers.py
index c7e506495..4952688dc 100644
--- a/src/exo/worker/runner/llm_inference/model_output_parsers.py
+++ b/src/exo/worker/runner/llm_inference/model_output_parsers.py
@@ -1,7 +1,8 @@
-from collections.abc import Generator
+from collections.abc import Callable, Generator, Iterator
 from functools import cache
 from typing import Any
 
+from mlx_lm.models.deepseek_v4 import Model as DeepseekV4Model
 from mlx_lm.models.deepseek_v32 import Model as DeepseekV32Model
 from mlx_lm.models.gpt_oss import Model as GptOssModel
 from mlx_lm.tokenizer_utils import TokenizerWrapper
@@ -14,12 +15,19 @@ from openai_harmony import (  # pyright: ignore[reportMissingTypeStubs]
 )
 
 from exo.api.types import ToolCallItem
+from exo.shared.types.chunks import (
+    ErrorChunk,
+    GenerationChunk,
+    TokenChunk,
+    ToolCallChunk,
+)
 from exo.shared.types.common import ModelId
-from exo.shared.types.mlx import Model
 from exo.shared.types.worker.runner_response import GenerationResponse, ToolCallResponse
+from exo.worker.engines.mlx.types import Model
 from exo.worker.engines.mlx.utils_mlx import (
     detect_thinking_prompt_suffix,
 )
+from exo.worker.engines.mlx.vendor.dsml_encoding import parse_dsml_output
 from exo.worker.runner.bootstrap import logger
 from exo.worker.runner.llm_inference.tool_parsers import ToolParser
 
@@ -64,29 +72,84 @@ def apply_all_parsers(
     model_type: type[Model],
     model_id: ModelId,
     tools: list[dict[str, Any]] | None,
-) -> Generator[GenerationResponse | ToolCallResponse | None]:
-    mlx_generator = receiver
+) -> Iterator[GenerationChunk | None]:
+    generator = receiver
 
+    normalized_id = model_id.normalize().lower()
     if issubclass(model_type, GptOssModel):
-        mlx_generator = parse_gpt_oss(mlx_generator)
-    elif (
-        issubclass(model_type, DeepseekV32Model)
-        and "deepseek" in model_id.normalize().lower()
-    ):
-        mlx_generator = parse_deepseek_v32(mlx_generator)
+        generator = parse_gpt_oss(generator)
+    elif issubclass(model_type, DeepseekV32Model) and "deepseek" in normalized_id:
+        if tokenizer.has_thinking:
+            generator = parse_thinking_models(
+                generator,
+                tokenizer.think_start,
+                tokenizer.think_end,
+                starts_in_thinking=detect_thinking_prompt_suffix(prompt, tokenizer),
+            )
+        generator = parse_deepseek_v32(generator)
+    elif issubclass(model_type, DeepseekV4Model) and "deepseek-v4" in normalized_id:
+        if tokenizer.has_thinking:
+            generator = parse_thinking_models(
+                generator,
+                tokenizer.think_start,
+                tokenizer.think_end,
+                starts_in_thinking=detect_thinking_prompt_suffix(prompt, tokenizer),
+            )
+        generator = parse_deepseek_v4(generator)
     else:
         if tokenizer.has_thinking:
-            mlx_generator = parse_thinking_models(
-                mlx_generator,
+            generator = parse_thinking_models(
+                generator,
                 tokenizer.think_start,
                 tokenizer.think_end,
                 starts_in_thinking=detect_thinking_prompt_suffix(prompt, tokenizer),
             )
 
         if tool_parser:
-            mlx_generator = parse_tool_calls(mlx_generator, tool_parser, tools)
+            generator = parse_tool_calls(generator, tool_parser, tools)
 
-    return count_reasoning_tokens(mlx_generator)
+    generator = count_reasoning_tokens(generator)
+
+    return map(lambda r: map_responses_to_chunks(r, model_id), generator)
+
+
+def map_responses_to_chunks(
+    response: GenerationResponse | ToolCallResponse | None, model_id: ModelId
+) -> GenerationChunk | None:
+    match response:
+        case None:
+            return None
+        case GenerationResponse():
+            if response.finish_reason == "error":
+                return ErrorChunk(
+                    error_message=response.text,
+                    model=model_id,
+                )
+            else:
+                finish_reason = response.finish_reason
+                assert finish_reason not in (
+                    "error",
+                    "tool_calls",
+                    "function_call",
+                )
+                return TokenChunk(
+                    model=model_id,
+                    text=response.text,
+                    token_id=response.token,
+                    usage=response.usage,
+                    finish_reason=finish_reason,
+                    stats=response.stats,
+                    logprob=response.logprob,
+                    top_logprobs=response.top_logprobs,
+                    is_thinking=response.is_thinking,
+                )
+        case ToolCallResponse():
+            return ToolCallChunk(
+                tool_calls=response.tool_calls,
+                model=model_id,
+                usage=response.usage,
+                stats=response.stats,
+            )
 
 
 def parse_gpt_oss(
@@ -163,19 +226,37 @@ def parse_deepseek_v32(
 
     Uses accumulated-text matching (not per-token marker checks) because
     DSML markers like <｜DSML｜function_calls> may span multiple tokens.
-    Also handles <think>...</think> blocks for thinking mode.
+    Thinking tag handling is delegated to parse_thinking_models, which
+    wraps this parser in apply_all_parsers.
     """
-    from exo.worker.engines.mlx.dsml_encoding import (
-        THINKING_END,
-        THINKING_START,
+    from exo.worker.engines.mlx.vendor.dsml_encoding import (
         TOOL_CALLS_END,
         TOOL_CALLS_START,
         parse_dsml_output,
     )
 
+    return _parse_dsml_stream(
+        responses, TOOL_CALLS_START, TOOL_CALLS_END, parse_dsml_output
+    )
+
+
+def parse_deepseek_v4(
+    responses: Generator[GenerationResponse | None],
+) -> Generator[GenerationResponse | ToolCallResponse | None]:
+    dsml_token = "｜DSML｜"
+    start = f"<{dsml_token}tool_calls>"
+    end = f"</{dsml_token}tool_calls>"
+    return _parse_dsml_stream(responses, start, end, parse_dsml_output)
+
+
+def _parse_dsml_stream(
+    responses: Generator[GenerationResponse | None],
+    tool_calls_start: str,
+    tool_calls_end: str,
+    parse_body: Callable[[str], list[ToolCallItem] | None],
+) -> Generator[GenerationResponse | ToolCallResponse | None]:
     accumulated = ""
     in_tool_call = False
-    thinking = False
     # Tokens buffered while we detect the start of a DSML block
     pending_buffer: list[GenerationResponse] = []
     # Text accumulated during a tool call block
@@ -184,7 +265,7 @@ def parse_deepseek_v32(
     def _try_parse_tool_call(
         text: str, response: GenerationResponse
     ) -> ToolCallResponse | GenerationResponse:
-        parsed = parse_dsml_output(text)
+        parsed = parse_body(text)
         if parsed is not None:
             return ToolCallResponse(
                 tool_calls=parsed, usage=response.usage, stats=response.stats
@@ -204,11 +285,11 @@ def parse_deepseek_v32(
                 tool_call_text += response.text
                 yield (
                     _try_parse_tool_call(tool_call_text, response)
-                    if TOOL_CALLS_END in tool_call_text
+                    if tool_calls_end in tool_call_text
                     else response.model_copy(update={"text": tool_call_text})
                 )
-            elif TOOL_CALLS_START in response.text and TOOL_CALLS_END in response.text:
-                dsml_start = response.text.index(TOOL_CALLS_START)
+            elif tool_calls_start in response.text and tool_calls_end in response.text:
+                dsml_start = response.text.index(tool_calls_start)
                 before = response.text[:dsml_start]
                 if before:
                     yield response.model_copy(update={"text": before})
@@ -217,48 +298,21 @@ def parse_deepseek_v32(
                 yield response
             break
 
-        # ── Handle thinking tags ──
-        if not thinking and THINKING_START in response.text:
-            thinking = True
-            # Yield any text before the <think> tag
-            before = response.text[: response.text.index(THINKING_START)]
-            if before:
-                yield response.model_copy(update={"text": before})
-            continue
-
-        if thinking and THINKING_END in response.text:
-            thinking = False
-            # Yield any text after the </think> tag
-            after = response.text[
-                response.text.index(THINKING_END) + len(THINKING_END) :
-            ]
-            if after:
-                yield response.model_copy(update={"text": after, "is_thinking": False})
-            continue
-
-        if thinking:
-            yield response.model_copy(update={"is_thinking": True})
-            continue
-
-        # ── Handle tool call accumulation ──
         if in_tool_call:
             tool_call_text += response.text
-            if TOOL_CALLS_END in tool_call_text:
+            if tool_calls_end in tool_call_text:
                 yield _try_parse_tool_call(tool_call_text, response)
                 in_tool_call = False
                 tool_call_text = ""
             continue
 
-        # ── Detect start of tool call block ──
         accumulated += response.text
 
-        if TOOL_CALLS_START in accumulated:
-            # The start marker might be split across pending_buffer + current token
-            start_idx = accumulated.index(TOOL_CALLS_START)
-            # Yield any pending tokens that are purely before the marker
+        if tool_calls_start in accumulated:
+            start_idx = accumulated.index(tool_calls_start)
             pre_text = accumulated[:start_idx]
+            # Flush pending buffer tokens that contributed text before the marker
             if pre_text:
-                # Flush pending buffer tokens that contributed text before the marker
                 for buf_resp in pending_buffer:
                     if not pre_text:
                         break
@@ -273,17 +327,14 @@ def parse_deepseek_v32(
             tool_call_text = accumulated[start_idx:]
             accumulated = ""
 
-            # Check if the end marker is already present (entire tool call in one token)
-            if TOOL_CALLS_END in tool_call_text:
+            if tool_calls_end in tool_call_text:
                 yield _try_parse_tool_call(tool_call_text, response)
                 tool_call_text = ""
             else:
                 in_tool_call = True
             continue
 
-        # Check if accumulated text might be the start of a DSML marker
-        # Buffer tokens if we see a partial match at the end
-        if _could_be_dsml_prefix(accumulated):
+        if _could_be_marker_prefix(accumulated, tool_calls_start):
             pending_buffer.append(response)
             continue
 
@@ -297,22 +348,12 @@ def parse_deepseek_v32(
     yield from pending_buffer
 
 
-def _could_be_dsml_prefix(text: str) -> bool:
-    """Check if the end of text could be the start of a DSML function_calls marker.
-
-    We look for suffixes of text that are prefixes of the TOOL_CALLS_START pattern.
-    This allows us to buffer tokens until we can determine if a tool call is starting.
-    """
-    from exo.worker.engines.mlx.dsml_encoding import TOOL_CALLS_START
-
-    # Only check the last portion of text that could overlap with the marker
-    max_check = len(TOOL_CALLS_START)
+def _could_be_marker_prefix(text: str, marker: str) -> bool:
+    max_check = len(marker)
     tail = text[-max_check:] if len(text) > max_check else text
-
-    # Check if any suffix of tail is a prefix of TOOL_CALLS_START
     for i in range(len(tail)):
         suffix = tail[i:]
-        if TOOL_CALLS_START.startswith(suffix):
+        if marker.startswith(suffix):
             return True
     return False
 
@@ -330,6 +371,12 @@ def parse_thinking_models(
     """
     is_thinking = starts_in_thinking
     accumulated = ""
+    pending_buffer: list[GenerationResponse] = []
+
+    def drain_pending(_is_thinking: bool):
+        for buffered in pending_buffer:
+            yield buffered.model_copy(update={"is_thinking": _is_thinking})
+        pending_buffer.clear()
 
     for response in responses:
         if response is None:
@@ -339,25 +386,30 @@ def parse_thinking_models(
         accumulated += response.text
 
         if response.finish_reason is not None:
+            yield from drain_pending(is_thinking)
             yield response.model_copy(update={"is_thinking": False})
             continue
 
         if accumulated == think_start and not is_thinking:
             is_thinking = True
             accumulated = ""
+            pending_buffer.clear()
             continue
         if accumulated == think_end and is_thinking:
             is_thinking = False
             accumulated = ""
+            pending_buffer.clear()
             continue
 
         if (think_start and accumulated == think_start[: len(accumulated)]) or (
             think_end and accumulated == think_end[: len(accumulated)]
         ):
+            pending_buffer.append(response)
             continue
 
         accumulated = ""
 
+        yield from drain_pending(is_thinking)
         yield response.model_copy(update={"is_thinking": is_thinking})
 
 
@@ -368,6 +420,8 @@ def parse_tool_calls(
 ) -> Generator[GenerationResponse | ToolCallResponse | None]:
     in_tool_call = False
     tool_call_text_parts: list[str] = []
+    accumulated_tool_calls: list[ToolCallItem] = []
+
     for response in responses:
         if response is None:
             yield None
@@ -376,6 +430,19 @@ def parse_tool_calls(
         if not in_tool_call and response.text.startswith(tool_parser.start_parsing):
             in_tool_call = True
 
+        if (
+            not in_tool_call
+            and accumulated_tool_calls
+            and (response.stats is not None or response.finish_reason is not None)
+        ):
+            yield ToolCallResponse(
+                tool_calls=accumulated_tool_calls,
+                usage=response.usage,
+                stats=response.stats,
+            )
+            accumulated_tool_calls.clear()
+            continue
+
         if not in_tool_call:
             yield response
             continue
@@ -396,9 +463,16 @@ def parse_tool_calls(
                 )
                 break
 
-            yield ToolCallResponse(
-                tool_calls=parsed, usage=response.usage, stats=response.stats
-            )
+            accumulated_tool_calls.extend(parsed)
+            if accumulated_tool_calls and (
+                response.finish_reason is not None or response.stats is not None
+            ):
+                yield ToolCallResponse(
+                    tool_calls=accumulated_tool_calls,
+                    usage=response.usage,
+                    stats=response.stats,
+                )
+                accumulated_tool_calls.clear()
             continue
 
         if response.finish_reason is not None:
@@ -413,3 +487,6 @@ def parse_tool_calls(
                 }
             )
             yield response
+
+    if not accumulated_tool_calls:
+        logger.warning("Tool calls should have all been emitted but were not")
diff --git a/src/exo/worker/runner/llm_inference/runner.py b/src/exo/worker/runner/llm_inference/runner.py
deleted file mode 100644
index 53fc2f197..000000000
--- a/src/exo/worker/runner/llm_inference/runner.py
+++ /dev/null
@@ -1,442 +0,0 @@
-import os
-import time
-from dataclasses import dataclass
-from enum import Enum
-
-import mlx.core as mx
-from anyio import WouldBlock
-from mlx_lm.tokenizer_utils import TokenizerWrapper
-
-from exo.shared.models.model_cards import ModelTask
-from exo.shared.types.chunks import (
-    ErrorChunk,
-    TokenChunk,
-    ToolCallChunk,
-)
-from exo.shared.types.common import CommandId, ModelId
-from exo.shared.types.events import (
-    ChunkGenerated,
-    Event,
-    RunnerStatusUpdated,
-    TaskAcknowledged,
-    TaskStatusUpdated,
-)
-from exo.shared.types.mlx import Model
-from exo.shared.types.tasks import (
-    ConnectToGroup,
-    LoadModel,
-    Shutdown,
-    StartWarmup,
-    Task,
-    TaskId,
-    TaskStatus,
-    TextGeneration,
-)
-from exo.shared.types.worker.instances import BoundInstance
-from exo.shared.types.worker.runner_response import (
-    GenerationResponse,
-    ToolCallResponse,
-)
-from exo.shared.types.worker.runners import (
-    RunnerConnected,
-    RunnerConnecting,
-    RunnerFailed,
-    RunnerIdle,
-    RunnerLoaded,
-    RunnerLoading,
-    RunnerReady,
-    RunnerRunning,
-    RunnerShutdown,
-    RunnerShuttingDown,
-    RunnerStatus,
-    RunnerWarmingUp,
-)
-from exo.utils.channels import MpReceiver, MpSender
-from exo.worker.engines.mlx.cache import KVPrefixCache
-from exo.worker.engines.mlx.utils_mlx import (
-    initialize_mlx,
-    load_mlx_items,
-)
-from exo.worker.engines.mlx.vision import VisionProcessor
-from exo.worker.runner.bootstrap import logger
-from exo.worker.runner.llm_inference.batch_generator import (
-    BatchGenerator,
-    InferenceGenerator,
-    SequentialGenerator,
-)
-
-from .batch_generator import Cancelled, Finished
-from .tool_parsers import make_mlx_parser
-
-
-class ExitCode(str, Enum):
-    AllTasksComplete = "AllTasksComplete"
-    Shutdown = "Shutdown"
-
-
-class Runner:
-    def __init__(
-        self,
-        bound_instance: BoundInstance,
-        event_sender: MpSender[Event],
-        task_receiver: MpReceiver[Task],
-        cancel_receiver: MpReceiver[TaskId],
-    ):
-        self.event_sender = event_sender
-        self.task_receiver = task_receiver
-        self.cancel_receiver = cancel_receiver
-        self.bound_instance = bound_instance
-
-        self.instance, self.runner_id, self.shard_metadata = (
-            self.bound_instance.instance,
-            self.bound_instance.bound_runner_id,
-            self.bound_instance.bound_shard,
-        )
-        self.model_id = self.shard_metadata.model_card.model_id
-        self.device_rank = self.shard_metadata.device_rank
-
-        logger.info("hello from the runner")
-        if getattr(self.shard_metadata, "immediate_exception", False):
-            raise Exception("Fake exception - runner failed to spin up.")
-        if timeout := getattr(self.shard_metadata, "should_timeout", 0):
-            time.sleep(timeout)
-
-        self.setup_start_time = time.time()
-
-        self.generator: Builder | InferenceGenerator = Builder(
-            self.model_id,
-            self.event_sender,
-            self.cancel_receiver,
-        )
-
-        self.seen: set[TaskId] = set()
-        self.active_tasks: dict[
-            TaskId,
-            TextGeneration,
-        ] = {}
-
-        logger.info("runner created")
-        self.update_status(RunnerIdle())
-
-    def update_status(self, status: RunnerStatus):
-        self.current_status = status
-        self.event_sender.send(
-            RunnerStatusUpdated(
-                runner_id=self.runner_id, runner_status=self.current_status
-            )
-        )
-
-    def send_task_status(self, task_id: TaskId, task_status: TaskStatus):
-        self.event_sender.send(
-            TaskStatusUpdated(task_id=task_id, task_status=task_status)
-        )
-
-    def acknowledge_task(self, task: Task):
-        self.event_sender.send(TaskAcknowledged(task_id=task.task_id))
-
-    def main(self):
-        with self.task_receiver:
-            for task in self.task_receiver:
-                if task.task_id in self.seen:
-                    logger.warning("repeat task - potential error")
-                    continue
-                self.seen.add(task.task_id)
-                self.handle_first_task(task)
-                if isinstance(self.current_status, RunnerShutdown):
-                    break
-
-    def handle_first_task(self, task: Task):
-        self.send_task_status(task.task_id, TaskStatus.Running)
-
-        match task:
-            case ConnectToGroup() if isinstance(self.current_status, RunnerIdle):
-                assert isinstance(self.generator, Builder)
-                logger.info("runner connecting")
-                self.update_status(RunnerConnecting())
-                self.acknowledge_task(task)
-
-                self.generator.group = initialize_mlx(self.bound_instance)
-
-                self.send_task_status(task.task_id, TaskStatus.Complete)
-                self.update_status(RunnerConnected())
-                logger.info("runner connected")
-
-            # we load the model if it's connected with a group, or idle without a group. we should never tell a model to connect if it doesn't need to
-            case LoadModel() if isinstance(self.generator, Builder) and (
-                (
-                    isinstance(self.current_status, RunnerConnected)
-                    and self.generator.group is not None
-                )
-                or (
-                    isinstance(self.current_status, RunnerIdle)
-                    and self.generator.group is None
-                )
-            ):
-                total_layers = (
-                    self.shard_metadata.end_layer - self.shard_metadata.start_layer
-                )
-                logger.info("runner loading")
-
-                self.update_status(
-                    RunnerLoading(layers_loaded=0, total_layers=total_layers)
-                )
-                self.acknowledge_task(task)
-
-                def on_model_load_timeout() -> None:
-                    self.update_status(
-                        RunnerFailed(error_message="Model loading timed out")
-                    )
-                    time.sleep(0.5)
-
-                def on_layer_loaded(layers_loaded: int, total: int) -> None:
-                    self.update_status(
-                        RunnerLoading(layers_loaded=layers_loaded, total_layers=total)
-                    )
-
-                assert (
-                    ModelTask.TextGeneration in self.shard_metadata.model_card.tasks
-                ), f"Incorrect model task(s): {self.shard_metadata.model_card.tasks}"
-                (
-                    self.generator.inference_model,
-                    self.generator.tokenizer,
-                    self.generator.vision_processor,
-                ) = load_mlx_items(
-                    self.bound_instance,
-                    self.generator.group,
-                    on_timeout=on_model_load_timeout,
-                    on_layer_loaded=on_layer_loaded,
-                )
-
-                self.generator = self.generator.build()
-
-                self.send_task_status(task.task_id, TaskStatus.Complete)
-                self.update_status(RunnerLoaded())
-                logger.info("runner loaded")
-
-            case StartWarmup() if isinstance(self.current_status, RunnerLoaded):
-                assert isinstance(self.generator, InferenceGenerator)
-                logger.info("runner warming up")
-
-                self.update_status(RunnerWarmingUp())
-                self.acknowledge_task(task)
-
-                self.generator.warmup()
-
-                logger.info(
-                    f"runner initialized in {time.time() - self.setup_start_time} seconds"
-                )
-
-                self.send_task_status(task.task_id, TaskStatus.Complete)
-                self.update_status(RunnerReady())
-                logger.info("runner ready")
-
-            case TextGeneration() if isinstance(self.current_status, RunnerReady):
-                return_code = self.handle_generation_tasks(starting_task=task)
-                if return_code == ExitCode.Shutdown:
-                    return
-
-            case Shutdown():
-                self.shutdown(task)
-                return
-
-            case _:
-                raise ValueError(
-                    f"Received {task.__class__.__name__} outside of state machine in {self.current_status=}"
-                )
-
-    def shutdown(self, task: Task):
-        logger.info("runner shutting down")
-        self.update_status(RunnerShuttingDown())
-        self.acknowledge_task(task)
-        if isinstance(self.generator, InferenceGenerator):
-            self.generator.close()
-        mx.clear_cache()
-        import gc
-
-        gc.collect()
-        self.send_task_status(task.task_id, TaskStatus.Complete)
-        self.update_status(RunnerShutdown())
-
-    def submit_text_generation(self, task: TextGeneration):
-        assert isinstance(self.generator, InferenceGenerator)
-        self.active_tasks[task.task_id] = task
-        self.generator.submit(task)
-
-    def handle_generation_tasks(self, starting_task: TextGeneration):
-        assert isinstance(self.current_status, RunnerReady)
-        assert isinstance(self.generator, InferenceGenerator)
-
-        logger.info(f"received chat request: {starting_task}")
-        self.update_status(RunnerRunning())
-        logger.info("runner running")
-        self.acknowledge_task(starting_task)
-        self.seen.add(starting_task.task_id)
-
-        self.submit_text_generation(starting_task)
-
-        while self.active_tasks:
-            results = self.generator.step()
-
-            finished: list[TaskId] = []
-            for task_id, result in results:
-                match result:
-                    case Cancelled():
-                        finished.append(task_id)
-                    case Finished():
-                        self.send_task_status(task_id, TaskStatus.Complete)
-                        finished.append(task_id)
-                    case _:
-                        self.send_response(
-                            result, self.active_tasks[task_id].command_id
-                        )
-
-            for task_id in finished:
-                self.active_tasks.pop(task_id, None)
-
-            try:
-                task = self.task_receiver.receive_nowait()
-
-                if task.task_id in self.seen:
-                    logger.warning("repeat task - potential error")
-                    continue
-                self.seen.add(task.task_id)
-
-                match task:
-                    case TextGeneration():
-                        self.acknowledge_task(task)
-                        self.submit_text_generation(task)
-                    case Shutdown():
-                        self.shutdown(task)
-                        return ExitCode.Shutdown
-                    case _:
-                        raise ValueError(
-                            f"Received {task.__class__.__name__} outside of state machine in {self.current_status=}"
-                        )
-
-            except WouldBlock:
-                pass
-
-        self.update_status(RunnerReady())
-        logger.info("runner ready")
-
-        return ExitCode.AllTasksComplete
-
-    def send_response(
-        self,
-        response: GenerationResponse | ToolCallResponse,
-        command_id: CommandId,
-    ):
-        match response:
-            case GenerationResponse():
-                if self.device_rank == 0 and response.finish_reason == "error":
-                    self.event_sender.send(
-                        ChunkGenerated(
-                            command_id=command_id,
-                            chunk=ErrorChunk(
-                                error_message=response.text,
-                                model=self.model_id,
-                            ),
-                        )
-                    )
-
-                elif self.device_rank == 0:
-                    assert response.finish_reason not in (
-                        "error",
-                        "tool_calls",
-                        "function_call",
-                    )
-                    self.event_sender.send(
-                        ChunkGenerated(
-                            command_id=command_id,
-                            chunk=TokenChunk(
-                                model=self.model_id,
-                                text=response.text,
-                                token_id=response.token,
-                                usage=response.usage,
-                                finish_reason=response.finish_reason,
-                                stats=response.stats,
-                                logprob=response.logprob,
-                                top_logprobs=response.top_logprobs,
-                                is_thinking=response.is_thinking,
-                            ),
-                        )
-                    )
-            case ToolCallResponse():
-                if self.device_rank == 0:
-                    self.event_sender.send(
-                        ChunkGenerated(
-                            command_id=command_id,
-                            chunk=ToolCallChunk(
-                                tool_calls=response.tool_calls,
-                                model=self.model_id,
-                                usage=response.usage,
-                                stats=response.stats,
-                            ),
-                        )
-                    )
-
-
-@dataclass
-class Builder:
-    model_id: ModelId
-    event_sender: MpSender[Event]
-    cancel_receiver: MpReceiver[TaskId]
-    inference_model: Model | None = None
-    tokenizer: TokenizerWrapper | None = None
-    group: mx.distributed.Group | None = None
-    vision_processor: VisionProcessor | None = None
-
-    def build(
-        self,
-    ) -> InferenceGenerator:
-        assert self.model_id
-        assert self.inference_model
-        assert self.tokenizer
-
-        vision_processor = self.vision_processor
-
-        tool_parser = None
-        logger.info(
-            f"model has_tool_calling={self.tokenizer.has_tool_calling} using tokens {self.tokenizer.tool_call_start}, {self.tokenizer.tool_call_end}"
-        )
-        if (
-            self.tokenizer.tool_call_start
-            and self.tokenizer.tool_call_end
-            and self.tokenizer.tool_parser  # type: ignore
-        ):
-            tool_parser = make_mlx_parser(
-                self.tokenizer.tool_call_start,
-                self.tokenizer.tool_call_end,
-                self.tokenizer.tool_parser,  # type: ignore
-            )
-
-        kv_prefix_cache = KVPrefixCache(self.group)
-
-        device_rank = 0 if self.group is None else self.group.rank()
-        if os.environ.get("EXO_NO_BATCH"):
-            logger.info("using SequentialGenerator (batching disabled)")
-            return SequentialGenerator(
-                model=self.inference_model,
-                tokenizer=self.tokenizer,
-                group=self.group,
-                tool_parser=tool_parser,
-                kv_prefix_cache=kv_prefix_cache,
-                model_id=self.model_id,
-                device_rank=device_rank,
-                cancel_receiver=self.cancel_receiver,
-                event_sender=self.event_sender,
-                vision_processor=vision_processor,
-            )
-        logger.info("using BatchGenerator")
-        return BatchGenerator(
-            model=self.inference_model,
-            tokenizer=self.tokenizer,
-            group=self.group,
-            tool_parser=tool_parser,
-            kv_prefix_cache=kv_prefix_cache,
-            model_id=self.model_id,
-            device_rank=device_rank,
-            cancel_receiver=self.cancel_receiver,
-            event_sender=self.event_sender,
-            vision_processor=vision_processor,
-        )
diff --git a/src/exo/worker/runner/runner.py b/src/exo/worker/runner/runner.py
new file mode 100644
index 000000000..ac5d05480
--- /dev/null
+++ b/src/exo/worker/runner/runner.py
@@ -0,0 +1,394 @@
+import queue
+import threading
+import time
+from dataclasses import dataclass
+from enum import Enum
+from typing import BinaryIO
+
+from anyio import ClosedResourceError, EndOfStream
+
+from exo.shared.constants import ENABLE_DISAGGREGATION
+from exo.shared.types.chunks import Chunk
+from exo.shared.types.common import CommandId
+from exo.shared.types.events import (
+    ChunkGenerated,
+    Event,
+    RunnerStatusUpdated,
+    TaskAcknowledged,
+    TaskStatusUpdated,
+)
+from exo.shared.types.tasks import (
+    ConnectToGroup,
+    GenerationTask,
+    ImageEdits,
+    ImageGeneration,
+    LoadModel,
+    Shutdown,
+    StartWarmup,
+    Task,
+    TaskId,
+    TaskStatus,
+    TextGeneration,
+)
+from exo.shared.types.worker.instances import BoundInstance
+from exo.shared.types.worker.runner_response import (
+    CancelledResponse,
+    FinishedResponse,
+)
+from exo.shared.types.worker.runners import (
+    RunnerConnected,
+    RunnerConnecting,
+    RunnerIdle,
+    RunnerLoaded,
+    RunnerLoading,
+    RunnerReady,
+    RunnerRunning,
+    RunnerShutdown,
+    RunnerShuttingDown,
+    RunnerStatus,
+    RunnerWarmingUp,
+)
+from exo.utils.channels import MpReceiver, MpSender
+from exo.utils.ports import random_ephemeral_port
+from exo.worker.disaggregated.server import (
+    PrefillRequest,
+    PrefillServer,
+)
+from exo.worker.engines.base import Builder, Engine
+from exo.worker.runner.bootstrap import logger
+
+PREFILL_PICKUP_TIMEOUT_SECONDS = 3
+PREFILL_FINISH_TIMEOUT_SECONDS = 300
+
+
+@dataclass
+class PrefillTask:
+    request: PrefillRequest
+    wfile: BinaryIO
+    started: threading.Event
+    done: threading.Event
+
+
+class _TaskStreamClosed:
+    pass
+
+
+WorkItem = Task | PrefillTask | _TaskStreamClosed
+
+
+class ExitCode(str, Enum):
+    AllTasksComplete = "AllTasksComplete"
+    Shutdown = "Shutdown"
+
+
+class Runner:
+    def __init__(
+        self,
+        bound_instance: BoundInstance,
+        builder: Builder,
+        event_sender: MpSender[Event],
+        task_receiver: MpReceiver[Task],
+    ):
+        self.event_sender = event_sender
+        self.task_receiver = task_receiver
+        self.bound_instance = bound_instance
+
+        self.instance, self.runner_id, self.shard_metadata = (
+            self.bound_instance.instance,
+            self.bound_instance.bound_runner_id,
+            self.bound_instance.bound_shard,
+        )
+        self.model_id = self.shard_metadata.model_card.model_id
+        self.device_rank = self.shard_metadata.device_rank
+
+        logger.info("hello from the runner")
+        if getattr(self.shard_metadata, "immediate_exception", False):
+            raise Exception("Fake exception - runner failed to spin up.")
+        if timeout := getattr(self.shard_metadata, "should_timeout", 0):
+            time.sleep(timeout)
+
+        self.setup_start_time = time.time()
+
+        self.generator: Builder | Engine = builder
+
+        self.seen: set[TaskId] = set()
+        self.active_tasks: dict[
+            TaskId,
+            GenerationTask,
+        ] = {}
+
+        self._prefill_server: PrefillServer | None = None
+        self._prefill_server_port: int | None = None
+        self._work_queue: queue.Queue[WorkItem] = queue.Queue()
+        self._task_reader_thread: threading.Thread | None = None
+
+        logger.info("runner created")
+        self.update_status(RunnerIdle())
+
+    def _start_prefill_server(self) -> int | None:
+        if not ENABLE_DISAGGREGATION:
+            return None
+        if self.device_rank != 0:
+            return None
+        if self._prefill_server_port is not None:
+            return self._prefill_server_port
+
+        def resolve(request: PrefillRequest, wfile: BinaryIO) -> bool:
+            req = PrefillTask(
+                request=request,
+                wfile=wfile,
+                started=threading.Event(),
+                done=threading.Event(),
+            )
+            self._work_queue.put(req)
+            if not req.started.wait(timeout=PREFILL_PICKUP_TIMEOUT_SECONDS):
+                logger.warning(
+                    f"Prefill request {request.request_id} not picked up within "
+                    f"{PREFILL_PICKUP_TIMEOUT_SECONDS}s — runner busy"
+                )
+                return False
+            if not req.done.wait(timeout=PREFILL_FINISH_TIMEOUT_SECONDS):
+                logger.warning(
+                    f"Prefill request {request.request_id} did not finish within "
+                    f"{PREFILL_FINISH_TIMEOUT_SECONDS}s"
+                )
+            return True
+
+        port = random_ephemeral_port()
+        self._prefill_server = PrefillServer(resolve=resolve, host="0.0.0.0", port=port)
+        self._prefill_server_port = port
+        return self._prefill_server_port
+
+    def _start_task_reader(self) -> None:
+        if self._task_reader_thread is not None:
+            return
+
+        def loop() -> None:
+            try:
+                with self.task_receiver:
+                    for task in self.task_receiver:
+                        self._work_queue.put(task)
+            except (EndOfStream, ClosedResourceError):
+                pass
+            finally:
+                self._work_queue.put(_TaskStreamClosed())
+
+        self._task_reader_thread = threading.Thread(target=loop, name="task-reader")
+        self._task_reader_thread.start()
+
+    def _serve_prefill(self, req: PrefillTask) -> None:
+        req.started.set()
+        try:
+            assert isinstance(self.generator, Engine)
+            self.generator.serve_prefill(req.request, req.wfile)
+        except Exception:
+            logger.opt(exception=True).warning(
+                f"Failed to serve prefill request {req.request.request_id}"
+            )
+        finally:
+            req.done.set()
+
+    def update_status(self, status: RunnerStatus):
+        self.current_status = status
+        self.event_sender.send(
+            RunnerStatusUpdated(
+                runner_id=self.runner_id, runner_status=self.current_status
+            )
+        )
+
+    def send_task_status(self, task_id: TaskId, task_status: TaskStatus):
+        self.event_sender.send(
+            TaskStatusUpdated(task_id=task_id, task_status=task_status)
+        )
+
+    def acknowledge_task(self, task: Task):
+        self.event_sender.send(TaskAcknowledged(task_id=task.task_id))
+
+    def main(self):
+        self._start_task_reader()
+        try:
+            while True:
+                item = self._work_queue.get()
+                if isinstance(item, _TaskStreamClosed):
+                    break
+                if isinstance(item, PrefillTask):
+                    self._serve_prefill(item)
+                    continue
+                if item.task_id in self.seen:
+                    logger.warning("repeat task - potential error")
+                    continue
+                self.seen.add(item.task_id)
+                self.handle_first_task(item)
+                if isinstance(self.current_status, RunnerShutdown):
+                    break
+        finally:
+            if self._prefill_server is not None:
+                self._prefill_server.stop()
+                self._prefill_server = None
+            self.task_receiver.close()
+            if self._task_reader_thread is not None:
+                self._task_reader_thread.join(timeout=5)
+                self._task_reader_thread = None
+
+    def handle_first_task(self, task: Task):
+        self.send_task_status(task.task_id, TaskStatus.Running)
+
+        match task:
+            case ConnectToGroup() if isinstance(self.current_status, RunnerIdle):
+                assert isinstance(self.generator, Builder)
+                logger.info("runner connecting")
+                self.update_status(RunnerConnecting())
+                self.acknowledge_task(task)
+
+                self.generator.connect(self.bound_instance)
+
+                self.send_task_status(task.task_id, TaskStatus.Complete)
+                self.update_status(RunnerConnected())
+                logger.info("runner connected")
+
+            # we load the model if it's connected with a group, or idle without a group. we should never tell a model to connect if it doesn't need to
+            case LoadModel() if isinstance(self.generator, Builder) and (
+                isinstance(self.current_status, (RunnerConnected, RunnerIdle))
+            ):
+                total_layers = (
+                    self.shard_metadata.end_layer - self.shard_metadata.start_layer
+                )
+                logger.info("runner loading")
+
+                self.update_status(
+                    RunnerLoading(layers_loaded=0, total_layers=total_layers)
+                )
+                self.acknowledge_task(task)
+
+                for load_progress in self.generator.load(self.bound_instance):
+                    self.update_status(
+                        RunnerLoading(
+                            layers_loaded=load_progress.layers_loaded,
+                            total_layers=load_progress.total,
+                        )
+                    )
+
+                self.generator = self.generator.build()
+
+                self.send_task_status(task.task_id, TaskStatus.Complete)
+                self.update_status(RunnerLoaded())
+                logger.info("runner loaded")
+
+            case StartWarmup() if isinstance(self.current_status, RunnerLoaded):
+                assert isinstance(self.generator, Engine)
+                logger.info("runner warming up")
+
+                self.update_status(RunnerWarmingUp())
+                self.acknowledge_task(task)
+
+                self.generator.warmup()
+
+                logger.info(
+                    f"runner initialized in {time.time() - self.setup_start_time} seconds"
+                )
+
+                self._start_prefill_server()
+                self.send_task_status(task.task_id, TaskStatus.Complete)
+                self.update_status(
+                    RunnerReady(prefill_server_port=self._prefill_server_port)
+                )
+                logger.info("runner ready")
+
+            case TextGeneration() | ImageEdits() | ImageGeneration() if isinstance(
+                self.current_status, RunnerReady
+            ):
+                return_code = self.handle_generation_tasks(starting_task=task)
+                if return_code == ExitCode.Shutdown:
+                    return
+
+            case Shutdown():
+                self.shutdown(task)
+                return
+
+            case _:
+                raise ValueError(
+                    f"Received {task.__class__.__name__} outside of state machine in {self.current_status=}"
+                )
+
+    def shutdown(self, task: Task):
+        logger.info("runner shutting down")
+        self.update_status(RunnerShuttingDown())
+        self.acknowledge_task(task)
+        self.generator.close()
+        import gc
+
+        gc.collect()
+        self.send_task_status(task.task_id, TaskStatus.Complete)
+        self.update_status(RunnerShutdown())
+
+    def submit_generation(self, task: GenerationTask):
+        assert isinstance(self.generator, Engine)
+        self.active_tasks[task.task_id] = task
+        self.generator.submit(task)
+
+    def handle_generation_tasks(self, starting_task: GenerationTask):
+        assert isinstance(self.current_status, RunnerReady)
+        assert isinstance(self.generator, Engine)
+
+        logger.info(f"received chat request: {starting_task}")
+        self.update_status(RunnerRunning())
+        logger.info("runner running")
+        self.acknowledge_task(starting_task)
+        self.seen.add(starting_task.task_id)
+
+        self.submit_generation(starting_task)
+
+        while self.active_tasks:
+            results = self.generator.step()
+
+            finished: list[TaskId] = []
+            for task_id, result in results:
+                match result:
+                    case CancelledResponse():
+                        finished.append(task_id)
+                    case FinishedResponse():
+                        self.send_task_status(task_id, TaskStatus.Complete)
+                        finished.append(task_id)
+                    case other:
+                        self.send_chunk(other, self.active_tasks[task_id].command_id)
+
+            for task_id in finished:
+                self.active_tasks.pop(task_id, None)
+
+            try:
+                item = self._work_queue.get_nowait()
+            except queue.Empty:
+                continue
+            if isinstance(item, _TaskStreamClosed):
+                return ExitCode.Shutdown
+            if isinstance(item, PrefillTask):
+                self._serve_prefill(item)
+                continue
+            if item.task_id in self.seen:
+                logger.warning("repeat task - potential error")
+                continue
+            self.seen.add(item.task_id)
+            match item:
+                case TextGeneration() | ImageGeneration() | ImageEdits():
+                    self.acknowledge_task(item)
+                    self.submit_generation(item)
+                case Shutdown():
+                    self.shutdown(item)
+                    return ExitCode.Shutdown
+                case _:
+                    raise ValueError(
+                        f"Received {item.__class__.__name__} outside of state machine in {self.current_status=}"
+                    )
+
+        self.update_status(RunnerReady(prefill_server_port=self._prefill_server_port))
+        logger.info("runner ready")
+
+        return ExitCode.AllTasksComplete
+
+    def send_chunk(
+        self,
+        chunk: Chunk,
+        command_id: CommandId,
+    ):
+        assert isinstance(self.generator, Engine)
+        self.event_sender.send(ChunkGenerated(command_id=command_id, chunk=chunk))
diff --git a/src/exo/worker/runner/runner_supervisor.py b/src/exo/worker/runner/supervisor.py
similarity index 53%
rename from src/exo/worker/runner/runner_supervisor.py
rename to src/exo/worker/runner/supervisor.py
index b55c0bd45..961126247 100644
--- a/src/exo/worker/runner/runner_supervisor.py
+++ b/src/exo/worker/runner/supervisor.py
@@ -1,17 +1,20 @@
+import codecs
 import contextlib
-import multiprocessing as mp
 import signal
 from dataclasses import dataclass, field
-from typing import Self
+from os import PathLike
+from typing import Callable, Self
 
 import anyio
 from anyio import (
+    AsyncFile,
     BrokenResourceError,
+    CancelScope,
     ClosedResourceError,
-    to_thread,
 )
 from loguru import logger
 
+from exo.shared.constants import EXO_RUNNER_STDERR_LOG, EXO_RUNNER_STDOUT_LOG
 from exo.shared.types.chunks import ErrorChunk
 from exo.shared.types.events import (
     ChunkGenerated,
@@ -41,21 +44,148 @@ from exo.shared.types.worker.runners import (
     RunnerWarmingUp,
 )
 from exo.shared.types.worker.shards import ShardMetadata
-from exo.utils.channels import MpReceiver, MpSender, Sender, mp_channel
+from exo.utils.async_process import AsyncProcess
+from exo.utils.channels import MpReceiver, MpSender, Receiver, Sender, mp_channel
+from exo.utils.fs import ensure_parent_directory_exists
 from exo.utils.task_group import TaskGroup
-from exo.worker.runner.bootstrap import entrypoint
+from exo.worker.runner.bootstrap import RunnerTerminationError, entrypoint
+from exo.worker.runner.diagnostics import (
+    RunnerDiagnosticCollector,
+    RunnerUnknown,
+)
 
 PREFILL_TIMEOUT_SECONDS = 60
 DECODE_TIMEOUT_SECONDS = 5
 
 
+@dataclass(eq=False)
+class RunnerStdioHandler:
+    _stdout_rx: Receiver[bytes]
+    _stderr_rx: Receiver[bytes]
+    _stdout_log: AsyncFile[str]
+    _stderr_log: AsyncFile[str]
+    diagnostics: RunnerDiagnosticCollector = field(
+        default_factory=RunnerDiagnosticCollector
+    )
+
+    _tg: TaskGroup = field(default_factory=TaskGroup, init=False)
+
+    @classmethod
+    async def create(
+        cls,
+        *,
+        stdout_rx: Receiver[bytes],
+        stderr_rx: Receiver[bytes],
+        stdout_log_path: PathLike[str] = EXO_RUNNER_STDOUT_LOG,
+        stderr_log_path: PathLike[str] = EXO_RUNNER_STDERR_LOG,
+    ) -> Self:
+        # these are append only logs used to gather data for log template mining
+        #
+        # TODO: in the future use [Drain3](https://github.com/logpai/Drain3)
+        #       to mine these logs
+        ensure_parent_directory_exists(stdout_log_path)
+        ensure_parent_directory_exists(stderr_log_path)
+        stdout_log = await anyio.open_file(stdout_log_path, "a")
+        stderr_log = await anyio.open_file(stderr_log_path, "a")
+
+        # instantiate and return
+        self = cls(
+            _stdout_rx=stdout_rx,
+            _stderr_rx=stderr_rx,
+            _stdout_log=stdout_log,
+            _stderr_log=stderr_log,
+        )
+        return self
+
+    async def run(self):
+        try:
+            async with self._tg as tg:
+                tg.start_soon(  # pyright: ignore[reportUnknownArgumentType]
+                    self._handle_runner_output,
+                    self._stdout_rx,
+                    self._stdout_log,
+                    lambda line: logger.info(f"Runner stdout: {line}"),  # pyright: ignore[reportUnknownLambdaType]
+                    lambda _: None,  # pyright: ignore[reportUnknownLambdaType]
+                )
+                tg.start_soon(  # pyright: ignore[reportUnknownArgumentType]
+                    self._handle_runner_output,
+                    self._stderr_rx,
+                    self._stderr_log,
+                    lambda line: logger.warning(f"Runner stderr: {line}"),  # pyright: ignore[reportUnknownLambdaType]
+                    self.diagnostics.record_line,
+                )
+        finally:
+            with CancelScope(shield=True):
+                await self._stdout_log.aclose()
+                await self._stderr_log.aclose()
+
+    async def _handle_runner_output(
+        self,
+        rx: Receiver[bytes],
+        logfile: AsyncFile[str],
+        log_line: Callable[[str], None],
+        record_diagnostic_line: Callable[[str], None],
+    ):
+        # The diagnostic collector is deliberately line-level for now. It records
+        # bounded stderr context and known failure anchors; the supervisor
+        # correlates those hints with the runner exit status before surfacing an
+        # error.
+
+        # not using TextReceiveStream because it doesn't do final=True handling on errors
+        decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
+        pending_line = ""
+
+        async def handle_line(line: str):
+            # preserve whitespace for later log-mining
+            line = line.removesuffix("\r")
+            if not line:
+                return
+
+            # Send to logger & error recovery task
+            log_line(line)
+            record_diagnostic_line(line)
+
+        async def handle_text(text: str):
+            nonlocal pending_line
+
+            if not text:
+                return
+
+            await logfile.write(text)
+            await logfile.flush()
+
+            # newline buffering
+            pending_line += text
+            lines = pending_line.split("\n")
+            pending_line = lines.pop()
+
+            for line in lines:
+                await handle_line(line)
+
+        try:
+            with rx:
+                async for chunk in rx:
+                    await handle_text(decoder.decode(chunk, final=False))
+        except (ClosedResourceError, BrokenResourceError):
+            logger.warning("Runner stdio stream closed before clean EOF")
+        finally:
+            with CancelScope(shield=True):
+                await handle_text(decoder.decode(b"", final=True))
+                await logfile.flush()
+
+                if pending_line:
+                    await handle_line(pending_line)
+                    pending_line = ""
+
+
 @dataclass(eq=False)
 class RunnerSupervisor:
     shard_metadata: ShardMetadata
     bound_instance: BoundInstance
-    runner_process: mp.Process
+    runner_process: AsyncProcess
+    _runner_stdio_handler: RunnerStdioHandler
     initialize_timeout: float
-    _ev_recv: MpReceiver[Event]
+    _ev_recv: MpReceiver[Event | RunnerTerminationError]
     _task_sender: MpSender[Task]
     _event_sender: Sender[Event]
     _cancel_sender: MpSender[TaskId]
@@ -70,18 +200,18 @@ class RunnerSupervisor:
     )
 
     @classmethod
-    def create(
+    async def create(
         cls,
         *,
         bound_instance: BoundInstance,
         event_sender: Sender[Event],
         initialize_timeout: float = 400,
     ) -> Self:
-        ev_send, ev_recv = mp_channel[Event]()
+        ev_send, ev_recv = mp_channel[Event | RunnerTerminationError]()
         task_sender, task_recv = mp_channel[Task]()
         cancel_sender, cancel_recv = mp_channel[TaskId]()
 
-        runner_process = mp.Process(
+        runner_process = AsyncProcess(
             target=entrypoint,
             args=(
                 bound_instance,
@@ -92,6 +222,9 @@ class RunnerSupervisor:
             ),
             daemon=True,
         )
+        runner_stdio_handler = await RunnerStdioHandler.create(
+            stdout_rx=runner_process.stdout, stderr_rx=runner_process.stderr
+        )
 
         shard_metadata = bound_instance.bound_shard
 
@@ -99,6 +232,7 @@ class RunnerSupervisor:
             bound_instance=bound_instance,
             shard_metadata=shard_metadata,
             runner_process=runner_process,
+            _runner_stdio_handler=runner_stdio_handler,
             initialize_timeout=initialize_timeout,
             _ev_recv=ev_recv,
             _task_sender=task_sender,
@@ -109,9 +243,12 @@ class RunnerSupervisor:
         return self
 
     async def run(self):
-        self.runner_process.start()
         try:
             async with self._tg as tg:
+                # start the process itself & handle its stdout/stderr
+                await tg.start(self.runner_process.run)
+                tg.start_soon(self._runner_stdio_handler.run)
+
                 tg.start_soon(self._watch_runner)
                 tg.start_soon(self._forward_events)
         finally:
@@ -129,23 +266,11 @@ class RunnerSupervisor:
             with contextlib.suppress(ClosedResourceError):
                 self._cancel_sender.close()
 
-            await to_thread.run_sync(self.runner_process.join, 5)
-
-            if self.runner_process.is_alive():
-                logger.warning(
-                    "Runner process didn't shutdown succesfully, terminating"
+            with anyio.CancelScope(shield=True):
+                await self.runner_process.stop()
+                logger.info(
+                    f"Runner process successfully terminated: {self.runner_process.exitcode}"
                 )
-                self.runner_process.terminate()
-                self.runner_process.join(timeout=5)
-                # This is overkill but it's not technically bad, just unnecessary.
-                if self.runner_process.is_alive():
-                    logger.critical("Runner process didn't respond to SIGTERM, killing")
-                    self.runner_process.kill()
-                    self.runner_process.join(timeout=5)
-            else:
-                logger.info("Runner process succesfully terminated")
-
-            self.runner_process.close()
 
     def shutdown(self):
         self._tg.cancel_tasks()
@@ -195,6 +320,10 @@ class RunnerSupervisor:
         try:
             with self._ev_recv as events:
                 async for event in events:
+                    if isinstance(event, RunnerTerminationError):
+                        # try to get exception if possible
+                        await self._check_runner(event)
+                        break
                     if isinstance(event, RunnerStatusUpdated):
                         self.status = event.runner_status
                     if isinstance(event, TaskAcknowledged):
@@ -218,8 +347,9 @@ class RunnerSupervisor:
                         self.in_progress.pop(event.task_id, None)
                         self.completed.add(event.task_id)
                     await self._event_sender.send(event)
-        except (ClosedResourceError, BrokenResourceError) as e:
-            await self._check_runner(e)
+        except (ClosedResourceError, BrokenResourceError):
+            # this is the happy path shutdown - we don't need to spam log with it
+            await self._check_runner()
         finally:
             for tid in self.pending:
                 self.pending[tid].set()
@@ -231,29 +361,50 @@ class RunnerSupervisor:
                 if not self.runner_process.is_alive():
                     await self._check_runner(RuntimeError("Runner found to be dead"))
 
-    async def _check_runner(self, e: Exception) -> None:
+    async def _check_runner(
+        self, e: RunnerTerminationError | Exception | None = None
+    ) -> None:
         if not self._cancel_watch_runner.cancel_called:
             self._cancel_watch_runner.cancel()
         logger.info("Checking runner's status")
         if self.runner_process.is_alive():
-            logger.info("Runner was found to be alive, attempting to join process")
-            await to_thread.run_sync(self.runner_process.join, 5)
+            logger.info("Runner was found to be alive, stopping process")
+            with anyio.CancelScope(shield=True):
+                await self.runner_process.stop()
         rc = self.runner_process.exitcode
         logger.info(f"Runner exited with exit code {rc}")
+
+        # If exit code is 0 then the transient errors were recoverable, meaning we don't need runner diagnostics
         if rc == 0:
             return
 
         if isinstance(rc, int) and rc < 0:
             sig = -rc
             try:
-                cause = f"signal={sig} ({signal.strsignal(sig)})"
+                if (description := signal.strsignal(sig)) is not None:
+                    cause = f"signal={sig} ({description})"
+                else:
+                    cause = f"signal={sig}"
             except Exception:
                 cause = f"signal={sig}"
         else:
-            cause = f"exitcode={rc}"
+            cause: str = f"exitcode={rc}"
 
-        logger.opt(exception=e).error(f"Runner terminated with {cause}")
+        if e is not None:
+            # Record how runner shut down, try exception, resort to RunnerTerminationError fallback
+            if isinstance(e, Exception):
+                logger.opt(exception=e).error(f"Runner terminated with {cause}")
+            else:
+                cause = f"{cause}\nRunner error: {e}"
+                logger.error(f"Runner terminated with {cause}")
+        else:
+            logger.error(f"Runner terminated with {cause}")
 
+        diagnostics = [
+            d
+            for d in self._runner_stdio_handler.diagnostics.diagnostics()
+            if not isinstance(d, RunnerUnknown)
+        ]
         for task in self.in_progress.values():
             if isinstance(task, (TextGeneration, ImageGeneration, ImageEdits)):
                 with anyio.CancelScope(shield=True):
@@ -262,6 +413,7 @@ class RunnerSupervisor:
                             command_id=task.command_id,
                             chunk=ErrorChunk(
                                 model=self.shard_metadata.model_card.model_id,
+                                diagnostics=diagnostics,
                                 error_message=(
                                     "Runner shutdown before completing command "
                                     f"({cause})"
@@ -271,7 +423,9 @@ class RunnerSupervisor:
                     )
 
         try:
-            self.status = RunnerFailed(error_message=f"Terminated ({cause})")
+            self.status = RunnerFailed(
+                error_message=f"Terminated ({cause})", diagnostics=diagnostics
+            )
             with anyio.CancelScope(shield=True):
                 await self._event_sender.send(
                     RunnerStatusUpdated(
diff --git a/src/exo/worker/tests/unittests/conftest.py b/src/exo/worker/tests/unittests/conftest.py
index f88148ef3..01388c5b7 100644
--- a/src/exo/worker/tests/unittests/conftest.py
+++ b/src/exo/worker/tests/unittests/conftest.py
@@ -1,6 +1,7 @@
 from dataclasses import dataclass, field
 
 from exo.shared.models.model_cards import ModelCard, ModelId, ModelTask
+from exo.shared.types.backends import Backend
 from exo.shared.types.common import NodeId
 from exo.shared.types.memory import Memory
 from exo.shared.types.tasks import BaseTask, TaskId
@@ -41,6 +42,7 @@ def get_pipeline_shard_metadata(
             hidden_size=2048,
             supports_tensor=False,
             tasks=[ModelTask.TextGeneration],
+            backends=[Backend.MlxMetal],
         ),
         device_rank=device_rank,
         world_size=world_size,
diff --git a/src/exo/worker/tests/unittests/test_mlx/conftest.py b/src/exo/worker/tests/unittests/test_mlx/conftest.py
index c09bfe1ea..d948feb94 100644
--- a/src/exo/worker/tests/unittests/test_mlx/conftest.py
+++ b/src/exo/worker/tests/unittests/test_mlx/conftest.py
@@ -12,12 +12,13 @@ import mlx.nn as nn
 
 from exo.shared.constants import EXO_DEFAULT_MODELS_DIR
 from exo.shared.models.model_cards import ModelCard, ModelTask
+from exo.shared.types.backends import Backend
 from exo.shared.types.common import ModelId
 from exo.shared.types.memory import Memory
-from exo.shared.types.mlx import Model
 from exo.shared.types.text_generation import InputMessage, TextGenerationTaskParams
 from exo.shared.types.worker.shards import PipelineShardMetadata, TensorShardMetadata
 from exo.worker.engines.mlx.generator.generate import mlx_generate
+from exo.worker.engines.mlx.types import Model
 from exo.worker.engines.mlx.utils_mlx import apply_chat_template, shard_and_load
 
 
@@ -88,6 +89,7 @@ def run_gpt_oss_pipeline_device(
                 hidden_size=2880,
                 supports_tensor=False,
                 tasks=[ModelTask.TextGeneration],
+                backends=[Backend.MlxMetal],
             ),
             device_rank=rank,
             world_size=world_size,
@@ -96,9 +98,12 @@ def run_gpt_oss_pipeline_device(
             n_layers=24,
         )
 
-        model, tokenizer = shard_and_load(
-            shard_meta, group, on_timeout=None, on_layer_loaded=None
-        )
+        gen = shard_and_load(shard_meta, group)
+        try:
+            while True:
+                next(gen)
+        except StopIteration as stop:
+            model, tokenizer = stop.value
         model = cast(Model, model)
 
         # Generate a prompt of exact token length
@@ -166,6 +171,7 @@ def run_gpt_oss_tensor_parallel_device(
                 hidden_size=2880,
                 supports_tensor=True,
                 tasks=[ModelTask.TextGeneration],
+                backends=[Backend.MlxMetal],
             ),
             device_rank=rank,
             world_size=world_size,
@@ -174,9 +180,12 @@ def run_gpt_oss_tensor_parallel_device(
             n_layers=24,
         )
 
-        model, tokenizer = shard_and_load(
-            shard_meta, group, on_timeout=None, on_layer_loaded=None
-        )
+        gen = shard_and_load(shard_meta, group)
+        try:
+            while True:
+                next(gen)
+        except StopIteration as stop:
+            model, tokenizer = stop.value
         model = cast(Model, model)
 
         base_text = "The quick brown fox jumps over the lazy dog. "
diff --git a/src/exo/worker/tests/unittests/test_mlx/test_kv_prefix_cache.py b/src/exo/worker/tests/unittests/test_mlx/test_kv_prefix_cache.py
index f9b4d1d47..3d72d47d6 100644
--- a/src/exo/worker/tests/unittests/test_mlx/test_kv_prefix_cache.py
+++ b/src/exo/worker/tests/unittests/test_mlx/test_kv_prefix_cache.py
@@ -9,7 +9,6 @@ from mlx_lm.models.cache import KVCache
 from mlx_lm.sample_utils import make_sampler
 
 from exo.shared.types.common import ModelId
-from exo.shared.types.mlx import Model
 from exo.shared.types.text_generation import InputMessage, TextGenerationTaskParams
 from exo.worker.engines.mlx.cache import (
     KVPrefixCache,
@@ -19,6 +18,7 @@ from exo.worker.engines.mlx.cache import (
     make_kv_cache,
 )
 from exo.worker.engines.mlx.generator.generate import mlx_generate, prefill
+from exo.worker.engines.mlx.types import Model
 from exo.worker.engines.mlx.utils_mlx import apply_chat_template
 from exo.worker.tests.unittests.test_mlx.conftest import (
     DEFAULT_GPT_OSS_CONFIG,
@@ -189,7 +189,7 @@ class TestKVPrefixCacheWithModel:
         assert stored_length > 0
 
         # Retrieve with same prompt: exact match
-        result_cache, remaining_tokens, matched_index = kv_prefix_cache.get_kv_cache(
+        result_cache, remaining_tokens, matched_index, _ = kv_prefix_cache.get_kv_cache(
             model, tokens
         )
         assert matched_index == 0
@@ -242,7 +242,7 @@ class TestKVPrefixCacheWithModel:
             "Prompts should share a prefix from the chat template"
         )
 
-        result_cache, remaining_tokens, matched_index = kv_prefix_cache.get_kv_cache(
+        result_cache, remaining_tokens, matched_index, _ = kv_prefix_cache.get_kv_cache(
             model, long_tokens
         )
         assert matched_index == 0
@@ -282,7 +282,7 @@ class TestKVPrefixCacheWithModel:
         stored_length = cache_length(kv_prefix_cache.caches[0])
 
         # Get cache and mutate it (simulating what generation does)
-        result_cache, _, matched_index = kv_prefix_cache.get_kv_cache(model, tokens)
+        result_cache, _, matched_index, _ = kv_prefix_cache.get_kv_cache(model, tokens)
         assert matched_index == 0
 
         # Simulate generation: feed many additional tokens through the cache
@@ -329,7 +329,7 @@ class TestKVPrefixCacheWithModel:
         stored_length = cache_length(kv_prefix_cache.caches[0])
 
         for i in range(3):
-            result_cache, _, _ = kv_prefix_cache.get_kv_cache(model, tokens)
+            result_cache, _, _, _ = kv_prefix_cache.get_kv_cache(model, tokens)
 
             head_dim = result_cache[0].keys.shape[-1]
             num_heads = result_cache[0].keys.shape[1]
@@ -343,7 +343,7 @@ class TestKVPrefixCacheWithModel:
             )
 
     def test_mlx_generate_populates_cache(self, model_and_tokenizer):
-        """mlx_generate should save the cache after generation completes."""
+        """mlx_generate should save the post-prefill cache (before the decode loop)."""
         model, tokenizer = model_and_tokenizer
 
         kv_prefix_cache = KVPrefixCache(None)
@@ -356,7 +356,6 @@ class TestKVPrefixCacheWithModel:
         prompt_tokens = encode_prompt(tokenizer, prompt)
 
         # Consume the entire generator so the cache-saving code after yield runs
-        generated_tokens = 0
         for _response in mlx_generate(
             model=model,
             tokenizer=tokenizer,
@@ -365,13 +364,14 @@ class TestKVPrefixCacheWithModel:
             kv_prefix_cache=kv_prefix_cache,
             group=None,
         ):
-            generated_tokens += 1
+            pass
 
         assert len(kv_prefix_cache.prompts) == 1
         assert len(kv_prefix_cache.caches) == 1
-        # Cache should contain prompt + generated tokens
-        expected_length = len(prompt_tokens) + generated_tokens
-        assert cache_length(kv_prefix_cache.caches[0]) == expected_length
+        # add_kv_cache is called before the decode loop and stores a deepcopy of
+        # the cache as it is just after prefill + trim(2). Generation tokens are
+        # never written into the stored entry.
+        assert cache_length(kv_prefix_cache.caches[0]) == len(prompt_tokens) - 2
 
     def test_mlx_generate_second_call_gets_prefix_hit(self, model_and_tokenizer):
         """Second mlx_generate call with same prompt should get a prefix hit from stored cache."""
@@ -401,7 +401,7 @@ class TestKVPrefixCacheWithModel:
 
         # Second call should find a prefix match (the stored cache contains
         # prompt + generated tokens, which shares the prompt prefix)
-        result_cache, remaining_tokens, matched_index = kv_prefix_cache.get_kv_cache(
+        result_cache, remaining_tokens, matched_index, _ = kv_prefix_cache.get_kv_cache(
             model, prompt_tokens
         )
         # The stored cache is longer than the prompt (it includes generated tokens),
diff --git a/src/exo/worker/tests/unittests/test_mlx/test_pipeline_prefill_callbacks.py b/src/exo/worker/tests/unittests/test_mlx/test_pipeline_prefill_callbacks.py
index be723380d..ad8ee0302 100644
--- a/src/exo/worker/tests/unittests/test_mlx/test_pipeline_prefill_callbacks.py
+++ b/src/exo/worker/tests/unittests/test_mlx/test_pipeline_prefill_callbacks.py
@@ -17,6 +17,7 @@ import pytest
 
 from exo.shared.constants import EXO_DEFAULT_MODELS_DIR
 from exo.shared.models.model_cards import ModelCard, ModelTask
+from exo.shared.types.backends import Backend
 from exo.shared.types.common import ModelId
 from exo.shared.types.memory import Memory
 from exo.shared.types.text_generation import InputMessage, TextGenerationTaskParams
@@ -37,6 +38,7 @@ def _model_card() -> ModelCard:
         hidden_size=2880,
         supports_tensor=False,
         tasks=[ModelTask.TextGeneration],
+        backends=[Backend.MlxMetal],
     )
 
 
@@ -174,9 +176,12 @@ def _run_pipeline_device(
             n_layers=TOTAL_LAYERS,
         )
 
-        model, tokenizer = shard_and_load(
-            shard_meta, group, on_timeout=None, on_layer_loaded=None
-        )
+        gen = shard_and_load(shard_meta, group)
+        try:
+            while True:
+                next(gen)
+        except StopIteration as stop:
+            model, tokenizer = stop.value
         model = cast(Any, model)
 
         prompt, task = _build_prompt(tokenizer, prompt_tokens)
diff --git a/src/exo/worker/tests/unittests/test_mlx/test_prefix_cache_architectures.py b/src/exo/worker/tests/unittests/test_mlx/test_prefix_cache_architectures.py
index fec5082a2..074a9f941 100644
--- a/src/exo/worker/tests/unittests/test_mlx/test_prefix_cache_architectures.py
+++ b/src/exo/worker/tests/unittests/test_mlx/test_prefix_cache_architectures.py
@@ -14,8 +14,9 @@ import pytest
 from mlx.utils import tree_flatten, tree_unflatten
 from mlx_lm.tokenizer_utils import TokenizerWrapper
 
+from exo.download.download_utils import resolve_existing_model
+from exo.shared.constants import EXO_MODELS_DIRS, EXO_MODELS_READ_ONLY_DIRS
 from exo.shared.types.common import ModelId
-from exo.shared.types.mlx import Model
 from exo.shared.types.text_generation import (
     InputMessage,
     InputMessageContent,
@@ -23,13 +24,12 @@ from exo.shared.types.text_generation import (
 )
 from exo.worker.engines.mlx.cache import KVPrefixCache
 from exo.worker.engines.mlx.generator.generate import mlx_generate
+from exo.worker.engines.mlx.types import Model
 from exo.worker.engines.mlx.utils_mlx import (
     apply_chat_template,
     load_tokenizer_for_model_id,
 )
 
-HF_CACHE = Path.home() / ".cache" / "huggingface" / "hub"
-
 # ── Config reduction ──────────────────────────────────────────────────────── #
 
 _REDUCE = {
@@ -70,6 +70,13 @@ def _reduce_config(cfg: dict[str, Any]) -> dict[str, Any]:
         tc: dict[str, Any] = result["text_config"]
         if "num_nextn_predict_layers" in tc:
             tc["num_nextn_predict_layers"] = 0
+        tc_n_layers = cast(int, tc.get("num_hidden_layers", n_layers))
+        if "layer_types" in tc and isinstance(tc["layer_types"], list):
+            tc["layer_types"] = cast(list[Any], tc["layer_types"])[:tc_n_layers]
+        if "mlp_only_layers" in tc and isinstance(tc["mlp_only_layers"], list):
+            tc["mlp_only_layers"] = [
+                i for i in cast(list[int], tc["mlp_only_layers"]) if i < tc_n_layers
+            ]
 
     if "layer_types" in result and isinstance(result["layer_types"], list):
         result["layer_types"] = result["layer_types"][:n_layers]
@@ -100,12 +107,21 @@ def _reduce_config(cfg: dict[str, Any]) -> dict[str, Any]:
 
 
 def _find_snapshot(hub_name: str) -> Path | None:
-    model_dir = HF_CACHE / f"models--mlx-community--{hub_name}"
-    snaps = model_dir / "snapshots"
-    if not snaps.exists():
-        return None
-    children = sorted(snaps.iterdir())
-    return children[0] if children else None
+    """Locate a model directory under exo's models dirs.
+
+    Uses resolve_existing_model for fully-downloaded models; falls back to any
+    existing directory (even partial) so that tokenizer-only copies still work.
+    """
+    model_id = ModelId(f"mlx-community/{hub_name}")
+    found = resolve_existing_model(model_id)
+    if found is not None:
+        return found
+    normalized = model_id.normalize()
+    for search_dir in (*EXO_MODELS_READ_ONLY_DIRS, *EXO_MODELS_DIRS):
+        candidate = search_dir / normalized
+        if candidate.is_dir():
+            return candidate
+    return None
 
 
 def _copy_tokenizer(src: Path, dst: Path) -> None:
@@ -192,13 +208,31 @@ ARCHITECTURES: list[ArchSpec] = [
 ]
 
 
+def _has_chat_template(model_dir: Path) -> bool:
+    """Check if a model dir has a usable chat template (inline or separate)."""
+    if (model_dir / "chat_template.jinja").exists():
+        return True
+    cfg = model_dir / "tokenizer_config.json"
+    if not cfg.exists():
+        return False
+    try:
+        data = cast(dict[str, Any], json.loads(cfg.read_text()))
+    except (OSError, json.JSONDecodeError):
+        return False
+    return bool(data.get("chat_template"))
+
+
 def _arch_available(spec: ArchSpec) -> bool:
     snap = _find_snapshot(spec.hub_name)
     if snap is None or not (snap / "config.json").exists():
         return False
+    tokenizer_snap = snap
     if spec.tokenizer_hub is not None:
-        return _find_snapshot(spec.tokenizer_hub) is not None
-    return True
+        alt = _find_snapshot(spec.tokenizer_hub)
+        if alt is None:
+            return False
+        tokenizer_snap = alt
+    return _has_chat_template(tokenizer_snap)
 
 
 def _make_task() -> TextGenerationTaskParams:
diff --git a/src/exo/worker/tests/unittests/test_mlx/test_tokenizers.py b/src/exo/worker/tests/unittests/test_mlx/test_tokenizers.py
index 2f4ca7e64..9572e7135 100644
--- a/src/exo/worker/tests/unittests/test_mlx/test_tokenizers.py
+++ b/src/exo/worker/tests/unittests/test_mlx/test_tokenizers.py
@@ -16,7 +16,7 @@ from exo.download.download_utils import (
     fetch_file_list_with_cache,
     resolve_model_dir,
 )
-from exo.shared.models.model_cards import ModelCard, ModelId, get_model_cards
+from exo.shared.models.model_cards import ModelCard, ModelId, card_cache
 from exo.worker.engines.mlx.utils_mlx import (
     get_eos_token_ids_for_model,
     load_tokenizer_for_model_id,
@@ -76,7 +76,7 @@ def get_test_models() -> list[ModelCard]:
     """Get a representative sample of models to test."""
     # Pick one model from each family to test
     families: dict[str, ModelCard] = {}
-    for card in asyncio.run(get_model_cards()):
+    for card in asyncio.run(card_cache.list_all()):
         # Extract family name (e.g., "llama-3.1" from "llama-3.1-8b")
         parts = card.model_id.short().split("-")
         family = "-".join(parts[:2]) if len(parts) >= 2 else parts[0]
@@ -298,7 +298,7 @@ async def test_tokenizer_special_tokens(model_card: ModelCard) -> None:
 async def test_kimi_tokenizer_specifically():
     """Test Kimi tokenizer with its specific patches and quirks."""
     kimi_models = [
-        card for card in await get_model_cards() if "kimi" in card.model_id.lower()
+        card for card in await card_cache.list_all() if "kimi" in card.model_id.lower()
     ]
 
     if not kimi_models:
@@ -350,7 +350,7 @@ async def test_glm_tokenizer_specifically():
 
     glm_model_cards = [
         card
-        for card in await get_model_cards()
+        for card in await card_cache.list_all()
         if contains(card, "glm")
         and not contains(card, "-5")
         and not contains(card, "4.7")
diff --git a/src/exo/worker/tests/unittests/test_mlx/test_tp_bit_exact.py b/src/exo/worker/tests/unittests/test_mlx/test_tp_bit_exact.py
new file mode 100644
index 000000000..470a98a16
--- /dev/null
+++ b/src/exo/worker/tests/unittests/test_mlx/test_tp_bit_exact.py
@@ -0,0 +1,431 @@
+# type: ignore
+"""uv run pytest -v -m "" src/exo/worker/tests/unittests/test_mlx/test_tp_bit_exact.py"""
+
+import importlib
+import json
+import multiprocessing as mp
+import os
+import sys
+import tempfile
+import traceback
+
+import numpy as np
+import pytest
+
+MODEL_CONFIGS = {
+    "llama": dict(
+        module="mlx_lm.models.llama",
+        args=dict(
+            model_type="llama",
+            hidden_size=512,
+            intermediate_size=1024,
+            num_hidden_layers=2,
+            num_attention_heads=16,
+            num_key_value_heads=4,
+            rms_norm_eps=1e-6,
+            vocab_size=512,
+            max_position_embeddings=128,
+            head_dim=32,
+            rope_theta=10000.0,
+        ),
+    ),
+    "qwen3_5_moe": dict(
+        module="mlx_lm.models.qwen3_5_moe",
+        args=dict(
+            model_type="qwen3_5_moe",
+            text_config=dict(
+                model_type="qwen3_5_moe",
+                vocab_size=512,
+                hidden_size=512,
+                intermediate_size=1024,
+                num_hidden_layers=4,
+                num_attention_heads=16,
+                num_key_value_heads=4,
+                head_dim=32,
+                max_position_embeddings=128,
+                rms_norm_eps=1e-6,
+                tie_word_embeddings=False,
+                attention_bias=False,
+                full_attention_interval=2,
+                linear_num_value_heads=32,
+                linear_num_key_heads=16,
+                linear_key_head_dim=32,
+                linear_value_head_dim=32,
+                linear_conv_kernel_dim=4,
+                num_experts=16,
+                num_experts_per_tok=2,
+                decoder_sparse_step=1,
+                shared_expert_intermediate_size=256,
+                moe_intermediate_size=256,
+                norm_topk_prob=True,
+                rope_parameters={
+                    "type": "default",
+                    "rope_theta": 10000.0,
+                    "partial_rotary_factor": 0.25,
+                    "mrope_section": [11, 11, 10],
+                },
+            ),
+        ),
+    ),
+    "qwen3_next": dict(
+        module="mlx_lm.models.qwen3_next",
+        args=dict(
+            model_type="qwen3_next",
+            hidden_size=512,
+            intermediate_size=1024,
+            num_hidden_layers=4,
+            num_attention_heads=16,
+            num_key_value_heads=4,
+            head_dim=32,
+            max_position_embeddings=128,
+            rms_norm_eps=1e-6,
+            vocab_size=512,
+            attention_bias=False,
+            full_attention_interval=2,
+            linear_num_value_heads=32,
+            linear_num_key_heads=16,
+            linear_key_head_dim=32,
+            linear_value_head_dim=32,
+            linear_conv_kernel_dim=4,
+            num_experts=16,
+            num_experts_per_tok=2,
+            decoder_sparse_step=1,
+            shared_expert_intermediate_size=256,
+            moe_intermediate_size=256,
+            norm_topk_prob=True,
+            mlp_only_layers=[],
+            rope_theta=10000.0,
+            partial_rotary_factor=0.25,
+        ),
+    ),
+    "deepseek_v3": dict(
+        module="mlx_lm.models.deepseek_v3",
+        args=dict(
+            model_type="deepseek_v3",
+            hidden_size=512,
+            intermediate_size=1024,
+            num_hidden_layers=2,
+            num_attention_heads=16,
+            num_key_value_heads=16,
+            vocab_size=512,
+            max_position_embeddings=128,
+            rms_norm_eps=1e-6,
+            n_routed_experts=8,
+            n_shared_experts=1,
+            num_experts_per_tok=2,
+            moe_intermediate_size=256,
+            moe_layer_freq=1,
+            first_k_dense_replace=0,
+            n_group=1,
+            topk_group=1,
+            routed_scaling_factor=1.0,
+            q_lora_rank=None,
+            kv_lora_rank=16,
+            qk_nope_head_dim=16,
+            qk_rope_head_dim=16,
+            v_head_dim=32,
+            rope_theta=10000.0,
+            rope_scaling={},
+            attention_bias=False,
+            norm_topk_prob=True,
+            scoring_func="sigmoid",
+            topk_method="noaux_tc",
+        ),
+    ),
+    "deepseek_v3_q4": dict(
+        module="mlx_lm.models.deepseek_v3",
+        quantize=dict(group_size=32, bits=4, mode="affine"),
+        args=dict(
+            model_type="deepseek_v3",
+            hidden_size=512,
+            intermediate_size=1024,
+            num_hidden_layers=2,
+            num_attention_heads=16,
+            num_key_value_heads=16,
+            vocab_size=512,
+            max_position_embeddings=128,
+            rms_norm_eps=1e-6,
+            n_routed_experts=8,
+            n_shared_experts=1,
+            num_experts_per_tok=2,
+            moe_intermediate_size=256,
+            moe_layer_freq=1,
+            first_k_dense_replace=0,
+            n_group=1,
+            topk_group=1,
+            routed_scaling_factor=1.0,
+            q_lora_rank=None,
+            kv_lora_rank=64,
+            qk_nope_head_dim=32,
+            qk_rope_head_dim=32,
+            v_head_dim=32,
+            rope_theta=10000.0,
+            rope_scaling={},
+            attention_bias=False,
+            norm_topk_prob=True,
+            scoring_func="sigmoid",
+            topk_method="noaux_tc",
+        ),
+    ),
+    "glm4_moe_lite": dict(
+        module="mlx_lm.models.glm4_moe_lite",
+        args=dict(
+            model_type="glm4_moe_lite",
+            hidden_size=512,
+            intermediate_size=1024,
+            num_hidden_layers=2,
+            num_attention_heads=16,
+            num_key_value_heads=16,
+            vocab_size=512,
+            max_position_embeddings=128,
+            rms_norm_eps=1e-6,
+            n_routed_experts=8,
+            n_shared_experts=1,
+            num_experts_per_tok=2,
+            moe_intermediate_size=256,
+            first_k_dense_replace=1,
+            n_group=1,
+            topk_group=1,
+            routed_scaling_factor=1.0,
+            rope_theta=10000.0,
+            attention_bias=False,
+            q_lora_rank=None,
+            kv_lora_rank=16,
+            qk_rope_head_dim=16,
+            qk_nope_head_dim=16,
+            v_head_dim=32,
+        ),
+    ),
+    "minimax": dict(
+        module="mlx_lm.models.minimax",
+        args=dict(
+            model_type="minimax",
+            hidden_size=512,
+            intermediate_size=1024,
+            num_attention_heads=16,
+            num_key_value_heads=4,
+            max_position_embeddings=128,
+            num_experts_per_tok=2,
+            num_local_experts=8,
+            shared_intermediate_size=256,
+            num_hidden_layers=2,
+            rms_norm_eps=1e-6,
+            rope_theta=10000.0,
+            rotary_dim=32,
+            vocab_size=512,
+        ),
+    ),
+    "gpt_oss": dict(
+        module="mlx_lm.models.gpt_oss",
+        args=dict(
+            model_type="gpt_oss",
+            hidden_size=512,
+            intermediate_size=256,
+            num_hidden_layers=2,
+            num_attention_heads=16,
+            num_key_value_heads=4,
+            vocab_size=512,
+            head_dim=32,
+            rms_norm_eps=1e-6,
+            num_local_experts=8,
+            num_experts_per_tok=2,
+            layer_types=["sliding_attention", "full_attention"],
+            sliding_window=64,
+            rope_theta=10000.0,
+        ),
+    ),
+    "deepseek_v4": dict(
+        module="mlx_lm.models.deepseek_v4",
+        args=dict(
+            model_type="deepseek_v4",
+            vocab_size=256,
+            hidden_size=64,
+            num_hidden_layers=4,
+            num_attention_heads=4,
+            num_key_value_heads=1,
+            q_lora_rank=32,
+            o_lora_rank=32,
+            o_groups=1,
+            head_dim=16,
+            qk_rope_head_dim=8,
+            sliding_window=32,
+            compress_ratios=[0, 0, 4, 0, 0],
+            index_n_heads=4,
+            index_head_dim=16,
+            index_topk=16,
+            moe_intermediate_size=32,
+            n_routed_experts=4,
+            n_shared_experts=1,
+            num_experts_per_tok=2,
+            num_hash_layers=1,
+            hc_mult=1,
+            num_nextn_predict_layers=0,
+            max_position_embeddings=2048,
+            rope_scaling={
+                "beta_fast": 32,
+                "beta_slow": 1,
+                "factor": 2,
+                "original_max_position_embeddings": 1024,
+                "type": "yarn",
+            },
+        ),
+    ),
+    "gemma4": dict(
+        module="mlx_lm.models.gemma4",
+        args=dict(
+            model_type="gemma4",
+            vocab_size=512,
+            text_config=dict(
+                vocab_size=512,
+                hidden_size=512,
+                intermediate_size=1024,
+                num_hidden_layers=4,
+                num_attention_heads=16,
+                num_key_value_heads=4,
+                head_dim=32,
+                global_head_dim=32,
+                num_kv_shared_layers=0,
+                vocab_size_per_layer_input=512,
+                hidden_size_per_layer_input=512,
+                rms_norm_eps=1e-6,
+                max_position_embeddings=128,
+                sliding_window=64,
+                sliding_window_pattern=2,
+                layer_types=[
+                    "sliding_attention",
+                    "full_attention",
+                    "sliding_attention",
+                    "full_attention",
+                ],
+                enable_moe_block=True,
+                num_experts=8,
+                top_k_experts=2,
+                moe_intermediate_size=256,
+            ),
+        ),
+    ),
+}
+
+_PROMPT = [[1, 23, 45, 67, 89, 12, 34, 56]]
+
+
+def _build(name):
+    import mlx.core as mx
+    import mlx.nn as nn
+    from mlx.utils import tree_map_with_path
+
+    import exo.worker.engines.mlx.auto_parallel  # noqa: F401
+
+    cfg = MODEL_CONFIGS[name]
+    module = importlib.import_module(cfg["module"])
+    model_cls = module.Model
+    model_args_cls = module.ModelArgs
+
+    mx.random.seed(0)
+    args = model_args_cls(**cfg["args"])
+    m = model_cls(args)
+
+    def _to_bf16(_p, v):
+        if hasattr(v, "dtype") and v.dtype in (mx.float16, mx.float32, mx.bfloat16):
+            return v.astype(mx.bfloat16)
+        return v
+
+    m.update(tree_map_with_path(_to_bf16, m.parameters()))
+    if "quantize" in cfg:
+        nn.quantize(m, **cfg["quantize"])
+    mx.eval(m.parameters())
+    return mx, m
+
+
+def _run(name, out_path, shard):
+    import mlx.core as mx
+
+    if shard:
+        g = mx.distributed.init(backend="ring", strict=True)
+    mx_, m = _build(name)
+    if shard:
+        from exo.worker.engines.mlx.auto_parallel import tensor_auto_parallel
+
+        m = tensor_auto_parallel(m, g, on_layer_loaded=None)
+        mx_.eval(m.parameters())
+    inputs = mx_.array(_PROMPT, dtype=mx_.int32)
+    logits = m(inputs)
+    mx_.eval(logits)
+    np.savez(out_path, logits=np.asarray(logits.astype(mx_.float32)))
+
+
+def _ref_worker(name, out_path, q):
+    try:
+        _run(name, out_path, shard=False)
+        q.put(True)
+    except BaseException as e:
+        q.put(f"{e}\n{traceback.format_exc()}")
+
+
+def _tp_worker(name, rank, hf, out_path, q):
+    os.environ["MLX_HOSTFILE"] = hf
+    os.environ["MLX_RANK"] = str(rank)
+    try:
+        path = out_path if rank == 0 else out_path + f".r{rank}"
+        _run(name, path, shard=True)
+        q.put((rank, True, None))
+    except BaseException as e:
+        q.put((rank, False, f"{e}\n{traceback.format_exc()}"))
+
+
+def _run_compare(name, world_size, port_base):
+    d = tempfile.mkdtemp()
+    ref_path = f"{d}/ref.npz"
+    tp_path = f"{d}/tp.npz"
+    ctx = mp.get_context("spawn")
+    q = ctx.Queue()
+
+    p = ctx.Process(target=_ref_worker, args=(name, ref_path, q))
+    p.start()
+    p.join(300)
+    r = q.get(timeout=10)
+    if r is not True:
+        pytest.fail(f"[{name}] ref FAIL: {str(r)[:500]}")
+
+    hosts = [f"127.0.0.1:{port_base + i}" for i in range(world_size)]
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+        json.dump(hosts, f)
+        hf = f.name
+    ps = [
+        ctx.Process(target=_tp_worker, args=(name, rank, hf, tp_path, q))
+        for rank in range(world_size)
+    ]
+    for pp in ps:
+        pp.start()
+    results = [q.get(timeout=300) for _ in range(world_size)]
+    for pp in ps:
+        pp.join(60)
+    for rank, ok, payload in results:
+        if not ok:
+            pytest.fail(f"[{name}] rank {rank} FAIL: {payload[:500]}")
+
+    ref = np.load(ref_path)["logits"]
+    tp = np.load(tp_path)["logits"]
+    diff = np.abs(ref - tp)
+    max_diff = float(diff.max())
+    mean_diff = float(diff.mean())
+    assert max_diff == 0.0, (
+        f"[{name} TP={world_size}] not bit-exact: max={max_diff} mean={mean_diff}"
+    )
+
+
+pytestmark = [
+    pytest.mark.slow,
+    pytest.mark.skipif(
+        sys.platform != "darwin", reason="MLX distributed requires Metal"
+    ),
+]
+
+
+@pytest.mark.skip("TP=2 is currently very different to TP=1. This test will not pass")
+@pytest.mark.parametrize("world_size", [2, 4])
+@pytest.mark.parametrize("name", list(MODEL_CONFIGS))
+def test_tp_bit_exact(name, world_size):
+    name_idx = list(MODEL_CONFIGS).index(name)
+    port = 32000 + name_idx * 20 + world_size
+    _run_compare(name, world_size, port)
diff --git a/src/exo/worker/tests/unittests/test_plan/test_download_and_loading.py b/src/exo/worker/tests/unittests/test_plan/test_download_and_loading.py
index 461d8f12c..b574a9b28 100644
--- a/src/exo/worker/tests/unittests/test_plan/test_download_and_loading.py
+++ b/src/exo/worker/tests/unittests/test_plan/test_download_and_loading.py
@@ -54,6 +54,7 @@ def test_plan_requests_download_when_waiting_and_shard_not_downloaded():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -109,6 +110,7 @@ def test_plan_loads_model_when_all_shards_downloaded_and_waiting():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -154,6 +156,7 @@ def test_plan_does_not_request_download_when_shard_already_downloaded():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -204,6 +207,7 @@ def test_plan_does_not_load_model_until_all_shards_downloaded_globally():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -227,6 +231,7 @@ def test_plan_does_not_load_model_until_all_shards_downloaded_globally():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
diff --git a/src/exo/worker/tests/unittests/test_plan/test_runner_lifecycle.py b/src/exo/worker/tests/unittests/test_plan/test_runner_lifecycle.py
index 1ac9dee07..743a43291 100644
--- a/src/exo/worker/tests/unittests/test_plan/test_runner_lifecycle.py
+++ b/src/exo/worker/tests/unittests/test_plan/test_runner_lifecycle.py
@@ -54,6 +54,7 @@ def test_plan_kills_runner_when_instance_missing():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -85,7 +86,7 @@ def test_plan_kills_runner_when_sibling_failed():
     instances = {INSTANCE_1_ID: instance}
     all_runners = {
         RUNNER_1_ID: RunnerReady(),
-        RUNNER_2_ID: RunnerFailed(error_message="boom"),
+        RUNNER_2_ID: RunnerFailed(error_message="boom", diagnostics=[]),
     }
 
     result = plan_mod.plan(
@@ -96,6 +97,7 @@ def test_plan_kills_runner_when_sibling_failed():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -130,6 +132,7 @@ def test_plan_creates_runner_when_missing_for_node():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -171,6 +174,7 @@ def test_plan_does_not_create_runner_when_supervisor_already_present():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -203,6 +207,7 @@ def test_plan_does_not_create_runner_for_unassigned_node():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
diff --git a/src/exo/worker/tests/unittests/test_plan/test_task_forwarding.py b/src/exo/worker/tests/unittests/test_plan/test_task_forwarding.py
index bb87268fc..152fc6e19 100644
--- a/src/exo/worker/tests/unittests/test_plan/test_task_forwarding.py
+++ b/src/exo/worker/tests/unittests/test_plan/test_task_forwarding.py
@@ -78,6 +78,7 @@ def test_plan_forwards_pending_chat_completion_when_runner_ready():
         all_runners=all_runners,
         tasks={TASK_1_ID: task},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -131,6 +132,7 @@ def test_plan_does_not_forward_chat_completion_if_any_runner_not_ready():
         all_runners=all_runners,
         tasks={TASK_1_ID: task},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -181,6 +183,7 @@ def test_plan_does_not_forward_tasks_for_other_instances():
         all_runners=all_runners,
         tasks={foreign_task.task_id: foreign_task},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -249,6 +252,7 @@ def test_plan_ignores_non_pending_or_non_chat_tasks():
         all_runners=all_runners,
         tasks={TASK_1_ID: completed_task, other_task_id: other_task},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -291,6 +295,7 @@ def test_plan_returns_none_when_nothing_to_do():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
diff --git a/src/exo/worker/tests/unittests/test_plan/test_warmup.py b/src/exo/worker/tests/unittests/test_plan/test_warmup.py
index 612868e7a..46e372f6c 100644
--- a/src/exo/worker/tests/unittests/test_plan/test_warmup.py
+++ b/src/exo/worker/tests/unittests/test_plan/test_warmup.py
@@ -63,6 +63,7 @@ def test_plan_starts_warmup_for_accepting_rank_when_all_loaded_or_warming():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -107,6 +108,7 @@ def test_plan_starts_warmup_for_rank_zero_after_others_warming():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -150,6 +152,7 @@ def test_plan_does_not_start_warmup_for_non_zero_rank_until_all_loaded_or_warmin
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -197,6 +200,7 @@ def test_plan_does_not_start_warmup_for_rank_zero_until_others_warming():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -216,6 +220,7 @@ def test_plan_does_not_start_warmup_for_rank_zero_until_others_warming():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -262,6 +267,7 @@ def test_plan_starts_warmup_for_connecting_rank_after_others_warming():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -307,6 +313,7 @@ def test_plan_does_not_start_warmup_for_accepting_rank_until_all_loaded_or_warmi
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
@@ -351,6 +358,7 @@ def test_plan_does_not_start_warmup_for_connecting_rank_until_others_warming():
         all_runners=all_runners,
         tasks={},
         input_chunk_buffer={},
+        image_cache={},
         instance_backoff=KeyedBackoff(),
         download_backoff=KeyedBackoff(),
     )
diff --git a/src/exo/worker/tests/unittests/test_runner/test_dsml_e2e.py b/src/exo/worker/tests/unittests/test_runner/test_dsml_e2e.py
index 74efbef1d..fe508ae30 100644
--- a/src/exo/worker/tests/unittests/test_runner/test_dsml_e2e.py
+++ b/src/exo/worker/tests/unittests/test_runner/test_dsml_e2e.py
@@ -1,13 +1,13 @@
 import json
 from collections.abc import Generator
-from typing import Any
+from typing import Any, cast
 
 from exo.shared.types.common import ModelId
 from exo.shared.types.worker.runner_response import (
     GenerationResponse,
     ToolCallResponse,
 )
-from exo.worker.engines.mlx.dsml_encoding import (
+from exo.worker.engines.mlx.vendor.dsml_encoding import (
     ASSISTANT_TOKEN,
     BOS_TOKEN,
     DSML_TOKEN,
@@ -20,7 +20,26 @@ from exo.worker.engines.mlx.dsml_encoding import (
     encode_messages,
     parse_dsml_output,
 )
-from exo.worker.runner.llm_inference.model_output_parsers import parse_deepseek_v32
+from exo.worker.runner.llm_inference.model_output_parsers import (
+    parse_deepseek_v4,
+    parse_deepseek_v32,
+    parse_thinking_models,
+)
+
+
+def _parse_deepseek_with_thinking(
+    source: Generator[GenerationResponse | None],
+    starts_in_thinking: bool = False,
+) -> Generator[GenerationResponse | ToolCallResponse | None]:
+    return parse_deepseek_v32(
+        parse_thinking_models(
+            source,
+            think_start=THINKING_START,
+            think_end=THINKING_END,
+            starts_in_thinking=starts_in_thinking,
+        )
+    )
+
 
 # ── Shared fixtures ──────────────────────────────────────────────
 
@@ -333,9 +352,7 @@ class TestE2EThinkingAndToolCall:
         assert prompt.endswith(THINKING_START)
 
         # Simulate: model outputs <think>, thinks, closes thinking, then tool call.
-        # In the full pipeline, parse_thinking_models handles the case where
-        # <think> is in the prompt. Here we test parse_deepseek_v32 directly,
-        # which detects <think>/<think> markers in the stream.
+        # Use the full production chain (parse_thinking_models → parse_deepseek_v32).
         model_tokens = [
             THINKING_START,
             "The user wants weather",
@@ -353,7 +370,7 @@ class TestE2EThinkingAndToolCall:
             TOOL_CALLS_END,
         ]
 
-        results = list(parse_deepseek_v32(_simulate_tokens(model_tokens)))
+        results = list(_parse_deepseek_with_thinking(_simulate_tokens(model_tokens)))
 
         gen_results = [r for r in results if isinstance(r, GenerationResponse)]
         tool_results = [r for r in results if isinstance(r, ToolCallResponse)]
@@ -387,7 +404,7 @@ class TestE2EThinkingAndToolCall:
         prompt_no_think = encode_messages(
             messages, tools=_WEATHER_TOOLS, thinking_mode="chat"
         )
-        assert prompt_no_think.endswith(THINKING_END)
+        assert not prompt_no_think.endswith(THINKING_START)
 
         # Both should have the same tool definitions
         assert "get_weather" in prompt_think
@@ -597,7 +614,9 @@ class TestE2EFullRoundTrip:
             f"</{DSML_TOKEN}invoke>\n",
             TOOL_CALLS_END,
         ]
-        results_1 = list(parse_deepseek_v32(_simulate_tokens(model_tokens_1)))
+        results_1 = list(
+            _parse_deepseek_with_thinking(_simulate_tokens(model_tokens_1))
+        )
 
         # Verify: thinking tokens + tool call
         gen_1 = [r for r in results_1 if isinstance(r, GenerationResponse)]
@@ -660,7 +679,9 @@ class TestE2EFullRoundTrip:
             THINKING_END,
             "The weather in Hangzhou is currently cloudy with temperatures between 7°C and 13°C.",
         ]
-        results_2 = list(parse_deepseek_v32(_simulate_tokens(model_tokens_2)))
+        results_2 = list(
+            _parse_deepseek_with_thinking(_simulate_tokens(model_tokens_2))
+        )
 
         gen_2 = [r for r in results_2 if isinstance(r, GenerationResponse)]
         tool_2 = [r for r in results_2 if isinstance(r, ToolCallResponse)]
@@ -1034,3 +1055,128 @@ class TestApplyChatTemplateWithToolCalls:
         assert "get_weather" in prompt
         assert "Tokyo" in prompt
         assert "Sunny" in prompt
+
+
+class TestE2EDeepseekV4ToolCallParsing:
+    """V4 emits `<｜DSML｜tool_calls>` (outer) wrapping `<｜DSML｜invoke …>` calls
+    (the V4-Flash chat template promises this exact structure). Parser must
+    extract the tool name + parameters back out."""
+
+    def test_v4_tool_call_extracted_from_clean_output(self):
+        """Clean V4 DSML output should yield a ToolCallResponse with the
+        invoked tool name and parameter values — not bleed through as text."""
+        # Realistic token splits matching the V4 tokenizer's known behavior:
+        #   `<｜DSML｜tool_calls>` -> ['<', '｜DSML｜', 'tool', '_c', 'alls', '>']
+        # The model emits tokens one-by-one in this multi-token pattern.
+        model_tokens = [
+            "<",
+            DSML_TOKEN,
+            "tool",
+            "_c",
+            "alls",
+            ">",
+            "\n<",
+            DSML_TOKEN,
+            "invoke",
+            ' name="read"',
+            ">\n<",
+            DSML_TOKEN,
+            "parameter",
+            ' name="filePath" string="true"',
+            ">",
+            "/Users/l2/PycharmProjects/exo",
+            "</",
+            DSML_TOKEN,
+            "parameter",
+            ">\n</",
+            DSML_TOKEN,
+            "invoke",
+            ">\n</",
+            DSML_TOKEN,
+            "tool",
+            "_c",
+            "alls",
+            ">",
+        ]
+
+        results = list(parse_deepseek_v4(_simulate_tokens(model_tokens)))
+
+        tool_results = [r for r in results if isinstance(r, ToolCallResponse)]
+        text_results = [r for r in results if isinstance(r, GenerationResponse)]
+
+        assert len(tool_results) == 1, (
+            f"expected one ToolCallResponse, got {len(tool_results)} tool + "
+            f"{len(text_results)} text results: text="
+            f"{''.join(r.text for r in text_results)!r}"
+        )
+        tool_calls = tool_results[0].tool_calls
+        assert len(tool_calls) == 1
+        assert tool_calls[0].name == "read"
+        args = cast(dict[str, str], json.loads(tool_calls[0].arguments))
+        assert args == {"filePath": "/Users/l2/PycharmProjects/exo"}
+
+    def test_v4_tool_call_after_thinking_block(self):
+        """V4 reasoning models start in `<think>` and emit DSML tool calls
+        after `</think>`. The thinking parser must hand a complete tool-call
+        block off to `parse_deepseek_v4` without dropping markers."""
+        # `</think>` token-splits into ['</think>'] in V4's tokenizer, so the
+        # thinking parser sees it as a single token. Emit thinking, then the
+        # DSML tool call.
+        model_tokens = [
+            "<think>",
+            "The user wants me to explore the codebase.",
+            "</think>",
+            "<",
+            DSML_TOKEN,
+            "tool",
+            "_c",
+            "alls",
+            ">",
+            "\n<",
+            DSML_TOKEN,
+            "invoke",
+            ' name="read"',
+            ">\n<",
+            DSML_TOKEN,
+            "parameter",
+            ' name="filePath" string="true"',
+            ">",
+            "/Users/l2/PycharmProjects/exo",
+            "</",
+            DSML_TOKEN,
+            "parameter",
+            ">\n</",
+            DSML_TOKEN,
+            "invoke",
+            ">\n</",
+            DSML_TOKEN,
+            "tool",
+            "_c",
+            "alls",
+            ">",
+        ]
+
+        results = list(
+            parse_deepseek_v4(
+                parse_thinking_models(
+                    _simulate_tokens(model_tokens),
+                    think_start="<think>",
+                    think_end="</think>",
+                    starts_in_thinking=True,
+                )
+            )
+        )
+
+        tool_results = [r for r in results if isinstance(r, ToolCallResponse)]
+        text_results = [r for r in results if isinstance(r, GenerationResponse)]
+        non_thinking_text = "".join(r.text for r in text_results if not r.is_thinking)
+
+        assert len(tool_results) == 1, (
+            f"expected ToolCallResponse, got {len(tool_results)} tool + "
+            f"non-thinking text {non_thinking_text!r}"
+        )
+        tool_calls = tool_results[0].tool_calls
+        assert len(tool_calls) == 1
+        assert tool_calls[0].name == "read"
+        args = cast(dict[str, str], json.loads(tool_calls[0].arguments))
+        assert args == {"filePath": "/Users/l2/PycharmProjects/exo"}
diff --git a/src/exo/worker/tests/unittests/test_runner/test_event_ordering.py b/src/exo/worker/tests/unittests/test_runner/test_event_ordering.py
index ffd8fbfdf..e658249d2 100644
--- a/src/exo/worker/tests/unittests/test_runner/test_event_ordering.py
+++ b/src/exo/worker/tests/unittests/test_runner/test_event_ordering.py
@@ -1,14 +1,13 @@
 # Check tasks are complete before runner is ever ready.
-import unittest.mock
 from collections.abc import Iterable
+from dataclasses import dataclass
 from typing import Callable
 
-import mlx.core as mx
 import pytest
 
+import exo.worker.engines.mlx.builder as mlx_builder
 import exo.worker.runner.llm_inference.batch_generator as mlx_batch_generator
 import exo.worker.runner.llm_inference.model_output_parsers as mlx_model_output_parsers
-import exo.worker.runner.llm_inference.runner as mlx_runner
 from exo.shared.types.chunks import TokenChunk
 from exo.shared.types.events import (
     ChunkGenerated,
@@ -46,6 +45,8 @@ from exo.shared.types.worker.runners import (
     RunnerWarmingUp,
 )
 from exo.utils.channels import mp_channel
+from exo.worker.engines.mlx.builder import MlxBuilder
+from exo.worker.runner.runner import Runner
 
 from ...constants import (
     CHAT_COMPLETION_TASK_ID,
@@ -111,17 +112,26 @@ CHAT_TASK = TextGeneration(
 
 def assert_events_equal(test_events: Iterable[Event], true_events: Iterable[Event]):
     for test_event, true_event in zip(test_events, true_events, strict=True):
-        test_event.event_id = true_event.event_id
+        test_event = test_event.model_copy(update={"event_id": true_event.event_id})
         assert test_event == true_event, f"{test_event} != {true_event}"
 
 
+@dataclass
+class MockLoadOutput:
+    layers_loaded: int
+    total: int
+
+
 @pytest.fixture
 def patch_out_mlx(monkeypatch: pytest.MonkeyPatch):
     # initialize_mlx returns a mock group
-    monkeypatch.setattr(mlx_runner, "initialize_mlx", make_nothin(MockGroup()))
-    monkeypatch.setattr(
-        mlx_runner, "load_mlx_items", make_nothin((1, MockTokenizer, None))
-    )
+    monkeypatch.setattr(mlx_builder, "initialize_mlx", make_nothin(MockGroup()))
+
+    def lmi_gen():
+        yield MockLoadOutput(1, 1)
+        return (1, MockTokenizer, None)
+
+    monkeypatch.setattr(mlx_builder, "load_mlx_items", make_nothin(lmi_gen()))
     monkeypatch.setattr(mlx_batch_generator, "warmup_inference", make_nothin(1))
     monkeypatch.setattr(mlx_batch_generator, "_check_for_debug_prompts", nothin)
     monkeypatch.setattr(mlx_batch_generator, "mx_any", make_nothin(False))
@@ -142,6 +152,11 @@ def patch_out_mlx(monkeypatch: pytest.MonkeyPatch):
     )
     monkeypatch.setattr(mlx_batch_generator, "ExoBatchGenerator", FakeExoBatchGenerator)
 
+    def _no_prefill_server(_self: Runner) -> int | None:
+        return None
+
+    monkeypatch.setattr(Runner, "_start_prefill_server", _no_prefill_server)
+
 
 class FakeExoBatchGenerator:
     def __init__(self, *_args: object, **_kwargs: object) -> None:
@@ -264,17 +279,18 @@ def _run(tasks: Iterable[Task], send_after_ready: list[Task] | None = None):
         # this is some c++ nonsense
         task_receiver.close = nothin
         task_receiver.join = nothin
-        with unittest.mock.patch(
-            "exo.worker.runner.llm_inference.runner.mx.distributed.all_gather",
-            make_nothin(mx.array([1])),
-        ):
-            runner = mlx_runner.Runner(
-                bound_instance,
-                event_sender,  # pyright: ignore[reportArgumentType]
-                task_receiver,
-                cancel_receiver,
-            )
-            runner.main()
+        builder = MlxBuilder(
+            bound_instance.bound_shard.model_card.model_id,
+            event_sender,  # pyright: ignore[reportArgumentType]
+            cancel_receiver,
+        )
+        runner = Runner(
+            bound_instance,
+            builder,
+            event_sender,  # pyright: ignore[reportArgumentType]
+            task_receiver,
+        )
+        runner.main()
 
         return event_sender.events
 
@@ -318,6 +334,10 @@ def test_events_processed_in_correct_order(patch_out_mlx: pytest.MonkeyPatch):
                 runner_status=RunnerLoading(layers_loaded=0, total_layers=32),
             ),
             TaskAcknowledged(task_id=LOAD_TASK_ID),
+            RunnerStatusUpdated(
+                runner_id=RUNNER_1_ID,
+                runner_status=RunnerLoading(layers_loaded=1, total_layers=1),
+            ),
             TaskStatusUpdated(task_id=LOAD_TASK_ID, task_status=TaskStatus.Complete),
             RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerLoaded()),
             TaskStatusUpdated(task_id=WARMUP_TASK_ID, task_status=TaskStatus.Running),
diff --git a/src/exo/worker/tests/unittests/test_runner/test_finish_reason_sse.py b/src/exo/worker/tests/unittests/test_runner/test_finish_reason_sse.py
index 0aee03ba6..8eb4b4433 100644
--- a/src/exo/worker/tests/unittests/test_runner/test_finish_reason_sse.py
+++ b/src/exo/worker/tests/unittests/test_runner/test_finish_reason_sse.py
@@ -1,3 +1,4 @@
+import json
 from collections.abc import Generator
 from typing import Any
 
@@ -7,7 +8,7 @@ from exo.shared.types.worker.runner_response import (
     GenerationResponse,
     ToolCallResponse,
 )
-from exo.worker.engines.mlx.dsml_encoding import (
+from exo.worker.engines.mlx.vendor.dsml_encoding import (
     DSML_TOKEN,
     THINKING_END,
     THINKING_START,
@@ -379,6 +380,110 @@ class TestGenericToolCallsFinishReason:
 # ── Double parser chain (parse_thinking_models → parse_deepseek_v32) ──
 
 
+class TestDeepSeekV32StartsInThinking:
+    """Regression tests for deepseek v3.2 where the chat template appends
+    <think> to the prompt so the model starts already inside a thinking block.
+    """
+
+    def test_reasoning_tagged_when_starts_in_thinking(self):
+        tokens = [
+            _make_response("let me", 0),
+            _make_response(" think", 1),
+            _make_response(THINKING_END, 2),
+            _make_response("\n", 3),
+            _make_response("42", 4, finish_reason="stop"),
+        ]
+        thinking = parse_thinking_models(
+            _queue_source(tokens),
+            think_start=THINKING_START,
+            think_end=THINKING_END,
+            starts_in_thinking=True,
+        )
+        results = _step_until_finish(parse_deepseek_v32(thinking))
+        gens = [
+            r
+            for r in results
+            if isinstance(r, GenerationResponse) and r.finish_reason is None
+        ]
+        texts = [(r.text, r.is_thinking) for r in gens]
+        assert texts == [("let me", True), (" think", True), ("\n", False)]
+        final = [
+            r
+            for r in results
+            if isinstance(r, GenerationResponse) and r.finish_reason is not None
+        ]
+        assert len(final) == 1
+        assert final[0].text == "42"
+        assert final[0].is_thinking is False
+
+    def test_starts_in_thinking_then_tool_call(self):
+        tokens = [
+            _make_response("need weather", 0),
+            _make_response(THINKING_END, 1),
+            _make_response("\n\n", 2),
+            _make_response(TOOL_CALLS_START, 3),
+            _make_response("\n", 4),
+            _make_response(f'<{DSML_TOKEN}invoke name="get_weather">\n', 5),
+            _make_response(
+                f'<{DSML_TOKEN}parameter name="city" string="true">NYC</{DSML_TOKEN}parameter>\n',
+                6,
+            ),
+            _make_response(f"</{DSML_TOKEN}invoke>\n", 7),
+            _make_response(TOOL_CALLS_END, 8, finish_reason="stop"),
+        ]
+        thinking = parse_thinking_models(
+            _queue_source(tokens),
+            think_start=THINKING_START,
+            think_end=THINKING_END,
+            starts_in_thinking=True,
+        )
+        results = _step_until_finish(parse_deepseek_v32(thinking))
+        reasoning_gens = [
+            r
+            for r in results
+            if isinstance(r, GenerationResponse)
+            and r.finish_reason is None
+            and r.is_thinking
+        ]
+        assert [r.text for r in reasoning_gens] == ["need weather"]
+        tool_results = [r for r in results if isinstance(r, ToolCallResponse)]
+        assert len(tool_results) == 1
+        assert tool_results[0].tool_calls[0].name == "get_weather"
+
+    def test_reasoning_tokens_counted_starts_in_thinking(self):
+        usage = Usage(
+            prompt_tokens=10,
+            completion_tokens=5,
+            total_tokens=15,
+            prompt_tokens_details=PromptTokensDetails(cached_tokens=0),
+            completion_tokens_details=CompletionTokensDetails(reasoning_tokens=0),
+        )
+        tokens = [
+            _make_response("reasoning", 0),
+            _make_response(" more", 1),
+            _make_response(THINKING_END, 2),
+            _make_response("\n", 3),
+            GenerationResponse(text="42", token=4, finish_reason="stop", usage=usage),
+        ]
+        thinking = parse_thinking_models(
+            _queue_source(tokens),
+            think_start=THINKING_START,
+            think_end=THINKING_END,
+            starts_in_thinking=True,
+        )
+        results = _step_until_finish(
+            count_reasoning_tokens(parse_deepseek_v32(thinking))
+        )
+        final = [
+            r
+            for r in results
+            if isinstance(r, GenerationResponse) and r.finish_reason is not None
+        ]
+        assert len(final) == 1
+        assert final[0].usage is not None
+        assert final[0].usage.completion_tokens_details.reasoning_tokens == 2
+
+
 class TestBatchGeneratorSingleNext:
     def test_finish_reason_with_buffered_tokens_drain_loop(self):
         from exo.worker.runner.llm_inference.batch_generator import GeneratorQueue
@@ -403,3 +508,167 @@ class TestBatchGeneratorSingleNext:
         assert _got_finish(collected), (
             f"No finish_reason in collected: {[(type(r).__name__, getattr(r, 'finish_reason', None) if isinstance(r, GenerationResponse) else 'tool') for r in collected]}"
         )
+
+
+# ── parse_thinking_models prefix buffering ──────────────────────
+
+
+def _drain_text(
+    results: list[GenerationResponse | ToolCallResponse],
+) -> str:
+    return "".join(
+        r.text
+        for r in results
+        if isinstance(r, GenerationResponse) and r.finish_reason is None
+    )
+
+
+class TestThinkingModelsPrefixBuffering:
+    def test_lone_lt_is_preserved(self):
+        tokens = [
+            _make_response("<", 0),
+            _make_response("function", 1),
+            _make_response(">", 2),
+            _make_response("", 3, finish_reason="stop"),
+        ]
+        results = _step_until_finish(
+            parse_thinking_models(
+                _queue_source(tokens),
+                think_start="<think>",
+                think_end="</think>",
+                starts_in_thinking=False,
+            )
+        )
+        assert _drain_text(results) == "<function>"
+        gens = [r for r in results if isinstance(r, GenerationResponse)]
+        assert all(not r.is_thinking for r in gens)
+
+    def test_lone_lt_slash_is_preserved(self):
+        tokens = [
+            _make_response("</", 0),
+            _make_response("parameter", 1),
+            _make_response(">", 2),
+            _make_response("", 3, finish_reason="stop"),
+        ]
+        results = _step_until_finish(
+            parse_thinking_models(
+                _queue_source(tokens),
+                think_start="<think>",
+                think_end="</think>",
+                starts_in_thinking=False,
+            )
+        )
+        assert _drain_text(results) == "</parameter>"
+
+    def test_partial_prefix_then_diverge(self):
+        tokens = [
+            _make_response("<", 0),
+            _make_response("t", 1),
+            _make_response("h", 2),
+            _make_response("other", 3),
+            _make_response("", 4, finish_reason="stop"),
+        ]
+        results = _step_until_finish(
+            parse_thinking_models(
+                _queue_source(tokens),
+                think_start="<think>",
+                think_end="</think>",
+                starts_in_thinking=False,
+            )
+        )
+        assert _drain_text(results) == "<thother"
+
+    def test_real_think_tag_still_swallowed(self):
+        tokens = [
+            _make_response("<", 0),
+            _make_response("think", 1),
+            _make_response(">", 2),
+            _make_response("body", 3),
+            _make_response("</", 4),
+            _make_response("think", 5),
+            _make_response(">", 6),
+            _make_response("after", 7),
+            _make_response("", 8, finish_reason="stop"),
+        ]
+        results = _step_until_finish(
+            parse_thinking_models(
+                _queue_source(tokens),
+                think_start="<think>",
+                think_end="</think>",
+                starts_in_thinking=False,
+            )
+        )
+        gens = [
+            r
+            for r in results
+            if isinstance(r, GenerationResponse) and r.finish_reason is None
+        ]
+        texts = [(r.text, r.is_thinking) for r in gens]
+        assert texts == [("body", True), ("after", False)]
+
+    def test_finish_reason_flushes_buffer(self):
+        tokens = [
+            _make_response("<", 0),
+            _make_response("", 1, finish_reason="stop"),
+        ]
+        results = _step_until_finish(
+            parse_thinking_models(
+                _queue_source(tokens),
+                think_start="<think>",
+                think_end="</think>",
+                starts_in_thinking=False,
+            )
+        )
+        gens = [r for r in results if isinstance(r, GenerationResponse)]
+        assert len(gens) == 2
+        assert gens[0].text == "<"
+        assert gens[0].is_thinking is False
+        assert gens[0].finish_reason is None
+        assert gens[1].finish_reason == "stop"
+        assert gens[1].is_thinking is False
+
+    def test_tool_call_after_prefix_tokens_parses(self):
+        def _capture_parser(text: str) -> dict[str, Any]:
+            return {"name": "captured", "arguments": {"raw": text}}
+
+        tool_parser = make_mlx_parser("<tool_call>", "</tool_call>", _capture_parser)
+
+        tokens = [
+            _make_response("<tool_call>", 0),
+            _make_response("\n", 1),
+            _make_response("<", 2),
+            _make_response("function", 3),
+            _make_response("=glob", 4),
+            _make_response(">", 5),
+            _make_response("\n", 6),
+            _make_response("<", 7),
+            _make_response("parameter", 8),
+            _make_response("=pattern", 9),
+            _make_response(">", 10),
+            _make_response("**/*", 11),
+            _make_response("</", 12),
+            _make_response("parameter", 13),
+            _make_response(">", 14),
+            _make_response("</", 15),
+            _make_response("function", 16),
+            _make_response(">", 17),
+            _make_response("</tool_call>", 18, finish_reason="stop"),
+        ]
+
+        thinking = parse_thinking_models(
+            _queue_source(tokens),
+            think_start="<think>",
+            think_end="</think>",
+            starts_in_thinking=False,
+        )
+        results = _step_until_finish(
+            parse_tool_calls(thinking, tool_parser, tools=None)
+        )
+
+        tool_results = [r for r in results if isinstance(r, ToolCallResponse)]
+        assert len(tool_results) == 1
+        raw = json.loads(tool_results[0].tool_calls[0].arguments)["raw"]  # pyright: ignore[reportAny]
+        assert "<function=glob>" in raw
+        assert "<parameter=pattern>" in raw
+        assert "</parameter>" in raw
+        assert "</function>" in raw
diff --git a/src/exo/worker/tests/unittests/test_runner/test_parse_tool_calls.py b/src/exo/worker/tests/unittests/test_runner/test_parse_tool_calls.py
index 49655f03e..d69fbe091 100644
--- a/src/exo/worker/tests/unittests/test_runner/test_parse_tool_calls.py
+++ b/src/exo/worker/tests/unittests/test_runner/test_parse_tool_calls.py
@@ -9,17 +9,14 @@ from exo.worker.runner.llm_inference.model_output_parsers import parse_tool_call
 from exo.worker.runner.llm_inference.tool_parsers import make_mlx_parser
 
 
-def _make_responses(
-    texts: list[str],
-    finish_on_last: bool = True,
-) -> Generator[GenerationResponse]:
+def _make_responses(texts: list[str]) -> Generator[GenerationResponse]:
     """Create a sequence of GenerationResponses from text strings."""
     for i, text in enumerate(texts):
         is_last = i == len(texts) - 1
         yield GenerationResponse(
             text=text,
             token=i,
-            finish_reason="stop" if (is_last and finish_on_last) else None,
+            finish_reason="stop" if is_last else None,
             usage=None,
         )
 
@@ -39,7 +36,7 @@ class TestParseToolCalls:
         texts = ["<tool_call>", "test_fn", "</tool_call>"]
         results = list(
             parse_tool_calls(
-                _make_responses(texts, finish_on_last=False),
+                _make_responses(texts),
                 _dummy_parser,
                 tools=None,
             )
@@ -78,7 +75,7 @@ class TestParseToolCalls:
         texts = ["<tool_call>", "bad content", "</tool_call>"]
         results = list(
             parse_tool_calls(
-                _make_responses(texts, finish_on_last=False),
+                _make_responses(texts),
                 make_mlx_parser("<tool_call>", "</tool_call>", _failing_parser),
                 tools=None,
             )
diff --git a/src/exo/worker/tests/unittests/test_runner/test_runner_supervisor.py b/src/exo/worker/tests/unittests/test_runner/test_runner_supervisor.py
index 82612d6d6..87cb9c744 100644
--- a/src/exo/worker/tests/unittests/test_runner/test_runner_supervisor.py
+++ b/src/exo/worker/tests/unittests/test_runner/test_runner_supervisor.py
@@ -1,4 +1,3 @@
-import multiprocessing as mp
 from typing import cast
 
 import anyio
@@ -16,36 +15,32 @@ from exo.shared.types.text_generation import (
 )
 from exo.shared.types.worker.instances import BoundInstance, InstanceId
 from exo.shared.types.worker.runners import RunnerFailed, RunnerId
+from exo.utils.async_process import AsyncProcess
 from exo.utils.channels import channel, mp_channel
-from exo.worker.runner.runner_supervisor import RunnerSupervisor
+from exo.worker.runner.bootstrap import RunnerTerminationError
+from exo.worker.runner.supervisor import RunnerStdioHandler, RunnerSupervisor
 from exo.worker.tests.unittests.conftest import get_bound_mlx_ring_instance
 
 
 class _DeadProcess:
-    exitcode = -6
+    def __init__(self):
+        rx1, _ = channel[bytes]()
+        rx2, _ = channel[bytes]()
+        self.stdout = rx1
+        self.stderr = rx2
 
-    def start(self) -> None:
-        return None
+    exitcode = -6
 
     def is_alive(self) -> bool:
         return False
 
-    def join(self, _timeout: float | None = None) -> None:
-        return None
 
-    def terminate(self) -> None:
-        return None
-
-    def kill(self) -> None:
-        return None
-
-
-@pytest.mark.asyncio
+@pytest.mark.anyio
 async def test_check_runner_emits_error_chunk_for_inflight_text_generation() -> None:
     event_sender, event_receiver = channel[Event]()
     task_sender, _ = mp_channel[Task]()
     cancel_sender, _ = mp_channel[TaskId]()
-    _, ev_recv = mp_channel[Event]()
+    _, ev_recv = mp_channel[Event | RunnerTerminationError]()
 
     bound_instance: BoundInstance = get_bound_mlx_ring_instance(
         instance_id=InstanceId("instance-a"),
@@ -54,10 +49,15 @@ async def test_check_runner_emits_error_chunk_for_inflight_text_generation() ->
         node_id=NodeId("node-a"),
     )
 
+    proc = cast(AsyncProcess, cast(object, _DeadProcess()))
+    handler = await RunnerStdioHandler.create(
+        stdout_rx=proc.stdout, stderr_rx=proc.stderr
+    )
     supervisor = RunnerSupervisor(
         shard_metadata=bound_instance.bound_shard,
         bound_instance=bound_instance,
-        runner_process=cast("mp.Process", cast(object, _DeadProcess())),
+        runner_process=proc,
+        _runner_stdio_handler=handler,
         initialize_timeout=400,
         _ev_recv=ev_recv,
         _task_sender=task_sender,
diff --git a/src/exo/worker/tests/unittests/test_runner/test_serve_prefill.py b/src/exo/worker/tests/unittests/test_runner/test_serve_prefill.py
new file mode 100644
index 000000000..6c607dfa2
--- /dev/null
+++ b/src/exo/worker/tests/unittests/test_runner/test_serve_prefill.py
@@ -0,0 +1,227 @@
+"""Tests for serve_prefill_request — the producer-side path that uses
+KVPrefixCache for cross-request prefix sharing."""
+
+import io
+from collections.abc import Callable
+from dataclasses import dataclass
+from typing import Any, cast
+
+import mlx.core as mx
+import pytest
+from mlx_lm.models.cache import KVCache
+
+import exo.worker.engines.mlx.disaggregated.serve as mlx_serve_mod
+from exo.worker.disaggregated.protocol import (
+    Done,
+    Header,
+    KVChunk,
+    read_header,
+    read_message,
+)
+from exo.worker.disaggregated.server import PrefillRequest
+from exo.worker.engines.mlx.cache import KVPrefixCache
+from exo.worker.engines.mlx.disaggregated.adapter import write_cache_to_wire
+
+N_HEADS = 2
+HEAD_DIM = 4
+
+
+@dataclass
+class _FakeTokenizer:
+    has_thinking: bool = False
+    think_start: object | None = None
+    think_end: object | None = None
+
+
+class _FakeModel:
+    def __init__(self) -> None:
+        self.layers = [object()]
+
+    def make_cache(self) -> list[KVCache]:
+        return [KVCache() for _ in self.layers]
+
+
+def _populate_cache_in_place(cache: list[KVCache], n_tokens: int) -> None:
+    mx.random.seed(0)
+    for c in cache:
+        c.keys = (
+            mx.random.uniform(shape=(1, N_HEADS, n_tokens, HEAD_DIM)) * 10
+        ).astype(mx.bfloat16)
+        c.values = (
+            mx.random.uniform(shape=(1, N_HEADS, n_tokens, HEAD_DIM)) * 10
+        ).astype(mx.bfloat16)
+        c.offset = n_tokens
+
+
+def _patch_prefill(monkeypatch: pytest.MonkeyPatch) -> list[mx.array]:
+    """Replace mlx_prefill with a tracker; return the list it appends to."""
+    inputs: list[mx.array] = []
+
+    def fake_prefill(**kwargs: object) -> tuple[float, int, list[object]]:
+        pt = cast(mx.array, kwargs["prompt_tokens"])
+        inputs.append(pt)
+        n = int(pt.shape[0])
+        cache = cast(list[KVCache], kwargs["cache"])
+        existing = int(cache[0].offset) if cache and cache[0].keys is not None else 0
+        _populate_cache_in_place(cache, existing + n)
+        return (0.0, n, [])
+
+    def fake_make_sampler(**_: object) -> Callable[[mx.array], mx.array]:
+        return lambda x: x
+
+    monkeypatch.setattr(mlx_serve_mod, "mlx_prefill", fake_prefill)
+    monkeypatch.setattr(mlx_serve_mod, "make_sampler", fake_make_sampler)
+    return inputs
+
+
+def _decode(payload: bytes) -> tuple[Header, list[KVChunk], int]:
+    buf = io.BytesIO(payload)
+    hdr = read_header(buf)
+    chunks: list[KVChunk] = []
+    total = 0
+    while True:
+        msg = read_message(buf)
+        if msg is None:
+            break
+        if isinstance(msg, KVChunk):
+            chunks.append(msg)
+        elif isinstance(msg, Done):
+            total = msg.total_tokens
+            break
+    return hdr, chunks, total
+
+
+def _serve(request: PrefillRequest, kv_prefix_cache: KVPrefixCache | None) -> bytes:
+    cache = mlx_serve_mod.run_prefill_for_request(
+        model=cast(Any, _FakeModel()),  # pyright: ignore[reportAny]
+        tokenizer=cast(Any, _FakeTokenizer()),  # pyright: ignore[reportAny]
+        group=None,
+        kv_prefix_cache=kv_prefix_cache,
+        request=request,
+    )
+    buf = io.BytesIO()
+    write_cache_to_wire(
+        buf,
+        cache,
+        request_id=request.request_id,
+        model_id=request.model_id,
+        start_pos=request.start_pos,
+    )
+    return buf.getvalue()
+
+
+def test_serve_prefill_runs_full_prefill_when_cache_empty(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    inputs = _patch_prefill(monkeypatch)
+    cache = KVPrefixCache(group=None)
+
+    payload = _serve(
+        PrefillRequest(
+            request_id="r1", model_id="m", token_ids=list(range(20)), start_pos=0
+        ),
+        cache,
+    )
+
+    assert len(inputs) == 1
+    assert int(inputs[0].shape[0]) == 18
+
+    hdr, chunks, total = _decode(payload)
+    assert hdr.start_pos == 0
+    assert total == 18
+    assert len(chunks) == 1
+    assert chunks[0].num_tokens == 18
+
+    assert len(cache.prompts) == 1
+    assert int(cache.prompts[0].shape[0]) == 20
+
+
+def test_serve_prefill_skips_work_on_exact_cache_hit(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    inputs = _patch_prefill(monkeypatch)
+    cache = KVPrefixCache(group=None)
+    tokens = list(range(20))
+
+    _serve(
+        PrefillRequest(request_id="r1", model_id="m", token_ids=tokens, start_pos=0),
+        cache,
+    )
+    inputs.clear()
+
+    _serve(
+        PrefillRequest(request_id="r2", model_id="m", token_ids=tokens, start_pos=0),
+        cache,
+    )
+
+    assert inputs == []
+
+
+def test_serve_prefill_only_runs_suffix_on_partial_match(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    inputs = _patch_prefill(monkeypatch)
+    cache = KVPrefixCache(group=None)
+
+    base = list(range(15))
+    _serve(
+        PrefillRequest(request_id="r1", model_id="m", token_ids=base, start_pos=0),
+        cache,
+    )
+    inputs.clear()
+
+    extended = base + [99, 100, 101, 102, 103]
+    _serve(
+        PrefillRequest(request_id="r2", model_id="m", token_ids=extended, start_pos=0),
+        cache,
+    )
+
+    assert len(inputs) == 1
+    suffix_len = int(inputs[0].shape[0])
+    assert suffix_len < len(extended) - 2
+
+
+def test_serve_prefill_slices_payload_at_client_start_pos(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _patch_prefill(monkeypatch)
+    cache = KVPrefixCache(group=None)
+
+    n_tokens = 20
+    client_has = 12
+
+    payload = _serve(
+        PrefillRequest(
+            request_id="r1",
+            model_id="m",
+            token_ids=list(range(n_tokens)),
+            start_pos=client_has,
+        ),
+        cache,
+    )
+
+    hdr, chunks, total = _decode(payload)
+    assert hdr.start_pos == client_has
+    expected_sent = (n_tokens - 2) - client_has
+    assert total == expected_sent
+    assert len(chunks) == 1
+    assert chunks[0].num_tokens == expected_sent
+
+
+def test_serve_prefill_works_without_prefix_cache(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    inputs = _patch_prefill(monkeypatch)
+
+    payload = _serve(
+        PrefillRequest(
+            request_id="r1", model_id="m", token_ids=list(range(20)), start_pos=0
+        ),
+        None,
+    )
+
+    assert len(inputs) == 1
+    assert int(inputs[0].shape[0]) == 18
+
+    _, _, total = _decode(payload)
+    assert total == 18
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 000000000..141bdee7f
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,181 @@
+# type: ignore
+"""Pytest configuration for marker-driven exo integration tests.
+
+Test authors declare requirements via markers:
+
+    @pytest.mark.cluster(count=2, thunderbolt='a2a')
+    @pytest.mark.instance('mlx-community/Llama-3.2-1B-Instruct-4bit',
+                          sharding='tensor', comm='jaccl')
+    def test_jaccl_inference(session):
+        resp = session.chat('What is 2+2?')
+        assert '4' in resp
+
+Clusters are cached by `ClusterSpec`; tests with the same cluster_spec
+share a deployment. Each test places its own instance (matching its
+`@pytest.mark.instance`), and instances are cleaned up after the test.
+
+Run with:
+    uv run pytest tests/ -v
+    uv run pytest tests/ -v --hosts s2,s4,s9,s10
+"""
+
+from __future__ import annotations
+
+import contextlib
+import json
+
+import pytest
+from exo_tools.cluster import ClusterInfo, EcoSession
+from exo_tools.harness import cleanup_all_instances, place_instance
+
+from .framework import (
+    ClusterSpec,
+    Session,
+    parse_cluster_marker,
+    parse_instance_marker,
+)
+
+# Single eco session for the entire test process.
+eco = EcoSession(user_prefix="test")
+
+# Cluster cache keyed by ClusterSpec — tests with the same spec share a deployment.
+# Cleared at session teardown.
+_cluster_cache: dict[ClusterSpec, ClusterInfo] = {}
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--hosts",
+        default=None,
+        help="Comma-separated list of hosts (e.g. s2,s4,s9,s10). "
+        "Overrides constraint-based reservation.",
+    )
+
+
+def pytest_configure(config):
+    """Register custom markers."""
+    config.addinivalue_line(
+        "markers",
+        "cluster(count=N, thunderbolt=Thunderbolt|None, min_memory=GB, chip=PATTERN): "
+        "declare cluster requirements for a test",
+    )
+    config.addinivalue_line(
+        "markers",
+        "instance(model_id, sharding=Sharding, comm=Comm, min_nodes=N): "
+        "declare instance placement for a test",
+    )
+
+
+def pytest_report_header(config):
+    """Show the eco user and hosts for this test session."""
+    hosts = config.getoption("--hosts")
+    lines = [f"eco user: {eco.user}"]
+    if hosts:
+        lines.append(f"hosts override: {hosts}")
+    return lines
+
+
+@pytest.fixture(scope="session")
+def _host_pool(request) -> list[str] | None:
+    raw = request.config.getoption("--hosts")
+    if raw:
+        return [h.strip() for h in raw.split(",") if h.strip()]
+    return None
+
+
+@pytest.fixture
+def session(request, _host_pool) -> Session:
+    """Per-test fixture providing a Session matching the test's markers.
+
+    Reads @pytest.mark.cluster and @pytest.mark.instance from the test, deploys
+    a matching cluster (cached across tests with the same spec), places the
+    model, and yields a Session for the test to interact with. Cleans up the
+    instance after the test, and invalidates the cluster cache if the test
+    left nodes disconnected.
+    """
+    cluster_marker = request.node.get_closest_marker("cluster")
+    instance_marker = request.node.get_closest_marker("instance")
+
+    cluster_spec = parse_cluster_marker(cluster_marker)
+    instance_spec = parse_instance_marker(instance_marker)
+
+    # Deploy or reuse a cluster matching the spec
+    cluster = _cluster_cache.get(cluster_spec)
+    if cluster is None:
+        if _host_pool:
+            cluster = eco.start_deploy(
+                hosts=_host_pool[: cluster_spec.count], wait=True
+            )
+        else:
+            cluster = eco.start_deploy(
+                count=cluster_spec.count,
+                thunderbolt=cluster_spec.thunderbolt,
+                chip=cluster_spec.chip,
+                min_memory_gb=cluster_spec.min_memory_gb,
+                wait=True,
+            )
+        _cluster_cache[cluster_spec] = cluster
+
+    # Place an instance for this test if the test specified one
+    instance_id = None
+    if instance_spec is not None:
+        client = cluster.make_client()
+        instance_id = place_instance(
+            client,
+            instance_spec.model_id,
+            sharding=instance_spec.sharding,
+            comm=instance_spec.comm,
+            min_nodes=instance_spec.min_nodes,
+        )
+
+    sess = Session(
+        cluster=cluster,
+        eco=eco,
+        instance_spec=instance_spec,
+        instance_id=instance_id,
+    )
+
+    yield sess
+
+    # ---- Teardown ----
+
+    # If the test left nodes disconnected, invalidate the cluster cache and
+    # stop the cluster so the next test deploys fresh.
+    if sess._stopped_hosts:
+        _cluster_cache.pop(cluster_spec, None)
+        with contextlib.suppress(Exception):
+            eco.stop(sess.cluster.hosts)
+        return
+
+    # Otherwise, clean up any instances created during the test
+    with contextlib.suppress(Exception):
+        cleanup_all_instances(sess.client)
+
+
+# ---------------------------------------------------------------------------
+# Session-level teardown — stop all cached clusters
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(scope="session", autouse=True)
+def _teardown_clusters():
+    yield
+    for cluster in _cluster_cache.values():
+        with contextlib.suppress(Exception):
+            eco.stop(cluster.hosts)
+    _cluster_cache.clear()
+
+
+def pytest_runtest_makereport(item, call):
+    """Attach cluster logs to the test report when a test fails."""
+    if call.when != "call" or call.excinfo is None:
+        return
+
+    sess = item.funcargs.get("session")
+    if sess is None:
+        return
+    try:
+        logs = eco.logs(sess.cluster.hosts, lines=200)
+        item.add_report_section("call", "Cluster Logs", json.dumps(logs, indent=2))
+    except Exception:
+        pass
diff --git a/tests/framework.py b/tests/framework.py
new file mode 100644
index 000000000..4e4bd81ff
--- /dev/null
+++ b/tests/framework.py
@@ -0,0 +1,199 @@
+"""Marker-driven test framework for exo integration tests.
+
+Test authors declare requirements via markers:
+
+    @pytest.mark.cluster(count=2, thunderbolt='a2a')
+    @pytest.mark.instance('mlx-community/Llama-3.2-1B-Instruct-4bit',
+                          sharding='tensor', comm='jaccl')
+    def test_jaccl_inference(session):
+        resp = session.chat('What is 2+2?')
+        assert '4' in resp
+
+The `session` fixture reads the markers, deploys the cluster, places the
+instance, and provides a `Session` object. All cluster/instance orchestration
+lives in `exo_tools.harness`; this module is purely the pytest-facing layer.
+"""
+
+from __future__ import annotations
+
+import time
+from dataclasses import dataclass, field
+from typing import Any
+
+from exo_tools.client import ExoClient
+from exo_tools.cluster import (
+    Chip,
+    ClusterInfo,
+    EcoSession,
+    Thunderbolt,
+    make_client_from_url,
+)
+from exo_tools.harness import Comm, Sharding
+
+from exo.api.types.api import (
+    ChatCompletionChoice,
+    ChatCompletionRequest,
+    ChatCompletionResponse,
+)
+
+DEFAULT_MODEL = "mlx-community/Llama-3.2-1B-Instruct-4bit"
+
+
+def _extract_content(resp: ChatCompletionResponse) -> str:
+    """Extract plain-text content from a non-streaming chat completion."""
+    choice = resp.choices[0]
+    if not isinstance(choice, ChatCompletionChoice):
+        raise RuntimeError(
+            f"Expected non-streaming choice, got {type(choice).__name__}"
+        )
+    content = choice.message.content
+    if not isinstance(content, str):
+        raise RuntimeError(f"Expected string content, got {type(content).__name__}")
+    return content
+
+
+@dataclass(frozen=True)
+class ClusterSpec:
+    count: int = 1
+    thunderbolt: Thunderbolt | None = None
+    min_memory_gb: float | None = None
+    chip: Chip | None = None
+
+
+@dataclass(frozen=True)
+class InstanceSpec:
+    model_id: str
+    sharding: Sharding = Sharding.PIPELINE
+    comm: Comm = Comm.RING
+    min_nodes: int = 1
+
+
+def parse_cluster_marker(marker) -> ClusterSpec:
+    if marker is None:
+        return ClusterSpec()
+    return ClusterSpec(
+        count=marker.kwargs.get("count", 1),
+        thunderbolt=marker.kwargs.get("thunderbolt"),
+        min_memory_gb=marker.kwargs.get("min_memory"),
+        chip=marker.kwargs.get("chip"),
+    )
+
+
+def parse_instance_marker(marker) -> InstanceSpec | None:
+    if marker is None:
+        return None
+    if not marker.args:
+        raise ValueError(
+            "@pytest.mark.instance requires a positional model_id argument"
+        )
+    return InstanceSpec(
+        model_id=marker.args[0],
+        sharding=marker.kwargs.get("sharding", Sharding.PIPELINE),
+        comm=marker.kwargs.get("comm", Comm.RING),
+        min_nodes=marker.kwargs.get("min_nodes", 1),
+    )
+
+
+@dataclass
+class Session:
+    cluster: ClusterInfo
+    eco: EcoSession
+    instance_spec: InstanceSpec | None = None
+    instance_id: str | None = None
+    _stopped_hosts: set[str] = field(default_factory=set)
+
+    @property
+    def client(self) -> ExoClient:
+        for host in self.cluster.hosts:
+            if host not in self._stopped_hosts:
+                return make_client_from_url(self.cluster.api_endpoints[host])
+        return self.cluster.make_client()
+
+    @property
+    def state(self) -> dict[str, Any]:
+        return self.client.request_json("GET", "/state") or {}
+
+    @property
+    def instances(self) -> dict[str, Any]:
+        return self.state.get("instances", {})
+
+    # ---- Inference ----
+
+    def chat(self, prompt: str, max_tokens: int = 100) -> str:
+        resp = self.chat_raw(prompt, max_tokens=max_tokens)
+        return _extract_content(resp)
+
+    def chat_raw(self, prompt: str, **kwargs: Any) -> ChatCompletionResponse:
+        if not self.instance_spec:
+            raise RuntimeError(
+                "No instance placed; add @pytest.mark.instance to the test"
+            )
+        max_tokens = kwargs.pop("max_tokens", 100)
+        request = ChatCompletionRequest.model_validate(
+            {
+                "model": self.instance_spec.model_id,
+                "messages": [{"role": "user", "content": prompt}],
+                "max_tokens": max_tokens,
+                **kwargs,
+            }
+        )
+        return self._post_chat(request)
+
+    def multi_turn(self, messages: list[dict[str, str]], max_tokens: int = 100) -> str:
+        if not self.instance_spec:
+            raise RuntimeError(
+                "No instance placed; add @pytest.mark.instance to the test"
+            )
+        request = ChatCompletionRequest.model_validate(
+            {
+                "model": self.instance_spec.model_id,
+                "messages": messages,
+                "max_tokens": max_tokens,
+            }
+        )
+        return _extract_content(self._post_chat(request))
+
+    def _post_chat(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
+        raw = self.client.request_json(
+            "POST",
+            "/v1/chat/completions",
+            body=request.model_dump(exclude_none=True),
+        )
+        return ChatCompletionResponse.model_validate(raw)
+
+    def disconnect_node(self, index: int) -> None:
+        """Stop exo on a node and wait for the cluster to observe the disconnect."""
+        host = self.cluster.hosts[index]
+        self.eco.stop([host], keep=True)
+        self._stopped_hosts.add(host)
+
+    def reconnect_node(self, index: int) -> None:
+        """Restart a previously disconnected node into the existing namespace."""
+        host = self.cluster.hosts[index]
+        self.eco.start_hosts([host], namespace=self.cluster.namespace)
+        self._stopped_hosts.discard(host)
+
+    def wait_ready(
+        self, expected_nodes: int | None = None, timeout: float = 60
+    ) -> None:
+        """Wait until the cluster has exactly `expected_nodes` visible and reporting memory.
+
+        Defaults to the count of non-stopped hosts. Use this after
+        `disconnect_node` / `reconnect_node` to wait for the cluster to settle.
+        """
+        if expected_nodes is None:
+            expected_nodes = len(self.cluster.hosts) - len(self._stopped_hosts)
+        start = time.time()
+        while time.time() - start < timeout:
+            try:
+                state = self.state
+                identities = len(state.get("nodeIdentities", {}))
+                memory = len(state.get("nodeMemory", {}))
+                if identities == expected_nodes and memory == expected_nodes:
+                    return
+            except Exception:
+                pass
+            time.sleep(2.0)
+        raise TimeoutError(
+            f"Cluster did not reach exactly {expected_nodes} ready nodes within {timeout}s"
+        )
diff --git a/tests/test_1node.py b/tests/test_1node.py
new file mode 100644
index 000000000..7ef4a2789
--- /dev/null
+++ b/tests/test_1node.py
@@ -0,0 +1,75 @@
+# type: ignore
+"""Single-node integration tests.
+
+Run with:
+    uv run pytest tests/test_1node.py -v
+"""
+
+from __future__ import annotations
+
+import time
+
+import pytest
+from exo_tools.harness import is_model_downloaded, place_instance
+
+from .framework import DEFAULT_MODEL, InstanceSpec
+
+
+@pytest.mark.cluster(count=1)
+@pytest.mark.instance(DEFAULT_MODEL)
+def test_place_instance_and_chat(session):
+    resp = session.chat("Say hello in one sentence.")
+    assert len(resp) > 0
+
+
+@pytest.mark.cluster(count=1)
+@pytest.mark.instance(DEFAULT_MODEL)
+def test_chat_multiple_turns(session):
+    first_reply = session.chat("What is 2 + 2?")
+    assert len(first_reply) > 0
+
+    second_reply = session.multi_turn(
+        [
+            {"role": "user", "content": "What is 2 + 2?"},
+            {"role": "assistant", "content": first_reply},
+            {"role": "user", "content": "Now multiply that by 3."},
+        ]
+    )
+    assert len(second_reply) > 0
+
+
+@pytest.mark.cluster(count=1)
+@pytest.mark.instance(DEFAULT_MODEL)
+def test_delete_instance(session):
+    from exo_tools.harness import wait_for_instance_gone
+
+    session.client.request_json("DELETE", f"/instance/{session.instance_id}")
+    wait_for_instance_gone(session.client, session.instance_id, timeout=30.0)
+    assert len(session.instances) == 0, (
+        f"Expected no instances, found {len(session.instances)}"
+    )
+
+
+@pytest.mark.cluster(count=1)
+def test_download_from_scratch(session):
+    """Ensure the model is not on the cluster, then place an instance to
+    trigger a fresh download and verify inference.
+    """
+    node_id = next(iter(session.state.get("nodeIdentities", {})))
+
+    # Delete any existing download — the API call is idempotent
+    session.client.request_json("DELETE", f"/download/{node_id}/{DEFAULT_MODEL}")
+
+    # Poll until the model is gone (it may already be gone)
+    deadline = time.time() + 60.0
+    while time.time() < deadline:
+        if not is_model_downloaded(session.client, DEFAULT_MODEL):
+            break
+        time.sleep(2.0)
+    else:
+        raise AssertionError(f"Expected {DEFAULT_MODEL} to be deleted from cluster")
+
+    place_instance(session.client, DEFAULT_MODEL, timeout=900.0)
+    session.instance_spec = InstanceSpec(model_id=DEFAULT_MODEL)
+    resp = session.chat("Say hello in one sentence.")
+    assert len(resp) > 0
diff --git a/tests/test_2node.py b/tests/test_2node.py
new file mode 100644
index 000000000..ffc3ed2b5
--- /dev/null
+++ b/tests/test_2node.py
@@ -0,0 +1,49 @@
+# type: ignore
+"""Two-node integration tests (ring + jaccl parallelism).
+
+Run with:
+    uv run pytest tests/test_2node.py -v
+"""
+
+from __future__ import annotations
+
+import pytest
+from exo_tools.cluster import Thunderbolt
+from exo_tools.harness import Comm, Sharding
+
+from .framework import DEFAULT_MODEL
+
+
+@pytest.mark.cluster(count=2, thunderbolt=Thunderbolt.A2A)
+@pytest.mark.instance(
+    DEFAULT_MODEL, sharding=Sharding.TENSOR, comm=Comm.JACCL, min_nodes=2
+)
+def test_2node_jaccl(session):
+    resp = session.chat("Say hello in one sentence.")
+    assert len(resp) > 0
+
+
+@pytest.mark.cluster(count=2, thunderbolt=Thunderbolt.A2A)
+@pytest.mark.instance(
+    DEFAULT_MODEL, sharding=Sharding.PIPELINE, comm=Comm.RING, min_nodes=2
+)
+def test_2node_ring(session):
+    resp = session.chat("Say hello in one sentence.")
+    assert len(resp) > 0
+
+
+@pytest.mark.cluster(count=2, thunderbolt=Thunderbolt.A2A)
+@pytest.mark.instance(
+    DEFAULT_MODEL, sharding=Sharding.TENSOR, comm=Comm.JACCL, min_nodes=2
+)
+def test_2node_jaccl_multi_turn(session):
+    first = session.chat("What is the capital of France?")
+    assert len(first) > 0
+    second = session.multi_turn(
+        [
+            {"role": "user", "content": "What is the capital of France?"},
+            {"role": "assistant", "content": first},
+            {"role": "user", "content": "What country is it in?"},
+        ]
+    )
+    assert len(second) > 0
diff --git a/tests/test_4node.py b/tests/test_4node.py
new file mode 100644
index 000000000..cf0601944
--- /dev/null
+++ b/tests/test_4node.py
@@ -0,0 +1,32 @@
+# type: ignore
+"""Four-node integration tests.
+
+Run with:
+    uv run pytest tests/test_4node.py -v
+"""
+
+from __future__ import annotations
+
+import pytest
+from exo_tools.cluster import Thunderbolt
+from exo_tools.harness import Comm, Sharding
+
+from .framework import DEFAULT_MODEL
+
+
+@pytest.mark.cluster(count=4, thunderbolt=Thunderbolt.A2A)
+@pytest.mark.instance(
+    DEFAULT_MODEL, sharding=Sharding.PIPELINE, comm=Comm.RING, min_nodes=4
+)
+def test_4node_pipeline_ring(session):
+    resp = session.chat("Say hello in one sentence.")
+    assert len(resp) > 0
+
+
+@pytest.mark.cluster(count=4, thunderbolt=Thunderbolt.A2A)
+@pytest.mark.instance(
+    DEFAULT_MODEL, sharding=Sharding.TENSOR, comm=Comm.JACCL, min_nodes=4
+)
+def test_4node_tensor_jaccl(session):
+    resp = session.chat("Say hello in one sentence.")
+    assert len(resp) > 0
diff --git a/tests/test_dashboard.py b/tests/test_dashboard.py
new file mode 100644
index 000000000..2a3524838
--- /dev/null
+++ b/tests/test_dashboard.py
@@ -0,0 +1,102 @@
+# type: ignore
+"""Dashboard end-to-end tests using Playwright (headless Chromium).
+
+Prerequisites:
+    uv run playwright install chromium
+
+Run with:
+    uv run pytest tests/test_dashboard.py -v
+"""
+
+from __future__ import annotations
+
+import contextlib
+
+import pytest
+
+try:
+    from playwright.sync_api import sync_playwright
+
+    _HAS_PLAYWRIGHT = True
+except ImportError:
+    _HAS_PLAYWRIGHT = False
+
+# Check if Chromium is installed by attempting a quick launch
+_HAS_CHROMIUM = False
+if _HAS_PLAYWRIGHT:
+    try:
+        with sync_playwright() as p:
+            browser = p.chromium.launch(headless=True)
+            browser.close()
+        _HAS_CHROMIUM = True
+    except Exception:
+        pass
+
+pytestmark = pytest.mark.skipif(
+    not _HAS_PLAYWRIGHT or not _HAS_CHROMIUM,
+    reason="playwright or chromium not installed (run: uv run playwright install chromium)",
+)
+
+
+def _mark_onboarding_complete(session) -> None:
+    """Mark onboarding complete on the server so the wizard doesn't auto-launch a model."""
+    with contextlib.suppress(Exception):
+        session.client.request_json("POST", "/onboarding")
+
+
+@pytest.mark.cluster(count=1)
+def test_dashboard_chat_inference(session):
+    """Full UI flow: open dashboard, pick a model, send a chat, verify response.
+
+    The instance is created via the dashboard UI (model picker → chat send
+    triggers the dashboard's auto-launch flow), not via @pytest.mark.instance.
+    """
+    _mark_onboarding_complete(session)
+
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=True)
+        page = browser.new_page(viewport={"width": 1280, "height": 800})
+        page.goto(session.cluster.api_url, wait_until="networkidle")
+        page.wait_for_timeout(3000)
+        page.screenshot(path="/tmp/dashboard_initial.png")
+
+        # Open the model picker by clicking the "SELECT MODEL" button
+        page.get_by_text("SELECT MODEL", exact=False).first.click()
+        page.wait_for_timeout(1000)
+        page.screenshot(path="/tmp/dashboard_picker_open.png")
+
+        # Search for the model — uses the model id substring; the picker
+        # matches against name/id so "Llama-3.2-1B" filters to the small Llama.
+        search_input = page.locator('input[placeholder*="Search models"]').first
+        search_input.fill("Llama-3.2-1B")
+        page.wait_for_timeout(1500)
+        page.screenshot(path="/tmp/dashboard_picker_search.png")
+
+        # Click the only matching result. The picker shows the model's
+        # display name (e.g. "Llama 3.2 1B") which differs from the model_id.
+        # We click the first visible button-like row in the result list.
+        page.get_by_text("Llama 3.2 1B", exact=False).first.click()
+        page.wait_for_timeout(1500)
+        page.screenshot(path="/tmp/dashboard_model_selected.png")
+
+        # Type a chat message — sending triggers the dashboard's auto-launch
+        # flow: it picks an optimal placement for the selected model and POSTs
+        # to /instance, then sends the chat once the runner is ready.
+        chat_input = page.locator("textarea").first
+        chat_input.fill("Say hello")
+        chat_input.press("Enter")
+        page.screenshot(path="/tmp/dashboard_chat_sent.png")
+
+        # Wait for the instance to launch and respond. Generous timeout
+        # because this includes model placement + load + generation.
+        page.wait_for_timeout(60000)
+        page.screenshot(path="/tmp/dashboard_after_chat.png")
+
+        # Verify an instance was created and the chat got a response
+        instances = session.client.request_json("GET", "/state").get("instances", {})
+        assert len(instances) > 0, "Expected the dashboard to have created an instance"
+
+        body_text = page.text_content("body") or ""
+        assert len(body_text) > 0
+
+        browser.close()
diff --git a/tests/test_resilience.py b/tests/test_resilience.py
new file mode 100644
index 000000000..69a007d9b
--- /dev/null
+++ b/tests/test_resilience.py
@@ -0,0 +1,56 @@
+# type: ignore
+"""Resilience tests: disconnect/reconnect nodes and verify cluster recovery.
+
+Run with:
+    uv run pytest tests/test_resilience.py -v
+"""
+
+from __future__ import annotations
+
+import pytest
+from exo_tools.cluster import Thunderbolt
+from exo_tools.harness import Comm, Sharding, cleanup_all_instances, place_instance
+
+from .framework import DEFAULT_MODEL, InstanceSpec
+
+
+@pytest.mark.cluster(count=2, thunderbolt=Thunderbolt.A2A)
+@pytest.mark.instance(
+    DEFAULT_MODEL, sharding=Sharding.PIPELINE, comm=Comm.RING, min_nodes=2
+)
+def test_node_recovery(session):
+    """Full disconnect/reconnect cycle.
+
+    1. Place a 2-node instance, verify inference
+    2. Disconnect one node
+    3. Place a 1-node instance on remaining node, verify inference
+    4. Reconnect the stopped node, wait for the cluster to reform
+    5. Place a 2-node instance again, verify inference
+    """
+    # --- Phase 1: 2-node inference ---
+    resp = session.chat("Hello")
+    assert len(resp) > 0
+
+    # --- Phase 2: disconnect one node ---
+    session.disconnect_node(1)
+    session.wait_ready(60)
+
+    # Clean up the now-broken 2-node instance
+    cleanup_all_instances(session.client)
+
+    # --- Phase 3: 1-node inference on the remaining node ---
+    place_instance(session.client, DEFAULT_MODEL, min_nodes=1)
+    session.instance_spec = InstanceSpec(model_id=DEFAULT_MODEL, min_nodes=1)
+    resp = session.chat("Hello")
+    assert len(resp) > 0
+
+    # --- Phase 4: reconnect and restore 2-node cluster ---
+    cleanup_all_instances(session.client)
+    session.reconnect_node(1)
+    session.wait_ready(60)
+
+    # --- Phase 5: 2-node inference again ---
+    place_instance(session.client, DEFAULT_MODEL, min_nodes=2)
+    session.instance_spec = InstanceSpec(model_id=DEFAULT_MODEL, min_nodes=2)
+    resp = session.chat("Hello again")
+    assert len(resp) > 0
diff --git a/tests/auto_bench.sh b/tmp/old_tests/auto_bench.sh
similarity index 100%
rename from tests/auto_bench.sh
rename to tmp/old_tests/auto_bench.sh
diff --git a/tests/eval_tool_calls.sh b/tmp/old_tests/eval_tool_calls.sh
similarity index 100%
rename from tests/eval_tool_calls.sh
rename to tmp/old_tests/eval_tool_calls.sh
diff --git a/tests/get_all_models_on_cluster.py b/tmp/old_tests/get_all_models_on_cluster.py
similarity index 100%
rename from tests/get_all_models_on_cluster.py
rename to tmp/old_tests/get_all_models_on_cluster.py
diff --git a/tests/headless_runner.py b/tmp/old_tests/headless_runner.py
similarity index 100%
rename from tests/headless_runner.py
rename to tmp/old_tests/headless_runner.py
diff --git a/tests/run_exo_on.sh b/tmp/old_tests/run_exo_on.sh
similarity index 100%
rename from tests/run_exo_on.sh
rename to tmp/old_tests/run_exo_on.sh
diff --git a/tests/start_distributed_test.py b/tmp/old_tests/start_distributed_test.py
similarity index 100%
rename from tests/start_distributed_test.py
rename to tmp/old_tests/start_distributed_test.py
diff --git a/tests/test_vision_cache.py b/tmp/old_tests/test_vision_cache.py
similarity index 100%
rename from tests/test_vision_cache.py
rename to tmp/old_tests/test_vision_cache.py
diff --git a/tools/pyproject.toml b/tools/pyproject.toml
new file mode 100644
index 000000000..8d33dea00
--- /dev/null
+++ b/tools/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "exo-tools"
+version = "0.1.0"
+description = "Shared tooling for interacting with exo clusters"
+requires-python = ">=3.13"
+dependencies = ["loguru>=0.7.3"]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/tools/src/exo_tools/__init__.py b/tools/src/exo_tools/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tools/src/exo_tools/client.py b/tools/src/exo_tools/client.py
new file mode 100644
index 000000000..181814612
--- /dev/null
+++ b/tools/src/exo_tools/client.py
@@ -0,0 +1,117 @@
+# type: ignore
+"""HTTP client for the exo API."""
+
+from __future__ import annotations
+
+import http.client
+import json
+from collections.abc import Iterator
+from typing import Any
+from urllib.parse import urlencode
+
+
+class ExoHttpError(RuntimeError):
+    def __init__(self, status: int, reason: str, body_preview: str):
+        super().__init__(f"HTTP {status} {reason}: {body_preview}")
+        self.status = status
+
+
+class ExoClient:
+    def __init__(self, host: str, port: int, timeout_s: float = 7200.0):
+        self.host = host
+        self.port = port
+        self.timeout_s = timeout_s
+
+    def request_json(
+        self,
+        method: str,
+        path: str,
+        params: dict[str, Any] | None = None,
+        body: dict[str, Any] | None = None,
+        headers: dict[str, str] | None = None,
+    ) -> Any:
+        if not path.startswith("/"):
+            path = "/" + path
+        if params:
+            path = path + "?" + urlencode(params)
+
+        conn = http.client.HTTPConnection(self.host, self.port, timeout=self.timeout_s)
+        try:
+            payload: bytes | None = None
+            hdrs: dict[str, str] = {"Accept": "application/json"}
+
+            if body is not None:
+                payload = json.dumps(body).encode("utf-8")
+                hdrs["Content-Type"] = "application/json"
+            if headers:
+                hdrs.update(headers)
+
+            conn.request(method.upper(), path, body=payload, headers=hdrs)
+            resp = conn.getresponse()
+            raw = resp.read()
+            text = raw.decode("utf-8", errors="replace") if raw else ""
+
+            if resp.status >= 400:
+                raise ExoHttpError(resp.status, resp.reason, text[:300])
+
+            if not text:
+                return None
+            return json.loads(text)
+        finally:
+            conn.close()
+
+    def post_bench_chat_completions(self, payload: dict[str, Any]) -> dict[str, Any]:
+        return self.request_json("POST", "/bench/chat/completions", body=payload)
+
+    def stream_bench_chat_completions(self, payload: dict[str, Any]) -> Iterator[str]:
+        """POST /bench/chat/completions with stream=True, yielding raw SSE lines."""
+        payload = {**payload, "stream": True}
+        data = json.dumps(payload).encode("utf-8")
+        conn = http.client.HTTPConnection(self.host, self.port, timeout=self.timeout_s)
+        try:
+            conn.request(
+                "POST",
+                "/bench/chat/completions",
+                body=data,
+                headers={
+                    "Content-Type": "application/json",
+                    "Accept": "text/event-stream",
+                },
+            )
+            resp = conn.getresponse()
+            if resp.status >= 400:
+                raw = resp.read().decode("utf-8", errors="replace")
+                raise ExoHttpError(resp.status, resp.reason, raw[:300])
+            for line in resp:
+                yield line.decode("utf-8", errors="replace")
+        finally:
+            conn.close()
+
+    def get_state_path(self, path: str) -> Any:
+        try:
+            return self.request_json("GET", f"/state/{path}")
+        except ExoHttpError as e:
+            if e.status == 404:
+                return None
+            raise
+
+    def get_instance(self, instance_id: str) -> dict[str, Any] | None:
+        return self.get_state_path(f"instances/{instance_id}")
+
+    def get_runner(self, runner_id: str) -> dict[str, Any] | None:
+        return self.get_state_path(f"runners/{runner_id}")
+
+    def get_node_downloads(self, node_id: str) -> list[dict[str, Any]] | None:
+        return self.get_state_path(f"downloads/{node_id}")
+
+    def get_node_disk(self, node_id: str) -> dict[str, Any] | None:
+        return self.get_state_path(f"nodeDisk/{node_id}")
+
+    def get_node_system(self, node_id: str) -> dict[str, Any] | None:
+        return self.get_state_path(f"nodeSystem/{node_id}")
+
+    def get_node_identities(self) -> dict[str, Any] | None:
+        return self.get_state_path("nodeIdentities")
+
+    def get_topology(self) -> dict[str, Any] | None:
+        return self.get_state_path("topology")
diff --git a/tools/src/exo_tools/cluster.py b/tools/src/exo_tools/cluster.py
new file mode 100644
index 000000000..ac4d47d0c
--- /dev/null
+++ b/tools/src/exo_tools/cluster.py
@@ -0,0 +1,243 @@
+# type: ignore
+"""Cluster lifecycle management via eco.
+
+Provides subprocess wrappers for eco commands (deploy, stop, start, release,
+logs, exec) and a ClusterInfo dataclass. Reusable by integration tests,
+bench, eval, and CI workflows.
+"""
+
+from __future__ import annotations
+
+import atexit
+import contextlib
+import json
+import logging
+import os
+import signal
+import subprocess
+import uuid
+from dataclasses import dataclass, field
+from enum import Enum
+
+from .client import ExoClient
+
+
+class Thunderbolt(str, Enum):
+    A2A = "a2a"  # all-to-all (eco --tb-a2a)
+    RING = "ring"  # ring topology (eco --tb-ring)
+
+
+class Chip(str, Enum):
+    M1 = "M1"
+    M1_PRO = "M1 Pro"
+    M1_MAX = "M1 Max"
+    M1_ULTRA = "M1 Ultra"
+    M2 = "M2"
+    M2_PRO = "M2 Pro"
+    M2_MAX = "M2 Max"
+    M2_ULTRA = "M2 Ultra"
+    M3 = "M3"
+    M3_PRO = "M3 Pro"
+    M3_MAX = "M3 Max"
+    M3_ULTRA = "M3 Ultra"
+    M4 = "M4"
+    M4_PRO = "M4 Pro"
+    M4_MAX = "M4 Max"
+    M4_ULTRA = "M4 Ultra"
+
+
+logger = logging.getLogger("exo_tools.cluster")
+
+# When set, deploy from a GitHub branch/tag instead of local source (rsync).
+_EXO_REF = os.environ.get("EXO_REF")
+
+
+@dataclass
+class ClusterInfo:
+    """Holds the result of an `eco start --deploy` invocation."""
+
+    hosts: list[str]
+    namespace: str
+    api_endpoints: dict[str, str]  # host -> url
+    api_url: str  # primary endpoint for ExoClient
+
+    primary_host: str = ""
+    _host: str = field(init=False, repr=False, default="")
+    _port: int = field(init=False, repr=False, default=52415)
+
+    def __post_init__(self) -> None:
+        if not self.primary_host:
+            self.primary_host = self.hosts[0]
+        url = self.api_url.replace("http://", "").replace("https://", "")
+        parts = url.split(":")
+        self._host = parts[0]
+        self._port = int(parts[1]) if len(parts) > 1 else 52415
+
+    def make_client(self, timeout_s: float = 7200.0) -> ExoClient:
+        return ExoClient(self._host, self._port, timeout_s=timeout_s)
+
+
+class EcoSession:
+    """Manages an eco session with a unique user and automatic cleanup.
+
+    Usage:
+        session = EcoSession(user_prefix="test")
+        cluster = session.start_deploy(count=2, thunderbolt=True)
+        ...
+        session.stop_all()  # or let atexit handle it
+
+    The session registers atexit and signal handlers to ensure cleanup
+    on normal exit, uncaught exceptions, SIGTERM, and SIGHUP. SIGINT
+    is left unhandled so KeyboardInterrupt propagates normally.
+    """
+
+    def __init__(self, user_prefix: str = "test") -> None:
+        self._session_id = uuid.uuid4().hex[:8]
+        self.user = f"{user_prefix}-{self._session_id}"
+        self._env = {**os.environ, "USER": self.user}
+
+        # Register cleanup handlers
+        atexit.register(self.stop_all)
+        for sig in (signal.SIGTERM, signal.SIGHUP):
+            signal.signal(sig, self._signal_handler)
+
+    def _signal_handler(self, signum: int, _frame: object) -> None:
+        self.stop_all()
+        raise SystemExit(128 + signum)
+
+    def stop_all(self) -> None:
+        """Stop all clusters and release all reservations for this session."""
+        with contextlib.suppress(Exception):
+            subprocess.run(
+                ["eco", "stop"],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                env=self._env,
+            )
+
+    def _run(
+        self, args: list[str], *, check: bool = True, timeout: int = 120
+    ) -> subprocess.CompletedProcess[str]:
+        """Run an eco command as this session's user.
+
+        stdout is captured (JSON output), stderr is passed through to the
+        console so eco's progress messages are visible.
+        """
+        logger.info(f"eco: {' '.join(args)}")
+        return subprocess.run(
+            args,
+            stdout=subprocess.PIPE,
+            stderr=None,
+            text=True,
+            check=check,
+            timeout=timeout,
+            env=self._env,
+        )
+
+    def start_deploy(
+        self,
+        hosts: list[str] | None = None,
+        *,
+        count: int | None = None,
+        thunderbolt: Thunderbolt | None = None,
+        chip: Chip | None = None,
+        min_memory_gb: float | None = None,
+        wait: bool = True,
+        ref: str | None = _EXO_REF,
+        timeout: int = 600,
+    ) -> ClusterInfo:
+        """Start and deploy exo on a set of hosts via eco.
+
+        By default, deploys from local source via rsync. Set EXO_REF
+        or pass ref= to deploy from a GitHub branch/tag instead (for CI).
+        """
+        cmd: list[str] = ["eco", "--json", "start", "--deploy"]
+        if hosts:
+            cmd.extend(hosts)
+        if count is not None:
+            cmd.extend(["--count", str(count)])
+        if thunderbolt is not None:
+            cmd.append(f"--tb-{thunderbolt.value}")
+        if chip is not None:
+            cmd.extend(["--chip", chip.value])
+        if min_memory_gb is not None:
+            cmd.extend(["--min-memory", str(min_memory_gb)])
+        if wait:
+            cmd.append("--wait")
+        if ref:
+            cmd.extend(["--ref", ref])
+
+        result = self._run(cmd, timeout=timeout)
+        data = json.loads(result.stdout)["data"]
+        endpoints: dict[str, str] = data["api_endpoints"]
+        primary_host = data["hosts"][0]
+
+        return ClusterInfo(
+            hosts=data["hosts"],
+            namespace=data["namespace"],
+            api_endpoints=endpoints,
+            api_url=endpoints[primary_host],
+            primary_host=primary_host,
+        )
+
+    def stop(self, hosts: list[str], *, keep: bool = False, timeout: int = 120) -> None:
+        """Stop exo on the given hosts. If keep=True, keep the reservation."""
+        cmd: list[str] = ["eco", "stop"]
+        cmd.extend(hosts)
+        if keep:
+            cmd.append("--keep")
+        self._run(cmd, timeout=timeout)
+
+    def start_hosts(
+        self, hosts: list[str], *, namespace: str, timeout: int = 300
+    ) -> None:
+        """Start (previously stopped) hosts back into an existing namespace."""
+        cmd: list[str] = ["eco", "--json", "start"]
+        cmd.extend(hosts)
+        cmd.extend(["--namespace", namespace])
+        self._run(cmd, timeout=timeout)
+
+    def release(self, hosts: list[str], timeout: int = 120) -> None:
+        """Release hosts from the reservation."""
+        cmd: list[str] = ["eco", "release"]
+        cmd.extend(hosts)
+        self._run(cmd, timeout=timeout)
+
+    def logs(
+        self, hosts: list[str], lines: int = 500, timeout: int = 60
+    ) -> dict[str, list[str]]:
+        """Fetch recent logs from cluster hosts."""
+        cmd: list[str] = ["eco", "--json", "logs"]
+        cmd.extend(hosts)
+        cmd.extend(["-n", str(lines), "--raw"])
+        result = self._run(cmd, check=False, timeout=timeout)
+        if result.returncode != 0:
+            return {"_error": [result.stderr]}
+        try:
+            return json.loads(result.stdout)
+        except json.JSONDecodeError:
+            return {"_raw": result.stdout.splitlines()}
+
+    def exec(self, hosts: list[str], command: str, timeout: int = 120) -> str:
+        """Run an arbitrary command on the given hosts via eco."""
+        cmd: list[str] = ["eco", "exec"]
+        cmd.extend(hosts)
+        cmd.append("--")
+        cmd.extend(command.split())
+        result = self._run(cmd, check=False, timeout=timeout)
+        return result.stdout
+
+
+def make_client(cluster: ClusterInfo, timeout_s: float = 7200.0) -> ExoClient:
+    """Create an ExoClient from a ClusterInfo."""
+    return cluster.make_client(timeout_s=timeout_s)
+
+
+def make_client_from_url(url: str, timeout_s: float = 7200.0) -> ExoClient:
+    """Create an ExoClient from a URL string like 'http://host:port'."""
+    url_clean = url.replace("http://", "").replace("https://", "")
+    parts = url_clean.split(":")
+    host = parts[0]
+    port = int(parts[1]) if len(parts) > 1 else 52415
+    return ExoClient(host, port, timeout_s=timeout_s)
diff --git a/bench/harness.py b/tools/src/exo_tools/harness.py
similarity index 74%
rename from bench/harness.py
rename to tools/src/exo_tools/harness.py
index 440a97cfe..a80da6254 100644
--- a/bench/harness.py
+++ b/tools/src/exo_tools/harness.py
@@ -1,104 +1,39 @@
 # type: ignore
+"""Instance lifecycle helpers for exo clusters.
+
+Provides utilities for placing instances, waiting for readiness,
+managing downloads, filtering placements, and common CLI arguments.
+"""
+
 from __future__ import annotations
 
 import argparse
-import http.client
-import json
+import contextlib
 import os
 import time
+from enum import Enum
 from typing import Any
-from urllib.parse import urlencode
 
 from loguru import logger
 
+from .client import ExoClient, ExoHttpError
+
+
+class Sharding(str, Enum):
+    PIPELINE = "Pipeline"  # layers split across nodes
+    TENSOR = "Tensor"  # layers split within (across nodes)
+
+
+class Comm(str, Enum):
+    RING = "MlxRing"  # ring all-reduce over network
+    JACCL = "MlxJaccl"  # RDMA over Thunderbolt
+
+
 _SETTLE_INITIAL_BACKOFF_S = 1.0
 _SETTLE_MAX_BACKOFF_S = 60.0
 _SETTLE_BACKOFF_MULTIPLIER = 2.0
 
 
-class ExoHttpError(RuntimeError):
-    def __init__(self, status: int, reason: str, body_preview: str):
-        super().__init__(f"HTTP {status} {reason}: {body_preview}")
-        self.status = status
-
-
-class ExoClient:
-    def __init__(self, host: str, port: int, timeout_s: float = 7200.0):
-        self.host = host
-        self.port = port
-        self.timeout_s = timeout_s
-
-    def request_json(
-        self,
-        method: str,
-        path: str,
-        params: dict[str, Any] | None = None,
-        body: dict[str, Any] | None = None,
-        headers: dict[str, str] | None = None,
-    ) -> Any:
-        if not path.startswith("/"):
-            path = "/" + path
-        if params:
-            path = path + "?" + urlencode(params)
-
-        conn = http.client.HTTPConnection(self.host, self.port, timeout=self.timeout_s)
-        try:
-            payload: bytes | None = None
-            hdrs: dict[str, str] = {"Accept": "application/json"}
-
-            if body is not None:
-                payload = json.dumps(body).encode("utf-8")
-                hdrs["Content-Type"] = "application/json"
-            if headers:
-                hdrs.update(headers)
-
-            conn.request(method.upper(), path, body=payload, headers=hdrs)
-            resp = conn.getresponse()
-            raw = resp.read()
-            text = raw.decode("utf-8", errors="replace") if raw else ""
-
-            if resp.status >= 400:
-                raise ExoHttpError(resp.status, resp.reason, text[:300])
-
-            if not text:
-                return None
-            return json.loads(text)
-        finally:
-            conn.close()
-
-    def post_bench_chat_completions(self, payload: dict[str, Any]) -> dict[str, Any]:
-        return self.request_json("POST", "/bench/chat/completions", body=payload)
-
-    def get_state_path(self, path: str) -> Any:
-        try:
-            return self.request_json("GET", f"/state/{path}")
-        except ExoHttpError as e:
-            if e.status == 404:
-                return None
-            raise
-
-    def get_instance(self, instance_id: str) -> dict[str, Any] | None:
-        return self.get_state_path(f"instances/{instance_id}")
-
-    def get_runner(self, runner_id: str) -> dict[str, Any] | None:
-        return self.get_state_path(f"runners/{runner_id}")
-
-    def get_node_downloads(self, node_id: str) -> list[dict[str, Any]] | None:
-        return self.get_state_path(f"downloads/{node_id}")
-
-    def get_node_disk(self, node_id: str) -> dict[str, Any] | None:
-        return self.get_state_path(f"nodeDisk/{node_id}")
-
-    def get_node_system(self, node_id: str) -> dict[str, Any] | None:
-        return self.get_state_path(f"nodeSystem/{node_id}")
-
-    def get_node_identities(self) -> dict[str, Any] | None:
-        return self.get_state_path("nodeIdentities")
-
-    def get_topology(self) -> dict[str, Any] | None:
-        return self.get_state_path("topology")
-
-
 def unwrap_instance(instance: dict[str, Any]) -> dict[str, Any]:
     if len(instance) != 1:
         raise KeyError(f"Expected 1 key, got keys={list(instance.keys())}")
@@ -268,11 +203,15 @@ def sharding_filter(sharding: str, wanted: str) -> bool:
 
 
 def fetch_and_filter_placements(
-    client: ExoClient, full_model_id: str, args: argparse.Namespace
+    client: ExoClient,
+    full_model_id: str,
+    args: argparse.Namespace,
+    node_id: str | None = None,
 ) -> list[dict[str, Any]]:
-    previews_resp = client.request_json(
-        "GET", "/instance/previews", params={"model_id": full_model_id}
-    )
+    params: dict[str, str] = {"model_id": full_model_id}
+    if node_id is not None:
+        params["node_ids"] = node_id
+    previews_resp = client.request_json("GET", "/instance/previews", params=params)
     previews = previews_resp.get("previews") or []
 
     selected: list[dict[str, Any]] = []
@@ -332,8 +271,9 @@ def settle_and_fetch_placements(
     full_model_id: str,
     args: argparse.Namespace,
     settle_timeout: float = 0,
+    node_id: str | None = None,
 ) -> list[dict[str, Any]]:
-    selected = fetch_and_filter_placements(client, full_model_id, args)
+    selected = fetch_and_filter_placements(client, full_model_id, args, node_id=node_id)
 
     if not selected and settle_timeout > 0:
         backoff = _SETTLE_INITIAL_BACKOFF_S
@@ -346,7 +286,9 @@ def settle_and_fetch_placements(
             )
             time.sleep(min(backoff, remaining))
             backoff = min(backoff * _SETTLE_BACKOFF_MULTIPLIER, _SETTLE_MAX_BACKOFF_S)
-            selected = fetch_and_filter_placements(client, full_model_id, args)
+            selected = fetch_and_filter_placements(
+                client, full_model_id, args, node_id=node_id
+            )
 
     return selected
 
@@ -462,9 +404,8 @@ def run_planning_phase(
         )
         logger.info(f"Started download on {node_id}")
 
-    # Wait for downloads
-    start = time.time()
-    while time.time() - start < timeout:
+    # Wait for downloads (no timeout — poll until complete or failed)
+    while True:
         all_done = True
         for node_id in node_ids:
             node_downloads = client.get_node_downloads(node_id) or []
@@ -514,9 +455,23 @@ def run_planning_phase(
             if download_t0 is not None:
                 return time.perf_counter() - download_t0
             return None
-        time.sleep(1)
+        time.sleep(10)
 
-    raise TimeoutError("Downloads did not complete in time")
+
+def find_existing_instance(client: ExoClient, model_id: str) -> str | None:
+    """Find an existing running instance for the given model."""
+    try:
+        state = client.request_json("GET", "/state")
+    except Exception:
+        return None
+    for inst_id, inst in state.get("instances", {}).items():
+        for _inst_type, inner in inst.items():
+            if not isinstance(inner, dict):
+                continue
+            sa = inner.get("shardAssignments", {})
+            if sa.get("modelId") == model_id:
+                return inst_id
+    return None
 
 
 def add_common_instance_args(ap: argparse.ArgumentParser) -> None:
@@ -564,7 +519,7 @@ def add_common_instance_args(ap: argparse.ArgumentParser) -> None:
     ap.add_argument(
         "--settle-timeout",
         type=float,
-        default=0,
+        default=60.0,
         help="Max seconds to wait for the cluster to produce valid placements (0 = try once).",
     )
     ap.add_argument(
@@ -572,3 +527,117 @@ def add_common_instance_args(ap: argparse.ArgumentParser) -> None:
         action="store_true",
         help="Delete existing models from smallest to largest to make room for benchmark model.",
     )
+    ap.add_argument(
+        "--reuse-instance",
+        action="store_true",
+        help="Reuse an existing running instance for this model instead of creating a new one.",
+    )
+
+
+# ---------------------------------------------------------------------------
+# Cluster/instance orchestration helpers (used by tests, bench, eval)
+# ---------------------------------------------------------------------------
+
+
+def get_instance_ids(client: ExoClient) -> set[str]:
+    """Return the set of current instance IDs from cluster state."""
+    state = client.request_json("GET", "/state") or {}
+    result: set[str] = set()
+    for instance in state.get("instances", {}).values():
+        with contextlib.suppress(Exception):
+            result.add(instance_id_from_instance(instance))
+    return result
+
+
+def wait_for_cluster_ready(
+    client: ExoClient, expected_nodes: int = 1, timeout: float = 120.0
+) -> None:
+    """Wait until the cluster has all expected nodes visible and reporting memory.
+
+    Placement requires nodeMemory for all nodes in a cycle. This polls until
+    both nodeIdentities and nodeMemory have at least `expected_nodes` entries.
+    """
+    start = time.time()
+    while time.time() - start < timeout:
+        try:
+            state = client.request_json("GET", "/state") or {}
+            if (
+                len(state.get("nodeIdentities", {})) >= expected_nodes
+                and len(state.get("nodeMemory", {})) >= expected_nodes
+            ):
+                return
+        except Exception:
+            pass
+        time.sleep(1.0)
+    raise TimeoutError(f"Cluster not ready: expected {expected_nodes} nodes")
+
+
+def place_instance(
+    client: ExoClient,
+    model_id: str,
+    *,
+    sharding: Sharding = Sharding.PIPELINE,
+    comm: Comm = Comm.RING,
+    min_nodes: int = 1,
+    timeout: float = 600.0,
+    placement_retries: int = 10,
+    placement_retry_delay: float = 10.0,
+) -> str:
+    """Place an instance and wait for it to be ready. Returns the instance_id.
+
+    The /place_instance API returns a command_id, but instances are stored
+    under a separately-generated instance_id. This polls cluster state for the
+    new instance, retrying placement if the cluster is still settling.
+    """
+    wait_for_cluster_ready(client, expected_nodes=min_nodes)
+
+    body = {
+        "model_id": model_id,
+        "sharding": sharding.value,
+        "instance_meta": comm.value,
+        "min_nodes": min_nodes,
+    }
+
+    instance_id: str | None = None
+    for attempt in range(placement_retries):
+        before_ids = get_instance_ids(client)
+        client.request_json("POST", "/place_instance", body=body)
+
+        poll_deadline = time.time() + 30.0
+        while time.time() < poll_deadline:
+            new_ids = get_instance_ids(client) - before_ids
+            if new_ids:
+                instance_id = next(iter(new_ids))
+                break
+            time.sleep(1.0)
+
+        if instance_id is not None:
+            break
+
+        if attempt < placement_retries - 1:
+            time.sleep(placement_retry_delay)
+
+    if instance_id is None:
+        raise TimeoutError(
+            f"Placement failed after {placement_retries} attempts "
+            f"({sharding.value}/{comm.value} for {model_id})"
+        )
+
+    wait_for_instance_ready(client, instance_id, timeout=timeout)
+    return instance_id
+
+
+def cleanup_all_instances(client: ExoClient) -> None:
+    """Remove all running instances from the cluster."""
+    state = client.request_json("GET", "/state") or {}
+    for instance in state.get("instances", {}).values():
+        with contextlib.suppress(Exception):
+            iid = instance_id_from_instance(instance)
+            client.request_json("DELETE", f"/instance/{iid}")
+            wait_for_instance_gone(client, iid, timeout=30.0)
+
+
+def is_model_downloaded(client: ExoClient, model_id: str) -> bool:
+    response = client.request_json("GET", "/models", params={"status": "downloaded"})
+    data = (response or {}).get("data", [])
+    return all(model.get("id") == model_id for model in data)
diff --git a/uv.lock b/uv.lock
index 850dbe0b4..8d73846cd 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,16 +1,21 @@
 version = 1
 revision = 3
-requires-python = ">=3.13"
+requires-python = "==3.13.*"
 resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'darwin'",
-    "python_full_version < '3.14' and sys_platform == 'darwin'",
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-    "python_full_version < '3.14' and sys_platform == 'linux'",
+    "sys_platform == 'darwin'",
+    "platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "platform_machine != 'aarch64' and sys_platform == 'linux'",
 ]
 supported-markers = [
     "sys_platform == 'darwin'",
     "sys_platform == 'linux'",
 ]
+conflicts = [[
+    { package = "exo", extra = "mlx-cpu" },
+    { package = "exo", extra = "mlx-cuda12" },
+    { package = "exo", extra = "mlx-cuda13" },
+    { package = "exo", extra = "mlx-none" },
+]]
 
 [options]
 prerelease-mode = "allow"
@@ -20,7 +25,9 @@ members = [
     "exo",
     "exo-bench",
     "exo-pyo3-bindings",
+    "exo-tools",
 ]
+overrides = [{ name = "opencv-python", marker = "python_full_version < '0'" }]
 
 [[package]]
 name = "absl-py"
@@ -51,64 +58,36 @@ wheels = [
 
 [[package]]
 name = "aiohttp"
-version = "3.13.3"
+version = "3.13.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "aiohappyeyeballs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "aiosignal", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "attrs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "frozenlist", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "multidict", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "propcache", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "yarl", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "aiohappyeyeballs", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "aiosignal", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "attrs", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "frozenlist", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "multidict", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "propcache", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "yarl", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/50/42/32cf8e7704ceb4481406eb87161349abb46a57fee3f008ba9cb610968646/aiohttp-3.13.3.tar.gz", hash = "sha256:a949eee43d3782f2daae4f4a2819b2cb9b0c5d3b7f7a927067cc84dafdbb9f88", size = 7844556, upload-time = "2026-01-03T17:33:05.204Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/77/9a/152096d4808df8e4268befa55fba462f440f14beab85e8ad9bf990516918/aiohttp-3.13.5.tar.gz", hash = "sha256:9d98cc980ecc96be6eb4c1994ce35d28d8b1f5e5208a23b421187d1209dbb7d1", size = 7858271, upload-time = "2026-03-31T22:01:03.343Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/97/8a/12ca489246ca1faaf5432844adbfce7ff2cc4997733e0af120869345643a/aiohttp-3.13.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5dff64413671b0d3e7d5918ea490bdccb97a4ad29b3f311ed423200b2203e01c", size = 734190, upload-time = "2026-01-03T17:30:45.832Z" },
-    { url = "https://files.pythonhosted.org/packages/32/08/de43984c74ed1fca5c014808963cc83cb00d7bb06af228f132d33862ca76/aiohttp-3.13.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:87b9aab6d6ed88235aa2970294f496ff1a1f9adcd724d800e9b952395a80ffd9", size = 491783, upload-time = "2026-01-03T17:30:47.466Z" },
-    { url = "https://files.pythonhosted.org/packages/17/f8/8dd2cf6112a5a76f81f81a5130c57ca829d101ad583ce57f889179accdda/aiohttp-3.13.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:425c126c0dc43861e22cb1c14ba4c8e45d09516d0a3ae0a3f7494b79f5f233a3", size = 490704, upload-time = "2026-01-03T17:30:49.373Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/40/a46b03ca03936f832bc7eaa47cfbb1ad012ba1be4790122ee4f4f8cba074/aiohttp-3.13.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f9120f7093c2a32d9647abcaf21e6ad275b4fbec5b55969f978b1a97c7c86bf", size = 1720652, upload-time = "2026-01-03T17:30:50.974Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/7e/917fe18e3607af92657e4285498f500dca797ff8c918bd7d90b05abf6c2a/aiohttp-3.13.3-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:697753042d57f4bf7122cab985bf15d0cef23c770864580f5af4f52023a56bd6", size = 1692014, upload-time = "2026-01-03T17:30:52.729Z" },
-    { url = "https://files.pythonhosted.org/packages/71/b6/cefa4cbc00d315d68973b671cf105b21a609c12b82d52e5d0c9ae61d2a09/aiohttp-3.13.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6de499a1a44e7de70735d0b39f67c8f25eb3d91eb3103be99ca0fa882cdd987d", size = 1759777, upload-time = "2026-01-03T17:30:54.537Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/e3/e06ee07b45e59e6d81498b591fc589629be1553abb2a82ce33efe2a7b068/aiohttp-3.13.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:37239e9f9a7ea9ac5bf6b92b0260b01f8a22281996da609206a84df860bc1261", size = 1861276, upload-time = "2026-01-03T17:30:56.512Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/24/75d274228acf35ceeb2850b8ce04de9dd7355ff7a0b49d607ee60c29c518/aiohttp-3.13.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f76c1e3fe7d7c8afad7ed193f89a292e1999608170dcc9751a7462a87dfd5bc0", size = 1743131, upload-time = "2026-01-03T17:30:58.256Z" },
-    { url = "https://files.pythonhosted.org/packages/04/98/3d21dde21889b17ca2eea54fdcff21b27b93f45b7bb94ca029c31ab59dc3/aiohttp-3.13.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fc290605db2a917f6e81b0e1e0796469871f5af381ce15c604a3c5c7e51cb730", size = 1556863, upload-time = "2026-01-03T17:31:00.445Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/84/da0c3ab1192eaf64782b03971ab4055b475d0db07b17eff925e8c93b3aa5/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4021b51936308aeea0367b8f006dc999ca02bc118a0cc78c303f50a2ff6afb91", size = 1682793, upload-time = "2026-01-03T17:31:03.024Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/0f/5802ada182f575afa02cbd0ec5180d7e13a402afb7c2c03a9aa5e5d49060/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:49a03727c1bba9a97d3e93c9f93ca03a57300f484b6e935463099841261195d3", size = 1716676, upload-time = "2026-01-03T17:31:04.842Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/8c/714d53bd8b5a4560667f7bbbb06b20c2382f9c7847d198370ec6526af39c/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3d9908a48eb7416dc1f4524e69f1d32e5d90e3981e4e37eb0aa1cd18f9cfa2a4", size = 1733217, upload-time = "2026-01-03T17:31:06.868Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/79/e2176f46d2e963facea939f5be2d26368ce543622be6f00a12844d3c991f/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2712039939ec963c237286113c68dbad80a82a4281543f3abf766d9d73228998", size = 1552303, upload-time = "2026-01-03T17:31:08.958Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/6a/28ed4dea1759916090587d1fe57087b03e6c784a642b85ef48217b0277ae/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:7bfdc049127717581866fa4708791220970ce291c23e28ccf3922c700740fdc0", size = 1763673, upload-time = "2026-01-03T17:31:10.676Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/35/4a3daeb8b9fab49240d21c04d50732313295e4bd813a465d840236dd0ce1/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8057c98e0c8472d8846b9c79f56766bcc57e3e8ac7bfd510482332366c56c591", size = 1721120, upload-time = "2026-01-03T17:31:12.575Z" },
-    { url = "https://files.pythonhosted.org/packages/99/36/5b6514a9f5d66f4e2597e40dea2e3db271e023eb7a5d22defe96ba560996/aiohttp-3.13.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:ea37047c6b367fd4bd632bff8077449b8fa034b69e812a18e0132a00fae6e808", size = 737238, upload-time = "2026-01-03T17:31:17.909Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/49/459327f0d5bcd8c6c9ca69e60fdeebc3622861e696490d8674a6d0cb90a6/aiohttp-3.13.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:6fc0e2337d1a4c3e6acafda6a78a39d4c14caea625124817420abceed36e2415", size = 492292, upload-time = "2026-01-03T17:31:19.919Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/0b/b97660c5fd05d3495b4eb27f2d0ef18dc1dc4eff7511a9bf371397ff0264/aiohttp-3.13.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c685f2d80bb67ca8c3837823ad76196b3694b0159d232206d1e461d3d434666f", size = 493021, upload-time = "2026-01-03T17:31:21.636Z" },
-    { url = "https://files.pythonhosted.org/packages/54/d4/438efabdf74e30aeceb890c3290bbaa449780583b1270b00661126b8aae4/aiohttp-3.13.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48e377758516d262bde50c2584fc6c578af272559c409eecbdd2bae1601184d6", size = 1717263, upload-time = "2026-01-03T17:31:23.296Z" },
-    { url = "https://files.pythonhosted.org/packages/71/f2/7bddc7fd612367d1459c5bcf598a9e8f7092d6580d98de0e057eb42697ad/aiohttp-3.13.3-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:34749271508078b261c4abb1767d42b8d0c0cc9449c73a4df494777dc55f0687", size = 1669107, upload-time = "2026-01-03T17:31:25.334Z" },
-    { url = "https://files.pythonhosted.org/packages/00/5a/1aeaecca40e22560f97610a329e0e5efef5e0b5afdf9f857f0d93839ab2e/aiohttp-3.13.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:82611aeec80eb144416956ec85b6ca45a64d76429c1ed46ae1b5f86c6e0c9a26", size = 1760196, upload-time = "2026-01-03T17:31:27.394Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/f8/0ff6992bea7bd560fc510ea1c815f87eedd745fe035589c71ce05612a19a/aiohttp-3.13.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2fff83cfc93f18f215896e3a190e8e5cb413ce01553901aca925176e7568963a", size = 1843591, upload-time = "2026-01-03T17:31:29.238Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/d1/e30e537a15f53485b61f5be525f2157da719819e8377298502aebac45536/aiohttp-3.13.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bbe7d4cecacb439e2e2a8a1a7b935c25b812af7a5fd26503a66dadf428e79ec1", size = 1720277, upload-time = "2026-01-03T17:31:31.053Z" },
-    { url = "https://files.pythonhosted.org/packages/84/45/23f4c451d8192f553d38d838831ebbc156907ea6e05557f39563101b7717/aiohttp-3.13.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b928f30fe49574253644b1ca44b1b8adbd903aa0da4b9054a6c20fc7f4092a25", size = 1548575, upload-time = "2026-01-03T17:31:32.87Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/ed/0a42b127a43712eda7807e7892c083eadfaf8429ca8fb619662a530a3aab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7b5e8fe4de30df199155baaf64f2fcd604f4c678ed20910db8e2c66dc4b11603", size = 1679455, upload-time = "2026-01-03T17:31:34.76Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/b5/c05f0c2b4b4fe2c9d55e73b6d3ed4fd6c9dc2684b1d81cbdf77e7fad9adb/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:8542f41a62bcc58fc7f11cf7c90e0ec324ce44950003feb70640fc2a9092c32a", size = 1687417, upload-time = "2026-01-03T17:31:36.699Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/6b/915bc5dad66aef602b9e459b5a973529304d4e89ca86999d9d75d80cbd0b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:5e1d8c8b8f1d91cd08d8f4a3c2b067bfca6ec043d3ff36de0f3a715feeedf926", size = 1729968, upload-time = "2026-01-03T17:31:38.622Z" },
-    { url = "https://files.pythonhosted.org/packages/11/3b/e84581290a9520024a08640b63d07673057aec5ca548177a82026187ba73/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:90455115e5da1c3c51ab619ac57f877da8fd6d73c05aacd125c5ae9819582aba", size = 1545690, upload-time = "2026-01-03T17:31:40.57Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/04/0c3655a566c43fd647c81b895dfe361b9f9ad6d58c19309d45cff52d6c3b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:042e9e0bcb5fba81886c8b4fbb9a09d6b8a00245fd8d88e4d989c1f96c74164c", size = 1746390, upload-time = "2026-01-03T17:31:42.857Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/53/71165b26978f719c3419381514c9690bd5980e764a09440a10bb816ea4ab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2eb752b102b12a76ca02dff751a801f028b4ffbbc478840b473597fc91a9ed43", size = 1702188, upload-time = "2026-01-03T17:31:44.984Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/2a/3c79b638a9c3d4658d345339d22070241ea341ed4e07b5ac60fb0f418003/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:05861afbbec40650d8a07ea324367cb93e9e8cc7762e04dd4405df99fa65159c", size = 769512, upload-time = "2026-01-03T17:31:51.134Z" },
-    { url = "https://files.pythonhosted.org/packages/29/b9/3e5014d46c0ab0db8707e0ac2711ed28c4da0218c358a4e7c17bae0d8722/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2fc82186fadc4a8316768d61f3722c230e2c1dcab4200d52d2ebdf2482e47592", size = 506444, upload-time = "2026-01-03T17:31:52.85Z" },
-    { url = "https://files.pythonhosted.org/packages/90/03/c1d4ef9a054e151cd7839cdc497f2638f00b93cbe8043983986630d7a80c/aiohttp-3.13.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0add0900ff220d1d5c5ebbf99ed88b0c1bbf87aa7e4262300ed1376a6b13414f", size = 510798, upload-time = "2026-01-03T17:31:54.91Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/76/8c1e5abbfe8e127c893fe7ead569148a4d5a799f7cf958d8c09f3eedf097/aiohttp-3.13.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:568f416a4072fbfae453dcf9a99194bbb8bdeab718e08ee13dfa2ba0e4bebf29", size = 1868835, upload-time = "2026-01-03T17:31:56.733Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/ac/984c5a6f74c363b01ff97adc96a3976d9c98940b8969a1881575b279ac5d/aiohttp-3.13.3-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:add1da70de90a2569c5e15249ff76a631ccacfe198375eead4aadf3b8dc849dc", size = 1720486, upload-time = "2026-01-03T17:31:58.65Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/9a/b7039c5f099c4eb632138728828b33428585031a1e658d693d41d07d89d1/aiohttp-3.13.3-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:10b47b7ba335d2e9b1239fa571131a87e2d8ec96b333e68b2a305e7a98b0bae2", size = 1847951, upload-time = "2026-01-03T17:32:00.989Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/02/3bec2b9a1ba3c19ff89a43a19324202b8eb187ca1e928d8bdac9bbdddebd/aiohttp-3.13.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3dd4dce1c718e38081c8f35f323209d4c1df7d4db4bab1b5c88a6b4d12b74587", size = 1941001, upload-time = "2026-01-03T17:32:03.122Z" },
-    { url = "https://files.pythonhosted.org/packages/37/df/d879401cedeef27ac4717f6426c8c36c3091c6e9f08a9178cc87549c537f/aiohttp-3.13.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34bac00a67a812570d4a460447e1e9e06fae622946955f939051e7cc895cfab8", size = 1797246, upload-time = "2026-01-03T17:32:05.255Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/15/be122de1f67e6953add23335c8ece6d314ab67c8bebb3f181063010795a7/aiohttp-3.13.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a19884d2ee70b06d9204b2727a7b9f983d0c684c650254679e716b0b77920632", size = 1627131, upload-time = "2026-01-03T17:32:07.607Z" },
-    { url = "https://files.pythonhosted.org/packages/12/12/70eedcac9134cfa3219ab7af31ea56bc877395b1ac30d65b1bc4b27d0438/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5f8ca7f2bb6ba8348a3614c7918cc4bb73268c5ac2a207576b7afea19d3d9f64", size = 1795196, upload-time = "2026-01-03T17:32:09.59Z" },
-    { url = "https://files.pythonhosted.org/packages/32/11/b30e1b1cd1f3054af86ebe60df96989c6a414dd87e27ad16950eee420bea/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:b0d95340658b9d2f11d9697f59b3814a9d3bb4b7a7c20b131df4bcef464037c0", size = 1782841, upload-time = "2026-01-03T17:32:11.445Z" },
-    { url = "https://files.pythonhosted.org/packages/88/0d/d98a9367b38912384a17e287850f5695c528cff0f14f791ce8ee2e4f7796/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:a1e53262fd202e4b40b70c3aff944a8155059beedc8a89bba9dc1f9ef06a1b56", size = 1795193, upload-time = "2026-01-03T17:32:13.705Z" },
-    { url = "https://files.pythonhosted.org/packages/43/a5/a2dfd1f5ff5581632c7f6a30e1744deda03808974f94f6534241ef60c751/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:d60ac9663f44168038586cab2157e122e46bdef09e9368b37f2d82d354c23f72", size = 1621979, upload-time = "2026-01-03T17:32:15.965Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/f0/12973c382ae7c1cccbc4417e129c5bf54c374dfb85af70893646e1f0e749/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:90751b8eed69435bac9ff4e3d2f6b3af1f57e37ecb0fbeee59c0174c9e2d41df", size = 1822193, upload-time = "2026-01-03T17:32:18.219Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/5f/24155e30ba7f8c96918af1350eb0663e2430aad9e001c0489d89cd708ab1/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fc353029f176fd2b3ec6cfc71be166aba1936fe5d73dd1992ce289ca6647a9aa", size = 1769801, upload-time = "2026-01-03T17:32:20.25Z" },
+    { url = "https://files.pythonhosted.org/packages/78/e9/d76bf503005709e390122d34e15256b88f7008e246c4bdbe915cd4f1adce/aiohttp-3.13.5-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5029cc80718bbd545123cd8fe5d15025eccaaaace5d0eeec6bd556ad6163d61", size = 742930, upload-time = "2026-03-31T21:58:13.155Z" },
+    { url = "https://files.pythonhosted.org/packages/57/00/4b7b70223deaebd9bb85984d01a764b0d7bd6526fcdc73cca83bcbe7243e/aiohttp-3.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4bb6bf5811620003614076bdc807ef3b5e38244f9d25ca5fe888eaccea2a9832", size = 496927, upload-time = "2026-03-31T21:58:15.073Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/f5/0fb20fb49f8efdcdce6cd8127604ad2c503e754a8f139f5e02b01626523f/aiohttp-3.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a84792f8631bf5a94e52d9cc881c0b824ab42717165a5579c760b830d9392ac9", size = 497141, upload-time = "2026-03-31T21:58:17.009Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/86/b7c870053e36a94e8951b803cb5b909bfbc9b90ca941527f5fcafbf6b0fa/aiohttp-3.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:57653eac22c6a4c13eb22ecf4d673d64a12f266e72785ab1c8b8e5940d0e8090", size = 1732476, upload-time = "2026-03-31T21:58:18.925Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/e5/4e161f84f98d80c03a238671b4136e6530453d65262867d989bbe78244d0/aiohttp-3.13.5-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5e5f7debc7a57af53fdf5c5009f9391d9f4c12867049d509bf7bb164a6e295b", size = 1706507, upload-time = "2026-03-31T21:58:21.094Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/56/ea11a9f01518bd5a2a2fcee869d248c4b8a0cfa0bb13401574fa31adf4d4/aiohttp-3.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c719f65bebcdf6716f10e9eff80d27567f7892d8988c06de12bbbd39307c6e3a", size = 1773465, upload-time = "2026-03-31T21:58:23.159Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/40/333ca27fb74b0383f17c90570c748f7582501507307350a79d9f9f3c6eb1/aiohttp-3.13.5-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d97f93fdae594d886c5a866636397e2bcab146fd7a132fd6bb9ce182224452f8", size = 1873523, upload-time = "2026-03-31T21:58:25.59Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/d2/e2f77eef1acb7111405433c707dc735e63f67a56e176e72e9e7a2cd3f493/aiohttp-3.13.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3df334e39d4c2f899a914f1dba283c1aadc311790733f705182998c6f7cae665", size = 1754113, upload-time = "2026-03-31T21:58:27.624Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/56/3f653d7f53c89669301ec9e42c95233e2a0c0a6dd051269e6e678db4fdb0/aiohttp-3.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fe6970addfea9e5e081401bcbadf865d2b6da045472f58af08427e108d618540", size = 1562351, upload-time = "2026-03-31T21:58:29.918Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/a6/9b3e91eb8ae791cce4ee736da02211c85c6f835f1bdfac0594a8a3b7018c/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7becdf835feff2f4f335d7477f121af787e3504b48b449ff737afb35869ba7bb", size = 1693205, upload-time = "2026-03-31T21:58:32.214Z" },
+    { url = "https://files.pythonhosted.org/packages/98/fc/bfb437a99a2fcebd6b6eaec609571954de2ed424f01c352f4b5504371dd3/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:676e5651705ad5d8a70aeb8eb6936c436d8ebbd56e63436cb7dd9bb36d2a9a46", size = 1730618, upload-time = "2026-03-31T21:58:34.728Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/b6/c8534862126191a034f68153194c389addc285a0f1347d85096d349bbc15/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:9b16c653d38eb1a611cc898c41e76859ca27f119d25b53c12875fd0474ae31a8", size = 1745185, upload-time = "2026-03-31T21:58:36.909Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/93/4ca8ee2ef5236e2707e0fd5fecb10ce214aee1ff4ab307af9c558bda3b37/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:999802d5fa0389f58decd24b537c54aa63c01c3219ce17d1214cbda3c2b22d2d", size = 1557311, upload-time = "2026-03-31T21:58:39.38Z" },
+    { url = "https://files.pythonhosted.org/packages/57/ae/76177b15f18c5f5d094f19901d284025db28eccc5ae374d1d254181d33f4/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ec707059ee75732b1ba130ed5f9580fe10ff75180c812bc267ded039db5128c6", size = 1773147, upload-time = "2026-03-31T21:58:41.476Z" },
+    { url = "https://files.pythonhosted.org/packages/01/a4/62f05a0a98d88af59d93b7fcac564e5f18f513cb7471696ac286db970d6a/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2d6d44a5b48132053c2f6cd5c8cb14bc67e99a63594e336b0f2af81e94d5530c", size = 1730356, upload-time = "2026-03-31T21:58:44.049Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/85/fc8601f59dfa8c9523808281f2da571f8b4699685f9809a228adcc90838d/aiohttp-3.13.5-cp313-cp313-win32.whl", hash = "sha256:329f292ed14d38a6c4c435e465f48bebb47479fd676a0411936cc371643225cc", size = 432637, upload-time = "2026-03-31T21:58:46.167Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/1b/ac685a8882896acf0f6b31d689e3792199cfe7aba37969fa91da63a7fa27/aiohttp-3.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:69f571de7500e0557801c0b51f4780482c0ec5fe2ac851af5a92cfce1af1cb83", size = 458896, upload-time = "2026-03-31T21:58:48.119Z" },
 ]
 
 [[package]]
@@ -116,7 +95,7 @@ name = "aiosignal"
 version = "1.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "frozenlist", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "frozenlist", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" }
 wheels = [
@@ -164,8 +143,8 @@ name = "anyio"
 version = "4.11.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "idna", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "sniffio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "idna", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "sniffio", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c6/78/7d432127c41b50bccba979505f272c16cbcadcc33645d5fa3a738110ae75/anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4", size = 219094, upload-time = "2025-09-23T09:19:12.58Z" }
 wheels = [
@@ -174,32 +153,32 @@ wheels = [
 
 [[package]]
 name = "attrs"
-version = "25.4.0"
+version = "26.1.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/9a/8e/82a0fe20a541c03148528be8cac2408564a6c9a0cc7e9171802bc1d26985/attrs-26.1.0.tar.gz", hash = "sha256:d03ceb89cb322a8fd706d4fb91940737b6642aa36998fe130a9bc96c985eff32", size = 952055, upload-time = "2026-03-19T14:22:25.026Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" },
+    { url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" },
 ]
 
 [[package]]
 name = "basedpyright"
-version = "1.37.1"
+version = "1.39.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nodejs-wheel-binaries", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "nodejs-wheel-binaries", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/0c/b0/fbba81ea29eed1274e965cd0445f0d6020b467ff4d3393791e4d6ae02e64/basedpyright-1.37.1.tar.gz", hash = "sha256:1f47bc6f45cbcc5d6f8619d60aa42128e4b38942f5118dcd4bc20c3466c5e02f", size = 25235384, upload-time = "2026-01-08T14:42:46.447Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/bc/5d/68e9bd8a408011f820e74fdad1684cad553fce0acfd83fce0a1ec4e01d97/basedpyright-1.39.5.tar.gz", hash = "sha256:373e6999b9dc450c4af077cb76a758f5959eedf8d03f0aac144bc59227f47d33", size = 25508568, upload-time = "2026-05-17T10:56:08.126Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ad/d6/6b33bb49f08d761d7c958a1e3cecfb3ffbdcf4ba6bbed65b23ab47516b75/basedpyright-1.37.1-py3-none-any.whl", hash = "sha256:caf3adfe54f51623241712f8b4367adb51ef8a8c2288e3e1ec4118319661340d", size = 12297397, upload-time = "2026-01-08T14:42:50.306Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/d1/fce034d7b840470bef067543deab51a28d87d1b866a1a4e88752047ef846/basedpyright-1.39.5-py3-none-any.whl", hash = "sha256:dda95954607e3a5e409b5a3083607b69976353cd56c05fdd77e32f6be27c9898", size = 12420680, upload-time = "2026-05-17T10:56:12.685Z" },
 ]
 
 [[package]]
 name = "certifi"
-version = "2026.1.4"
+version = "2026.4.22"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/25/ee/6caf7a40c36a1220410afe15a1cc64993a1f864871f698c0f93acb72842a/certifi-2026.4.22.tar.gz", hash = "sha256:8d455352a37b71bf76a79caa83a3d6c25afee4a385d632127b6afb3963f1c580", size = 137077, upload-time = "2026-04-22T11:26:11.191Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
+    { url = "https://files.pythonhosted.org/packages/22/30/7cd8fdcdfbc5b869528b079bfb76dcdf6056b1a2097a662e5e8c04f42965/certifi-2026.4.22-py3-none-any.whl", hash = "sha256:3cb2210c8f88ba2318d29b0388d1023c8492ff72ecdde4ebdaddbb13a31b1c4a", size = 135707, upload-time = "2026-04-22T11:26:09.372Z" },
 ]
 
 [[package]]
@@ -207,7 +186,7 @@ name = "cffi"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pycparser", marker = "(implementation_name != 'PyPy' and sys_platform == 'darwin') or (implementation_name != 'PyPy' and sys_platform == 'linux')" },
+    { name = "pycparser", marker = "(implementation_name != 'PyPy' and sys_platform == 'darwin') or (implementation_name != 'PyPy' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
 wheels = [
@@ -220,75 +199,52 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
     { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
     { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
-    { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" },
-    { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" },
-    { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" },
-    { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" },
-    { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" },
+    { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" },
 ]
 
 [[package]]
 name = "chardet"
-version = "5.2.0"
+version = "6.0.0.post1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618, upload-time = "2023-08-01T19:23:02.662Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/7f/42/fb9436c103a881a377e34b9f58d77b5f503461c702ff654ebe86151bcfe9/chardet-6.0.0.post1.tar.gz", hash = "sha256:6b78048c3c97c7b2ed1fbad7a18f76f5a6547f7d34dbab536cc13887c9a92fa4", size = 12521798, upload-time = "2026-02-22T15:09:17.925Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385, upload-time = "2023-08-01T19:23:00.661Z" },
+    { url = "https://files.pythonhosted.org/packages/66/42/5de54f632c2de53cd3415b3703383d5fff43a94cbc0567ef362515261a21/chardet-6.0.0.post1-py3-none-any.whl", hash = "sha256:c894a36800549adf7bb5f2af47033281b75fdfcd2aa0f0243be0ad22a52e2dcb", size = 627245, upload-time = "2026-02-22T15:09:15.876Z" },
 ]
 
 [[package]]
 name = "charset-normalizer"
-version = "3.4.4"
+version = "3.4.7"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/a1/67fe25fac3c7642725500a3f6cfe5821ad557c3abb11c9d20d12c7008d3e/charset_normalizer-3.4.7.tar.gz", hash = "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5", size = 144271, upload-time = "2026-04-02T09:28:39.342Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" },
-    { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" },
-    { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" },
-    { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" },
-    { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" },
-    { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" },
-    { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" },
-    { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" },
-    { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" },
-    { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" },
-    { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" },
-    { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/3b/66777e39d3ae1ddc77ee606be4ec6d8cbd4c801f65e5a1b6f2b11b8346dd/charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063", size = 309627, upload-time = "2026-04-02T09:26:45.198Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/4e/b7f84e617b4854ade48a1b7915c8ccfadeba444d2a18c291f696e37f0d3b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c", size = 207008, upload-time = "2026-04-02T09:26:46.824Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/bb/ec73c0257c9e11b268f018f068f5d00aa0ef8c8b09f7753ebd5f2880e248/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66", size = 228303, upload-time = "2026-04-02T09:26:48.397Z" },
+    { url = "https://files.pythonhosted.org/packages/85/fb/32d1f5033484494619f701e719429c69b766bfc4dbc61aa9e9c8c166528b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18", size = 224282, upload-time = "2026-04-02T09:26:49.684Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/07/330e3a0dda4c404d6da83b327270906e9654a24f6c546dc886a0eb0ffb23/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd", size = 215595, upload-time = "2026-04-02T09:26:50.915Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/7c/fc890655786e423f02556e0216d4b8c6bcb6bdfa890160dc66bf52dee468/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215", size = 201986, upload-time = "2026-04-02T09:26:52.197Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/97/bfb18b3db2aed3b90cf54dc292ad79fdd5ad65c4eae454099475cbeadd0d/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859", size = 211711, upload-time = "2026-04-02T09:26:53.49Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/a5/a581c13798546a7fd557c82614a5c65a13df2157e9ad6373166d2a3e645d/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8", size = 210036, upload-time = "2026-04-02T09:26:54.975Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/bf/b3ab5bcb478e4193d517644b0fb2bf5497fbceeaa7a1bc0f4d5b50953861/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5", size = 202998, upload-time = "2026-04-02T09:26:56.303Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/4e/23efd79b65d314fa320ec6017b4b5834d5c12a58ba4610aa353af2e2f577/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832", size = 230056, upload-time = "2026-04-02T09:26:57.554Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/9f/1e1941bc3f0e01df116e68dc37a55c4d249df5e6fa77f008841aef68264f/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6", size = 211537, upload-time = "2026-04-02T09:26:58.843Z" },
+    { url = "https://files.pythonhosted.org/packages/80/0f/088cbb3020d44428964a6c97fe1edfb1b9550396bf6d278330281e8b709c/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48", size = 226176, upload-time = "2026-04-02T09:27:00.437Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/9f/130394f9bbe06f4f63e22641d32fc9b202b7e251c9aef4db044324dac493/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a", size = 217723, upload-time = "2026-04-02T09:27:02.021Z" },
+    { url = "https://files.pythonhosted.org/packages/73/55/c469897448a06e49f8fa03f6caae97074fde823f432a98f979cc42b90e69/charset_normalizer-3.4.7-cp313-cp313-win32.whl", hash = "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e", size = 148085, upload-time = "2026-04-02T09:27:03.192Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/78/1b74c5bbb3f99b77a1715c91b3e0b5bdb6fe302d95ace4f5b1bec37b0167/charset_normalizer-3.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110", size = 158819, upload-time = "2026-04-02T09:27:04.454Z" },
+    { url = "https://files.pythonhosted.org/packages/68/86/46bd42279d323deb8687c4a5a811fd548cb7d1de10cf6535d099877a9a9f/charset_normalizer-3.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b", size = 147915, upload-time = "2026-04-02T09:27:05.971Z" },
+    { url = "https://files.pythonhosted.org/packages/db/8f/61959034484a4a7c527811f4721e75d02d653a35afb0b6054474d8185d4c/charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", size = 61958, upload-time = "2026-04-02T09:28:37.794Z" },
 ]
 
 [[package]]
 name = "click"
-version = "8.3.1"
+version = "8.4.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/23/e4/796662cd90cf80e3a363c99db2b88e0e394b988a575f60a17e16440cd011/click-8.4.0.tar.gz", hash = "sha256:638f1338fe1235c8f4e008e4a8a254fb5c5fbdcbb40ece3c9142ebb78e792973", size = 350843, upload-time = "2026-05-17T00:47:58.425Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/ae/8e92f8058baf87f6c7d86ee7e457668690195cc77efedb8d3797a06e3940/click-8.4.0-py3-none-any.whl", hash = "sha256:40c50b7c6c6adac2823d411041ec84f3f103f1b280d5e9ce0d7f998995832f81", size = 116147, upload-time = "2026-05-17T00:47:56.842Z" },
 ]
 
 [[package]]
@@ -305,7 +261,7 @@ name = "contourpy"
 version = "1.3.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/58/01/1253e6698a07380cd31a736d248a3f2a50a7c88779a1813da27503cadc2a/contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880", size = 13466174, upload-time = "2025-07-26T12:03:12.549Z" }
 wheels = [
@@ -317,6 +273,9 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4b/32/e0f13a1c5b0f8572d0ec6ae2f6c677b7991fafd95da523159c19eff0696a/contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4debd64f124ca62069f313a9cb86656ff087786016d76927ae2cf37846b006c9", size = 362859, upload-time = "2025-07-26T12:01:46.519Z" },
     { url = "https://files.pythonhosted.org/packages/33/71/e2a7945b7de4e58af42d708a219f3b2f4cff7386e6b6ab0a0fa0033c49a9/contourpy-1.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a15459b0f4615b00bbd1e91f1b9e19b7e63aea7483d03d804186f278c0af2659", size = 1332062, upload-time = "2025-07-26T12:01:48.964Z" },
     { url = "https://files.pythonhosted.org/packages/12/fc/4e87ac754220ccc0e807284f88e943d6d43b43843614f0a8afa469801db0/contourpy-1.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca0fdcd73925568ca027e0b17ab07aad764be4706d0a925b89227e447d9737b7", size = 1403932, upload-time = "2025-07-26T12:01:51.979Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/2e/adc197a37443f934594112222ac1aa7dc9a98faf9c3842884df9a9d8751d/contourpy-1.3.3-cp313-cp313-win32.whl", hash = "sha256:b20c7c9a3bf701366556e1b1984ed2d0cedf999903c51311417cf5f591d8c78d", size = 185024, upload-time = "2025-07-26T12:01:53.245Z" },
+    { url = "https://files.pythonhosted.org/packages/18/0b/0098c214843213759692cc638fce7de5c289200a830e5035d1791d7a2338/contourpy-1.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:1cadd8b8969f060ba45ed7c1b714fe69185812ab43bd6b86a9123fe8f99c3263", size = 226578, upload-time = "2025-07-26T12:01:54.422Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/9a/2f6024a0c5995243cd63afdeb3651c984f0d2bc727fd98066d40e141ad73/contourpy-1.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:fd914713266421b7536de2bfa8181aa8c699432b6763a0ea64195ebe28bff6a9", size = 193524, upload-time = "2025-07-26T12:01:55.73Z" },
     { url = "https://files.pythonhosted.org/packages/c0/b3/f8a1a86bd3298513f500e5b1f5fd92b69896449f6cab6a146a5d52715479/contourpy-1.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:88df9880d507169449d434c293467418b9f6cbe82edd19284aa0409e7fdb933d", size = 306730, upload-time = "2025-07-26T12:01:57.051Z" },
     { url = "https://files.pythonhosted.org/packages/3f/11/4780db94ae62fc0c2053909b65dc3246bd7cecfc4f8a20d957ad43aa4ad8/contourpy-1.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d06bb1f751ba5d417047db62bca3c8fde202b8c11fb50742ab3ab962c81e8216", size = 287897, upload-time = "2025-07-26T12:01:58.663Z" },
     { url = "https://files.pythonhosted.org/packages/ae/15/e59f5f3ffdd6f3d4daa3e47114c53daabcb18574a26c21f03dc9e4e42ff0/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e4e6b05a45525357e382909a4c1600444e2a45b4795163d3b22669285591c1ae", size = 326751, upload-time = "2025-07-26T12:02:00.343Z" },
@@ -325,133 +284,48 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9f/52/5b00ea89525f8f143651f9f03a0df371d3cbd2fccd21ca9b768c7a6500c2/contourpy-1.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50ed930df7289ff2a8d7afeb9603f8289e5704755c7e5c3bbd929c90c817164b", size = 352548, upload-time = "2025-07-26T12:02:05.165Z" },
     { url = "https://files.pythonhosted.org/packages/32/1d/a209ec1a3a3452d490f6b14dd92e72280c99ae3d1e73da74f8277d4ee08f/contourpy-1.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4feffb6537d64b84877da813a5c30f1422ea5739566abf0bd18065ac040e120a", size = 1322297, upload-time = "2025-07-26T12:02:07.379Z" },
     { url = "https://files.pythonhosted.org/packages/bc/9e/46f0e8ebdd884ca0e8877e46a3f4e633f6c9c8c4f3f6e72be3fe075994aa/contourpy-1.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2b7e9480ffe2b0cd2e787e4df64270e3a0440d9db8dc823312e2c940c167df7e", size = 1391023, upload-time = "2025-07-26T12:02:10.171Z" },
-    { url = "https://files.pythonhosted.org/packages/72/8b/4546f3ab60f78c514ffb7d01a0bd743f90de36f0019d1be84d0a708a580a/contourpy-1.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fde6c716d51c04b1c25d0b90364d0be954624a0ee9d60e23e850e8d48353d07a", size = 292189, upload-time = "2025-07-26T12:02:16.095Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/e1/3542a9cb596cadd76fcef413f19c79216e002623158befe6daa03dbfa88c/contourpy-1.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:cbedb772ed74ff5be440fa8eee9bd49f64f6e3fc09436d9c7d8f1c287b121d77", size = 273251, upload-time = "2025-07-26T12:02:17.524Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/71/f93e1e9471d189f79d0ce2497007731c1e6bf9ef6d1d61b911430c3db4e5/contourpy-1.3.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22e9b1bd7a9b1d652cd77388465dc358dafcd2e217d35552424aa4f996f524f5", size = 335810, upload-time = "2025-07-26T12:02:18.9Z" },
-    { url = "https://files.pythonhosted.org/packages/91/f9/e35f4c1c93f9275d4e38681a80506b5510e9327350c51f8d4a5a724d178c/contourpy-1.3.3-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a22738912262aa3e254e4f3cb079a95a67132fc5a063890e224393596902f5a4", size = 382871, upload-time = "2025-07-26T12:02:20.418Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/71/47b512f936f66a0a900d81c396a7e60d73419868fba959c61efed7a8ab46/contourpy-1.3.3-cp314-cp314-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:afe5a512f31ee6bd7d0dda52ec9864c984ca3d66664444f2d72e0dc4eb832e36", size = 386264, upload-time = "2025-07-26T12:02:21.916Z" },
-    { url = "https://files.pythonhosted.org/packages/04/5f/9ff93450ba96b09c7c2b3f81c94de31c89f92292f1380261bd7195bea4ea/contourpy-1.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f64836de09927cba6f79dcd00fdd7d5329f3fccc633468507079c829ca4db4e3", size = 363819, upload-time = "2025-07-26T12:02:23.759Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/a6/0b185d4cc480ee494945cde102cb0149ae830b5fa17bf855b95f2e70ad13/contourpy-1.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1fd43c3be4c8e5fd6e4f2baeae35ae18176cf2e5cced681cca908addf1cdd53b", size = 1333650, upload-time = "2025-07-26T12:02:26.181Z" },
-    { url = "https://files.pythonhosted.org/packages/43/d7/afdc95580ca56f30fbcd3060250f66cedbde69b4547028863abd8aa3b47e/contourpy-1.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6afc576f7b33cf00996e5c1102dc2a8f7cc89e39c0b55df93a0b78c1bd992b36", size = 1404833, upload-time = "2025-07-26T12:02:28.782Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/42/38c159a7d0f2b7b9c04c64ab317042bb6952b713ba875c1681529a2932fe/contourpy-1.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:33c82d0138c0a062380332c861387650c82e4cf1747aaa6938b9b6516762e772", size = 306769, upload-time = "2025-07-26T12:02:34.2Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/6c/26a8205f24bca10974e77460de68d3d7c63e282e23782f1239f226fcae6f/contourpy-1.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ea37e7b45949df430fe649e5de8351c423430046a2af20b1c1961cae3afcda77", size = 287892, upload-time = "2025-07-26T12:02:35.807Z" },
-    { url = "https://files.pythonhosted.org/packages/66/06/8a475c8ab718ebfd7925661747dbb3c3ee9c82ac834ccb3570be49d129f4/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d304906ecc71672e9c89e87c4675dc5c2645e1f4269a5063b99b0bb29f232d13", size = 326748, upload-time = "2025-07-26T12:02:37.193Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/a3/c5ca9f010a44c223f098fccd8b158bb1cb287378a31ac141f04730dc49be/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca658cd1a680a5c9ea96dc61cdbae1e85c8f25849843aa799dfd3cb370ad4fbe", size = 375554, upload-time = "2025-07-26T12:02:38.894Z" },
-    { url = "https://files.pythonhosted.org/packages/80/5b/68bd33ae63fac658a4145088c1e894405e07584a316738710b636c6d0333/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ab2fd90904c503739a75b7c8c5c01160130ba67944a7b77bbf36ef8054576e7f", size = 388118, upload-time = "2025-07-26T12:02:40.642Z" },
-    { url = "https://files.pythonhosted.org/packages/40/52/4c285a6435940ae25d7410a6c36bda5145839bc3f0beb20c707cda18b9d2/contourpy-1.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7301b89040075c30e5768810bc96a8e8d78085b47d8be6e4c3f5a0b4ed478a0", size = 352555, upload-time = "2025-07-26T12:02:42.25Z" },
-    { url = "https://files.pythonhosted.org/packages/24/ee/3e81e1dd174f5c7fefe50e85d0892de05ca4e26ef1c9a59c2a57e43b865a/contourpy-1.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2a2a8b627d5cc6b7c41a4beff6c5ad5eb848c88255fda4a8745f7e901b32d8e4", size = 1322295, upload-time = "2025-07-26T12:02:44.668Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/b2/6d913d4d04e14379de429057cd169e5e00f6c2af3bb13e1710bcbdb5da12/contourpy-1.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fd6ec6be509c787f1caf6b247f0b1ca598bef13f4ddeaa126b7658215529ba0f", size = 1391027, upload-time = "2025-07-26T12:02:47.09Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/70/f308384a3ae9cd2209e0849f33c913f658d3326900d0ff5d378d6a1422d2/contourpy-1.3.3-cp313-cp313t-win32.whl", hash = "sha256:283edd842a01e3dcd435b1c5116798d661378d83d36d337b8dde1d16a5fc9ba3", size = 196157, upload-time = "2025-07-26T12:02:11.488Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/dd/880f890a6663b84d9e34a6f88cded89d78f0091e0045a284427cb6b18521/contourpy-1.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:87acf5963fc2b34825e5b6b048f40e3635dd547f590b04d2ab317c2619ef7ae8", size = 240570, upload-time = "2025-07-26T12:02:12.754Z" },
+    { url = "https://files.pythonhosted.org/packages/80/99/2adc7d8ffead633234817ef8e9a87115c8a11927a94478f6bb3d3f4d4f7d/contourpy-1.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:3c30273eb2a55024ff31ba7d052dde990d7d8e5450f4bbb6e913558b3d6c2301", size = 199713, upload-time = "2025-07-26T12:02:14.4Z" },
 ]
 
 [[package]]
 name = "cryptography"
-version = "46.0.3"
+version = "48.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cffi", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'linux'" },
+    { name = "cffi", marker = "(platform_python_implementation != 'PyPy' and sys_platform == 'linux') or (platform_python_implementation == 'PyPy' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_python_implementation == 'PyPy' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_python_implementation == 'PyPy' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (platform_python_implementation == 'PyPy' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_python_implementation == 'PyPy' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_python_implementation == 'PyPy' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/9f/33/c00162f49c0e2fe8064a62cb92b93e50c74a72bc370ab92f86112b33ff62/cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1", size = 749258, upload-time = "2025-10-15T23:18:31.74Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/9f/a9/db8f313fdcd85d767d4973515e1db101f9c71f95fced83233de224673757/cryptography-48.0.0.tar.gz", hash = "sha256:5c3932f4436d1cccb036cb0eaef46e6e2db91035166f1ad6505c3c9d5a635920", size = 832984, upload-time = "2026-05-04T22:59:38.133Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1c/67/38769ca6b65f07461eb200e85fc1639b438bdc667be02cf7f2cd6a64601c/cryptography-46.0.3-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:09859af8466b69bc3c27bdf4f5d84a665e0f7ab5088412e9e2ec49758eca5cbc", size = 4296667, upload-time = "2025-10-15T23:16:54.369Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/49/498c86566a1d80e978b42f0d702795f69887005548c041636df6ae1ca64c/cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d", size = 4450807, upload-time = "2025-10-15T23:16:56.414Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/0a/863a3604112174c8624a2ac3c038662d9e59970c7f926acdcfaed8d61142/cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb", size = 4299615, upload-time = "2025-10-15T23:16:58.442Z" },
-    { url = "https://files.pythonhosted.org/packages/64/02/b73a533f6b64a69f3cd3872acb6ebc12aef924d8d103133bb3ea750dc703/cryptography-46.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5bf0ed4490068a2e72ac03d786693adeb909981cc596425d09032d372bcc849", size = 4016800, upload-time = "2025-10-15T23:17:00.378Z" },
-    { url = "https://files.pythonhosted.org/packages/25/d5/16e41afbfa450cde85a3b7ec599bebefaef16b5c6ba4ec49a3532336ed72/cryptography-46.0.3-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5ecfccd2329e37e9b7112a888e76d9feca2347f12f37918facbb893d7bb88ee8", size = 4984707, upload-time = "2025-10-15T23:17:01.98Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/56/e7e69b427c3878352c2fb9b450bd0e19ed552753491d39d7d0a2f5226d41/cryptography-46.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a2c0cd47381a3229c403062f764160d57d4d175e022c1df84e168c6251a22eec", size = 4482541, upload-time = "2025-10-15T23:17:04.078Z" },
-    { url = "https://files.pythonhosted.org/packages/78/f6/50736d40d97e8483172f1bb6e698895b92a223dba513b0ca6f06b2365339/cryptography-46.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:549e234ff32571b1f4076ac269fcce7a808d3bf98b76c8dd560e42dbc66d7d91", size = 4299464, upload-time = "2025-10-15T23:17:05.483Z" },
-    { url = "https://files.pythonhosted.org/packages/00/de/d8e26b1a855f19d9994a19c702fa2e93b0456beccbcfe437eda00e0701f2/cryptography-46.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:c0a7bb1a68a5d3471880e264621346c48665b3bf1c3759d682fc0864c540bd9e", size = 4950838, upload-time = "2025-10-15T23:17:07.425Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/29/798fc4ec461a1c9e9f735f2fc58741b0daae30688f41b2497dcbc9ed1355/cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926", size = 4481596, upload-time = "2025-10-15T23:17:09.343Z" },
-    { url = "https://files.pythonhosted.org/packages/15/8d/03cd48b20a573adfff7652b76271078e3045b9f49387920e7f1f631d125e/cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71", size = 4426782, upload-time = "2025-10-15T23:17:11.22Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/b1/ebacbfe53317d55cf33165bda24c86523497a6881f339f9aae5c2e13e57b/cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac", size = 4698381, upload-time = "2025-10-15T23:17:12.829Z" },
-    { url = "https://files.pythonhosted.org/packages/73/dc/9aa866fbdbb95b02e7f9d086f1fccfeebf8953509b87e3f28fff927ff8a0/cryptography-46.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c8daeb2d2174beb4575b77482320303f3d39b8e81153da4f0fb08eb5fe86a6c5", size = 4288728, upload-time = "2025-10-15T23:17:21.527Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/fd/bc1daf8230eaa075184cbbf5f8cd00ba9db4fd32d63fb83da4671b72ed8a/cryptography-46.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39b6755623145ad5eff1dab323f4eae2a32a77a7abef2c5089a04a3d04366715", size = 4435078, upload-time = "2025-10-15T23:17:23.042Z" },
-    { url = "https://files.pythonhosted.org/packages/82/98/d3bd5407ce4c60017f8ff9e63ffee4200ab3e23fe05b765cab805a7db008/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:db391fa7c66df6762ee3f00c95a89e6d428f4d60e7abc8328f4fe155b5ac6e54", size = 4293460, upload-time = "2025-10-15T23:17:24.885Z" },
-    { url = "https://files.pythonhosted.org/packages/26/e9/e23e7900983c2b8af7a08098db406cf989d7f09caea7897e347598d4cd5b/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:78a97cf6a8839a48c49271cdcbd5cf37ca2c1d6b7fdd86cc864f302b5e9bf459", size = 3995237, upload-time = "2025-10-15T23:17:26.449Z" },
-    { url = "https://files.pythonhosted.org/packages/91/15/af68c509d4a138cfe299d0d7ddb14afba15233223ebd933b4bbdbc7155d3/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:dfb781ff7eaa91a6f7fd41776ec37c5853c795d3b358d4896fdbb5df168af422", size = 4967344, upload-time = "2025-10-15T23:17:28.06Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/e3/8643d077c53868b681af077edf6b3cb58288b5423610f21c62aadcbe99f4/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6f61efb26e76c45c4a227835ddeae96d83624fb0d29eb5df5b96e14ed1a0afb7", size = 4466564, upload-time = "2025-10-15T23:17:29.665Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/43/c1e8726fa59c236ff477ff2b5dc071e54b21e5a1e51aa2cee1676f1c986f/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:23b1a8f26e43f47ceb6d6a43115f33a5a37d57df4ea0ca295b780ae8546e8044", size = 4292415, upload-time = "2025-10-15T23:17:31.686Z" },
-    { url = "https://files.pythonhosted.org/packages/42/f9/2f8fefdb1aee8a8e3256a0568cffc4e6d517b256a2fe97a029b3f1b9fe7e/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b419ae593c86b87014b9be7396b385491ad7f320bde96826d0dd174459e54665", size = 4931457, upload-time = "2025-10-15T23:17:33.478Z" },
-    { url = "https://files.pythonhosted.org/packages/79/30/9b54127a9a778ccd6d27c3da7563e9f2d341826075ceab89ae3b41bf5be2/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:50fc3343ac490c6b08c0cf0d704e881d0d660be923fd3076db3e932007e726e3", size = 4466074, upload-time = "2025-10-15T23:17:35.158Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/68/b4f4a10928e26c941b1b6a179143af9f4d27d88fe84a6a3c53592d2e76bf/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22d7e97932f511d6b0b04f2bfd818d73dcd5928db509460aaf48384778eb6d20", size = 4420569, upload-time = "2025-10-15T23:17:37.188Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/49/3746dab4c0d1979888f125226357d3262a6dd40e114ac29e3d2abdf1ec55/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d55f3dffadd674514ad19451161118fd010988540cee43d8bc20675e775925de", size = 4681941, upload-time = "2025-10-15T23:17:39.236Z" },
-    { url = "https://files.pythonhosted.org/packages/27/32/b68d27471372737054cbd34c84981f9edbc24fe67ca225d389799614e27f/cryptography-46.0.3-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4b7387121ac7d15e550f5cb4a43aef2559ed759c35df7336c402bb8275ac9683", size = 4294089, upload-time = "2025-10-15T23:17:48.269Z" },
-    { url = "https://files.pythonhosted.org/packages/26/42/fa8389d4478368743e24e61eea78846a0006caffaf72ea24a15159215a14/cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d", size = 4440029, upload-time = "2025-10-15T23:17:49.837Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/eb/f483db0ec5ac040824f269e93dd2bd8a21ecd1027e77ad7bdf6914f2fd80/cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0", size = 4297222, upload-time = "2025-10-15T23:17:51.357Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/cf/da9502c4e1912cb1da3807ea3618a6829bee8207456fbbeebc361ec38ba3/cryptography-46.0.3-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10ca84c4668d066a9878890047f03546f3ae0a6b8b39b697457b7757aaf18dbc", size = 4012280, upload-time = "2025-10-15T23:17:52.964Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/8f/9adb86b93330e0df8b3dcf03eae67c33ba89958fc2e03862ef1ac2b42465/cryptography-46.0.3-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:36e627112085bb3b81b19fed209c05ce2a52ee8b15d161b7c643a7d5a88491f3", size = 4978958, upload-time = "2025-10-15T23:17:54.965Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/a0/5fa77988289c34bdb9f913f5606ecc9ada1adb5ae870bd0d1054a7021cc4/cryptography-46.0.3-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1000713389b75c449a6e979ffc7dcc8ac90b437048766cef052d4d30b8220971", size = 4473714, upload-time = "2025-10-15T23:17:56.754Z" },
-    { url = "https://files.pythonhosted.org/packages/14/e5/fc82d72a58d41c393697aa18c9abe5ae1214ff6f2a5c18ac470f92777895/cryptography-46.0.3-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b02cf04496f6576afffef5ddd04a0cb7d49cf6be16a9059d793a30b035f6b6ac", size = 4296970, upload-time = "2025-10-15T23:17:58.588Z" },
-    { url = "https://files.pythonhosted.org/packages/78/06/5663ed35438d0b09056973994f1aec467492b33bd31da36e468b01ec1097/cryptography-46.0.3-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:71e842ec9bc7abf543b47cf86b9a743baa95f4677d22baa4c7d5c69e49e9bc04", size = 4940236, upload-time = "2025-10-15T23:18:00.897Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/59/873633f3f2dcd8a053b8dd1d38f783043b5fce589c0f6988bf55ef57e43e/cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506", size = 4472642, upload-time = "2025-10-15T23:18:02.749Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/39/8e71f3930e40f6877737d6f69248cf74d4e34b886a3967d32f919cc50d3b/cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963", size = 4423126, upload-time = "2025-10-15T23:18:04.85Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/c7/f65027c2810e14c3e7268353b1681932b87e5a48e65505d8cc17c99e36ae/cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4", size = 4686573, upload-time = "2025-10-15T23:18:06.908Z" },
-]
-
-[[package]]
-name = "cuda-bindings"
-version = "13.2.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "cuda-pathfinder", marker = "sys_platform == 'linux'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/df/93/eef988860a3ca985f82c4f3174fc0cdd94e07331ba9a92e8e064c260337f/cuda_bindings-13.2.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6629ca2df6f795b784752409bcaedbd22a7a651b74b56a165ebc0c9dcbd504d0", size = 5614610, upload-time = "2026-03-11T00:12:50.337Z" },
-    { url = "https://files.pythonhosted.org/packages/18/23/6db3aba46864aee357ab2415135b3fe3da7e9f1fa0221fa2a86a5968099c/cuda_bindings-13.2.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7dca0da053d3b4cc4869eff49c61c03f3c5dbaa0bcd712317a358d5b8f3f385d", size = 6149914, upload-time = "2026-03-11T00:12:52.374Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/87/87a014f045b77c6de5c8527b0757fe644417b184e5367db977236a141602/cuda_bindings-13.2.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a6464b30f46692d6c7f65d4a0e0450d81dd29de3afc1bb515653973d01c2cd6e", size = 5685673, upload-time = "2026-03-11T00:12:56.371Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/5e/c0fe77a73aaefd3fff25ffaccaac69c5a63eafdf8b9a4c476626ef0ac703/cuda_bindings-13.2.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4af9f3e1be603fa12d5ad6cfca7844c9d230befa9792b5abdf7dd79979c3626", size = 6191386, upload-time = "2026-03-11T00:12:58.965Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/58/ed2c3b39c8dd5f96aa7a4abef0d47a73932c7a988e30f5fa428f00ed0da1/cuda_bindings-13.2.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:df850a1ff8ce1b3385257b08e47b70e959932f5f432d0a4e46a355962b4e4771", size = 5507469, upload-time = "2026-03-11T00:13:04.063Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/01/0c941b112ceeb21439b05895eace78ca1aa2eaaf695c8521a068fd9b4c00/cuda_bindings-13.2.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8a16384c6494e5485f39314b0b4afb04bee48d49edb16d5d8593fd35bbd231b", size = 6059693, upload-time = "2026-03-11T00:13:06.003Z" },
-]
-
-[[package]]
-name = "cuda-pathfinder"
-version = "1.5.0"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/93/66/0c02bd330e7d976f83fa68583d6198d76f23581bcbb5c0e98a6148f326e5/cuda_pathfinder-1.5.0-py3-none-any.whl", hash = "sha256:498f90a9e9de36044a7924742aecce11c50c49f735f1bc53e05aa46de9ea4110", size = 49739, upload-time = "2026-03-24T21:14:30.869Z" },
-]
-
-[[package]]
-name = "cuda-toolkit"
-version = "13.0.2"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/57/b2/453099f5f3b698d7d0eab38916aac44c7f76229f451709e2eb9db6615dcd/cuda_toolkit-13.0.2-py2.py3-none-any.whl", hash = "sha256:b198824cf2f54003f50d64ada3a0f184b42ca0846c1c94192fa269ecd97a66eb", size = 2364, upload-time = "2025-12-19T23:24:07.328Z" },
-]
-
-[package.optional-dependencies]
-cublas = [
-    { name = "nvidia-cublas", marker = "sys_platform == 'linux'" },
-]
-cudart = [
-    { name = "nvidia-cuda-runtime", marker = "sys_platform == 'linux'" },
-]
-cufft = [
-    { name = "nvidia-cufft", marker = "sys_platform == 'linux'" },
-]
-cufile = [
-    { name = "nvidia-cufile", marker = "sys_platform == 'linux'" },
-]
-cupti = [
-    { name = "nvidia-cuda-cupti", marker = "sys_platform == 'linux'" },
-]
-curand = [
-    { name = "nvidia-curand", marker = "sys_platform == 'linux'" },
-]
-cusolver = [
-    { name = "nvidia-cusolver", marker = "sys_platform == 'linux'" },
-]
-cusparse = [
-    { name = "nvidia-cusparse", marker = "sys_platform == 'linux'" },
-]
-nvjitlink = [
-    { name = "nvidia-nvjitlink", marker = "sys_platform == 'linux'" },
-]
-nvrtc = [
-    { name = "nvidia-cuda-nvrtc", marker = "sys_platform == 'linux'" },
-]
-nvtx = [
-    { name = "nvidia-nvtx", marker = "sys_platform == 'linux'" },
+    { url = "https://files.pythonhosted.org/packages/df/3d/01f6dd9190170a5a241e0e98c2d04be3664a9e6f5b9b872cde63aff1c3dd/cryptography-48.0.0-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:0c558d2cdffd8f4bbb30fc7134c74d2ca9a476f830bb053074498fbc86f41ed6", size = 8001587, upload-time = "2026-05-04T22:57:36.803Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/6e/e90527eef33f309beb811cf7c982c3aeffcce8e3edb178baa4ca3ae4a6fa/cryptography-48.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f5333311663ea94f75dd408665686aaf426563556bb5283554a3539177e03b8c", size = 4690433, upload-time = "2026-05-04T22:57:40.373Z" },
+    { url = "https://files.pythonhosted.org/packages/90/04/673510ed51ddff56575f306cf1617d80411ee76831ccd3097599140efdfe/cryptography-48.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7995ef305d7165c3f11ae07f2517e5a4f1d5c18da1376a0a9ed496336b69e5f3", size = 4710620, upload-time = "2026-05-04T22:57:42.935Z" },
+    { url = "https://files.pythonhosted.org/packages/14/d5/e9c4ef932c8d800490c34d8bd589d64a31d5890e27ec9e9ad532be893294/cryptography-48.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:40ba1f85eaa6959837b1d51c9767e230e14612eea4ef110ee8854ada22da1bf5", size = 4696283, upload-time = "2026-05-04T22:57:45.294Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/29/174b9dfb60b12d59ecfc6cfa04bc88c21b42a54f01b8aae09bb6e51e4c7f/cryptography-48.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:369a6348999f94bbd53435c894377b20ab95f25a9065c283570e70150d8abc3c", size = 5296573, upload-time = "2026-05-04T22:57:47.933Z" },
+    { url = "https://files.pythonhosted.org/packages/95/38/0d29a6fd7d0d1373f0c0c88a04ba20e359b257753ac497564cd660fc1d55/cryptography-48.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a0e692c683f4df67815a2d258b324e66f4738bd7a96a218c826dce4f4bd05d8f", size = 4743677, upload-time = "2026-05-04T22:57:50.067Z" },
+    { url = "https://files.pythonhosted.org/packages/30/be/eef653013d5c63b6a490529e0316f9ac14a37602965d4903efed1399f32b/cryptography-48.0.0-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:18349bbc56f4743c8b12dc32e2bccb2cf83ee8b69a3bba74ef8ae857e26b3d25", size = 4330808, upload-time = "2026-05-04T22:57:52.301Z" },
+    { url = "https://files.pythonhosted.org/packages/84/9e/500463e87abb7a0a0f9f256ec21123ecde0a7b5541a15e840ea54551fd81/cryptography-48.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:7e8eac43dfca5c4cccc6dad9a80504436fca53bb9bc3100a2386d730fbe6b602", size = 4695941, upload-time = "2026-05-04T22:57:54.603Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/dc/7303087450c2ec9e7fbb750e17c2abfbc658f23cbd0e54009509b7cc4091/cryptography-48.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9ccdac7d40688ecb5a3b4a604b8a88c8002e3442d6c60aead1db2a89a041560c", size = 5252579, upload-time = "2026-05-04T22:57:57.207Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/c0/7101d3b7215edcdc90c45da544961fd8ed2d6448f77577460fa75a8443f7/cryptography-48.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:bd72e68b06bb1e96913f97dd4901119bc17f39d4586a5adf2d3e47bc2b9d58b5", size = 4743326, upload-time = "2026-05-04T22:57:59.535Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/d8/5b833bad13016f562ab9d063d68199a4bd121d18458e439515601d3357ec/cryptography-48.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:59baa2cb386c4f0b9905bd6eb4c2a79a69a128408fd31d32ca4d7102d4156321", size = 4826672, upload-time = "2026-05-04T22:58:01.996Z" },
+    { url = "https://files.pythonhosted.org/packages/98/e1/7074eb8bf3c135558c73fc2bcf0f5633f912e6fb87e868a55c454080ef09/cryptography-48.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9249e3cd978541d665967ac2cb2787fd6a62bddf1e75b3e347a594d7dacf4f74", size = 4972574, upload-time = "2026-05-04T22:58:03.968Z" },
+    { url = "https://files.pythonhosted.org/packages/04/70/e5a1b41d325f797f39427aa44ef8baf0be500065ab6d8e10369d850d4a4f/cryptography-48.0.0-cp311-abi3-win32.whl", hash = "sha256:9c459db21422be75e2809370b829a87eb37f74cd785fc4aa9ea1e5f43b47cda4", size = 3294868, upload-time = "2026-05-04T22:58:06.467Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/ac/8ac51b4a5fc5932eb7ee5c517ba7dc8cd834f0048962b6b352f00f41ebf9/cryptography-48.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:5b012212e08b8dd5edc78ef54da83dd9892fd9105323b3993eff6bea65dc21d7", size = 3817107, upload-time = "2026-05-04T22:58:08.845Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/63/61d4a4e1c6b6bab6ce1e213cd36a24c415d90e76d78c5eb8577c5541d2e8/cryptography-48.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:58d00498e8933e4a194f3076aee1b4a97dfec1a6da444535755822fe5d8b0b86", size = 7983482, upload-time = "2026-05-04T22:58:43.769Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/ac/f5b5995b87770c693e2596559ffafe195b4033a57f14a82268a2842953f3/cryptography-48.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:614d0949f4790582d2cc25553abd09dd723025f0c0e7c67376a1d77196743d6e", size = 4683266, upload-time = "2026-05-04T22:58:46.064Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/c6/8b14f67e18338fbc4adb76f66c001f5c3610b3e2d1837f268f47a347dbbb/cryptography-48.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ce4bfae76319a532a2dc68f82cc32f5676ee792a983187dac07183690e5c66f", size = 4696228, upload-time = "2026-05-04T22:58:48.22Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/73/f808fbae9514bd91b47875b003f13e284c8c6bdfd904b7944e803937eec1/cryptography-48.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:2eb992bbd4661238c5a397594c83f5b4dc2bc5b848c365c8f991b6780efcc5c7", size = 4689097, upload-time = "2026-05-04T22:58:50.9Z" },
+    { url = "https://files.pythonhosted.org/packages/93/01/d86632d7d28db8ae83221995752eeb6639ffb374c2d22955648cf8d52797/cryptography-48.0.0-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:22a5cb272895dce158b2cacdfdc3debd299019659f42947dbdac6f32d68fe832", size = 5283582, upload-time = "2026-05-04T22:58:53.017Z" },
+    { url = "https://files.pythonhosted.org/packages/02/e1/50edc7a50334807cc4791fc4a0ce7468b4a1416d9138eab358bfc9a3d70b/cryptography-48.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2b4d59804e8408e2fea7d1fbaf218e5ec984325221db76e6a241a9abd6cdd95c", size = 4730479, upload-time = "2026-05-04T22:58:55.611Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/af/99a582b1b1641ff5911ac559beb45097cf79efd4ead4657f578ef1af2d47/cryptography-48.0.0-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:984a20b0f62a26f48a3396c72e4bc34c66e356d356bf370053066b3b6d54634a", size = 4326481, upload-time = "2026-05-04T22:58:57.607Z" },
+    { url = "https://files.pythonhosted.org/packages/90/ee/89aa26a06ef0a7d7611788ffd571a7c50e368cc6a4d5eef8b4884e866edb/cryptography-48.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:5a5ed8fde7a1d09376ca0b40e68cd59c69fe23b1f9768bd5824f54681626032a", size = 4688713, upload-time = "2026-05-04T22:59:00.077Z" },
+    { url = "https://files.pythonhosted.org/packages/70/ba/bcb1b0bb7a33d4c7c0c4d4c7874b4a62ae4f56113a5f4baefa362dfb1f0f/cryptography-48.0.0-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:8cd666227ef7af430aa5914a9910e0ddd703e75f039cef0825cd0da71b6b711a", size = 5238165, upload-time = "2026-05-04T22:59:02.317Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/70/ca4003b1ce5ca3dc3186ada51908c8a9b9ff7d5cab83cc0d43ee14ec144f/cryptography-48.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:9071196d81abc88b3516ac8cdfad32e2b66dd4a5393a8e68a961e9161ddc6239", size = 4729947, upload-time = "2026-05-04T22:59:05.255Z" },
+    { url = "https://files.pythonhosted.org/packages/44/a0/4ec7cf774207905aef1a8d11c3750d5a1db805eb380ee4e16df317870128/cryptography-48.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1e2d54c8be6152856a36f0882ab231e70f8ec7f14e93cf87db8a2ed056bf160c", size = 4822059, upload-time = "2026-05-04T22:59:07.802Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/75/a2e55f99c16fcac7b5d6c1eb19ad8e00799854d6be5ca845f9259eae1681/cryptography-48.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a5da777e32ffed6f85a7b2b3f7c5cbc88c146bfcd0a1d7baf5fcc6c52ee35dd4", size = 4960575, upload-time = "2026-05-04T22:59:09.851Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/23/6e6f32143ab5d8b36ca848a502c4bcd477ae75b9e1677e3530d669062578/cryptography-48.0.0-cp39-abi3-win32.whl", hash = "sha256:77a2ccbbe917f6710e05ba9adaa25fb5075620bf3ea6fb751997875aff4ae4bd", size = 3279117, upload-time = "2026-05-04T22:59:12.019Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/9a/0fea98a70cf1749d41d738836f6349d97945f7c89433a259a6c2642eefeb/cryptography-48.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:16cd65b9330583e4619939b3a3843eec1e6e789744bb01e7c7e2e62e33c239c8", size = 3792100, upload-time = "2026-05-04T22:59:14.884Z" },
 ]
 
 [[package]]
@@ -465,123 +339,156 @@ wheels = [
 
 [[package]]
 name = "dataproperty"
-version = "1.1.0"
+version = "1.1.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "mbstrdecoder", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "typepy", extra = ["datetime"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mbstrdecoder", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "typepy", extra = ["datetime"], marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/0b/81/8c8b64ae873cb9014815214c07b63b12e3b18835780fb342223cfe3fe7d8/dataproperty-1.1.0.tar.gz", hash = "sha256:b038437a4097d1a1c497695c3586ea34bea67fdd35372b9a50f30bf044d77d04", size = 42574, upload-time = "2024-12-31T14:37:26.033Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f1/6f/a801320bb388d965be9c370ec753cc33120e6cbe0069fa05644f05821975/dataproperty-1.1.1.tar.gz", hash = "sha256:a83af82a234edda5378a36fb092bc90dd554646c5e58202a310acf468ae81bc8", size = 42954, upload-time = "2026-05-09T10:33:42.212Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/21/c2/e12e95e289e6081a40454199ab213139ef16a528c7c86432de545b05a23a/DataProperty-1.1.0-py3-none-any.whl", hash = "sha256:c61fcb2e2deca35e6d1eb1f251a7f22f0dcde63e80e61f0cc18c19f42abfd25b", size = 27581, upload-time = "2024-12-31T14:37:22.657Z" },
+    { url = "https://files.pythonhosted.org/packages/03/41/eab7fe313820578b341a2a1d6aeeedd2c38ec1e3f3d51e57e2735b5beac0/dataproperty-1.1.1-py3-none-any.whl", hash = "sha256:cf026aa002dbd6c57c619ec6741ffd61ae7bf2f20481951d8af2dff44480340e", size = 27691, upload-time = "2026-05-09T10:33:40.468Z" },
 ]
 
 [[package]]
 name = "datasets"
-version = "4.6.1"
+version = "4.8.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "fsspec", extra = ["http"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "multiprocess", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pandas", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pyarrow", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "xxhash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "fsspec", extra = ["http"], marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "multiprocess", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pandas", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pyarrow", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "xxhash", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d7/94/eb81c6fe32e9b6ef92223141b5a553aeff2e9456968424a8533cbe88f476/datasets-4.6.1.tar.gz", hash = "sha256:140ce500bc41939ff6ce995702d66b1f4b2ee7f117bb9b07512fab6804d4070a", size = 593865, upload-time = "2026-02-27T23:26:49.482Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/66/34/14cd8e76f907f7d4dca2334cfeec9f81d30fd15c25a015f99aaea694eaed/datasets-4.8.5.tar.gz", hash = "sha256:0f0c1c3d56ffff2c93b2f4c63c95bac94f3d7e8621aea2a2a576275233bba772", size = 605649, upload-time = "2026-04-27T15:43:57.384Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/37/f0/99fe6eb530c7ee9ee1faee48059eb8a6437f80c893a496b98a78864e0fc6/datasets-4.6.1-py3-none-any.whl", hash = "sha256:f53228e6dadc9f837037b1bf3051d7d8c054abbb3eb29f1f022926e08090e0da", size = 520667, upload-time = "2026-02-27T23:26:46.855Z" },
+    { url = "https://files.pythonhosted.org/packages/65/99/00f3196036501b53032c4b1ab8337a0b978dee832ed276dae3815df4e8b5/datasets-4.8.5-py3-none-any.whl", hash = "sha256:5079900781719c0e063a8efdd2cd95a31ad0c63209178669cd23cf1b926149ff", size = 528973, upload-time = "2026-04-27T15:43:53.702Z" },
 ]
 
 [[package]]
 name = "dill"
-version = "0.4.0"
+version = "0.4.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/81/e1/56027a71e31b02ddc53c7d65b01e68edf64dea2932122fe7746a516f75d5/dill-0.4.1.tar.gz", hash = "sha256:423092df4182177d4d8ba8290c8a5b640c66ab35ec7da59ccfa00f6fa3eea5fa", size = 187315, upload-time = "2026-01-19T02:36:56.85Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/77/dc8c558f7593132cf8fefec57c4f60c83b16941c574ac5f619abb3ae7933/dill-0.4.1-py3-none-any.whl", hash = "sha256:1e1ce33e978ae97fcfcff5638477032b801c46c7c65cf717f95fbc2248f79a9d", size = 120019, upload-time = "2026-01-19T02:36:55.663Z" },
 ]
 
 [[package]]
 name = "docutils"
-version = "0.22.4"
+version = "0.23rc1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/77/2d/ca6465f7020ec0c852b556fe74c299708f64d3f2705f0561098a3cb31ec5/docutils-0.23rc1.tar.gz", hash = "sha256:675672f4f98087eaaf4ecddcd7d739e4f057e2edadddeaa962c5bf15b3eeb2e9", size = 2303934, upload-time = "2026-05-09T10:02:52.356Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" },
-]
-
-[[package]]
-name = "evaluate"
-version = "0.4.6"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "datasets", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "fsspec", extra = ["http"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "multiprocess", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pandas", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "xxhash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/ad/d0/0c17a8e6e8dc7245f22dea860557c32bae50fc4d287ae030cb0e8ab8720f/evaluate-0.4.6.tar.gz", hash = "sha256:e07036ca12b3c24331f83ab787f21cc2dbf3631813a1631e63e40897c69a3f21", size = 65716, upload-time = "2025-09-18T13:06:30.581Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/3e/af/3e990d8d4002bbc9342adb4facd59506e653da93b2417de0fa6027cb86b1/evaluate-0.4.6-py3-none-any.whl", hash = "sha256:bca85bc294f338377b7ac2f861e21c308b11b2a285f510d7d5394d5df437db29", size = 84069, upload-time = "2025-09-18T13:06:29.265Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/bc/72b8b3b011abfa69d50a7c022d39df218eddc25cce2b3f00e0ca6437502f/docutils-0.23rc1-py3-none-any.whl", hash = "sha256:2a67e56e135ab80f9250de3f1a90d4d5738a85882a55037fb7b4ea90786234be", size = 634765, upload-time = "2026-05-09T10:02:43.727Z" },
 ]
 
 [[package]]
 name = "exo"
-version = "0.3.69"
+version = "0.3.70"
 source = { editable = "." }
 dependencies = [
-    { name = "aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "exo-pyo3-bindings", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "fastapi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "hypercorn", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "loguru", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "exo-pyo3-bindings", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "fastapi", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "hypercorn", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "loguru", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "msgspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "openai-harmony", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "psutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "python-multipart", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "rustworkx", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tiktoken", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tomlkit", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "types-aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "zstandard", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+]
+
+[package.optional-dependencies]
+build = [
+    { name = "nanobind", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+]
+mlx = [
+    { name = "mflux", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0.dev20260519+e9835615", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#e9835615badaf4ad7c95301f2a9bd58d782b38db" }, marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_aarch64.whl" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_x86_64.whl" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx-vlm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "torch", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "torchaudio", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "torchvision", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+]
+mlx-cpu = [
     { name = "mflux", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.6", source = { registry = "https://pypi.org/simple" }, extra = ["cpu"], marker = "sys_platform == 'linux'" },
-    { name = "mlx", version = "0.31.2.dev20260406+90dd61a5", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#90dd61a5f0837f9bbbab4fd3fbfedba1ca5d33e7" }, marker = "sys_platform == 'darwin'" },
+    { name = "mlx", version = "0.32.0.dev20260519+e9835615", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#e9835615badaf4ad7c95301f2a9bd58d782b38db" }, marker = "(sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cpu') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_aarch64.whl" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cpu') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (extra != 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra != 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra != 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_x86_64.whl" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cpu') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (extra != 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra != 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra != 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx-cpu", marker = "sys_platform == 'linux'" },
     { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "mlx-vlm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "msgspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "openai-harmony", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "psutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "python-multipart", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "rustworkx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tiktoken", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tomlkit", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "types-aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "zstandard", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "torch", marker = "sys_platform == 'darwin'" },
+    { name = "torchaudio", marker = "sys_platform == 'darwin'" },
+    { name = "torchvision", marker = "sys_platform == 'darwin'" },
+]
+mlx-cuda12 = [
+    { name = "mflux", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mlx", version = "0.32.0.dev20260519+e9835615", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#e9835615badaf4ad7c95301f2a9bd58d782b38db" }, marker = "(sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_aarch64.whl" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra != 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_x86_64.whl" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra != 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx-cuda-12", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_12-0.32.0-py3-none-manylinux_2_35_aarch64.whl" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra != 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx-cuda-12", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_12-0.32.0-py3-none-manylinux_2_35_x86_64.whl" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra != 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mlx-vlm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "nvidia-ml-py", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "torch", marker = "sys_platform == 'darwin'" },
+    { name = "torchaudio", marker = "sys_platform == 'darwin'" },
+    { name = "torchvision", marker = "sys_platform == 'darwin'" },
+]
+mlx-cuda13 = [
+    { name = "mflux", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mlx", version = "0.32.0.dev20260519+e9835615", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#e9835615badaf4ad7c95301f2a9bd58d782b38db" }, marker = "(sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_aarch64.whl" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_x86_64.whl" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx-cuda-13", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_13-0.32.0-py3-none-manylinux_2_35_aarch64.whl" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx-cuda-13", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_13-0.32.0-py3-none-manylinux_2_35_x86_64.whl" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mlx-vlm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "nvidia-ml-py", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "torch", marker = "sys_platform == 'darwin'" },
+    { name = "torchaudio", marker = "sys_platform == 'darwin'" },
+    { name = "torchvision", marker = "sys_platform == 'darwin'" },
+]
+mlx-none = [
+    { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
 
 [package.dev-dependencies]
 dev = [
-    { name = "basedpyright", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pyinstaller", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pytest-asyncio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pytest-env", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "ruff", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "basedpyright", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "playwright", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pyinstaller", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pytest-asyncio", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pytest-env", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "ruff", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 
 [package.metadata]
@@ -589,6 +496,10 @@ requires-dist = [
     { name = "aiofiles", specifier = ">=24.1.0" },
     { name = "aiohttp", specifier = ">=3.12.14" },
     { name = "anyio", specifier = "==4.11.0" },
+    { name = "anyio", marker = "extra == 'mlx-none'" },
+    { name = "exo", extras = ["mlx"], marker = "extra == 'mlx-cpu'" },
+    { name = "exo", extras = ["mlx"], marker = "extra == 'mlx-cuda12'" },
+    { name = "exo", extras = ["mlx"], marker = "extra == 'mlx-cuda13'" },
     { name = "exo-pyo3-bindings", editable = "rust/exo_pyo3_bindings" },
     { name = "fastapi", specifier = ">=0.116.1" },
     { name = "filelock", specifier = ">=3.18.0" },
@@ -596,12 +507,22 @@ requires-dist = [
     { name = "huggingface-hub", specifier = ">=1.8.0" },
     { name = "hypercorn", specifier = ">=0.18.0" },
     { name = "loguru", specifier = ">=0.7.3" },
-    { name = "mflux", specifier = "==0.17.2" },
-    { name = "mlx", marker = "sys_platform == 'darwin'", git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks" },
-    { name = "mlx", extras = ["cpu"], marker = "sys_platform == 'linux'", specifier = "==0.30.6" },
-    { name = "mlx-lm", git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Ffix-arrayscache-leak" },
-    { name = "mlx-vlm", specifier = ">=0.3.11" },
+    { name = "mflux", marker = "extra == 'mlx'", git = "https://github.com/evanev7/mflux?branch=exo2" },
+    { name = "mlx", marker = "sys_platform == 'darwin' and extra == 'mlx'", git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks" },
+    { name = "mlx", marker = "sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'mlx'", specifier = "==0.32.0" },
+    { name = "mlx", marker = "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'mlx'", url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_aarch64.whl" },
+    { name = "mlx", marker = "platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'mlx'", url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_x86_64.whl" },
+    { name = "mlx-cpu", marker = "sys_platform == 'linux' and extra == 'mlx-cpu'", specifier = "==0.31.2" },
+    { name = "mlx-cuda-12", marker = "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'mlx-cuda12'", url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_12-0.32.0-py3-none-manylinux_2_35_aarch64.whl" },
+    { name = "mlx-cuda-12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'mlx-cuda12'", url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_12-0.32.0-py3-none-manylinux_2_35_x86_64.whl" },
+    { name = "mlx-cuda-13", marker = "platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'mlx-cuda13'", url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_13-0.32.0-py3-none-manylinux_2_35_aarch64.whl" },
+    { name = "mlx-cuda-13", marker = "platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'mlx-cuda13'", url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_13-0.32.0-py3-none-manylinux_2_35_x86_64.whl" },
+    { name = "mlx-lm", marker = "extra == 'mlx'", git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Fdeepseek-v4" },
+    { name = "mlx-vlm", marker = "extra == 'mlx'", specifier = ">=0.3.11" },
     { name = "msgspec", specifier = ">=0.19.0" },
+    { name = "nanobind", marker = "extra == 'build'" },
+    { name = "nvidia-ml-py", marker = "extra == 'mlx-cuda12'", specifier = ">=13.595.45" },
+    { name = "nvidia-ml-py", marker = "extra == 'mlx-cuda13'", specifier = ">=13.595.45" },
     { name = "openai-harmony", specifier = ">=0.0.8" },
     { name = "psutil", specifier = ">=7.0.0" },
     { name = "pydantic", specifier = ">=2.11.7" },
@@ -609,14 +530,31 @@ requires-dist = [
     { name = "rustworkx", specifier = ">=0.17.1" },
     { name = "tiktoken", specifier = ">=0.12.0" },
     { name = "tomlkit", specifier = ">=0.14.0" },
-    { name = "transformers", specifier = ">=5.0.0,<5.4.0" },
+    { name = "torch", marker = "sys_platform == 'darwin' and extra == 'mlx'", specifier = "==2.10.0" },
+    { name = "torch", marker = "sys_platform == 'linux' and extra == 'mlx' and extra != 'mlx-cpu' and extra != 'mlx-cuda12' and extra != 'mlx-cuda13'", specifier = "==2.10.0" },
+    { name = "torch", marker = "sys_platform == 'linux' and extra == 'mlx' and extra == 'mlx-cpu' and extra != 'mlx-cuda12' and extra != 'mlx-cuda13'", specifier = "==2.10.0", index = "https://download.pytorch.org/whl/cpu" },
+    { name = "torch", marker = "sys_platform == 'linux' and extra == 'mlx' and extra == 'mlx-cuda12' and extra != 'mlx-cuda13'", specifier = "==2.10.0", index = "https://download.pytorch.org/whl/cu128" },
+    { name = "torch", marker = "sys_platform == 'linux' and extra == 'mlx' and extra == 'mlx-cuda13'", specifier = "==2.10.0", index = "https://download.pytorch.org/whl/cu130" },
+    { name = "torchaudio", marker = "sys_platform == 'darwin' and extra == 'mlx'", specifier = "==2.10.0" },
+    { name = "torchaudio", marker = "sys_platform == 'linux' and extra == 'mlx' and extra != 'mlx-cpu' and extra != 'mlx-cuda12' and extra != 'mlx-cuda13'", specifier = "==2.10.0" },
+    { name = "torchaudio", marker = "sys_platform == 'linux' and extra == 'mlx' and extra == 'mlx-cpu' and extra != 'mlx-cuda12' and extra != 'mlx-cuda13'", specifier = "==2.10.0", index = "https://download.pytorch.org/whl/cpu" },
+    { name = "torchaudio", marker = "sys_platform == 'linux' and extra == 'mlx' and extra == 'mlx-cuda12' and extra != 'mlx-cuda13'", specifier = "==2.10.0", index = "https://download.pytorch.org/whl/cu128" },
+    { name = "torchaudio", marker = "sys_platform == 'linux' and extra == 'mlx' and extra == 'mlx-cuda13'", specifier = "==2.10.0", index = "https://download.pytorch.org/whl/cu130" },
+    { name = "torchvision", marker = "sys_platform == 'darwin' and extra == 'mlx'", specifier = "==0.25.0" },
+    { name = "torchvision", marker = "sys_platform == 'linux' and extra == 'mlx' and extra != 'mlx-cpu' and extra != 'mlx-cuda12' and extra != 'mlx-cuda13'", specifier = "==0.25.0" },
+    { name = "torchvision", marker = "sys_platform == 'linux' and extra == 'mlx' and extra == 'mlx-cpu' and extra != 'mlx-cuda12' and extra != 'mlx-cuda13'", specifier = "==0.25.0", index = "https://download.pytorch.org/whl/cpu" },
+    { name = "torchvision", marker = "sys_platform == 'linux' and extra == 'mlx' and extra == 'mlx-cuda12' and extra != 'mlx-cuda13'", specifier = "==0.25.0", index = "https://download.pytorch.org/whl/cu128" },
+    { name = "torchvision", marker = "sys_platform == 'linux' and extra == 'mlx' and extra == 'mlx-cuda13'", specifier = "==0.25.0", index = "https://download.pytorch.org/whl/cu130" },
+    { name = "transformers", specifier = ">=5.6.2" },
     { name = "types-aiofiles", specifier = ">=24.1.0.20250708" },
     { name = "zstandard", specifier = ">=0.23.0" },
 ]
+provides-extras = ["build", "mlx-none", "mlx", "mlx-cpu", "mlx-cuda12", "mlx-cuda13"]
 
 [package.metadata.requires-dev]
 dev = [
     { name = "basedpyright", specifier = ">=1.29.0" },
+    { name = "playwright", specifier = ">=1.52.0" },
     { name = "pyinstaller", specifier = ">=6.17.0" },
     { name = "pytest", specifier = ">=8.4.0" },
     { name = "pytest-asyncio", specifier = ">=1.0.0" },
@@ -629,18 +567,18 @@ name = "exo-bench"
 version = "0.1.0"
 source = { editable = "bench" }
 dependencies = [
-    { name = "datasets", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "human-eval", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "lm-eval", extra = ["api", "math"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "loguru", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "math-verify", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tiktoken", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "datasets", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "human-eval", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "lm-eval", extra = ["api", "math"], marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "loguru", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "math-verify", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tiktoken", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 
 [package.metadata]
@@ -661,14 +599,14 @@ requires-dist = [
 
 [[package]]
 name = "exo-pyo3-bindings"
-version = "0.2.1"
+version = "0.2.2"
 source = { editable = "rust/exo_pyo3_bindings" }
 
 [package.dev-dependencies]
 dev = [
-    { name = "exo-pyo3-bindings", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pytest-asyncio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "exo-pyo3-bindings", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pytest-asyncio", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 
 [package.metadata]
@@ -680,28 +618,40 @@ dev = [
     { name = "pytest-asyncio", specifier = ">=1.0.0" },
 ]
 
+[[package]]
+name = "exo-tools"
+version = "0.1.0"
+source = { editable = "tools" }
+dependencies = [
+    { name = "loguru", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+]
+
+[package.metadata]
+requires-dist = [{ name = "loguru", specifier = ">=0.7.3" }]
+
 [[package]]
 name = "fastapi"
-version = "0.128.0"
+version = "0.136.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "annotated-doc", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "starlette", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "annotated-doc", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "starlette", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "typing-inspection", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/52/08/8c8508db6c7b9aae8f7175046af41baad690771c9bcde676419965e338c7/fastapi-0.128.0.tar.gz", hash = "sha256:1cc179e1cef10a6be60ffe429f79b829dce99d8de32d7acb7e6c8dfdf7f2645a", size = 365682, upload-time = "2025-12-27T15:21:13.714Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/5d/45/c130091c2dfa061bbfe3150f2a5091ef1adf149f2a8d2ae769ecaf6e99a2/fastapi-0.136.1.tar.gz", hash = "sha256:7af665ad7acfa0a3baf8983d393b6b471b9da10ede59c60045f49fbc89a0fa7f", size = 397448, upload-time = "2026-04-23T16:49:44.046Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5c/05/5cbb59154b093548acd0f4c7c474a118eda06da25aa75c616b72d8fcd92a/fastapi-0.128.0-py3-none-any.whl", hash = "sha256:aebd93f9716ee3b4f4fcfe13ffb7cf308d99c9f3ab5622d8877441072561582d", size = 103094, upload-time = "2025-12-27T15:21:12.154Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/ff/2e4eca3ade2c22fe1dea7043b8ee9dabe47753349eb1b56a202de8af6349/fastapi-0.136.1-py3-none-any.whl", hash = "sha256:a6e9d7eeada96c93a4d69cb03836b44fa34e2854accb7244a1ece36cd4781c3f", size = 117683, upload-time = "2026-04-23T16:49:42.437Z" },
 ]
 
 [[package]]
 name = "filelock"
-version = "3.20.3"
+version = "3.29.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/1d/65/ce7f1b70157833bf3cb851b556a37d4547ceafc158aa9b34b36782f23696/filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1", size = 19485, upload-time = "2026-01-09T17:55:05.421Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b5/fe/997687a931ab51049acce6fa1f23e8f01216374ea81374ddee763c493db5/filelock-3.29.0.tar.gz", hash = "sha256:69974355e960702e789734cb4871f884ea6fe50bd8404051a3530bc07809cf90", size = 57571, upload-time = "2026-04-19T15:39:10.068Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701, upload-time = "2026-01-09T17:55:04.334Z" },
+    { url = "https://files.pythonhosted.org/packages/81/47/dd9a212ef6e343a6857485ffe25bba537304f1913bdbed446a23f7f592e1/filelock-3.29.0-py3-none-any.whl", hash = "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258", size = 39812, upload-time = "2026-04-19T15:39:08.752Z" },
 ]
 
 [[package]]
@@ -709,7 +659,7 @@ name = "fire"
 version = "0.7.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "termcolor", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "termcolor", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c0/00/f8d10588d2019d6d6452653def1ee807353b21983db48550318424b5ff18/fire-0.7.1.tar.gz", hash = "sha256:3b208f05c736de98fb343310d090dcc4d8c78b2a89ea4f32b837c586270a9cbf", size = 88720, upload-time = "2025-08-16T20:20:24.175Z" }
 wheels = [
@@ -718,29 +668,19 @@ wheels = [
 
 [[package]]
 name = "fonttools"
-version = "4.61.1"
+version = "4.63.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ec/ca/cf17b88a8df95691275a3d77dc0a5ad9907f328ae53acbe6795da1b2f5ed/fonttools-4.61.1.tar.gz", hash = "sha256:6675329885c44657f826ef01d9e4fb33b9158e9d93c537d84ad8399539bc6f69", size = 3565756, upload-time = "2025-12-12T17:31:24.246Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/84/69/c97f2c18e0db87d2c7b15da1974dace76ae938f1cfa22e2727a648b7ed43/fonttools-4.63.0.tar.gz", hash = "sha256:caeb583deeb5168e694b65cda8b4ee62abedfa66cf88488734466f2366b9c4e0", size = 3597189, upload-time = "2026-05-14T12:04:30.958Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4b/cf/00ba28b0990982530addb8dc3e9e6f2fa9cb5c20df2abdda7baa755e8fe1/fonttools-4.61.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8c56c488ab471628ff3bfa80964372fc13504ece601e0d97a78ee74126b2045c", size = 2846454, upload-time = "2025-12-12T17:30:24.938Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/ca/468c9a8446a2103ae645d14fee3f610567b7042aba85031c1c65e3ef7471/fonttools-4.61.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dc492779501fa723b04d0ab1f5be046797fee17d27700476edc7ee9ae535a61e", size = 2398191, upload-time = "2025-12-12T17:30:27.343Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/4b/d67eedaed19def5967fade3297fed8161b25ba94699efc124b14fb68cdbc/fonttools-4.61.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:64102ca87e84261419c3747a0d20f396eb024bdbeb04c2bfb37e2891f5fadcb5", size = 4928410, upload-time = "2025-12-12T17:30:29.771Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/8d/6fb3494dfe61a46258cd93d979cf4725ded4eb46c2a4ca35e4490d84daea/fonttools-4.61.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c1b526c8d3f615a7b1867f38a9410849c8f4aef078535742198e942fba0e9bd", size = 4984460, upload-time = "2025-12-12T17:30:32.073Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/f1/a47f1d30b3dc00d75e7af762652d4cbc3dff5c2697a0dbd5203c81afd9c3/fonttools-4.61.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:41ed4b5ec103bd306bb68f81dc166e77409e5209443e5773cb4ed837bcc9b0d3", size = 4925800, upload-time = "2025-12-12T17:30:34.339Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/01/e6ae64a0981076e8a66906fab01539799546181e32a37a0257b77e4aa88b/fonttools-4.61.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b501c862d4901792adaec7c25b1ecc749e2662543f68bb194c42ba18d6eec98d", size = 5067859, upload-time = "2025-12-12T17:30:36.593Z" },
-    { url = "https://files.pythonhosted.org/packages/32/8f/4e7bf82c0cbb738d3c2206c920ca34ca74ef9dabde779030145d28665104/fonttools-4.61.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:fff4f534200a04b4a36e7ae3cb74493afe807b517a09e99cb4faa89a34ed6ecd", size = 2846094, upload-time = "2025-12-12T17:30:43.511Z" },
-    { url = "https://files.pythonhosted.org/packages/71/09/d44e45d0a4f3a651f23a1e9d42de43bc643cce2971b19e784cc67d823676/fonttools-4.61.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d9203500f7c63545b4ce3799319fe4d9feb1a1b89b28d3cb5abd11b9dd64147e", size = 2396589, upload-time = "2025-12-12T17:30:45.681Z" },
-    { url = "https://files.pythonhosted.org/packages/89/18/58c64cafcf8eb677a99ef593121f719e6dcbdb7d1c594ae5a10d4997ca8a/fonttools-4.61.1-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fa646ecec9528bef693415c79a86e733c70a4965dd938e9a226b0fc64c9d2e6c", size = 4877892, upload-time = "2025-12-12T17:30:47.709Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/ec/9e6b38c7ba1e09eb51db849d5450f4c05b7e78481f662c3b79dbde6f3d04/fonttools-4.61.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11f35ad7805edba3aac1a3710d104592df59f4b957e30108ae0ba6c10b11dd75", size = 4972884, upload-time = "2025-12-12T17:30:49.656Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/87/b5339da8e0256734ba0dbbf5b6cdebb1dd79b01dc8c270989b7bcd465541/fonttools-4.61.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b931ae8f62db78861b0ff1ac017851764602288575d65b8e8ff1963fed419063", size = 4924405, upload-time = "2025-12-12T17:30:51.735Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/47/e3409f1e1e69c073a3a6fd8cb886eb18c0bae0ee13db2c8d5e7f8495e8b7/fonttools-4.61.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b148b56f5de675ee16d45e769e69f87623a4944f7443850bf9a9376e628a89d2", size = 5035553, upload-time = "2025-12-12T17:30:54.823Z" },
-    { url = "https://files.pythonhosted.org/packages/39/5c/908ad78e46c61c3e3ed70c3b58ff82ab48437faf84ec84f109592cabbd9f/fonttools-4.61.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:e76ce097e3c57c4bcb67c5aa24a0ecdbd9f74ea9219997a707a4061fbe2707aa", size = 2929571, upload-time = "2025-12-12T17:31:02.574Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/41/975804132c6dea64cdbfbaa59f3518a21c137a10cccf962805b301ac6ab2/fonttools-4.61.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:9cfef3ab326780c04d6646f68d4b4742aae222e8b8ea1d627c74e38afcbc9d91", size = 2435317, upload-time = "2025-12-12T17:31:04.974Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/5a/aef2a0a8daf1ebaae4cfd83f84186d4a72ee08fd6a8451289fcd03ffa8a4/fonttools-4.61.1-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a75c301f96db737e1c5ed5fd7d77d9c34466de16095a266509e13da09751bd19", size = 4882124, upload-time = "2025-12-12T17:31:07.456Z" },
-    { url = "https://files.pythonhosted.org/packages/80/33/d6db3485b645b81cea538c9d1c9219d5805f0877fda18777add4671c5240/fonttools-4.61.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:91669ccac46bbc1d09e9273546181919064e8df73488ea087dcac3e2968df9ba", size = 5100391, upload-time = "2025-12-12T17:31:09.732Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/d6/675ba631454043c75fcf76f0ca5463eac8eb0666ea1d7badae5fea001155/fonttools-4.61.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c33ab3ca9d3ccd581d58e989d67554e42d8d4ded94ab3ade3508455fe70e65f7", size = 4978800, upload-time = "2025-12-12T17:31:11.681Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/33/d3ec753d547a8d2bdaedd390d4a814e8d5b45a093d558f025c6b990b554c/fonttools-4.61.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:664c5a68ec406f6b1547946683008576ef8b38275608e1cee6c061828171c118", size = 5006426, upload-time = "2025-12-12T17:31:13.764Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/4e/ce75a57ff3aebf6fc1f4e9d508b8e5810618a33d900ad6c19eb30b290b97/fonttools-4.61.1-py3-none-any.whl", hash = "sha256:17d2bf5d541add43822bcf0c43d7d847b160c9bb01d15d5007d84e2217aaa371", size = 1148996, upload-time = "2025-12-12T17:31:21.03Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/8d/d8fec3dcde2963f8c908fb315e5ff2cd0ac34f82394bbbf73a2aa5145ce3/fonttools-4.63.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:cd7e9857e5e63738b9d9fd707bc1f59c8b09e5177726d23664db393c59bb08bd", size = 2876062, upload-time = "2026-05-14T12:03:32.554Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/71/d935dc54e4ff121bfdd11e08702db63a7e6f25af21d8a3d7b7212df53641/fonttools-4.63.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c2a2a42198b696a6f48fad91709afb55176e66a5e566131219dba372fb7f8c59", size = 2424594, upload-time = "2026-05-14T12:03:34.86Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/40/e76320afa1df918e146155ef239b1719ee266092e96f5423bfd075affba1/fonttools-4.63.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e874792a8212b44583ea02189d9e693906b2f78b261f372f95d6c563210ac1d", size = 5024840, upload-time = "2026-05-14T12:03:36.745Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/36/0b805d8c485f872f65a509cbe3b58a5d0d17bee855333b54a150c79d3061/fonttools-4.63.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:22135da48a348785c5e2d5d2d9d6bec5ed44adacbaeb9db12d9493bf6c6bfa68", size = 4975801, upload-time = "2026-05-14T12:03:38.833Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/26/2cee03d0aa083ab022da5c07aff9ed3f689da1defb81ad6917c9627896da/fonttools-4.63.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ccf41f2efdf56994d22d73bef4ced1052161958169428d06ba9724ea9e9a64be", size = 4965009, upload-time = "2026-05-14T12:03:41.494Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/48/cc4b66d9058c0d0982c833fad10127c4b0e9324606aafa41382295ca4102/fonttools-4.63.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9ced0bd02ac751dd6319b0da88aaef24414e3b0dbc32bb4f24944821a3741a27", size = 5105892, upload-time = "2026-05-14T12:03:43.525Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/1f/a98a30a814b9ddef3a2e706025f90b9e0bc94890e6cb15254bc86547d11a/fonttools-4.63.0-cp313-cp313-win32.whl", hash = "sha256:85be818f5506e8a7753153def2c9550178f0ecae6a47b5e0e8dbb23f7cc90380", size = 2291313, upload-time = "2026-05-14T12:03:45.594Z" },
+    { url = "https://files.pythonhosted.org/packages/92/46/5177b01f3b4abfdd4409f31cca4ab279c9343a26efbe9ec78c97fc612e02/fonttools-4.63.0-cp313-cp313-win_amd64.whl", hash = "sha256:ba04cb5891d4c0c21b6da95eda8d7b090021508a294fff33464fc7d241e0856b", size = 2342299, upload-time = "2026-05-14T12:03:47.414Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/47/c99d5268f354002ce80f8d029cd9d7d872969da1de8b93d32de4dc56d6f4/fonttools-4.63.0-py3-none-any.whl", hash = "sha256:445af2eab030a16b9171ea8bdda7ebf7d96bda2df88ee182a464252f6e05e20d", size = 1164562, upload-time = "2026-05-14T12:04:29.092Z" },
 ]
 
 [[package]]
@@ -762,6 +702,9 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7a/58/afd56de246cf11780a40a2c28dc7cbabbf06337cc8ddb1c780a2d97e88d8/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:db1e72ede2d0d7ccb213f218df6a078a9c09a7de257c2fe8fcef16d5925230b1", size = 237763, upload-time = "2025-10-06T05:36:41.355Z" },
     { url = "https://files.pythonhosted.org/packages/cb/36/cdfaf6ed42e2644740d4a10452d8e97fa1c062e2a8006e4b09f1b5fd7d63/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b4dec9482a65c54a5044486847b8a66bf10c9cb4926d42927ec4e8fd5db7fed8", size = 240110, upload-time = "2025-10-06T05:36:42.716Z" },
     { url = "https://files.pythonhosted.org/packages/03/a8/9ea226fbefad669f11b52e864c55f0bd57d3c8d7eb07e9f2e9a0b39502e1/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:21900c48ae04d13d416f0e1e0c4d81f7931f73a9dfa0b7a8746fb2fe7dd970ed", size = 233717, upload-time = "2025-10-06T05:36:44.251Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/0b/1b5531611e83ba7d13ccc9988967ea1b51186af64c42b7a7af465dcc9568/frozenlist-1.8.0-cp313-cp313-win32.whl", hash = "sha256:8b7b94a067d1c504ee0b16def57ad5738701e4ba10cec90529f13fa03c833496", size = 39628, upload-time = "2025-10-06T05:36:45.423Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/cf/174c91dbc9cc49bc7b7aab74d8b734e974d1faa8f191c74af9b7e80848e6/frozenlist-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:878be833caa6a3821caf85eb39c5ba92d28e85df26d57afb06b35b2efd937231", size = 43882, upload-time = "2025-10-06T05:36:46.796Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/17/502cd212cbfa96eb1388614fe39a3fc9ab87dbbe042b66f97acb57474834/frozenlist-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:44389d135b3ff43ba8cc89ff7f51f5a0bb6b63d829c8300f79a2fe4fe61bcc62", size = 39676, upload-time = "2025-10-06T05:36:47.8Z" },
     { url = "https://files.pythonhosted.org/packages/d2/5c/3bbfaa920dfab09e76946a5d2833a7cbdf7b9b4a91c714666ac4855b88b4/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:e25ac20a2ef37e91c1b39938b591457666a0fa835c7783c3a8f33ea42870db94", size = 89235, upload-time = "2025-10-06T05:36:48.78Z" },
     { url = "https://files.pythonhosted.org/packages/d2/d6/f03961ef72166cec1687e84e8925838442b615bd0b8854b54923ce5b7b8a/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07cdca25a91a4386d2e76ad992916a85038a9b97561bf7a3fd12d5d9ce31870c", size = 50742, upload-time = "2025-10-06T05:36:49.837Z" },
     { url = "https://files.pythonhosted.org/packages/1e/bb/a6d12b7ba4c3337667d0e421f7181c82dda448ce4e7ad7ecd249a16fa806/frozenlist-1.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e0c11f2cc6717e0a741f84a527c52616140741cd812a50422f83dc31749fb52", size = 51725, upload-time = "2025-10-06T05:36:50.851Z" },
@@ -775,47 +718,42 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9f/d0/2366d3c4ecdc2fd391e0afa6e11500bfba0ea772764d631bbf82f0136c9d/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cf253e0e1c3ceb4aaff6df637ce033ff6535fb8c70a764a8f46aafd3d6ab798e", size = 289901, upload-time = "2025-10-06T05:37:00.811Z" },
     { url = "https://files.pythonhosted.org/packages/b8/94/daff920e82c1b70e3618a2ac39fbc01ae3e2ff6124e80739ce5d71c9b920/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:032efa2674356903cd0261c4317a561a6850f3ac864a63fc1583147fb05a79b0", size = 289395, upload-time = "2025-10-06T05:37:02.115Z" },
     { url = "https://files.pythonhosted.org/packages/e3/20/bba307ab4235a09fdcd3cc5508dbabd17c4634a1af4b96e0f69bfe551ebd/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6da155091429aeba16851ecb10a9104a108bcd32f6c1642867eadaee401c1c41", size = 283659, upload-time = "2025-10-06T05:37:03.711Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/c8/85da824b7e7b9b6e7f7705b2ecaf9591ba6f79c1177f324c2735e41d36a2/frozenlist-1.8.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cee686f1f4cadeb2136007ddedd0aaf928ab95216e7691c63e50a8ec066336d0", size = 86127, upload-time = "2025-10-06T05:37:08.438Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/e8/a1185e236ec66c20afd72399522f142c3724c785789255202d27ae992818/frozenlist-1.8.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:119fb2a1bd47307e899c2fac7f28e85b9a543864df47aa7ec9d3c1b4545f096f", size = 49698, upload-time = "2025-10-06T05:37:09.48Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/93/72b1736d68f03fda5fdf0f2180fb6caaae3894f1b854d006ac61ecc727ee/frozenlist-1.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4970ece02dbc8c3a92fcc5228e36a3e933a01a999f7094ff7c23fbd2beeaa67c", size = 49749, upload-time = "2025-10-06T05:37:10.569Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/b2/fabede9fafd976b991e9f1b9c8c873ed86f202889b864756f240ce6dd855/frozenlist-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:cba69cb73723c3f329622e34bdbf5ce1f80c21c290ff04256cff1cd3c2036ed2", size = 231298, upload-time = "2025-10-06T05:37:11.993Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/3b/d9b1e0b0eed36e70477ffb8360c49c85c8ca8ef9700a4e6711f39a6e8b45/frozenlist-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:778a11b15673f6f1df23d9586f83c4846c471a8af693a22e066508b77d201ec8", size = 232015, upload-time = "2025-10-06T05:37:13.194Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/94/be719d2766c1138148564a3960fc2c06eb688da592bdc25adcf856101be7/frozenlist-1.8.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0325024fe97f94c41c08872db482cf8ac4800d80e79222c6b0b7b162d5b13686", size = 225038, upload-time = "2025-10-06T05:37:14.577Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/09/6712b6c5465f083f52f50cf74167b92d4ea2f50e46a9eea0523d658454ae/frozenlist-1.8.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:97260ff46b207a82a7567b581ab4190bd4dfa09f4db8a8b49d1a958f6aa4940e", size = 240130, upload-time = "2025-10-06T05:37:15.781Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/d4/cd065cdcf21550b54f3ce6a22e143ac9e4836ca42a0de1022da8498eac89/frozenlist-1.8.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:54b2077180eb7f83dd52c40b2750d0a9f175e06a42e3213ce047219de902717a", size = 242845, upload-time = "2025-10-06T05:37:17.037Z" },
-    { url = "https://files.pythonhosted.org/packages/62/c3/f57a5c8c70cd1ead3d5d5f776f89d33110b1addae0ab010ad774d9a44fb9/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f05983daecab868a31e1da44462873306d3cbfd76d1f0b5b69c473d21dbb128", size = 229131, upload-time = "2025-10-06T05:37:18.221Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/52/232476fe9cb64f0742f3fde2b7d26c1dac18b6d62071c74d4ded55e0ef94/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:33f48f51a446114bc5d251fb2954ab0164d5be02ad3382abcbfe07e2531d650f", size = 240542, upload-time = "2025-10-06T05:37:19.771Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/85/07bf3f5d0fb5414aee5f47d33c6f5c77bfe49aac680bfece33d4fdf6a246/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:154e55ec0655291b5dd1b8731c637ecdb50975a2ae70c606d100750a540082f7", size = 237308, upload-time = "2025-10-06T05:37:20.969Z" },
-    { url = "https://files.pythonhosted.org/packages/11/99/ae3a33d5befd41ac0ca2cc7fd3aa707c9c324de2e89db0e0f45db9a64c26/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:4314debad13beb564b708b4a496020e5306c7333fa9a3ab90374169a20ffab30", size = 238210, upload-time = "2025-10-06T05:37:22.252Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/60/b1d2da22f4970e7a155f0adde9b1435712ece01b3cd45ba63702aea33938/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:073f8bf8becba60aa931eb3bc420b217bb7d5b8f4750e6f8b3be7f3da85d38b7", size = 231972, upload-time = "2025-10-06T05:37:23.5Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/c7/43200656ecc4e02d3f8bc248df68256cd9572b3f0017f0a0c4e93440ae23/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d3bb933317c52d7ea5004a1c442eef86f426886fba134ef8cf4226ea6ee1821d", size = 89238, upload-time = "2025-10-06T05:37:29.373Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/29/55c5f0689b9c0fb765055629f472c0de484dcaf0acee2f7707266ae3583c/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8009897cdef112072f93a0efdce29cd819e717fd2f649ee3016efd3cd885a7ed", size = 50738, upload-time = "2025-10-06T05:37:30.792Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/7d/b7282a445956506fa11da8c2db7d276adcbf2b17d8bb8407a47685263f90/frozenlist-1.8.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2c5dcbbc55383e5883246d11fd179782a9d07a986c40f49abe89ddf865913930", size = 51739, upload-time = "2025-10-06T05:37:32.127Z" },
-    { url = "https://files.pythonhosted.org/packages/62/1c/3d8622e60d0b767a5510d1d3cf21065b9db874696a51ea6d7a43180a259c/frozenlist-1.8.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:39ecbc32f1390387d2aa4f5a995e465e9e2f79ba3adcac92d68e3e0afae6657c", size = 284186, upload-time = "2025-10-06T05:37:33.21Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/14/aa36d5f85a89679a85a1d44cd7a6657e0b1c75f61e7cad987b203d2daca8/frozenlist-1.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92db2bf818d5cc8d9c1f1fc56b897662e24ea5adb36ad1f1d82875bd64e03c24", size = 292196, upload-time = "2025-10-06T05:37:36.107Z" },
-    { url = "https://files.pythonhosted.org/packages/05/23/6bde59eb55abd407d34f77d39a5126fb7b4f109a3f611d3929f14b700c66/frozenlist-1.8.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2dc43a022e555de94c3b68a4ef0b11c4f747d12c024a520c7101709a2144fb37", size = 273830, upload-time = "2025-10-06T05:37:37.663Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/3f/22cff331bfad7a8afa616289000ba793347fcd7bc275f3b28ecea2a27909/frozenlist-1.8.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb89a7f2de3602cfed448095bab3f178399646ab7c61454315089787df07733a", size = 294289, upload-time = "2025-10-06T05:37:39.261Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/89/5b057c799de4838b6c69aa82b79705f2027615e01be996d2486a69ca99c4/frozenlist-1.8.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:33139dc858c580ea50e7e60a1b0ea003efa1fd42e6ec7fdbad78fff65fad2fd2", size = 300318, upload-time = "2025-10-06T05:37:43.213Z" },
-    { url = "https://files.pythonhosted.org/packages/30/de/2c22ab3eb2a8af6d69dc799e48455813bab3690c760de58e1bf43b36da3e/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:168c0969a329b416119507ba30b9ea13688fafffac1b7822802537569a1cb0ef", size = 282814, upload-time = "2025-10-06T05:37:45.337Z" },
-    { url = "https://files.pythonhosted.org/packages/59/f7/970141a6a8dbd7f556d94977858cfb36fa9b66e0892c6dd780d2219d8cd8/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:28bd570e8e189d7f7b001966435f9dac6718324b5be2990ac496cf1ea9ddb7fe", size = 291762, upload-time = "2025-10-06T05:37:46.657Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/15/ca1adae83a719f82df9116d66f5bb28bb95557b3951903d39135620ef157/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b2a095d45c5d46e5e79ba1e5b9cb787f541a8dee0433836cea4b96a2c439dcd8", size = 289470, upload-time = "2025-10-06T05:37:47.946Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/83/dca6dc53bf657d371fbc88ddeb21b79891e747189c5de990b9dfff2ccba1/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:eab8145831a0d56ec9c4139b6c3e594c7a83c2c8be25d5bcf2d86136a532287a", size = 289042, upload-time = "2025-10-06T05:37:49.499Z" },
-    { url = "https://files.pythonhosted.org/packages/96/52/abddd34ca99be142f354398700536c5bd315880ed0a213812bc491cff5e4/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:974b28cf63cc99dfb2188d8d222bc6843656188164848c4f679e63dae4b0708e", size = 283148, upload-time = "2025-10-06T05:37:50.745Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/00/04ca1c3a7a124b6de4f8a9a17cc2fcad138b4608e7a3fc5877804b8715d7/frozenlist-1.8.0-cp313-cp313t-win32.whl", hash = "sha256:0f96534f8bfebc1a394209427d0f8a63d343c9779cda6fc25e8e121b5fd8555b", size = 43492, upload-time = "2025-10-06T05:37:04.915Z" },
+    { url = "https://files.pythonhosted.org/packages/59/5e/c69f733a86a94ab10f68e496dc6b7e8bc078ebb415281d5698313e3af3a1/frozenlist-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5d63a068f978fc69421fb0e6eb91a9603187527c86b7cd3f534a5b77a592b888", size = 48034, upload-time = "2025-10-06T05:37:06.343Z" },
+    { url = "https://files.pythonhosted.org/packages/16/6c/be9d79775d8abe79b05fa6d23da99ad6e7763a1d080fbae7290b286093fd/frozenlist-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf0a7e10b077bf5fb9380ad3ae8ce20ef919a6ad93b4552896419ac7e1d8e042", size = 41749, upload-time = "2025-10-06T05:37:07.431Z" },
     { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" },
 ]
 
 [[package]]
 name = "fsspec"
-version = "2026.1.0"
+version = "2026.2.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d5/7d/5df2650c57d47c57232af5ef4b4fdbff182070421e405e0d62c6cdbfaa87/fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b", size = 310496, upload-time = "2026-01-09T15:21:35.562Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/51/7c/f60c259dcbf4f0c47cc4ddb8f7720d2dcdc8888c8e5ad84c73ea4531cc5b/fsspec-2026.2.0.tar.gz", hash = "sha256:6544e34b16869f5aacd5b90bdf1a71acb37792ea3ddf6125ee69a22a53fb8bff", size = 313441, upload-time = "2026-02-05T21:50:53.743Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/01/c9/97cc5aae1648dcb851958a3ddf73ccd7dbe5650d95203ecb4d7720b4cdbf/fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc", size = 201838, upload-time = "2026-01-09T15:21:34.041Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/ab/fb21f4c939bb440104cc2b396d3be1d9b7a9fd3c6c2a53d98c45b3d7c954/fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437", size = 202505, upload-time = "2026-02-05T21:50:51.819Z" },
 ]
 
 [package.optional-dependencies]
 http = [
-    { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+]
+
+[[package]]
+name = "greenlet"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3c/3f/dbf99fb14bfeb88c28f16729215478c0e265cacd6dc22270c8f31bb6892f/greenlet-3.5.0.tar.gz", hash = "sha256:d419647372241bc68e957bf38d5c1f98852155e4146bd1e4121adea81f4f01e4", size = 196995, upload-time = "2026-04-27T13:37:15.544Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/58/fc576f99037ce19c5aa16628e4c3226b6d1419f72a62c79f5f40576e6eb3/greenlet-3.5.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:5a5ed18de6a0f6cc7087f1563f6bd93fc7df1c19165ca01e9bde5a5dc281d106", size = 285066, upload-time = "2026-04-27T12:23:05.033Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/ba/b28ddbe6bfad6a8ac196ef0e8cff37bc65b79735995b9e410923fffeeb70/greenlet-3.5.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a717fbc46d8a354fa675f7c1e813485b6ba3885f9bef0cd56e5ba27d758ff5b", size = 604414, upload-time = "2026-04-27T12:52:42.358Z" },
+    { url = "https://files.pythonhosted.org/packages/09/06/4b69f8f0b67603a8be2790e55107a190b376f2627fe0eaf5695d85ffb3cd/greenlet-3.5.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ddc090c5c1792b10246a78e8c2163ebbe04cf877f9d785c230a7b27b39ad038e", size = 617349, upload-time = "2026-04-27T12:59:43.32Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/15/a643b4ecd09969e30b8a150d5919960caae0abe4f5af75ab040b1ab85e78/greenlet-3.5.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4964101b8585c144cbda5532b1aa644255126c08a265dae90c16e7a0e63aaa9d", size = 623234, upload-time = "2026-04-27T13:02:40.611Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/17/a3918541fd0ddefe024a69de6d16aa7b46d36ac19562adaa63c7fa180eff/greenlet-3.5.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2094acd54b272cb6eae8c03dd87b3fa1820a4cef18d6889c378d503500a1dc13", size = 613927, upload-time = "2026-04-27T12:25:30.28Z" },
+    { url = "https://files.pythonhosted.org/packages/77/18/3b13d5ef1275b0ffaf933b05efa21408ac4ca95823c7411d79682e4fdcff/greenlet-3.5.0-cp313-cp313-manylinux_2_39_riscv64.whl", hash = "sha256:7022615368890680e67b9965d33f5773aade330d5343bbe25560135aaa849eae", size = 425243, upload-time = "2026-04-27T13:05:15.689Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/e1/bd0af6213c7dd33175d8a462d4c1fe1175124ebed4855bc1475a5b5242c2/greenlet-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5e05ba267789ea87b5a155cf0e810b1ab88bf18e9e8740813945ceb8ee4350ba", size = 1570893, upload-time = "2026-04-27T12:53:29.483Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/2a/0789702f864f5382cb476b93d7a9c823c10472658102ccd65f415747d2e2/greenlet-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0ecec963079cd58cbd14723582384f11f166fd58883c15dcbfb342e0bc9b5846", size = 1636060, upload-time = "2026-04-27T12:25:28.845Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/8f/22bf9df92bbff0eb07842b60f7e63bf7675a9742df628437a9f02d09137f/greenlet-3.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:728d9667d8f2f586644b748dbd9bb67e50d6a9381767d1357714ea6825bb3bf5", size = 238740, upload-time = "2026-04-27T12:24:01.341Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/b7/9c5c3d653bd4ff614277c049ac676422e2c557db47b4fe43e6313fc005dc/greenlet-3.5.0-cp313-cp313-win_arm64.whl", hash = "sha256:47422135b1d308c14b2c6e758beedb1acd33bb91679f5670edf77bf46244722b", size = 235525, upload-time = "2026-04-27T12:23:12.308Z" },
 ]
 
 [[package]]
@@ -832,8 +770,8 @@ name = "h2"
 version = "4.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "hpack", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "hyperframe", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "hpack", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "hyperframe", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" }
 wheels = [
@@ -868,32 +806,32 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/82/1a/9c748befbe3decf7cb415e34f8a0c3789a0a9c55910dea73d581e48c0ce5/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:dc7fff1345980d6c0ebb92c811d24afa4b98b3e07ed070c8e38cc91fd80478c5", size = 3390096, upload-time = "2025-01-07T10:04:59.98Z" },
     { url = "https://files.pythonhosted.org/packages/72/85/4c03da147b6b4b7cb12e074d3d44eee28604a387ed0eaf7eaaead5069c57/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:1a6bd16c667ebe89a069ca163060127a794fa3a3525292c900b8c8cc47985b0d", size = 3664743, upload-time = "2025-01-07T10:05:05.416Z" },
     { url = "https://files.pythonhosted.org/packages/e7/6e/e597b04f753f1b09e6893075d53a82a30c13855cbaa791402695b01e369f/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d2fde99d502093ade3ab1b53f80da18480e9902aa960dab7f74fb1b9e5bc5746", size = 3695243, upload-time = "2025-01-07T10:05:11.411Z" },
+    { url = "https://files.pythonhosted.org/packages/09/89/d4e234727a26b2546c8fb70a276cd924260d60135f2165bf8b9ed67bb9a4/hf_transfer-0.1.9-cp38-abi3-win32.whl", hash = "sha256:435cc3cdc8524ce57b074032b8fd76eed70a4224d2091232fa6a8cef8fd6803e", size = 1086605, upload-time = "2025-01-07T10:05:18.873Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/14/f1e15b851d1c2af5b0b1a82bf8eb10bda2da62d98180220ba6fd8879bb5b/hf_transfer-0.1.9-cp38-abi3-win_amd64.whl", hash = "sha256:16f208fc678911c37e11aa7b586bc66a37d02e636208f18b6bc53d29b5df40ad", size = 1160240, upload-time = "2025-01-07T10:05:14.324Z" },
 ]
 
 [[package]]
 name = "hf-xet"
-version = "1.4.2"
+version = "1.5.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/09/08/23c84a26716382c89151b5b447b4beb19e3345f3a93d3b73009a71a57ad3/hf_xet-1.4.2.tar.gz", hash = "sha256:b7457b6b482d9e0743bd116363239b1fa904a5e65deede350fbc0c4ea67c71ea", size = 672357, upload-time = "2026-03-13T06:58:51.077Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/74/d8/5c06fc76461418326a7decf8367480c35be11a41fd938633929c60a9ec6b/hf_xet-1.5.0.tar.gz", hash = "sha256:e0fb0a34d9f406eed88233e829a67ec016bec5af19e480eac65a233ea289a948", size = 837196, upload-time = "2026-05-06T06:18:15.583Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/18/06/e8cf74c3c48e5485c7acc5a990d0d8516cdfb5fdf80f799174f1287cc1b5/hf_xet-1.4.2-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ac8202ae1e664b2c15cdfc7298cbb25e80301ae596d602ef7870099a126fcad4", size = 3796125, upload-time = "2026-03-13T06:58:33.177Z" },
-    { url = "https://files.pythonhosted.org/packages/66/d4/b73ebab01cbf60777323b7de9ef05550790451eb5172a220d6b9845385ec/hf_xet-1.4.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6d2f8ee39fa9fba9af929f8c0d0482f8ee6e209179ad14a909b6ad78ffcb7c81", size = 3555985, upload-time = "2026-03-13T06:58:31.797Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/e7/ded6d1bd041c3f2bca9e913a0091adfe32371988e047dd3a68a2463c15a2/hf_xet-1.4.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4642a6cf249c09da8c1f87fe50b24b2a3450b235bf8adb55700b52f0ea6e2eb6", size = 4212085, upload-time = "2026-03-13T06:58:24.323Z" },
-    { url = "https://files.pythonhosted.org/packages/97/c1/a0a44d1f98934f7bdf17f7a915b934f9fca44bb826628c553589900f6df8/hf_xet-1.4.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:769431385e746c92dc05492dde6f687d304584b89c33d79def8367ace06cb555", size = 3988266, upload-time = "2026-03-13T06:58:22.887Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/82/be713b439060e7d1f1d93543c8053d4ef2fe7e6922c5b31642eaa26f3c4b/hf_xet-1.4.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c9dd1c1bc4cc56168f81939b0e05b4c36dd2d28c13dc1364b17af89aa0082496", size = 4188513, upload-time = "2026-03-13T06:58:40.858Z" },
-    { url = "https://files.pythonhosted.org/packages/21/a6/cbd4188b22abd80ebd0edbb2b3e87f2633e958983519980815fb8314eae5/hf_xet-1.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:fca58a2ae4e6f6755cc971ac6fcdf777ea9284d7e540e350bb000813b9a3008d", size = 4428287, upload-time = "2026-03-13T06:58:42.601Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/0f/fcd2504015eab26358d8f0f232a1aed6b8d363a011adef83fe130bff88f7/hf_xet-1.4.2-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:949dcf88b484bb9d9276ca83f6599e4aa03d493c08fc168c124ad10b2e6f75d7", size = 3796493, upload-time = "2026-03-13T06:58:39.267Z" },
-    { url = "https://files.pythonhosted.org/packages/82/56/19c25105ff81731ca6d55a188b5de2aa99d7a2644c7aa9de1810d5d3b726/hf_xet-1.4.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:41659966020d59eb9559c57de2cde8128b706a26a64c60f0531fa2318f409418", size = 3555797, upload-time = "2026-03-13T06:58:37.546Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/e3/8933c073186849b5e06762aa89847991d913d10a95d1603eb7f2c3834086/hf_xet-1.4.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5c588e21d80010119458dd5d02a69093f0d115d84e3467efe71ffb2c67c19146", size = 4212127, upload-time = "2026-03-13T06:58:30.539Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/01/f89ebba4e369b4ed699dcb60d3152753870996f41c6d22d3d7cac01310e1/hf_xet-1.4.2-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a296744d771a8621ad1d50c098d7ab975d599800dae6d48528ba3944e5001ba0", size = 3987788, upload-time = "2026-03-13T06:58:29.139Z" },
-    { url = "https://files.pythonhosted.org/packages/84/4d/8a53e5ffbc2cc33bbf755382ac1552c6d9af13f623ed125fe67cc3e6772f/hf_xet-1.4.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f563f7efe49588b7d0629d18d36f46d1658fe7e08dce3fa3d6526e1c98315e2d", size = 4188315, upload-time = "2026-03-13T06:58:48.017Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/b8/b7a1c1b5592254bd67050632ebbc1b42cc48588bf4757cb03c2ef87e704a/hf_xet-1.4.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5b2e0132c56d7ee1bf55bdb638c4b62e7106f6ac74f0b786fed499d5548c5570", size = 4428306, upload-time = "2026-03-13T06:58:49.502Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/86/b40b83a2ff03ef05c4478d2672b1fc2b9683ff870e2b25f4f3af240f2e7b/hf_xet-1.4.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:71f02d6e4cdd07f344f6844845d78518cc7186bd2bc52d37c3b73dc26a3b0bc5", size = 3800339, upload-time = "2026-03-13T06:58:36.245Z" },
-    { url = "https://files.pythonhosted.org/packages/64/2e/af4475c32b4378b0e92a587adb1aa3ec53e3450fd3e5fe0372a874531c00/hf_xet-1.4.2-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e9b38d876e94d4bdcf650778d6ebbaa791dd28de08db9736c43faff06ede1b5a", size = 3559664, upload-time = "2026-03-13T06:58:34.787Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/4c/781267da3188db679e601de18112021a5cb16506fe86b246e22c5401a9c4/hf_xet-1.4.2-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:77e8c180b7ef12d8a96739a4e1e558847002afe9ea63b6f6358b2271a8bdda1c", size = 4217422, upload-time = "2026-03-13T06:58:27.472Z" },
-    { url = "https://files.pythonhosted.org/packages/68/47/d6cf4a39ecf6c7705f887a46f6ef5c8455b44ad9eb0d391aa7e8a2ff7fea/hf_xet-1.4.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c3b3c6a882016b94b6c210957502ff7877802d0dbda8ad142c8595db8b944271", size = 3992847, upload-time = "2026-03-13T06:58:25.989Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/ef/e80815061abff54697239803948abc665c6b1d237102c174f4f7a9a5ffc5/hf_xet-1.4.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9d9a634cc929cfbaf2e1a50c0e532ae8c78fa98618426769480c58501e8c8ac2", size = 4193843, upload-time = "2026-03-13T06:58:44.59Z" },
-    { url = "https://files.pythonhosted.org/packages/54/75/07f6aa680575d9646c4167db6407c41340cbe2357f5654c4e72a1b01ca14/hf_xet-1.4.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6b0932eb8b10317ea78b7da6bab172b17be03bbcd7809383d8d5abd6a2233e04", size = 4432751, upload-time = "2026-03-13T06:58:46.533Z" },
+    { url = "https://files.pythonhosted.org/packages/68/9b/6912c99070915a4f28119e3c5b52a9abd1eec0ad5cb293b8c967a0c6f5a2/hf_xet-1.5.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:7d70fe2ce97b9db73b9c9b9c81fe3693640aec83416a966c446afea54acfae3c", size = 4023383, upload-time = "2026-05-06T06:17:53.947Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/6d/9563cfde59b5d8128a9c7ec972a087f4c782e4f7bac5a85234edfd5d5e49/hf_xet-1.5.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:73a0dae8c71de3b0633a45c73f4a4a5ed09e94b43441d82981a781d4f12baa42", size = 3792751, upload-time = "2026-05-06T06:17:51.791Z" },
+    { url = "https://files.pythonhosted.org/packages/07/a5/ed5a0cf35b49a0571af5a8f53416dad1877a718c021c9937c3a53cb45781/hf_xet-1.5.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a60290ec57e9b71767fba7c3645ddafdd0759974b540441510c629c6db6db24a", size = 4456058, upload-time = "2026-05-06T06:17:40.735Z" },
+    { url = "https://files.pythonhosted.org/packages/60/fb/3ae8bf2a7a37a4197d0195d7247fd25b3952e15cb8a599e285dfaa6f52b3/hf_xet-1.5.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:e5de0f6deada0dada870bb376a11bcd1f08abf3a968a6d118f33e72d1b1eb480", size = 4250783, upload-time = "2026-05-06T06:17:38.412Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/9b/8bae40d4d91525085137196e84eb0ed49cf65b5e96e5c3ecdadd8bd0fac2/hf_xet-1.5.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c799d49f1a5544a0ef7591c0ee75e0d6b93d6f56dc7a4979f59f7518d2872216", size = 4445594, upload-time = "2026-05-06T06:18:04.219Z" },
+    { url = "https://files.pythonhosted.org/packages/13/59/c74efbbd4e8728172b2cc72a2bc014d2947a4b7bdced932fbd3f5da1a4e5/hf_xet-1.5.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2baea1b0b989e5c152fe81425f7745ddc8901280ba3d97c98d8cdece7b706c60", size = 4663995, upload-time = "2026-05-06T06:18:06.1Z" },
+    { url = "https://files.pythonhosted.org/packages/73/32/8e1e0410af64cda9b139d1dcebdc993a8ff9c8c7c0e2696ae356d75ccc0d/hf_xet-1.5.0-cp313-cp313t-win_amd64.whl", hash = "sha256:526345b3ed45f374f6317349df489167606736c876241ba984105afe7fd4839d", size = 3966608, upload-time = "2026-05-06T06:18:19.74Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/34/a8febc8f4edbea8b3e21b02ebc8b628679b84ba7e45cde624a7736b51500/hf_xet-1.5.0-cp313-cp313t-win_arm64.whl", hash = "sha256:786d28e2eb8315d5035544b9d137b4a842d600c434bb91bf7d0d953cce906ad4", size = 3796946, upload-time = "2026-05-06T06:18:17.568Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/fb/69ff198a82cae7eb1a69fb84d93b3a3e4816564d76817fe541ddc96874eb/hf_xet-1.5.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:dad0dc84e941b8ba3c860659fe1fdc35c049d47cce293f003287757e971a8f56", size = 4030814, upload-time = "2026-05-06T06:17:57.933Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/ff/edcc2b40162bef3ff78e14ab637e5f3b89243d6aee72f5949d3bb6a5af83/hf_xet-1.5.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:fd6e5a9b0fdac4ed03ed45ef79254a655b1aaab514a02202617fbf643f5fdf7a", size = 3798444, upload-time = "2026-05-06T06:17:55.79Z" },
+    { url = "https://files.pythonhosted.org/packages/49/4d/103f76b04310e5e57656696cc184690d20c466af0bca3ca88f8c8ea5d4f3/hf_xet-1.5.0-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3531b1823a0e6d77d80f9ed15ca0e00f0d115094f8ac033d5cae88f4564cc949", size = 4465986, upload-time = "2026-05-06T06:17:44.886Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/a2/546f47f464737b3edbab6f8ddb57f2599b93d2cbb66f06abb475ccb48651/hf_xet-1.5.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:9a0ee58cd18d5ea799f7ed11290bbccbe56bdd8b1d97ca74b9cc49a3945d7a3b", size = 4259865, upload-time = "2026-05-06T06:17:42.639Z" },
+    { url = "https://files.pythonhosted.org/packages/95/7f/1be593c1f28613be2e196473481cd81bfc5910795e30a34e8f744f6cac4f/hf_xet-1.5.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1e60df5a42e9bed8628b6416af2cba4cba57ae9f02de226a06b020d98e1aab18", size = 4459835, upload-time = "2026-05-06T06:18:08.026Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/b2/703569fc881f3284487e68cda7b42179978480da3c438042a6bbbb4a671c/hf_xet-1.5.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4b35549ce62601b84da4ff9b24d970032ace3d4430f52d91bcbb26c901d6c690", size = 4672414, upload-time = "2026-05-06T06:18:09.864Z" },
+    { url = "https://files.pythonhosted.org/packages/af/37/1b6def445c567286b50aa3b33828158e135b1be44938dde59f11382a500c/hf_xet-1.5.0-cp37-abi3-win_amd64.whl", hash = "sha256:2806c7c17b4d23f8d88f7c4814f838c3b6150773fe339c20af23e1cfaf2797e4", size = 3977238, upload-time = "2026-05-06T06:18:23.621Z" },
+    { url = "https://files.pythonhosted.org/packages/62/94/3b66b148778ee100dcfd69c2ca22b57b41b44d3063ceec934f209e9184ce/hf_xet-1.5.0-cp37-abi3-win_arm64.whl", hash = "sha256:b6c9df403040248c76d808d3e047d64db2d923bae593eb244c41e425cf6cd7be", size = 3806916, upload-time = "2026-05-06T06:18:21.7Z" },
 ]
 
 [[package]]
@@ -910,8 +848,8 @@ name = "httpcore"
 version = "1.0.9"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "h11", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "h11", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
 wheels = [
@@ -923,10 +861,10 @@ name = "httpx"
 version = "0.28.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "httpcore", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "idna", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "httpcore", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "idna", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
 wheels = [
@@ -935,22 +873,22 @@ wheels = [
 
 [[package]]
 name = "huggingface-hub"
-version = "1.8.0"
+version = "1.15.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "fsspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "hf-xet", marker = "(platform_machine == 'AMD64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'darwin') or (platform_machine == 'amd64' and sys_platform == 'darwin') or (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and sys_platform == 'darwin') or (platform_machine == 'AMD64' and sys_platform == 'linux') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'amd64' and sys_platform == 'linux') or (platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "typer", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "fsspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "hf-xet", marker = "(platform_machine == 'AMD64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'darwin') or (platform_machine == 'amd64' and sys_platform == 'darwin') or (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and sys_platform == 'darwin') or (platform_machine == 'AMD64' and sys_platform == 'linux') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'amd64' and sys_platform == 'linux') or (platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "typer", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8e/2a/a847fd02261cd051da218baf99f90ee7c7040c109a01833db4f838f25256/huggingface_hub-1.8.0.tar.gz", hash = "sha256:c5627b2fd521e00caf8eff4ac965ba988ea75167fad7ee72e17f9b7183ec63f3", size = 735839, upload-time = "2026-03-25T16:01:28.152Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/bb/b6/e22bd20a25299c34b8c5922c1545a6320825b13906eb0f7298edfd034a0b/huggingface_hub-1.15.0.tar.gz", hash = "sha256:28abfdddda3927fd4de6a63cf26ab012498a2c24dae52baf150c5c6edf98a1d5", size = 784100, upload-time = "2026-05-15T11:42:52.149Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a9/ae/8a3a16ea4d202cb641b51d2681bdd3d482c1c592d7570b3fa264730829ce/huggingface_hub-1.8.0-py3-none-any.whl", hash = "sha256:d3eb5047bd4e33c987429de6020d4810d38a5bef95b3b40df9b17346b7f353f2", size = 625208, upload-time = "2026-03-25T16:01:26.603Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/11/0b64cc9024329b76d7547c19a67604a61d21d3ba678a69d1b220c29d5112/huggingface_hub-1.15.0-py3-none-any.whl", hash = "sha256:a4a59af04cbc41a3fe3fec429b171ef994ef8c971eda10136746f408dd4e3744", size = 663602, upload-time = "2026-05-15T11:42:50.487Z" },
 ]
 
 [[package]]
@@ -958,9 +896,9 @@ name = "human-eval"
 version = "1.0.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "fire", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "fire", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0a/4b/44832f60f8fb14257019084412229a2ed48e34acd0d9d3f8198196d02dd7/human-eval-1.0.3.tar.gz", hash = "sha256:03ee63d3fbbb6fecbc5dfae58381fc655004ad86caafeb141ca2169c63648766", size = 54651, upload-time = "2023-07-24T18:45:58.99Z" }
 wheels = [
@@ -972,10 +910,10 @@ name = "hypercorn"
 version = "0.18.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "h11", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "h2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "priority", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "wsproto", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "h11", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "h2", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "priority", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "wsproto", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/44/01/39f41a014b83dd5c795217362f2ca9071cf243e6a75bdcd6cd5b944658cc/hypercorn-0.18.0.tar.gz", hash = "sha256:d63267548939c46b0247dc8e5b45a9947590e35e64ee73a23c074aa3cf88e9da", size = 68420, upload-time = "2025-11-08T13:54:04.78Z" }
 wheels = [
@@ -993,23 +931,23 @@ wheels = [
 
 [[package]]
 name = "id"
-version = "1.5.0"
+version = "1.6.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "urllib3", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/22/11/102da08f88412d875fa2f1a9a469ff7ad4c874b0ca6fed0048fe385bdb3d/id-1.5.0.tar.gz", hash = "sha256:292cb8a49eacbbdbce97244f47a97b4c62540169c976552e497fd57df0734c1d", size = 15237, upload-time = "2024-12-04T19:53:05.575Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6d/04/c2156091427636080787aac190019dc64096e56a23b7364d3c1764ee3a06/id-1.6.1.tar.gz", hash = "sha256:d0732d624fb46fd4e7bc4e5152f00214450953b9e772c182c1c22964def1a069", size = 18088, upload-time = "2026-02-04T16:19:41.26Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9f/cb/18326d2d89ad3b0dd143da971e77afd1e6ca6674f1b1c3df4b6bec6279fc/id-1.5.0-py3-none-any.whl", hash = "sha256:f1434e1cef91f2cbb8a4ec64663d5a23b9ed43ef44c4c957d02583d61714c658", size = 13611, upload-time = "2024-12-04T19:53:03.02Z" },
+    { url = "https://files.pythonhosted.org/packages/42/77/de194443bf38daed9452139e960c632b0ef9f9a5dd9ce605fdf18ca9f1b1/id-1.6.1-py3-none-any.whl", hash = "sha256:f5ec41ed2629a508f5d0988eda142e190c9c6da971100612c4de9ad9f9b237ca", size = 14689, upload-time = "2026-02-04T16:19:40.051Z" },
 ]
 
 [[package]]
 name = "idna"
-version = "3.11"
+version = "3.15"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/82/77/7b3966d0b9d1d31a36ddf1746926a11dface89a83409bf1483f0237aa758/idna-3.15.tar.gz", hash = "sha256:ca962446ea538f7092a95e057da437618e886f4d349216d2b1e294abfdb65fdc", size = 199245, upload-time = "2026-05-12T22:45:57.011Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/23/408243171aa9aaba178d3e2559159c24c1171a641aa83b67bdd3394ead8e/idna-3.15-py3-none-any.whl", hash = "sha256:048adeaf8c2d788c40fee287673ccaa74c24ffd8dcf09ffa555a2fbb59f10ac8", size = 72340, upload-time = "2026-05-12T22:45:55.733Z" },
 ]
 
 [[package]]
@@ -1026,7 +964,7 @@ name = "jaraco-classes"
 version = "3.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "more-itertools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "more-itertools", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/06/c0/ed4a27bc5571b99e3cff68f8a9fa5b56ff7df1c2251cc715a652ddd26402/jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd", size = 11780, upload-time = "2024-03-31T07:27:36.643Z" }
 wheels = [
@@ -1035,23 +973,23 @@ wheels = [
 
 [[package]]
 name = "jaraco-context"
-version = "6.1.0"
+version = "6.1.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/cb/9c/a788f5bb29c61e456b8ee52ce76dbdd32fd72cd73dd67bc95f42c7a8d13c/jaraco_context-6.1.0.tar.gz", hash = "sha256:129a341b0a85a7db7879e22acd66902fda67882db771754574338898b2d5d86f", size = 15850, upload-time = "2026-01-13T02:53:53.847Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/50/4763cd07e722bb6285316d390a164bc7e479db9d90daa769f22578f698b4/jaraco_context-6.1.2.tar.gz", hash = "sha256:f1a6c9d391e661cc5b8d39861ff077a7dc24dc23833ccee564b234b81c82dfe3", size = 16801, upload-time = "2026-03-20T22:13:33.922Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8d/48/aa685dbf1024c7bd82bede569e3a85f82c32fd3d79ba5fea578f0159571a/jaraco_context-6.1.0-py3-none-any.whl", hash = "sha256:a43b5ed85815223d0d3cfdb6d7ca0d2bc8946f28f30b6f3216bda070f68badda", size = 7065, upload-time = "2026-01-13T02:53:53.031Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/58/bc8954bda5fcda97bd7c19be11b85f91973d67a706ed4a3aec33e7de22db/jaraco_context-6.1.2-py3-none-any.whl", hash = "sha256:bf8150b79a2d5d91ae48629d8b427a8f7ba0e1097dd6202a9059f29a36379535", size = 7871, upload-time = "2026-03-20T22:13:32.808Z" },
 ]
 
 [[package]]
 name = "jaraco-functools"
-version = "4.4.0"
+version = "4.5.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "more-itertools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "more-itertools", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/0f/27/056e0638a86749374d6f57d0b0db39f29509cce9313cf91bdc0ac4d91084/jaraco_functools-4.4.0.tar.gz", hash = "sha256:da21933b0417b89515562656547a77b4931f98176eb173644c0d35032a33d6bb", size = 19943, upload-time = "2025-12-21T09:29:43.6Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/36/cf/ea4ef2920830dea3f5ab2ea4da6fb67724e6dca80ee2553788c3607243d0/jaraco_functools-4.5.0.tar.gz", hash = "sha256:3bb5665ea4a020cf78a7040e89154c77edadb3ca74f366479669c5999aa70b03", size = 20272, upload-time = "2026-05-15T21:34:10.025Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fd/c4/813bb09f0985cb21e959f21f2464169eca882656849adf727ac7bb7e1767/jaraco_functools-4.4.0-py3-none-any.whl", hash = "sha256:9eec1e36f45c818d9bf307c8948eb03b2b56cd44087b3cdc989abca1f20b9176", size = 10481, upload-time = "2025-12-21T09:29:42.27Z" },
+    { url = "https://files.pythonhosted.org/packages/96/9a/982e48afcffcd727a9144506720ffd4224b6b7e355c98641866f38b7c043/jaraco_functools-4.5.0-py3-none-any.whl", hash = "sha256:79ce39246eddbde4b3a03b77ea5f0f7878dc669b166a66cf3fa8e266aa3fa2f4", size = 10594, upload-time = "2026-05-15T21:34:08.595Z" },
 ]
 
 [[package]]
@@ -1068,7 +1006,7 @@ name = "jinja2"
 version = "3.1.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "markupsafe", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "markupsafe", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" }
 wheels = [
@@ -1084,28 +1022,16 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" },
 ]
 
-[[package]]
-name = "jsonlines"
-version = "4.0.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "attrs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/35/87/bcda8e46c88d0e34cad2f09ee2d0c7f5957bccdb9791b0b934ec84d84be4/jsonlines-4.0.0.tar.gz", hash = "sha256:0c6d2c09117550c089995247f605ae4cf77dd1533041d366351f6f298822ea74", size = 11359, upload-time = "2023-09-01T12:34:44.187Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/62/d9ba6323b9202dd2fe166beab8a86d29465c41a0288cbe229fac60c1ab8d/jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55", size = 8701, upload-time = "2023-09-01T12:34:42.563Z" },
-]
-
 [[package]]
 name = "keyring"
 version = "25.7.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "jaraco-classes", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "jaraco-context", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "jaraco-functools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "jeepney", marker = "sys_platform == 'linux'" },
-    { name = "secretstorage", marker = "sys_platform == 'linux'" },
+    { name = "jaraco-classes", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "jaraco-context", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "jaraco-functools", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "jeepney", marker = "sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "secretstorage", marker = "sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/43/4b/674af6ef2f97d56f0ab5153bf0bfa28ccb6c3ed4d1babf4305449668807b/keyring-25.7.0.tar.gz", hash = "sha256:fe01bd85eb3f8fb3dd0405defdeac9a5b4f6f0439edbb3149577f244a2e8245b", size = 63516, upload-time = "2025-11-16T16:26:09.482Z" }
 wheels = [
@@ -1114,54 +1040,39 @@ wheels = [
 
 [[package]]
 name = "kiwisolver"
-version = "1.4.10rc0"
+version = "1.5.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/bf/de/354c903d772c1cc0a9310344e077b31c6c893cc5a664019b907a04997099/kiwisolver-1.4.10rc0.tar.gz", hash = "sha256:d321718aaa2583577be9836e8cc0ed9fd0863e57a85b1b73b328aac063bc9903", size = 97614, upload-time = "2025-08-10T20:22:27.702Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d0/67/9c61eccb13f0bdca9307614e782fec49ffdde0f7a2314935d489fa93cd9c/kiwisolver-1.5.0.tar.gz", hash = "sha256:d4193f3d9dc3f6f79aaed0e5637f45d98850ebf01f7ca20e69457f3e8946b66a", size = 103482, upload-time = "2026-03-09T13:15:53.382Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/45/7f/fa24b3666fab8c2956ce7d7d4e05ba16db6f6d2d47119c2d91d1c6a7acc9/kiwisolver-1.4.10rc0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7804171d944890ebe6e549f75af5d0f6247f612b6e4477364e8af6bea0bdc46c", size = 123747, upload-time = "2025-08-10T20:21:02.996Z" },
-    { url = "https://files.pythonhosted.org/packages/03/28/15292f93eae55cf5e6fe92a6d1afb5b945a368098d6207aca1cbd96fb715/kiwisolver-1.4.10rc0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c99e1074a531560410aaf1c207de83d483c0b663017a9ddcec15aceae60a8df", size = 66527, upload-time = "2025-08-10T20:21:04.089Z" },
-    { url = "https://files.pythonhosted.org/packages/be/a2/e40c005bcd90254cb6cbba49cf044a450bdfc7eb9c9770f29166db18ce4e/kiwisolver-1.4.10rc0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:019791b16388c20ec7c1dcd6cb76e2eb493d8b199e0fc443ee97c457b763607b", size = 65013, upload-time = "2025-08-10T20:21:05.157Z" },
-    { url = "https://files.pythonhosted.org/packages/25/41/12101024a85b6052119b1af613fb6c7f588b32d0025592a399decfda893a/kiwisolver-1.4.10rc0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d6389700c5c3568a8bc1dd8766e74f8ee5819dccf795a85a090c2553592fd0e", size = 1474690, upload-time = "2025-08-10T20:21:06.304Z" },
-    { url = "https://files.pythonhosted.org/packages/13/da/0c9638f35488cf6fa4e8b7d5ff958770e1d7eadb1c7d17800d00f2746963/kiwisolver-1.4.10rc0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:04bc9f5acf650e30dd332989272e660e8e78f97f240a3c7765d6e15ee4db9146", size = 1276603, upload-time = "2025-08-10T20:21:08.163Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/36/448c98d01e90cc176b97848356f73f55a42eb846d612d913e695fbfc239c/kiwisolver-1.4.10rc0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7efcc55e35be59b0ddb21a8d22a8aaa8a0494d44da4776e158889dbd9abbe989", size = 1294550, upload-time = "2025-08-10T20:21:09.874Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/02/b51c4d88db1ec21b42d508b2bfc61071192ab57e79eb9efc5096f564a6e4/kiwisolver-1.4.10rc0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:07ba167734ff2616e6853c639d8919b91a8595d675fb940b90feed1e513dc141", size = 1343764, upload-time = "2025-08-10T20:21:11.238Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/5b/1367d1a0ec9cef06021e7367802d89457be48d4e8442800d91564e6dad2a/kiwisolver-1.4.10rc0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:453953bce6a745c7b84ea9e9f600802a5f5cbf4acf60efaa7832dd20acc40772", size = 2224958, upload-time = "2025-08-10T20:21:12.86Z" },
-    { url = "https://files.pythonhosted.org/packages/59/a3/cdc5fef9b8110d60e9185104067ef8a6b7c56b9315475cb73e5c10953633/kiwisolver-1.4.10rc0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3dde1fe2838d9ef93f0c66a564c9b369652127190b8da1e6378075d7a0176281", size = 2321418, upload-time = "2025-08-10T20:21:14.451Z" },
-    { url = "https://files.pythonhosted.org/packages/16/b8/12c5187d08c79c053ba9bb0622720322991edfd3fd14e9ef3d2a2cfd4036/kiwisolver-1.4.10rc0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:319c1c56b4497fe729c5c9c2a319957b8bf70b5bd036f478c20b8dccb906f8ad", size = 2488384, upload-time = "2025-08-10T20:21:16.233Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/3e/4f6800de4b1ca9c0f011ffd46f4871cbf3b10b2d02a38a4c37c1445fe88e/kiwisolver-1.4.10rc0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:244946ee11b873e9ae4f01d8bc8cfe44d6c7369421e1980b3220b27e5dccae79", size = 2292042, upload-time = "2025-08-10T20:21:17.945Z" },
-    { url = "https://files.pythonhosted.org/packages/11/f2/2b3ec9b63e57f948a0bf1867e7e5b6a1aca12623335a6a7bdbccd72fa49d/kiwisolver-1.4.10rc0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:f0ec8b92ac6bee771883865afd9a8725fef2ad420f77b88c91313ff1d417b5f7", size = 126584, upload-time = "2025-08-10T20:21:21.345Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/e3/c6647c859796dfb6b60b5c2b6216877831adec5558e21bc9bd061d8b2e08/kiwisolver-1.4.10rc0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0315b7f45a244696093b53308d2546879341b3e85d4bf4a66e21d35e076aa7eb", size = 67962, upload-time = "2025-08-10T20:21:22.449Z" },
-    { url = "https://files.pythonhosted.org/packages/21/8a/85ef96d5f220887b60fee183a4ac977fab7189404b625382c6aeae297eb6/kiwisolver-1.4.10rc0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:65ff3f2320ced57b1d020a9c31ccdfa9eb8b58e2b40be1e47feafc8785c16a1a", size = 66478, upload-time = "2025-08-10T20:21:23.471Z" },
-    { url = "https://files.pythonhosted.org/packages/85/6c/ab252887a1b6af045959fe589e0cf3019b23ad6f8923b900ab0cc472284f/kiwisolver-1.4.10rc0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a91fdb10abc117f4df88ac1036c7b220be19bfb3b25d116ef07538087920fed0", size = 1582201, upload-time = "2025-08-10T20:21:24.634Z" },
-    { url = "https://files.pythonhosted.org/packages/57/1a/1fcbaad9a2d6965acdbc903d2fed2bf335e746ebd6295f495435ea0583ec/kiwisolver-1.4.10rc0-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f10d577d557c9cc0f84794a52957782fd3b65da3ddf8f010dc880f5124f13356", size = 1389458, upload-time = "2025-08-10T20:21:26.015Z" },
-    { url = "https://files.pythonhosted.org/packages/17/39/2905d2c97253d7336ef13f581ca05c0f15b3ccde1309221abe21b027f12e/kiwisolver-1.4.10rc0-cp313-cp313t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dffb2678f68aa3aaa79cefa229981ac1f6b2ab1317b40b662c1059009fb3df70", size = 1401841, upload-time = "2025-08-10T20:21:27.39Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/a6/ecb4a9079292dd8e9771adfc1116ff56362ed89a8906d048e4918e8b21bc/kiwisolver-1.4.10rc0-cp313-cp313t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:74c4bba7e213c4fb94a7cc23e4ae67755d7c188a214302f8da75d9117c158459", size = 1453704, upload-time = "2025-08-10T20:21:29.112Z" },
-    { url = "https://files.pythonhosted.org/packages/18/da/ced52538144643fb6ac68c8f548d3ef7505c2a08bd183ad4629e1ec70cb7/kiwisolver-1.4.10rc0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:cfcd7f1c72c170db55719c0899cb10ddc6584491f27dc1b0d8925e6bbcceca13", size = 2330856, upload-time = "2025-08-10T20:21:30.567Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/bf/b91302b110eb3adabaa429d9597bb98dba4e43c39570a75c59460883ece5/kiwisolver-1.4.10rc0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:59bb9e7089552273187c8e7b7af62543d3198684231f26d5da60b7bc31a73395", size = 2420031, upload-time = "2025-08-10T20:21:32.181Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/3a/8bc22b09b485775a4fda94a37fd1d6d0c8db2640481a2941277ce0c0fd81/kiwisolver-1.4.10rc0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:dcdbe9d777d2a55749db7ff810ba58f530c06f52e612e4e407fc19457709b148", size = 2594729, upload-time = "2025-08-10T20:21:33.959Z" },
-    { url = "https://files.pythonhosted.org/packages/47/12/597a6c2f00a09ca83e7c0a567b756ac6ad7896428ea4677128cf9ee7e9b2/kiwisolver-1.4.10rc0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9b485e2e377a594dbcf131e8c90f2561d10b4e654025c0760a8bbd2e23427748", size = 2391799, upload-time = "2025-08-10T20:21:36.063Z" },
-    { url = "https://files.pythonhosted.org/packages/11/b2/cda7e698c85ad65b00cdadfcc5f0c48e88afb4cded5d401a59e7571aa838/kiwisolver-1.4.10rc0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:93b6286826dffd9eb20e2e25dc47b42830d3f48f3835e20299711f30c4200677", size = 123863, upload-time = "2025-08-10T20:21:38.543Z" },
-    { url = "https://files.pythonhosted.org/packages/81/a3/df94ae199ac43ff99f2fd3ffad50a4fea1a1ba57aa5b9e00066b16eb0fb0/kiwisolver-1.4.10rc0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c7364af8780fcffbb0bc88a96fde73d08890b75b7359014cdf52f73f5305346f", size = 66663, upload-time = "2025-08-10T20:21:39.612Z" },
-    { url = "https://files.pythonhosted.org/packages/89/d0/954830c6f28f77f5457bb3591f825e3b602ff8fee07959c147c801aa7bd1/kiwisolver-1.4.10rc0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d5c052c819163b4022c2c2e0fcb06471672df1de9deac45f14c7d4246ae680ea", size = 65011, upload-time = "2025-08-10T20:21:40.69Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/a0/ed7cdc111881fb9093e667bbb0d164f4c060acbea6505f188213e262a315/kiwisolver-1.4.10rc0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:614636cbf8a6ee1b19c645780d633f63c3f82861c13c18848ea5805d560519d1", size = 1472481, upload-time = "2025-08-10T20:21:42.144Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/fe/9b7331a8f63c1001c90b0da1b58d4eff6b577576958862c518e5e6be67a2/kiwisolver-1.4.10rc0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dbbeebea5c25e114f3a77f3949c857ac9865f18efdb794976c23f78dbb14fa6a", size = 1281319, upload-time = "2025-08-10T20:21:43.694Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/89/590743079cf1e8b48d8760c275a82dcf175fbbd2d8f02b356a98c89866ea/kiwisolver-1.4.10rc0-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b64ceacb59d97820ef86d8ca29cd0b861806850a88d5d39171cc4d08a4822ea8", size = 1298654, upload-time = "2025-08-10T20:21:45.173Z" },
-    { url = "https://files.pythonhosted.org/packages/57/c4/23da0e3af18c87c0505e332c2e9b56312eb46c8ea2692d49ae6b756add9d/kiwisolver-1.4.10rc0-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1157c3cdf59068792409cab46a346520ab0c31545f709b2ed91a740ae6639951", size = 1345677, upload-time = "2025-08-10T20:21:46.621Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/23/1dee49cabb73e2b95fd4154155b029909158e8a97206ced1f164d435fb29/kiwisolver-1.4.10rc0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e4ac9b148e0a44f45321524096c45df725fe8e54ad105204474b065e724fa3b9", size = 2230237, upload-time = "2025-08-10T20:21:48.471Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/9c/8efc8ccba0f34324abc8d3758d622558b765be6c2a719c8cc527a48204de/kiwisolver-1.4.10rc0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c01a868cd5f4860f89d6e23c6dde1c9b730b31b838e33c25b7f5edc568736715", size = 2326035, upload-time = "2025-08-10T20:21:49.875Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/c2/ee8823d8b9b73abd6ff93e3df25b3814c063a7702c166c1dae1bed725c87/kiwisolver-1.4.10rc0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:6a945c1d7dc67fae25929ce22a67c83d009944be5f3a22d6ca3914867af998ac", size = 2491519, upload-time = "2025-08-10T20:21:51.397Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/ec/ccf064bedb2c7afe74b226bd15a2389766564a1300b1718cb06db065580a/kiwisolver-1.4.10rc0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:58bd1cb70d28234875a31a3a4e6c76690ac1bf9e06d660ca110e8fb0f2180824", size = 2294697, upload-time = "2025-08-10T20:21:52.889Z" },
-    { url = "https://files.pythonhosted.org/packages/00/63/d8c79c487ef7ddcdf1c905dc9f018184d1afdd142a284f092b572849b9d7/kiwisolver-1.4.10rc0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:9968c28fff4893d8ecc1c0006a033348735d9add1c2761b7069451378ef5a366", size = 126594, upload-time = "2025-08-10T20:21:56.335Z" },
-    { url = "https://files.pythonhosted.org/packages/97/a0/08cf87f47916d81a3fed94949ca2a91d904876fe8affc0ac59953bbfd57a/kiwisolver-1.4.10rc0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:fb61ab937f2f3ffcbfd5c0ea954426515e31d4e9069aca3b67df80608b351bac", size = 67963, upload-time = "2025-08-10T20:21:58.218Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/9f/76f48a32800e0659a4fd2e36139b97edcdd20c6c96d2f4f2eca421db3804/kiwisolver-1.4.10rc0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:95aff11941e39ff83a8c40b102b4cbd6ce6c877de606b8844e68fb870780ef47", size = 66478, upload-time = "2025-08-10T20:21:59.647Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/9f/1d09b1f2f86bdf45a6f2f13ab692cb23dab58b5b7b96acb8886624378a02/kiwisolver-1.4.10rc0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0b034dc34bcd2ea55eae3481a9282df17a8941f2b55be5f32a93596b85da8161", size = 1582270, upload-time = "2025-08-10T20:22:00.918Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/cb/c1790b2446465974b5e203ecaf4d77f29fa753f0a03ff3beffac31064305/kiwisolver-1.4.10rc0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6893c0dbf86a3bac9359363bc4b371b90e2bb2fc7645104f350ab5b84cd7f1b6", size = 1390172, upload-time = "2025-08-10T20:22:02.335Z" },
-    { url = "https://files.pythonhosted.org/packages/61/c1/3dedb5fb484c874333f65fe2418d95352ae52c90d7b765e72f03e038fda3/kiwisolver-1.4.10rc0-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6d3550d0c99ff6e1f1046c87a6ea845aa03cbea964cc87aefaba8ccbbacb0a76", size = 1402672, upload-time = "2025-08-10T20:22:03.79Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/c8/b5edcddefc81674c0de9d5f45be49ee4ebb65593d39e07ab41b353d9cbce/kiwisolver-1.4.10rc0-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:858c66d68285227c6bd350e4947a00424c48cc366334485b50377beaecd16140", size = 1454047, upload-time = "2025-08-10T20:22:05.311Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/0a/4ca0c782b074613315ed6194a72bd6731403a46409d5c43867add4072318/kiwisolver-1.4.10rc0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:dba0e50a89f753cb97940918d2d3c01a09d1fe5c5c5f39bab9f730191fce22f9", size = 2331652, upload-time = "2025-08-10T20:22:07.119Z" },
-    { url = "https://files.pythonhosted.org/packages/58/b5/8df22261f42502e507c7c55812c72c0525e7787481f878706eea7560d75e/kiwisolver-1.4.10rc0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:c3113b5955f88028954c7af68f3f1025c1046cb1106f3d806fc9c376d37c12a7", size = 2422113, upload-time = "2025-08-10T20:22:08.95Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/9d/5425178710964dc6167867fde113e1d402c0744433815de191deff90dd03/kiwisolver-1.4.10rc0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:4608d88b4f0210d7ad28a64fd8a291747eb665efbf10e6850051c2fa8c7af91b", size = 2594963, upload-time = "2025-08-10T20:22:10.513Z" },
-    { url = "https://files.pythonhosted.org/packages/64/ad/53bd6b22fa1917746096b6240dd0c546020e358506e8503dce57f3cdcd9a/kiwisolver-1.4.10rc0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:acc08f93b36220a6baa7df3428cb8847b27717db9be4295c0b1571d040c77327", size = 2391902, upload-time = "2025-08-10T20:22:12.421Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/69/024d6711d5ba575aa65d5538042e99964104e97fa153a9f10bc369182bc2/kiwisolver-1.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:fd40bb9cd0891c4c3cb1ddf83f8bbfa15731a248fdc8162669405451e2724b09", size = 123166, upload-time = "2026-03-09T13:13:48.032Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/48/adbb40df306f587054a348831220812b9b1d787aff714cfbc8556e38fccd/kiwisolver-1.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c0e1403fd7c26d77c1f03e096dc58a5c726503fa0db0456678b8668f76f521e3", size = 66395, upload-time = "2026-03-09T13:13:49.365Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/3a/d0a972b34e1c63e2409413104216cd1caa02c5a37cb668d1687d466c1c45/kiwisolver-1.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:dda366d548e89a90d88a86c692377d18d8bd64b39c1fb2b92cb31370e2896bbd", size = 64065, upload-time = "2026-03-09T13:13:50.562Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/0a/7b98e1e119878a27ba8618ca1e18b14f992ff1eda40f47bccccf4de44121/kiwisolver-1.5.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:332b4f0145c30b5f5ad9374881133e5aa64320428a57c2c2b61e9d891a51c2f3", size = 1477903, upload-time = "2026-03-09T13:13:52.084Z" },
+    { url = "https://files.pythonhosted.org/packages/18/d8/55638d89ffd27799d5cc3d8aa28e12f4ce7a64d67b285114dbedc8ea4136/kiwisolver-1.5.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c50b89ffd3e1a911c69a1dd3de7173c0cd10b130f56222e57898683841e4f96", size = 1278751, upload-time = "2026-03-09T13:13:54.673Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/97/b4c8d0d18421ecceba20ad8701358453b88e32414e6f6950b5a4bad54e65/kiwisolver-1.5.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4db576bb8c3ef9365f8b40fe0f671644de6736ae2c27a2c62d7d8a1b4329f099", size = 1296793, upload-time = "2026-03-09T13:13:56.287Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/10/f862f94b6389d8957448ec9df59450b81bec4abb318805375c401a1e6892/kiwisolver-1.5.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0b85aad90cea8ac6797a53b5d5f2e967334fa4d1149f031c4537569972596cb8", size = 1346041, upload-time = "2026-03-09T13:13:58.269Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/6a/f1650af35821eaf09de398ec0bc2aefc8f211f0cda50204c9f1673741ba9/kiwisolver-1.5.0-cp313-cp313-manylinux_2_39_riscv64.whl", hash = "sha256:d36ca54cb4c6c4686f7cbb7b817f66f5911c12ddb519450bbe86707155028f87", size = 987292, upload-time = "2026-03-09T13:13:59.871Z" },
+    { url = "https://files.pythonhosted.org/packages/de/19/d7fb82984b9238115fe629c915007be608ebd23dc8629703d917dbfaffd4/kiwisolver-1.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:38f4a703656f493b0ad185211ccfca7f0386120f022066b018eb5296d8613e23", size = 2227865, upload-time = "2026-03-09T13:14:01.401Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/b9/46b7f386589fd222dac9e9de9c956ce5bcefe2ee73b4e79891381dda8654/kiwisolver-1.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3ac2360e93cb41be81121755c6462cff3beaa9967188c866e5fce5cf13170859", size = 2324369, upload-time = "2026-03-09T13:14:02.972Z" },
+    { url = "https://files.pythonhosted.org/packages/92/8b/95e237cf3d9c642960153c769ddcbe278f182c8affb20cecc1cc983e7cc5/kiwisolver-1.5.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c95cab08d1965db3d84a121f1c7ce7479bdd4072c9b3dafd8fecce48a2e6b902", size = 1977989, upload-time = "2026-03-09T13:14:04.503Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/95/980c9df53501892784997820136c01f62bc1865e31b82b9560f980c0e649/kiwisolver-1.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:fc20894c3d21194d8041a28b65622d5b86db786da6e3cfe73f0c762951a61167", size = 2491645, upload-time = "2026-03-09T13:14:06.106Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/32/900647fd0840abebe1561792c6b31e6a7c0e278fc3973d30572a965ca14c/kiwisolver-1.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7a32f72973f0f950c1920475d5c5ea3d971b81b6f0ec53b8d0a956cc965f22e0", size = 2295237, upload-time = "2026-03-09T13:14:08.891Z" },
+    { url = "https://files.pythonhosted.org/packages/be/8a/be60e3bbcf513cc5a50f4a3e88e1dcecebb79c1ad607a7222877becaa101/kiwisolver-1.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:0bf3acf1419fa93064a4c2189ac0b58e3be7872bf6ee6177b0d4c63dc4cea276", size = 73573, upload-time = "2026-03-09T13:14:12.327Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/d2/64be2e429eb4fca7f7e1c52a91b12663aeaf25de3895e5cca0f47ef2a8d0/kiwisolver-1.5.0-cp313-cp313-win_arm64.whl", hash = "sha256:fa8eb9ecdb7efb0b226acec134e0d709e87a909fa4971a54c0c4f6e88635484c", size = 64998, upload-time = "2026-03-09T13:14:13.469Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/69/ce68dd0c85755ae2de490bf015b62f2cea5f6b14ff00a463f9d0774449ff/kiwisolver-1.5.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:db485b3847d182b908b483b2ed133c66d88d49cacf98fd278fadafe11b4478d1", size = 125700, upload-time = "2026-03-09T13:14:14.636Z" },
+    { url = "https://files.pythonhosted.org/packages/74/aa/937aac021cf9d4349990d47eb319309a51355ed1dbdc9c077cdc9224cb11/kiwisolver-1.5.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:be12f931839a3bdfe28b584db0e640a65a8bcbc24560ae3fdb025a449b3d754e", size = 67537, upload-time = "2026-03-09T13:14:15.808Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/20/3a87fbece2c40ad0f6f0aefa93542559159c5f99831d596050e8afae7a9f/kiwisolver-1.5.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:16b85d37c2cbb3253226d26e64663f755d88a03439a9c47df6246b35defbdfb7", size = 65514, upload-time = "2026-03-09T13:14:18.035Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/7f/f943879cda9007c45e1f7dba216d705c3a18d6b35830e488b6c6a4e7cdf0/kiwisolver-1.5.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4432b835675f0ea7414aab3d37d119f7226d24869b7a829caeab49ebda407b0c", size = 1584848, upload-time = "2026-03-09T13:14:19.745Z" },
+    { url = "https://files.pythonhosted.org/packages/37/f8/4d4f85cc1870c127c88d950913370dd76138482161cd07eabbc450deff01/kiwisolver-1.5.0-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b0feb50971481a2cc44d94e88bdb02cdd497618252ae226b8eb1201b957e368", size = 1391542, upload-time = "2026-03-09T13:14:21.54Z" },
+    { url = "https://files.pythonhosted.org/packages/04/0b/65dd2916c84d252b244bd405303220f729e7c17c9d7d33dca6feeff9ffc4/kiwisolver-1.5.0-cp313-cp313t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:56fa888f10d0f367155e76ce849fa1166fc9730d13bd2d65a2aa13b6f5424489", size = 1404447, upload-time = "2026-03-09T13:14:23.205Z" },
+    { url = "https://files.pythonhosted.org/packages/39/5c/2606a373247babce9b1d056c03a04b65f3cf5290a8eac5d7bdead0a17e21/kiwisolver-1.5.0-cp313-cp313t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:940dda65d5e764406b9fb92761cbf462e4e63f712ab60ed98f70552e496f3bf1", size = 1455918, upload-time = "2026-03-09T13:14:24.74Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/d1/c6078b5756670658e9192a2ef11e939c92918833d2745f85cd14a6004bdf/kiwisolver-1.5.0-cp313-cp313t-manylinux_2_39_riscv64.whl", hash = "sha256:89fc958c702ee9a745e4700378f5d23fddbc46ff89e8fdbf5395c24d5c1452a3", size = 1072856, upload-time = "2026-03-09T13:14:26.597Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/c8/7def6ddf16eb2b3741d8b172bdaa9af882b03c78e9b0772975408801fa63/kiwisolver-1.5.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9027d773c4ff81487181a925945743413f6069634d0b122d0b37684ccf4f1e18", size = 2333580, upload-time = "2026-03-09T13:14:28.237Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/87/2ac1fce0eb1e616fcd3c35caa23e665e9b1948bb984f4764790924594128/kiwisolver-1.5.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:5b233ea3e165e43e35dba1d2b8ecc21cf070b45b65ae17dd2747d2713d942021", size = 2423018, upload-time = "2026-03-09T13:14:30.018Z" },
+    { url = "https://files.pythonhosted.org/packages/67/13/c6700ccc6cc218716bfcda4935e4b2997039869b4ad8a94f364c5a3b8e63/kiwisolver-1.5.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ce9bf03dad3b46408c08649c6fbd6ca28a9fce0eb32fdfffa6775a13103b5310", size = 2062804, upload-time = "2026-03-09T13:14:32.888Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/bd/877056304626943ff0f1f44c08f584300c199b887cb3176cd7e34f1515f1/kiwisolver-1.5.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:fc4d3f1fb9ca0ae9f97b095963bc6326f1dbfd3779d6679a1e016b9baaa153d3", size = 2597482, upload-time = "2026-03-09T13:14:34.971Z" },
+    { url = "https://files.pythonhosted.org/packages/75/19/c60626c47bf0f8ac5dcf72c6c98e266d714f2fbbfd50cf6dab5ede3aaa50/kiwisolver-1.5.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f443b4825c50a51ee68585522ab4a1d1257fac65896f282b4c6763337ac9f5d2", size = 2394328, upload-time = "2026-03-09T13:14:36.816Z" },
+    { url = "https://files.pythonhosted.org/packages/47/84/6a6d5e5bb8273756c27b7d810d47f7ef2f1f9b9fd23c9ee9a3f8c75c9cef/kiwisolver-1.5.0-cp313-cp313t-win_arm64.whl", hash = "sha256:893ff3a711d1b515ba9da14ee090519bad4610ed1962fbe298a434e8c5f8db53", size = 68410, upload-time = "2026-03-09T13:14:38.695Z" },
 ]
 
 [[package]]
@@ -1169,8 +1080,8 @@ name = "latex2sympy2-extended"
 version = "1.11.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "antlr4-python3-runtime", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "sympy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "antlr4-python3-runtime", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "sympy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/30/75/456da2da05f6380ea96e6ea804ab2c03e41fc3ed80052307fe8efe6ea20e/latex2sympy2_extended-1.11.0.tar.gz", hash = "sha256:9695657c81b50abba2636638638618db59f4663ed2a4a12d62cef74a40e28fec", size = 207023, upload-time = "2026-01-10T01:43:21.319Z" }
 wheels = [
@@ -1179,47 +1090,61 @@ wheels = [
 
 [package.optional-dependencies]
 antlr4-11-0 = [
-    { name = "antlr4-python3-runtime", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "antlr4-python3-runtime", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+]
+
+[[package]]
+name = "llguidance"
+version = "1.7.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/74/2a/e889d6fdddda852171cf537486513d59fd8d9c38104323c1851a73675f1f/llguidance-1.7.5.tar.gz", hash = "sha256:afaa8f979708cd546c762f06a4fe4748e5ef7f06ed45875dabe7db8f07b73645", size = 1156674, upload-time = "2026-04-29T19:11:09.915Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d7/e7/5c019dcd5c0312bd7b2ddaa3563c630a87bc51bfa692aed60999d5ac2bc7/llguidance-1.7.5-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:dd805b8b0302edfa18c9b2b4b9ecf7f7b23f5bea42a44a91e7706238ffd21cef", size = 3225139, upload-time = "2026-04-29T19:10:56.95Z" },
+    { url = "https://files.pythonhosted.org/packages/32/93/ecbe86d090afe4de7ab74ddc93b03a6cef8b01c62e06fa87e462e2dc4ffc/llguidance-1.7.5-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:421ff50f59fbe21bc3cba509e02366312a0de050088d2754711d1f1edb5dfe2b", size = 3136321, upload-time = "2026-04-29T19:10:58.49Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/dc/97cff2071bd9f0659db30655cfeb10bceaed91f7dee3ecbe2c813bd43642/llguidance-1.7.5-cp39-abi3-manylinux_2_31_aarch64.whl", hash = "sha256:c1dfda8d8c47da5be5e47b30084eadb2ef331ab08dc6e3a114429511ab13ae05", size = 2901942, upload-time = "2026-04-29T19:10:59.958Z" },
+    { url = "https://files.pythonhosted.org/packages/27/c4/2b9b9d0de824a71627373b0ccdbcf61bd56133b52c3f5b988a803f55d2c0/llguidance-1.7.5-cp39-abi3-manylinux_2_31_x86_64.whl", hash = "sha256:1d02dbc64dc1afc2d2cb7e5e868886527f8c6f088062e87d81bbad6212e22500", size = 3073011, upload-time = "2026-04-29T19:11:01.949Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/c6/7cc11c2e68245cbabaf1a69a9e52a55f1216beebaeee5a8455b1d85d6d84/llguidance-1.7.5-cp39-abi3-manylinux_2_34_i686.whl", hash = "sha256:3e243bc1acf47d5200e78a082a61f4866a2a3faf59b1b2ed5748e42ecaf32397", size = 3317403, upload-time = "2026-04-29T19:11:03.607Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/78/9130ce2d49e33637de372c22620b00ae6b816c4636a3a0dee0d2390a649d/llguidance-1.7.5-cp39-abi3-manylinux_2_39_riscv64.whl", hash = "sha256:f1f9fb791a8def3de4feec9c40b5d4bd63b9f06e5315586a209d567467443293", size = 3604816, upload-time = "2026-04-29T19:11:05.244Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/98/067b0cac8143f00e72dfe81d463059fe4e3615182121df3be977443ea744/llguidance-1.7.5-cp39-abi3-win32.whl", hash = "sha256:01da2d40298c3487c188a0b3c5fba982afcccf8ed0ec523b05b6210ffa745036", size = 2612422, upload-time = "2026-04-29T19:11:06.932Z" },
+    { url = "https://files.pythonhosted.org/packages/96/10/81469d27185bec13720ff1eff4b07c3d157d5633d0b2522ffb11ae09e81b/llguidance-1.7.5-cp39-abi3-win_amd64.whl", hash = "sha256:502e55c4521dde4be352b34e508dc665ddea3ae1f73c55c869f2d8b63475e4e5", size = 2883265, upload-time = "2026-04-29T19:11:08.414Z" },
 ]
 
 [[package]]
 name = "lm-eval"
-version = "0.4.11"
+version = "0.5.0.dev1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "datasets", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "evaluate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "jsonlines", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "more-itertools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pytablewriter", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "rouge-score", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "sacrebleu", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "scikit-learn", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "sqlitedict", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "word2number", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "zstandard", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "datasets", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "more-itertools", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pytablewriter", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "rouge-score", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "sacrebleu", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "scikit-learn", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "sqlitedict", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "word2number", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/18/40/ad60ae17f97902ba4ee67914a94c159552f8643fb4598d1d2430a571f0a2/lm_eval-0.4.11.tar.gz", hash = "sha256:a3891d6d0b4ad17892f2ca1046094554ce3ab6cb522759e4a453858e45649916", size = 3246509, upload-time = "2026-02-13T20:22:59.494Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/41/26141271560b0a41e2279fe2a2f29bf75e26cfb93849e3d5ad68c28ba66b/lm_eval-0.5.0.dev1.tar.gz", hash = "sha256:5b71c723db0377a3f7c1f932e94430f9d3508bc150eeffdfd236afc60b42c35c", size = 3367200, upload-time = "2026-05-11T14:27:27.223Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/df/b3/8c2923ad4c4d911307e20ab9c7d7869d3811b5a76fe6dbe53678aafbeb04/lm_eval-0.4.11-py3-none-any.whl", hash = "sha256:9c9945475a715558649a38ffcb90f46e7bd23a849524a5e838f249161b030517", size = 8744826, upload-time = "2026-02-13T20:22:56.317Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/dc/8dc6e399a1e97948064a571639ad5b3f2b7cae84f930dae081e49bcafd47/lm_eval-0.5.0.dev1-py3-none-any.whl", hash = "sha256:f855cd83238747d51abd8c9dcc5feb0496e91f627c40cfac93e21304c9c8f9d6", size = 8824039, upload-time = "2026-05-11T14:27:23.594Z" },
 ]
 
 [package.optional-dependencies]
 api = [
-    { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tenacity", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tiktoken", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tenacity", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tiktoken", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 math = [
-    { name = "antlr4-python3-runtime", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "math-verify", extra = ["antlr4-11-0"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "sympy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "antlr4-python3-runtime", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "math-verify", extra = ["antlr4-11-0"], marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "sympy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 
 [[package]]
@@ -1233,55 +1158,27 @@ wheels = [
 
 [[package]]
 name = "lxml"
-version = "6.0.2"
+version = "6.1.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/aa/88/262177de60548e5a2bfc46ad28232c9e9cbde697bd94132aeb80364675cb/lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62", size = 4073426, upload-time = "2025-09-22T04:04:59.287Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/3b/aab6728cae887456f409b4d75e8a01856e4f04bd510de38052a47768b680/lxml-6.1.1.tar.gz", hash = "sha256:ba96ae44888e0185281e937633a743ea90d5a196c6000f82565ebb0580012d40", size = 4197430, upload-time = "2026-05-18T19:19:06.424Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/53/fd/4e8f0540608977aea078bf6d79f128e0e2c2bba8af1acf775c30baa70460/lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77", size = 8648494, upload-time = "2025-09-22T04:01:54.242Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/f4/2a94a3d3dfd6c6b433501b8d470a1960a20ecce93245cf2db1706adf6c19/lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f", size = 4661146, upload-time = "2025-09-22T04:01:56.282Z" },
-    { url = "https://files.pythonhosted.org/packages/25/2e/4efa677fa6b322013035d38016f6ae859d06cac67437ca7dc708a6af7028/lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452", size = 4946932, upload-time = "2025-09-22T04:01:58.989Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/0f/526e78a6d38d109fdbaa5049c62e1d32fdd70c75fb61c4eadf3045d3d124/lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048", size = 5100060, upload-time = "2025-09-22T04:02:00.812Z" },
-    { url = "https://files.pythonhosted.org/packages/81/76/99de58d81fa702cc0ea7edae4f4640416c2062813a00ff24bd70ac1d9c9b/lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df", size = 5019000, upload-time = "2025-09-22T04:02:02.671Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/35/9e57d25482bc9a9882cb0037fdb9cc18f4b79d85df94fa9d2a89562f1d25/lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1", size = 5348496, upload-time = "2025-09-22T04:02:04.904Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/8e/cb99bd0b83ccc3e8f0f528e9aa1f7a9965dfec08c617070c5db8d63a87ce/lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916", size = 5643779, upload-time = "2025-09-22T04:02:06.689Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/34/9e591954939276bb679b73773836c6684c22e56d05980e31d52a9a8deb18/lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd", size = 5244072, upload-time = "2025-09-22T04:02:08.587Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/27/b29ff065f9aaca443ee377aff699714fcbffb371b4fce5ac4ca759e436d5/lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6", size = 4718675, upload-time = "2025-09-22T04:02:10.783Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/9f/f756f9c2cd27caa1a6ef8c32ae47aadea697f5c2c6d07b0dae133c244fbe/lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a", size = 5255171, upload-time = "2025-09-22T04:02:12.631Z" },
-    { url = "https://files.pythonhosted.org/packages/61/46/bb85ea42d2cb1bd8395484fd72f38e3389611aa496ac7772da9205bbda0e/lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679", size = 5057175, upload-time = "2025-09-22T04:02:14.718Z" },
-    { url = "https://files.pythonhosted.org/packages/95/0c/443fc476dcc8e41577f0af70458c50fe299a97bb6b7505bb1ae09aa7f9ac/lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659", size = 4785688, upload-time = "2025-09-22T04:02:16.957Z" },
-    { url = "https://files.pythonhosted.org/packages/48/78/6ef0b359d45bb9697bc5a626e1992fa5d27aa3f8004b137b2314793b50a0/lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484", size = 5660655, upload-time = "2025-09-22T04:02:18.815Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/ea/e1d33808f386bc1339d08c0dcada6e4712d4ed8e93fcad5f057070b7988a/lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2", size = 5247695, upload-time = "2025-09-22T04:02:20.593Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/47/eba75dfd8183673725255247a603b4ad606f4ae657b60c6c145b381697da/lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314", size = 5269841, upload-time = "2025-09-22T04:02:22.489Z" },
-    { url = "https://files.pythonhosted.org/packages/03/15/d4a377b385ab693ce97b472fe0c77c2b16ec79590e688b3ccc71fba19884/lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe", size = 8659801, upload-time = "2025-09-22T04:02:30.113Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/e8/c128e37589463668794d503afaeb003987373c5f94d667124ffd8078bbd9/lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d", size = 4659403, upload-time = "2025-09-22T04:02:32.119Z" },
-    { url = "https://files.pythonhosted.org/packages/00/ce/74903904339decdf7da7847bb5741fc98a5451b42fc419a86c0c13d26fe2/lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d", size = 4966974, upload-time = "2025-09-22T04:02:34.155Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/d3/131dec79ce61c5567fecf82515bd9bc36395df42501b50f7f7f3bd065df0/lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5", size = 5102953, upload-time = "2025-09-22T04:02:36.054Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/ea/a43ba9bb750d4ffdd885f2cd333572f5bb900cd2408b67fdda07e85978a0/lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0", size = 5055054, upload-time = "2025-09-22T04:02:38.154Z" },
-    { url = "https://files.pythonhosted.org/packages/60/23/6885b451636ae286c34628f70a7ed1fcc759f8d9ad382d132e1c8d3d9bfd/lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba", size = 5352421, upload-time = "2025-09-22T04:02:40.413Z" },
-    { url = "https://files.pythonhosted.org/packages/48/5b/fc2ddfc94ddbe3eebb8e9af6e3fd65e2feba4967f6a4e9683875c394c2d8/lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0", size = 5673684, upload-time = "2025-09-22T04:02:42.288Z" },
-    { url = "https://files.pythonhosted.org/packages/29/9c/47293c58cc91769130fbf85531280e8cc7868f7fbb6d92f4670071b9cb3e/lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d", size = 5252463, upload-time = "2025-09-22T04:02:44.165Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/da/ba6eceb830c762b48e711ded880d7e3e89fc6c7323e587c36540b6b23c6b/lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37", size = 4698437, upload-time = "2025-09-22T04:02:46.524Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/24/7be3f82cb7990b89118d944b619e53c656c97dc89c28cfb143fdb7cd6f4d/lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9", size = 5269890, upload-time = "2025-09-22T04:02:48.812Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/bd/dcfb9ea1e16c665efd7538fc5d5c34071276ce9220e234217682e7d2c4a5/lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917", size = 5097185, upload-time = "2025-09-22T04:02:50.746Z" },
-    { url = "https://files.pythonhosted.org/packages/21/04/a60b0ff9314736316f28316b694bccbbabe100f8483ad83852d77fc7468e/lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f", size = 4745895, upload-time = "2025-09-22T04:02:52.968Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/bd/7d54bd1846e5a310d9c715921c5faa71cf5c0853372adf78aee70c8d7aa2/lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8", size = 5695246, upload-time = "2025-09-22T04:02:54.798Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/32/5643d6ab947bc371da21323acb2a6e603cedbe71cb4c99c8254289ab6f4e/lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a", size = 5260797, upload-time = "2025-09-22T04:02:57.058Z" },
-    { url = "https://files.pythonhosted.org/packages/33/da/34c1ec4cff1eea7d0b4cd44af8411806ed943141804ac9c5d565302afb78/lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c", size = 5277404, upload-time = "2025-09-22T04:02:58.966Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/5c/42c2c4c03554580708fc738d13414801f340c04c3eff90d8d2d227145275/lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d", size = 8910380, upload-time = "2025-09-22T04:03:01.645Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/4f/12df843e3e10d18d468a7557058f8d3733e8b6e12401f30b1ef29360740f/lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba", size = 4775632, upload-time = "2025-09-22T04:03:03.814Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/0c/9dc31e6c2d0d418483cbcb469d1f5a582a1cd00a1f4081953d44051f3c50/lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601", size = 4975171, upload-time = "2025-09-22T04:03:05.651Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/2b/9b870c6ca24c841bdd887504808f0417aa9d8d564114689266f19ddf29c8/lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed", size = 5110109, upload-time = "2025-09-22T04:03:07.452Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/0c/4f5f2a4dd319a178912751564471355d9019e220c20d7db3fb8307ed8582/lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37", size = 5041061, upload-time = "2025-09-22T04:03:09.297Z" },
-    { url = "https://files.pythonhosted.org/packages/12/64/554eed290365267671fe001a20d72d14f468ae4e6acef1e179b039436967/lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338", size = 5306233, upload-time = "2025-09-22T04:03:11.651Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/31/1d748aa275e71802ad9722df32a7a35034246b42c0ecdd8235412c3396ef/lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9", size = 5604739, upload-time = "2025-09-22T04:03:13.592Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/41/2c11916bcac09ed561adccacceaedd2bf0e0b25b297ea92aab99fd03d0fa/lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd", size = 5225119, upload-time = "2025-09-22T04:03:15.408Z" },
-    { url = "https://files.pythonhosted.org/packages/99/05/4e5c2873d8f17aa018e6afde417c80cc5d0c33be4854cce3ef5670c49367/lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d", size = 4633665, upload-time = "2025-09-22T04:03:17.262Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/c9/dcc2da1bebd6275cdc723b515f93edf548b82f36a5458cca3578bc899332/lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9", size = 5234997, upload-time = "2025-09-22T04:03:19.14Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/e2/5172e4e7468afca64a37b81dba152fc5d90e30f9c83c7c3213d6a02a5ce4/lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e", size = 5090957, upload-time = "2025-09-22T04:03:21.436Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/b3/15461fd3e5cd4ddcb7938b87fc20b14ab113b92312fc97afe65cd7c85de1/lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d", size = 4764372, upload-time = "2025-09-22T04:03:23.27Z" },
-    { url = "https://files.pythonhosted.org/packages/05/33/f310b987c8bf9e61c4dd8e8035c416bd3230098f5e3cfa69fc4232de7059/lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec", size = 5634653, upload-time = "2025-09-22T04:03:25.767Z" },
-    { url = "https://files.pythonhosted.org/packages/70/ff/51c80e75e0bc9382158133bdcf4e339b5886c6ee2418b5199b3f1a61ed6d/lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272", size = 5233795, upload-time = "2025-09-22T04:03:27.62Z" },
-    { url = "https://files.pythonhosted.org/packages/56/4d/4856e897df0d588789dd844dbed9d91782c4ef0b327f96ce53c807e13128/lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f", size = 5257023, upload-time = "2025-09-22T04:03:30.056Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/eb/7e6f37c5584ccbb2ff267f56fd0339016938c1c8684cfefab9b33ffc2f36/lxml-6.1.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:68a9198d0fc122d14bb76837de9aa80cf84caed990b5b237f532ed87d3706736", size = 8559780, upload-time = "2026-05-18T19:17:57.661Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/36/587c2521cf23a2cd6c9c22108aa7528f683a1f195ed7ccd23a4b1786ad36/lxml-6.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7d47866cb32fb503450b6edc9df355d10dc49836af2e89901bd6ac6b0896d9d9", size = 4618006, upload-time = "2026-05-18T19:18:04.452Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/ca/ab7bfe2bf4c972af5e7878262845ead3a24a929a9b04bc11c7c1ece6c82a/lxml-6.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb7c9811bfaa8b1ed5ed319f5d370dfbcaa59d52ea64be2a5a85e18195930354", size = 4924139, upload-time = "2026-05-18T19:19:04.873Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/55/a0c72851dfee5ecc689f949723a73dea457758912542cb955b108eaf0d8f/lxml-6.1.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:762ff394d5bd56da0cf034a23dcce4e13923f15321a2adfa2ac00201dc6d3fca", size = 5082329, upload-time = "2026-05-18T19:19:09.728Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/b6/0608f7d61a3b96cc67e5648a3d906e31a5082093e10e7be65b3886289938/lxml-6.1.1-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a088f287f7d8275a33c07f2cac6c50b9319309a0200a39e7e75d80c707723099", size = 4993564, upload-time = "2026-05-18T19:19:13.608Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/66/ae227524b066d29d55bf0b453d93d2d793c40218657d643dcbbca13b8faf/lxml-6.1.1-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e902da4b04e6b52e5893900d4b8ab46068f75f3561f01bf1080957f9fd932ed6", size = 5613467, upload-time = "2026-05-18T19:19:16.228Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/76/dbe4a00b50385e40194231dcfe5a12c059de7cf90e89c83407d2b085b719/lxml-6.1.1-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1d4962d4c66bf830a7e59ed6cfc17d148149898a3aefa8ec6e59763e6e3ed085", size = 5228304, upload-time = "2026-05-18T19:19:19.354Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/01/00b1b8442ed2041793336868ba0b9ea4b13d7da7c085c6404c207a63bf79/lxml-6.1.1-cp313-cp313-manylinux_2_28_i686.whl", hash = "sha256:581d4c8ae690a6609e64862dd6b7c2489635c2d13907fc2b20f2bc200ff1d21e", size = 5341607, upload-time = "2026-05-18T19:19:22.297Z" },
+    { url = "https://files.pythonhosted.org/packages/63/36/1ad29931e9a4638bb707869f01d423a6c815f82152138d1a40dfcfde2b95/lxml-6.1.1-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:876e1ff5930ed8bf295ec5ef9a8155e9b6b1876bbf1deed8b3a8069311875a8f", size = 4700168, upload-time = "2026-05-18T19:19:25.133Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/d1/a9536cecf9be18a0dc72d32bead283a2332d1ffebd2dd3ac70ce444686e5/lxml-6.1.1-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9eb9b5a968f6e0f6d640092a567e14529ff8cea2e29d00da6f78a79fa49f013c", size = 5232487, upload-time = "2026-05-18T19:19:28.603Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/77/b4fb1e03bf5d130e879214d3100092e386418807fb74dd0adc4b0a48f351/lxml-6.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:aa49e06d94aba782c6a02eecb7e507969e7e7a41b267f1b359bb35585f295d5b", size = 5044231, upload-time = "2026-05-18T19:18:42.246Z" },
+    { url = "https://files.pythonhosted.org/packages/26/4c/d00daeeb0a5530c4028a9232aa1b93db3ef4ed2158c116ea73c79a9765b3/lxml-6.1.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:70cdfd80589d59e43e18005dd7244e8895e93db8ab6a620b7e23df5445a4e3d2", size = 4769450, upload-time = "2026-05-18T19:18:48.013Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/6a/715a3a8d156ce42f29cf014706f5410c2ff3b02267774110fc23266409fe/lxml-6.1.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:aad9aa39483ed8ec44d6d2e59e5b98a0d80676ef0d92f44bfc374836111f62f5", size = 5635874, upload-time = "2026-05-18T19:18:51.914Z" },
+    { url = "https://files.pythonhosted.org/packages/45/37/0544bc21dde2a88f3a17b504e6fc79c0e01d25a33c2f6079724e9e72b9c7/lxml-6.1.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:d49514be2f28d895c38cf9d2b72d7b9a07d00314519f456c0b50b53cfcf4c785", size = 5223987, upload-time = "2026-05-18T19:18:59.715Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/f8/f6a5e8185bcb28c2befae3d31f8e3df3b811cb0f47746517a81279fcafe1/lxml-6.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:47402e62c52ff5988c1e8c6c63177f5708bccf48e366dea4e3dcf1e645e04947", size = 5250276, upload-time = "2026-05-18T19:19:03.834Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/f2/1a2b9f1b7a49d45495369be7ef9ad05b262930f2eab3e3145706fca8083f/lxml-6.1.1-cp313-cp313-win32.whl", hash = "sha256:3483644525531e1d5762b0c44a8e18b6efba321b6dcf8a8952de10b037618bca", size = 3596903, upload-time = "2026-05-18T19:17:29.863Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/99/f4ffb024f238eec2131aaa09f3278fb6129cf892741bf68e1fc1afb8c100/lxml-6.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:a10bd2fd62e8ce916ececb342f348f190724a098c1faa056fdfb2a22ad5e8660", size = 3995869, upload-time = "2026-05-18T19:18:02.596Z" },
 ]
 
 [[package]]
@@ -1289,7 +1186,7 @@ name = "macholib"
 version = "1.16.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "altgraph", marker = "sys_platform == 'darwin'" },
+    { name = "altgraph", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/10/2f/97589876ea967487978071c9042518d28b958d87b17dceb7cdc1d881f963/macholib-1.16.4.tar.gz", hash = "sha256:f408c93ab2e995cd2c46e34fe328b130404be143469e41bc366c807448979362", size = 59427, upload-time = "2025-11-22T08:28:38.373Z" }
 wheels = [
@@ -1298,14 +1195,14 @@ wheels = [
 
 [[package]]
 name = "markdown-it-py"
-version = "4.0.0"
+version = "4.2.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "mdurl", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mdurl", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/06/ff/7841249c247aa650a76b9ee4bbaeae59370dc8bfd2f6c01f3630c35eb134/markdown_it_py-4.2.0.tar.gz", hash = "sha256:04a21681d6fbb623de53f6f364d352309d4094dd4194040a10fd51833e418d49", size = 82454, upload-time = "2026-05-07T12:08:28.36Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/81/4da04ced5a082363ecfa159c010d200ecbd959ae410c10c0264a38cac0f5/markdown_it_py-4.2.0-py3-none-any.whl", hash = "sha256:9f7ebbcd14fe59494226453aed97c1070d83f8d24b6fc3a3bcf9a38092641c4a", size = 91687, upload-time = "2026-05-07T12:08:27.182Z" },
 ]
 
 [[package]]
@@ -1322,6 +1219,9 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" },
     { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" },
     { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" },
+    { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" },
+    { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" },
     { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" },
     { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" },
     { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" },
@@ -1330,22 +1230,9 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" },
     { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" },
     { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" },
-    { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" },
-    { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" },
-    { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" },
-    { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" },
-    { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" },
-    { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" },
-    { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" },
-    { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" },
+    { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" },
 ]
 
 [[package]]
@@ -1353,7 +1240,7 @@ name = "math-verify"
 version = "0.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "latex2sympy2-extended", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "latex2sympy2-extended", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/4f/12/b8d13b581e110ac2f724a2351a8361a70fa36d057eb945d6379e8747c256/math_verify-0.9.0.tar.gz", hash = "sha256:45ac6c61344ba056b9e99a660a4bc8d044ed408f730aed68c60435aa5eec4645", size = 60329, upload-time = "2026-01-10T01:48:33.056Z" }
 wheels = [
@@ -1362,58 +1249,52 @@ wheels = [
 
 [package.optional-dependencies]
 antlr4-11-0 = [
-    { name = "latex2sympy2-extended", extra = ["antlr4-11-0"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "latex2sympy2-extended", extra = ["antlr4-11-0"], marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 
 [[package]]
 name = "matplotlib"
-version = "3.10.8"
+version = "3.11.0rc2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "contourpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "cycler", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "fonttools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "kiwisolver", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pillow", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pyparsing", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "contourpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "cycler", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "fonttools", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "kiwisolver", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pillow", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pyparsing", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8a/76/d3c6e3a13fe484ebe7718d14e269c9569c4eb0020a968a327acb3b9a8fe6/matplotlib-3.10.8.tar.gz", hash = "sha256:2299372c19d56bcd35cf05a2738308758d32b9eaed2371898d8f5bd33f084aa3", size = 34806269, upload-time = "2025-12-10T22:56:51.155Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/42/b4/41b4c812df4c89230465b71cc86217923f904349d803abf67119a471e0ad/matplotlib-3.11.0rc2.tar.gz", hash = "sha256:cba0e90ae7bade3cec236c1082ef1c622ddb46f0efb060149bc2f25566ce6e5d", size = 33206182, upload-time = "2026-05-13T00:32:15.03Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3d/b9/15fd5541ef4f5b9a17eefd379356cf12175fe577424e7b1d80676516031a/matplotlib-3.10.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3f2e409836d7f5ac2f1c013110a4d50b9f7edc26328c108915f9075d7d7a91b6", size = 8261076, upload-time = "2025-12-10T22:55:44.648Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/a0/2ba3473c1b66b9c74dc7107c67e9008cb1782edbe896d4c899d39ae9cf78/matplotlib-3.10.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:56271f3dac49a88d7fca5060f004d9d22b865f743a12a23b1e937a0be4818ee1", size = 8148794, upload-time = "2025-12-10T22:55:46.252Z" },
-    { url = "https://files.pythonhosted.org/packages/75/97/a471f1c3eb1fd6f6c24a31a5858f443891d5127e63a7788678d14e249aea/matplotlib-3.10.8-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a0a7f52498f72f13d4a25ea70f35f4cb60642b466cbb0a9be951b5bc3f45a486", size = 8718474, upload-time = "2025-12-10T22:55:47.864Z" },
-    { url = "https://files.pythonhosted.org/packages/01/be/cd478f4b66f48256f42927d0acbcd63a26a893136456cd079c0cc24fbabf/matplotlib-3.10.8-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:646d95230efb9ca614a7a594d4fcacde0ac61d25e37dd51710b36477594963ce", size = 9549637, upload-time = "2025-12-10T22:55:50.048Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/7c/8dc289776eae5109e268c4fb92baf870678dc048a25d4ac903683b86d5bf/matplotlib-3.10.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f89c151aab2e2e23cb3fe0acad1e8b82841fd265379c4cecd0f3fcb34c15e0f6", size = 9613678, upload-time = "2025-12-10T22:55:52.21Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/27/51fe26e1062f298af5ef66343d8ef460e090a27fea73036c76c35821df04/matplotlib-3.10.8-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:ad3d9833a64cf48cc4300f2b406c3d0f4f4724a91c0bd5640678a6ba7c102077", size = 8305679, upload-time = "2025-12-10T22:55:57.856Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/1e/4de865bc591ac8e3062e835f42dd7fe7a93168d519557837f0e37513f629/matplotlib-3.10.8-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:eb3823f11823deade26ce3b9f40dcb4a213da7a670013929f31d5f5ed1055b22", size = 8198336, upload-time = "2025-12-10T22:55:59.371Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/cb/2f7b6e75fb4dce87ef91f60cac4f6e34f4c145ab036a22318ec837971300/matplotlib-3.10.8-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d9050fee89a89ed57b4fb2c1bfac9a3d0c57a0d55aed95949eedbc42070fea39", size = 8731653, upload-time = "2025-12-10T22:56:01.032Z" },
-    { url = "https://files.pythonhosted.org/packages/46/b3/bd9c57d6ba670a37ab31fb87ec3e8691b947134b201f881665b28cc039ff/matplotlib-3.10.8-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b44d07310e404ba95f8c25aa5536f154c0a8ec473303535949e52eb71d0a1565", size = 9561356, upload-time = "2025-12-10T22:56:02.95Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/3d/8b94a481456dfc9dfe6e39e93b5ab376e50998cddfd23f4ae3b431708f16/matplotlib-3.10.8-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0a33deb84c15ede243aead39f77e990469fff93ad1521163305095b77b72ce4a", size = 9614000, upload-time = "2025-12-10T22:56:05.411Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/43/9c0ff7a2f11615e516c3b058e1e6e8f9614ddeca53faca06da267c48345d/matplotlib-3.10.8-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b53285e65d4fa4c86399979e956235deb900be5baa7fc1218ea67fbfaeaadd6f", size = 8262481, upload-time = "2025-12-10T22:56:10.885Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/ca/e8ae28649fcdf039fda5ef554b40a95f50592a3c47e6f7270c9561c12b07/matplotlib-3.10.8-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:32f8dce744be5569bebe789e46727946041199030db8aeb2954d26013a0eb26b", size = 8151473, upload-time = "2025-12-10T22:56:12.377Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/6f/009d129ae70b75e88cbe7e503a12a4c0670e08ed748a902c2568909e9eb5/matplotlib-3.10.8-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4cf267add95b1c88300d96ca837833d4112756045364f5c734a2276038dae27d", size = 9553896, upload-time = "2025-12-10T22:56:14.432Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/26/4221a741eb97967bc1fd5e4c52b9aa5a91b2f4ec05b59f6def4d820f9df9/matplotlib-3.10.8-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2cf5bd12cecf46908f286d7838b2abc6c91cda506c0445b8223a7c19a00df008", size = 9824193, upload-time = "2025-12-10T22:56:16.29Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/f3/3abf75f38605772cf48a9daf5821cd4f563472f38b4b828c6fba6fa6d06e/matplotlib-3.10.8-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:41703cc95688f2516b480f7f339d8851a6035f18e100ee6a32bc0b8536a12a9c", size = 9615444, upload-time = "2025-12-10T22:56:18.155Z" },
-    { url = "https://files.pythonhosted.org/packages/68/d9/b31116a3a855bd313c6fcdb7226926d59b041f26061c6c5b1be66a08c826/matplotlib-3.10.8-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:b5a2b97dbdc7d4f353ebf343744f1d1f1cca8aa8bfddb4262fcf4306c3761d50", size = 8305785, upload-time = "2025-12-10T22:56:24.218Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/90/6effe8103f0272685767ba5f094f453784057072f49b393e3ea178fe70a5/matplotlib-3.10.8-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3f5c3e4da343bba819f0234186b9004faba952cc420fbc522dc4e103c1985908", size = 8198361, upload-time = "2025-12-10T22:56:26.787Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/65/a73188711bea603615fc0baecca1061429ac16940e2385433cc778a9d8e7/matplotlib-3.10.8-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f62550b9a30afde8c1c3ae450e5eb547d579dd69b25c2fc7a1c67f934c1717a", size = 9561357, upload-time = "2025-12-10T22:56:28.953Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/3d/b5c5d5d5be8ce63292567f0e2c43dde9953d3ed86ac2de0a72e93c8f07a1/matplotlib-3.10.8-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:495672de149445ec1b772ff2c9ede9b769e3cb4f0d0aa7fa730d7f59e2d4e1c1", size = 9823610, upload-time = "2025-12-10T22:56:31.455Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/4b/e7beb6bbd49f6bae727a12b270a2654d13c397576d25bd6786e47033300f/matplotlib-3.10.8-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:595ba4d8fe983b88f0eec8c26a241e16d6376fe1979086232f481f8f3f67494c", size = 9614011, upload-time = "2025-12-10T22:56:33.85Z" },
+    { url = "https://files.pythonhosted.org/packages/93/4a/33e02cfbd085eb673fcd2a933dc1daeb6041812987eb9cfc0cfac0d88917/matplotlib-3.11.0rc2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:147ae4ee5580956b730f84d094ae39fa90bbbee5c1eeab7c01be1ea635d50896", size = 9443098, upload-time = "2026-05-13T00:30:48.742Z" },
+    { url = "https://files.pythonhosted.org/packages/26/2e/85195f18ed4bec9a5e488634fef3745a88511ee65920f4ecec44d4a10d76/matplotlib-3.11.0rc2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e0e48996a7d20816d8e82ccaa75a2b6adc27e29ee2472ff84716f032f947a97b", size = 9272592, upload-time = "2026-05-13T00:30:51.435Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/b6/c07e0336ad102216aeb67e22dd9d58250538cc224982cddc387c3c8b6887/matplotlib-3.11.0rc2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:62a673fc72594e056130b86cdff97f25dd9305253d75a6ad51c040452c029c94", size = 10028158, upload-time = "2026-05-13T00:30:54.033Z" },
+    { url = "https://files.pythonhosted.org/packages/09/7b/7e94cba929d53c09a902383193af88718221067e4abec317d32eb2c27b5c/matplotlib-3.11.0rc2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a0b00f8de6fce1a306c309821eaace05754f164025073405743cfb884158757f", size = 10836402, upload-time = "2026-05-13T00:30:56.743Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/67/bb690a7c6385b0cb8da8eb63d3aae5eb11f67c7ceb0c268c5478f230c095/matplotlib-3.11.0rc2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e91fbf11172ef6dcc0bea17e77477e04335052aca4ca8811a5efa29948eb510b", size = 10919536, upload-time = "2026-05-13T00:30:59.629Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/eb/89cc8360b798d7281d9e95c1df1ec8b85c3466f6cc56df5d9d0bfc189175/matplotlib-3.11.0rc2-cp313-cp313-win_amd64.whl", hash = "sha256:27a3f814c438c68e5b2f85c2a4cc36fe02b71580c53c3bf1ae57f5251816880a", size = 9202637, upload-time = "2026-05-13T00:31:02.63Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/7f/3f57f2d8505b2989b3a639980bd083225a05ccc933db7e495512755b3a45/matplotlib-3.11.0rc2-cp313-cp313-win_arm64.whl", hash = "sha256:f160621b5b8c3314e16859a479ef4fbf15337ce6fb6b1da755c46dc59cf9b116", size = 9000020, upload-time = "2026-05-13T00:31:05.26Z" },
+    { url = "https://files.pythonhosted.org/packages/60/38/8ee725bc97a91f7e66bebbb5648b9671c71d805b52af8cfead227d904efb/matplotlib-3.11.0rc2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:16e119206b1a57dca2b65f559d9749bcc94b9eaeff0342c37913ce83e9a3c3a1", size = 9492237, upload-time = "2026-05-13T00:31:08.147Z" },
+    { url = "https://files.pythonhosted.org/packages/52/90/ece5d6bfc2ed8d063a4e841a5769824a7f6d47a08356bbfaa12cccc24fdb/matplotlib-3.11.0rc2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9eda954c59cfa053877b005197d52a00a37e865234f61bf22a42775da682804e", size = 9325161, upload-time = "2026-05-13T00:31:10.699Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/1e/9cd19bbe70b8ec33ac4739f26a11335e2cac2cc853f9dcd6bd144f476b84/matplotlib-3.11.0rc2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d1ac284470c6b5e13bb4666b51386a25f7f6d88622096fbdb9812b2d73a62464", size = 10041518, upload-time = "2026-05-13T00:31:13.252Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/a1/d2d177bfab51fc4f0fee65ea2fae022a3cad2772365b578e9013909c7730/matplotlib-3.11.0rc2-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:14e826c06f3379f5ca3bb376363e5f66d404ba7e51639ca7ac0314d61cdc96bb", size = 10853839, upload-time = "2026-05-13T00:31:15.789Z" },
+    { url = "https://files.pythonhosted.org/packages/26/aa/ce91ee604e51c1ed1a262697a345b251d9a25c887b94c398a11962f06e92/matplotlib-3.11.0rc2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:883e3a0b4f280f42fbd9c9f5546a846878bad4aca25b61713c426bec909070d3", size = 10935026, upload-time = "2026-05-13T00:31:18.56Z" },
+    { url = "https://files.pythonhosted.org/packages/60/8a/2fb9c97ef5abf3ef8c2a1086ecb58e349db2a33867fe8a445fc301ac530e/matplotlib-3.11.0rc2-cp313-cp313t-win_amd64.whl", hash = "sha256:76280075b52cdd29adcb2736cc7d9097bb85334366419a54ed1303ca4d6fe457", size = 9271762, upload-time = "2026-05-13T00:31:21.735Z" },
+    { url = "https://files.pythonhosted.org/packages/30/93/e6d4a37828a7d33a2f6774967af5f851b2f00ae27ee88bd870d8e71e35dc/matplotlib-3.11.0rc2-cp313-cp313t-win_arm64.whl", hash = "sha256:f383cac8316cad8e62f87272d650da6a0908c4e4f0b1846410efd7d67a152d0e", size = 9051893, upload-time = "2026-05-13T00:31:24.522Z" },
 ]
 
 [[package]]
 name = "mbstrdecoder"
-version = "1.1.4"
+version = "1.1.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "chardet", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "chardet", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/31/ab/05ae008357c8bdb6245ebf8a101d99f26c096e0ea20800b318153da23796/mbstrdecoder-1.1.4.tar.gz", hash = "sha256:8105ef9cf6b7d7d69fe7fd6b68a2d8f281ca9b365d7a9b670be376b2e6c81b21", size = 14527, upload-time = "2025-01-18T10:07:31.089Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4b/9c/dd6e38d747a62ead27f9abef32f4ca4311d4e40ac28e76bcc9ffb5dd0329/mbstrdecoder-1.1.5.tar.gz", hash = "sha256:8cbfba26938befd8a35e3cc06ca0632f61320b7b2be7df32550b895e1725b1ce", size = 14529, upload-time = "2026-05-05T04:17:58.23Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/30/ac/5ce64a1d4cce00390beab88622a290420401f1cabf05caf2fc0995157c21/mbstrdecoder-1.1.4-py3-none-any.whl", hash = "sha256:03dae4ec50ec0d2ff4743e63fdbd5e0022815857494d35224b60775d3d934a8c", size = 7933, upload-time = "2025-01-18T10:07:29.562Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/eb/711270faab7b7df702339a2c68b31fd3ed4fffc68b0e99e5bdf49b1e87e4/mbstrdecoder-1.1.5-py3-none-any.whl", hash = "sha256:4a50fe113d4abecfd86e8f716b2e413cce03d63af83ec3c7cdbe81dec0e519ed", size = 7966, upload-time = "2026-05-05T04:17:56.78Z" },
 ]
 
 [[package]]
@@ -1427,149 +1308,291 @@ wheels = [
 
 [[package]]
 name = "mflux"
-version = "0.17.2"
-source = { registry = "https://pypi.org/simple" }
+version = "0.17.5"
+source = { git = "https://github.com/evanev7/mflux?branch=exo2#0fdd4cca9468dd92d8c2511c88031e937655eb01" }
 dependencies = [
-    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "fonttools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "hf-transfer", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "matplotlib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.6", source = { registry = "https://pypi.org/simple" }, extra = ["cuda13"], marker = "sys_platform == 'linux'" },
-    { name = "mlx", version = "0.31.2.dev20260406+90dd61a5", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#90dd61a5f0837f9bbbab4fd3fbfedba1ca5d33e7" }, marker = "sys_platform == 'darwin'" },
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "opencv-python", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "piexif", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pillow", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "platformdirs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "safetensors", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "sentencepiece", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tokenizers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "toml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "torch", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "twine", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "urllib3", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/1b/75/65f791f54c9531b524813d18ff87a0d34866ef220a8fe2ad637437f3cb54/mflux-0.17.2.tar.gz", hash = "sha256:52dee2d27cf438a84648e5c1861b92ceb63a9ac06823d14452a78646a1d30ee7", size = 779264, upload-time = "2026-03-23T13:08:18.377Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/68/02/f94eca4e77b7d12685060461eb793cbc8c00e96cc7fe0ce376374201aed2/mflux-0.17.2-py3-none-any.whl", hash = "sha256:be1642b04847413c0a8ed1dae82ce1ca023e155b057d82a8301eca9c3fe08339", size = 1037451, upload-time = "2026-03-23T13:08:16.747Z" },
+    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "fonttools", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "hf-transfer", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "matplotlib", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0.dev20260519+e9835615", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#e9835615badaf4ad7c95301f2a9bd58d782b38db" }, marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_aarch64.whl" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_x86_64.whl" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "piexif", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pillow", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "platformdirs", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "safetensors", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "sentencepiece", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tokenizers", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "toml", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "twine", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "urllib3", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 
 [[package]]
 name = "miniaudio"
-version = "1.61"
+version = "1.71"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cffi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "cffi", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d8/d5/e5439dc08561f73656bfeb3340fc64ab63163e101426593d8fb9a025ff1e/miniaudio-1.71.tar.gz", hash = "sha256:ff51e2887bb673e2e757752b586b3dc924d59aa5fbcae9bbc45f4a111bd3262b", size = 1116480, upload-time = "2026-04-29T21:20:38.182Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3a/85/44545f767ec21142ffed5f9108406d11dc8a19aafed9bd57621a0892bb60/miniaudio-1.71-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:61b86f26d653040db32d9d15b05446321dd10e45beba25b44f841e26935213d5", size = 377184, upload-time = "2026-04-29T21:20:24.109Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/d1/071a560000c8ce903dc919968ecce40fbe7a73213ac399051b887184f8a3/miniaudio-1.71-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d9dc15eff711bcfc62a9d05e0c78e4bc34821a455595e049629f2fea7491a523", size = 351488, upload-time = "2026-04-29T21:20:25.183Z" },
+    { url = "https://files.pythonhosted.org/packages/46/24/5873a569451cae5686fb656ebd78ffe0b5eebe48ca21ef61e227d237d20a/miniaudio-1.71-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:12bc33e7e61072b4b541c14e10ef76119d5643e6bbb98e2dec0c0738889438fb", size = 643552, upload-time = "2026-04-29T21:20:26.211Z" },
+    { url = "https://files.pythonhosted.org/packages/90/9b/25785525e6b5ff9afd7f4c4279215dc09c3317ea4d837275b1ae17912b36/miniaudio-1.71-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:70fa2ea5353e6919aca59b8c5768144af009d18c3bca251749d66fb497424563", size = 645171, upload-time = "2026-04-29T21:20:27.494Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/6d/cbfd55fdc40256231f7b0c861e2bf79cc289bfbbe5e15869317944e6d673/miniaudio-1.71-cp313-cp313-win32.whl", hash = "sha256:1bf93aeede652926f27f430f0fd69ef0cf8a949c07b537d6a2f295602c747037", size = 235088, upload-time = "2026-04-29T21:20:29.031Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/8d/d5059c04b247b1079c0e48914a9ec20352910a3b9373060fb258dfd194ab/miniaudio-1.71-cp313-cp313-win_amd64.whl", hash = "sha256:4c849ccb1349f7b3553a77a66fe7e972315185f5c4c44a0bbda7ebcdd224db37", size = 274247, upload-time = "2026-04-29T21:20:29.96Z" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/55/fa/96d4cc7ada283357117f7890418ac065a0a6d81ec59e681cd965a403aba3/miniaudio-1.61.tar.gz", hash = "sha256:e88e97837d031f0fb6982394218b6487de02eaa382ad273b8fca37791a2b4b15", size = 1103527, upload-time = "2024-07-24T18:13:10.037Z" }
 
 [[package]]
 name = "mlx"
-version = "0.30.6"
-source = { registry = "https://pypi.org/simple" }
+version = "0.32.0.dev20260519+e9835615"
+source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#e9835615badaf4ad7c95301f2a9bd58d782b38db" }
 resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-    "python_full_version < '3.14' and sys_platform == 'linux'",
+    "sys_platform == 'darwin'",
+]
+
+[[package]]
+name = "mlx"
+version = "0.32.0"
+source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_aarch64.whl" }
+resolution-markers = [
+    "platform_machine == 'aarch64' and sys_platform == 'linux'",
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/93/06/280f6f2ba80520a7109730425eda0d966658793aa0d02d8be8d351f75253/mlx-0.30.6-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:67e6c9e30a9faeacc209917ef5523177cf9b086914b6b5d83ff886e4294b727d", size = 622011, upload-time = "2026-02-06T03:45:28.165Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/35/f872afbee9c079cc69924d9e9c46f5663adb7da58cba3511db082dd307c1/mlx-0.30.6-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:47db8b16fcb6f6c5a47c0bdb24ed377b41237017ac93aa6cb6aa206c9bdf82e4", size = 663650, upload-time = "2026-02-06T03:45:30.315Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/54/9d9e06804fb2088202a2cdf60458e00b221f71420bea285720b60f9e82b5/mlx-0.30.6-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:9ceddede4af0de31d1f6b3099f70e5469d60cd7c546975dedbdbeab3519cab3f", size = 624002, upload-time = "2026-02-06T03:45:36Z" },
-    { url = "https://files.pythonhosted.org/packages/42/92/3140a15a50cb1f9267a6552171e1dfa577861de53e093124bc43707f2a0e/mlx-0.30.6-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:4a6ffd2d16728cf95f63a1b555d7c2eaeea686a0e6b73228bd265411cb5d77a4", size = 663569, upload-time = "2026-02-06T03:45:37.242Z" },
+    { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:5ae89b605e3d7f8de493ea9ed0cc438d11b6909f991242646c9d7ed2b3c348c8" },
 ]
 
-[package.optional-dependencies]
-cpu = [
-    { name = "mlx-cpu", marker = "sys_platform == 'linux'" },
-]
-cuda13 = [
-    { name = "mlx-cuda-13", marker = "sys_platform == 'linux'" },
+[package.metadata]
+requires-dist = [
+    { name = "ml-dtypes", marker = "extra == 'dev'" },
+    { name = "mlx-cpu", marker = "sys_platform == 'linux' and extra == 'cpu'", specifier = "==0.32.0" },
+    { name = "mlx-cuda-12", marker = "sys_platform == 'linux' and extra == 'cuda'", specifier = "==0.32.0" },
+    { name = "mlx-cuda-12", marker = "sys_platform == 'linux' and extra == 'cuda12'", specifier = "==0.32.0" },
+    { name = "mlx-cuda-13", marker = "sys_platform == 'linux' and extra == 'cuda13'", specifier = "==0.32.0" },
+    { name = "mlx-metal", marker = "sys_platform == 'darwin'", specifier = "==0.32.0" },
+    { name = "numpy", marker = "extra == 'dev'", specifier = ">=2" },
+    { name = "pre-commit", marker = "extra == 'dev'" },
+    { name = "psutil", marker = "extra == 'dev'", specifier = ">=7.2" },
+    { name = "torch", marker = "extra == 'dev'", specifier = ">=2.9" },
+    { name = "typing-extensions", marker = "extra == 'dev'" },
 ]
+provides-extras = ["dev", "cuda", "cuda12", "cuda13", "cpu"]
 
 [[package]]
 name = "mlx"
-version = "0.31.2.dev20260406+90dd61a5"
-source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#90dd61a5f0837f9bbbab4fd3fbfedba1ca5d33e7" }
+version = "0.32.0"
+source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_x86_64.whl" }
 resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'darwin'",
-    "python_full_version < '3.14' and sys_platform == 'darwin'",
+    "platform_machine != 'aarch64' and sys_platform == 'linux'",
+]
+wheels = [
+    { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:58a83a1919de342811a696ae5c800a37b61f6c615c429062b29fb002545c2930" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "ml-dtypes", marker = "extra == 'dev'" },
+    { name = "mlx-cpu", marker = "sys_platform == 'linux' and extra == 'cpu'", specifier = "==0.32.0" },
+    { name = "mlx-cuda-12", marker = "sys_platform == 'linux' and extra == 'cuda'", specifier = "==0.32.0" },
+    { name = "mlx-cuda-12", marker = "sys_platform == 'linux' and extra == 'cuda12'", specifier = "==0.32.0" },
+    { name = "mlx-cuda-13", marker = "sys_platform == 'linux' and extra == 'cuda13'", specifier = "==0.32.0" },
+    { name = "mlx-metal", marker = "sys_platform == 'darwin'", specifier = "==0.32.0" },
+    { name = "numpy", marker = "extra == 'dev'", specifier = ">=2" },
+    { name = "pre-commit", marker = "extra == 'dev'" },
+    { name = "psutil", marker = "extra == 'dev'", specifier = ">=7.2" },
+    { name = "torch", marker = "extra == 'dev'", specifier = ">=2.9" },
+    { name = "typing-extensions", marker = "extra == 'dev'" },
+]
+provides-extras = ["dev", "cuda", "cuda12", "cuda13", "cpu"]
+
+[[package]]
+name = "mlx-audio"
+version = "0.4.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "miniaudio", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0.dev20260519+e9835615", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#e9835615badaf4ad7c95301f2a9bd58d782b38db" }, marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_aarch64.whl" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_x86_64.whl" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "scipy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "sounddevice", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/95/db/a9f95e3794eca373d681220c8b9f8f84451a0d14959f85cc341ca592394c/mlx_audio-0.4.3.tar.gz", hash = "sha256:8e87badf56a0f73bf91e3797b1195c01440a181cf0b64a2a08dc1bda4b037f54", size = 1144947, upload-time = "2026-04-28T20:18:12.09Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/25/0a89073ed7b7cdf34299042bd03d867c12c0c8b43f597be61bea7f146793/mlx_audio-0.4.3-py3-none-any.whl", hash = "sha256:6b87bf42d79d9ceb6b9310a77656b9b76429c2d6ddd89f634b2786c58a2e4721", size = 1373582, upload-time = "2026-04-28T20:18:10.512Z" },
 ]
 
 [[package]]
 name = "mlx-cpu"
-version = "0.30.6"
+version = "0.31.2"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c1/fd/b52c37d8514c321675bd9c8dacf1a26f493bb1004adbb3986feda76f2776/mlx_cpu-0.30.6-py3-none-manylinux_2_35_aarch64.whl", hash = "sha256:d4ca39400e06a762ab8c43c1da7e4ada87d9880b864d10eff8023dc910c76985", size = 8689310, upload-time = "2026-02-06T01:48:34.598Z" },
-    { url = "https://files.pythonhosted.org/packages/67/a1/f097b2042c3373abeb0365b52a5f5fc9176fe5c14682483b53012f714697/mlx_cpu-0.30.6-py3-none-manylinux_2_35_x86_64.whl", hash = "sha256:573baa804539b64fc3e1740d7055e80e7b47eaa4139bbace69d6f9896d2081e4", size = 10262929, upload-time = "2026-02-06T01:48:37.157Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/a3/5e62e9d4ebec89f1805b10dad74863815335f3048d5afd4b2640d474d809/mlx_cpu-0.31.2-py3-none-manylinux_2_35_aarch64.whl", hash = "sha256:0bfd8292d1d88ff1e5ea0a3202510a2cb61e0f212a69a364cee50aea4a175000", size = 8678191, upload-time = "2026-04-22T01:12:15.982Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/5b/4a0cfe9f7e25bf4a9d7fee9d03be8f56d4aa5949deedda0d8e1437d8b6f2/mlx_cpu-0.31.2-py3-none-manylinux_2_35_x86_64.whl", hash = "sha256:32bd40b8e7351b64b15921db5c24305726f97673e2fc512b1d5dc6727bb2f414", size = 10252297, upload-time = "2026-04-22T01:12:18.478Z" },
+]
+
+[[package]]
+name = "mlx-cuda-12"
+version = "0.32.0"
+source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_12-0.32.0-py3-none-manylinux_2_35_aarch64.whl" }
+resolution-markers = [
+    "platform_machine == 'aarch64' and sys_platform == 'linux'",
+]
+dependencies = [
+    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+]
+wheels = [
+    { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_12-0.32.0-py3-none-manylinux_2_35_aarch64.whl", hash = "sha256:0b86333c10a5f48f7ee73eaefb5437f5fe8f2ebd2bab21d41b0e6e8b706637d9" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "nvidia-cublas-cu12", specifier = "==12.9.*" },
+    { name = "nvidia-cuda-nvrtc-cu12", specifier = "==12.9.*" },
+    { name = "nvidia-cudnn-cu12", specifier = "==9.*" },
+    { name = "nvidia-nccl-cu12" },
+]
+
+[[package]]
+name = "mlx-cuda-12"
+version = "0.32.0"
+source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_12-0.32.0-py3-none-manylinux_2_35_x86_64.whl" }
+resolution-markers = [
+    "platform_machine != 'aarch64' and sys_platform == 'linux'",
+]
+dependencies = [
+    { name = "nvidia-cublas-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-nccl-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+]
+wheels = [
+    { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_12-0.32.0-py3-none-manylinux_2_35_x86_64.whl", hash = "sha256:3fd3d1491543e4ff4e6d6fbda77d2fca41316d32e4dadc36a10af75d43a2a51f" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "nvidia-cublas-cu12", specifier = "==12.9.*" },
+    { name = "nvidia-cuda-nvrtc-cu12", specifier = "==12.9.*" },
+    { name = "nvidia-cudnn-cu12", specifier = "==9.*" },
+    { name = "nvidia-nccl-cu12" },
 ]
 
 [[package]]
 name = "mlx-cuda-13"
-version = "0.30.6"
-source = { registry = "https://pypi.org/simple" }
+version = "0.32.0"
+source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_13-0.32.0-py3-none-manylinux_2_35_aarch64.whl" }
+resolution-markers = [
+    "platform_machine == 'aarch64' and sys_platform == 'linux'",
+]
 dependencies = [
-    { name = "nvidia-cublas", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cuda-nvrtc", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cudnn-cu13", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-nccl-cu13", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cublas", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-nvrtc", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-cu13", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-nccl-cu13", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/08/6b/27cd4abed63ca9e4086536acde4486ea7a762dda5b36bb867ed954efe93a/mlx_cuda_13-0.30.6-py3-none-manylinux_2_35_aarch64.whl", hash = "sha256:1a5e58eaf4b1a4eb6cb0111ecb4099f6f1380403599f8992ec93dfbad8d9c6f1", size = 66904875, upload-time = "2026-02-06T02:49:19.183Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/ae/8864834f1c4637bd2f5da7fd8a6d2fb554a0bad16fbe53bb5ecdfcf1611f/mlx_cuda_13-0.30.6-py3-none-manylinux_2_35_x86_64.whl", hash = "sha256:35792e799bbf1498f3e0792c5c34c64bd13075de6d34d88e223de4b1000bab82", size = 69745930, upload-time = "2026-02-06T02:49:22.844Z" },
+    { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_13-0.32.0-py3-none-manylinux_2_35_aarch64.whl", hash = "sha256:e035c6dccd9f6902a283a74ef07e6169421a575fc58c7125e9bc12dd349e1dde" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "nvidia-cublas" },
+    { name = "nvidia-cuda-nvrtc" },
+    { name = "nvidia-cudnn-cu13", specifier = "==9.*" },
+    { name = "nvidia-nccl-cu13" },
+]
+
+[[package]]
+name = "mlx-cuda-13"
+version = "0.32.0"
+source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_13-0.32.0-py3-none-manylinux_2_35_x86_64.whl" }
+resolution-markers = [
+    "platform_machine != 'aarch64' and sys_platform == 'linux'",
+]
+dependencies = [
+    { name = "nvidia-cublas", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-nvrtc", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-cu13", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-nccl-cu13", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+]
+wheels = [
+    { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx_cuda_13-0.32.0-py3-none-manylinux_2_35_x86_64.whl", hash = "sha256:d310054c29bbe2af8e7c1cd9ca25969f11cf7418c48e1ae19e2574f27420f54f" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "nvidia-cublas" },
+    { name = "nvidia-cuda-nvrtc" },
+    { name = "nvidia-cudnn-cu13", specifier = "==9.*" },
+    { name = "nvidia-nccl-cu13" },
 ]
 
 [[package]]
 name = "mlx-lm"
-version = "0.31.2"
-source = { git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Ffix-arrayscache-leak#b3540361c2fac915dc0f61ae0ce0de1583bfaa90" }
+version = "0.31.3"
+source = { git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Fdeepseek-v4#6a3df6cd6b00a347ee40f12d97a182aaf86ea599" }
 dependencies = [
-    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", version = "0.31.2.dev20260406+90dd61a5", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#90dd61a5f0837f9bbbab4fd3fbfedba1ca5d33e7" }, marker = "sys_platform == 'darwin'" },
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "sentencepiece", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0.dev20260519+e9835615", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#e9835615badaf4ad7c95301f2a9bd58d782b38db" }, marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "sentencepiece", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 
 [[package]]
 name = "mlx-vlm"
-version = "0.4.4"
+version = "0.5.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "datasets", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "fastapi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "miniaudio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
-    { name = "mlx", version = "0.31.2.dev20260406+90dd61a5", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#90dd61a5f0837f9bbbab4fd3fbfedba1ca5d33e7" }, marker = "sys_platform == 'darwin'" },
-    { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "opencv-python", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pillow", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "uvicorn", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "datasets", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "fastapi", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "llguidance", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "miniaudio", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0.dev20260519+e9835615", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#e9835615badaf4ad7c95301f2a9bd58d782b38db" }, marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_aarch64.whl" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine != 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx", version = "0.32.0", source = { url = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv/releases/download/mlx_cuda/mlx-0.32.0-cp313-cp313-manylinux_2_35_x86_64.whl" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 'aarch64' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx-audio", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pillow", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "uvicorn", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/94/ec/108aec30efb159940ea29d133d5d8ec14840edbec914869b46eaafac5552/mlx_vlm-0.4.4.tar.gz", hash = "sha256:3197e277c1be9ed1712ea04624df029e486f7747ad93e40e7bd1c9c771f8b179", size = 836370, upload-time = "2026-04-04T15:19:01.087Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/80/a3/70dce014f6a72efd2cecc07b6a68fc11c0694fbe54ea553b2e00499c7b36/mlx_vlm-0.5.0.tar.gz", hash = "sha256:24563cd1b3a399fd941b2359100628306e2754db1b48780516d1283138258793", size = 1033154, upload-time = "2026-05-06T21:09:33.594Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d9/81/235518176c3c8230e5274e91346ecf940591f653e73b0daeb505fb37eea9/mlx_vlm-0.4.4-py3-none-any.whl", hash = "sha256:3ff86ea738ab1914dc1b07e4fa5d4cc34bec5909e540692cfad0af808af13c11", size = 1014936, upload-time = "2026-04-04T15:18:59.328Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/66/fb955ccc442aa556e5e9d8836fb9041a7aadff5a88fa80c285e53dc19bf5/mlx_vlm-0.5.0-py3-none-any.whl", hash = "sha256:3351d6ccf609cbf57a4c8cd8308e9a1ce469883d8679d9968c6c6f77af016419", size = 1218132, upload-time = "2026-05-06T21:09:32.071Z" },
 ]
 
 [[package]]
 name = "more-itertools"
-version = "10.8.0"
+version = "11.0.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ea/5d/38b681d3fce7a266dd9ab73c66959406d565b3e85f21d5e66e1181d93721/more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd", size = 137431, upload-time = "2025-09-02T15:23:11.018Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/f7/139d22fef48ac78127d18e01d80cf1be40236ae489769d17f35c3d425293/more_itertools-11.0.2.tar.gz", hash = "sha256:392a9e1e362cbc106a2457d37cabf9b36e5e12efd4ebff1654630e76597df804", size = 144659, upload-time = "2026-04-09T15:01:33.297Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667, upload-time = "2025-09-02T15:23:09.635Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/98/6af411189d9413534c3eb691182bff1f5c6d44ed2f93f2edfe52a1bbceb8/more_itertools-11.0.2-py3-none-any.whl", hash = "sha256:6e35b35f818b01f691643c6c611bc0902f2e92b46c18fffa77ae1e7c46e912e4", size = 71939, upload-time = "2026-04-09T15:01:32.21Z" },
 ]
 
 [[package]]
@@ -1583,114 +1606,88 @@ wheels = [
 
 [[package]]
 name = "msgspec"
-version = "0.20.0"
+version = "0.21.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ea/9c/bfbd12955a49180cbd234c5d29ec6f74fe641698f0cd9df154a854fc8a15/msgspec-0.20.0.tar.gz", hash = "sha256:692349e588fde322875f8d3025ac01689fead5901e7fb18d6870a44519d62a29", size = 317862, upload-time = "2025-11-24T03:56:28.934Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e3/60/f79b9b013a16fa3a58350c9295ddc6789f2e335f36ea61ed10a21b215364/msgspec-0.21.1.tar.gz", hash = "sha256:2313508e394b0d208f8f56892ca9b2799e2561329de9763b19619595a6c0f72c", size = 319193, upload-time = "2026-04-12T21:44:50.394Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8a/d1/b902d38b6e5ba3bdddbec469bba388d647f960aeed7b5b3623a8debe8a76/msgspec-0.20.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9c1ff8db03be7598b50dd4b4a478d6fe93faae3bd54f4f17aa004d0e46c14c46", size = 196463, upload-time = "2025-11-24T03:55:43.405Z" },
-    { url = "https://files.pythonhosted.org/packages/57/b6/eff0305961a1d9447ec2b02f8c73c8946f22564d302a504185b730c9a761/msgspec-0.20.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f6532369ece217fd37c5ebcfd7e981f2615628c21121b7b2df9d3adcf2fd69b8", size = 188650, upload-time = "2025-11-24T03:55:44.761Z" },
-    { url = "https://files.pythonhosted.org/packages/99/93/f2ec1ae1de51d3fdee998a1ede6b2c089453a2ee82b5c1b361ed9095064a/msgspec-0.20.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9a1697da2f85a751ac3cc6a97fceb8e937fc670947183fb2268edaf4016d1ee", size = 218834, upload-time = "2025-11-24T03:55:46.441Z" },
-    { url = "https://files.pythonhosted.org/packages/28/83/36557b04cfdc317ed8a525c4993b23e43a8fbcddaddd78619112ca07138c/msgspec-0.20.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7fac7e9c92eddcd24c19d9e5f6249760941485dff97802461ae7c995a2450111", size = 224917, upload-time = "2025-11-24T03:55:48.06Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/56/362037a1ed5be0b88aced59272442c4b40065c659700f4b195a7f4d0ac88/msgspec-0.20.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f953a66f2a3eb8d5ea64768445e2bb301d97609db052628c3e1bcb7d87192a9f", size = 222821, upload-time = "2025-11-24T03:55:49.388Z" },
-    { url = "https://files.pythonhosted.org/packages/92/75/fa2370ec341cedf663731ab7042e177b3742645c5dd4f64dc96bd9f18a6b/msgspec-0.20.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:247af0313ae64a066d3aea7ba98840f6681ccbf5c90ba9c7d17f3e39dbba679c", size = 227227, upload-time = "2025-11-24T03:55:51.125Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/18/62dc13ab0260c7d741dda8dc7f481495b93ac9168cd887dda5929880eef8/msgspec-0.20.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:eead16538db1b3f7ec6e3ed1f6f7c5dec67e90f76e76b610e1ffb5671815633a", size = 196407, upload-time = "2025-11-24T03:55:55.001Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/1d/b9949e4ad6953e9f9a142c7997b2f7390c81e03e93570c7c33caf65d27e1/msgspec-0.20.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:703c3bb47bf47801627fb1438f106adbfa2998fe586696d1324586a375fca238", size = 188889, upload-time = "2025-11-24T03:55:56.311Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/19/f8bb2dc0f1bfe46cc7d2b6b61c5e9b5a46c62298e8f4d03bbe499c926180/msgspec-0.20.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6cdb227dc585fb109305cee0fd304c2896f02af93ecf50a9c84ee54ee67dbb42", size = 219691, upload-time = "2025-11-24T03:55:57.908Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/8e/6b17e43f6eb9369d9858ee32c97959fcd515628a1df376af96c11606cf70/msgspec-0.20.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27d35044dd8818ac1bd0fedb2feb4fbdff4e3508dd7c5d14316a12a2d96a0de0", size = 224918, upload-time = "2025-11-24T03:55:59.322Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/db/0e833a177db1a4484797adba7f429d4242585980b90882cc38709e1b62df/msgspec-0.20.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b4296393a29ee42dd25947981c65506fd4ad39beaf816f614146fa0c5a6c91ae", size = 223436, upload-time = "2025-11-24T03:56:00.716Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/30/d2ee787f4c918fd2b123441d49a7707ae9015e0e8e1ab51aa7967a97b90e/msgspec-0.20.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:205fbdadd0d8d861d71c8f3399fe1a82a2caf4467bc8ff9a626df34c12176980", size = 227190, upload-time = "2025-11-24T03:56:02.371Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/06/3da3fc9aaa55618a8f43eb9052453cfe01f82930bca3af8cea63a89f3a11/msgspec-0.20.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f84703e0e6ef025663dd1de828ca028774797b8155e070e795c548f76dde65d5", size = 200389, upload-time = "2025-11-24T03:56:06.375Z" },
-    { url = "https://files.pythonhosted.org/packages/83/3b/cc4270a5ceab40dfe1d1745856951b0a24fd16ac8539a66ed3004a60c91e/msgspec-0.20.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7c83fc24dd09cf1275934ff300e3951b3adc5573f0657a643515cc16c7dee131", size = 193198, upload-time = "2025-11-24T03:56:07.742Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/ae/4c7905ac53830c8e3c06fdd60e3cdcfedc0bbc993872d1549b84ea21a1bd/msgspec-0.20.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f13ccb1c335a124e80c4562573b9b90f01ea9521a1a87f7576c2e281d547f56", size = 225973, upload-time = "2025-11-24T03:56:09.18Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/da/032abac1de4d0678d99eaeadb1323bd9d247f4711c012404ba77ed6f15ca/msgspec-0.20.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:17c2b5ca19f19306fc83c96d85e606d2cc107e0caeea85066b5389f664e04846", size = 229509, upload-time = "2025-11-24T03:56:10.898Z" },
-    { url = "https://files.pythonhosted.org/packages/69/52/fdc7bdb7057a166f309e0b44929e584319e625aaba4771b60912a9321ccd/msgspec-0.20.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d931709355edabf66c2dd1a756b2d658593e79882bc81aae5964969d5a291b63", size = 230434, upload-time = "2025-11-24T03:56:12.48Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/fe/1dfd5f512b26b53043884e4f34710c73e294e7cc54278c3fe28380e42c37/msgspec-0.20.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:565f915d2e540e8a0c93a01ff67f50aebe1f7e22798c6a25873f9fda8d1325f8", size = 231758, upload-time = "2025-11-24T03:56:13.765Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/74/f11ede02839b19ff459f88e3145df5d711626ca84da4e23520cebf819367/msgspec-0.21.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:764173717a01743f007e9f74520ed281f24672c604514f7d76c1c3a10e8edb66", size = 196176, upload-time = "2026-04-12T21:44:17.613Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/40/4476c1bd341418a046c4955aff632ec769315d1e3cb94e6acf86d461f9ed/msgspec-0.21.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:344c7cd0eaed1fb81d7959f99100ef71ec9b536881a376f11b9a6c4803365697", size = 188524, upload-time = "2026-04-12T21:44:18.815Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/d9/9e9d7d7e5061b47540d03d640fab9b3965ba7ae49c1b2154861c8f007518/msgspec-0.21.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48943e278b3854c2f89f955ddc6f9f430d3f0784b16e47d10604ee0463cd21f5", size = 218880, upload-time = "2026-04-12T21:44:20.028Z" },
+    { url = "https://files.pythonhosted.org/packages/74/66/2bb344f34abb4b57e60c7c9c761994e0417b9718ec1460bf00c296f2a7ea/msgspec-0.21.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9aa659ebb0101b1cbc31461212b87e341d961f0ab0772aaf068a99e001ec4aa", size = 225050, upload-time = "2026-04-12T21:44:21.577Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/84/7c1e412f76092277bf760cef12b7979d03314d259ab5b5cafde5d0c1722d/msgspec-0.21.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7b27d1a8ead2b6f5b0c4f2d07b8be1ccfcc041c8a0e704781edebe3ae13c484", size = 222713, upload-time = "2026-04-12T21:44:22.83Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/27/0bba04b2b4ef05f3d068429410bc71d2cea925f1596a8f41152cccd5edb8/msgspec-0.21.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:38fe93e86b61328fe544cb7fd871fad5a27c8734bfda90f65e5dbe288ae50f61", size = 227259, upload-time = "2026-04-12T21:44:24.11Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/2d/09574b0eea02fed2c2c1383dbaae2c7f79dc16dcd6487a886000afb5d7c4/msgspec-0.21.1-cp313-cp313-win_amd64.whl", hash = "sha256:8bc666331c35fcce05a7cd2d6221adbe0f6058f8e750711413d22793c080ac6a", size = 189857, upload-time = "2026-04-12T21:44:25.359Z" },
+    { url = "https://files.pythonhosted.org/packages/46/34/105b1576ad182879914f0c821f17ee1d13abb165cb060448f96fe2aff078/msgspec-0.21.1-cp313-cp313-win_arm64.whl", hash = "sha256:42bb1241e0750c1a4346f2aa84db26c5ffd99a4eb3a954927d9f149ff2f42898", size = 175403, upload-time = "2026-04-12T21:44:26.608Z" },
 ]
 
 [[package]]
 name = "multidict"
-version = "6.7.0"
+version = "6.7.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/80/1e/5492c365f222f907de1039b91f922b93fa4f764c713ee858d235495d8f50/multidict-6.7.0.tar.gz", hash = "sha256:c6e99d9a65ca282e578dfea819cfa9c0a62b2499d8677392e09feaf305e9e6f5", size = 101834, upload-time = "2025-10-06T14:52:30.657Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010, upload-time = "2026-01-26T02:46:45.979Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d2/86/33272a544eeb36d66e4d9a920602d1a2f57d4ebea4ef3cdfe5a912574c95/multidict-6.7.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bee7c0588aa0076ce77c0ea5d19a68d76ad81fcd9fe8501003b9a24f9d4000f6", size = 76135, upload-time = "2025-10-06T14:49:54.26Z" },
-    { url = "https://files.pythonhosted.org/packages/91/1c/eb97db117a1ebe46d457a3d235a7b9d2e6dcab174f42d1b67663dd9e5371/multidict-6.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7ef6b61cad77091056ce0e7ce69814ef72afacb150b7ac6a3e9470def2198159", size = 45117, upload-time = "2025-10-06T14:49:55.82Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/d8/6c3442322e41fb1dd4de8bd67bfd11cd72352ac131f6368315617de752f1/multidict-6.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c0359b1ec12b1d6849c59f9d319610b7f20ef990a6d454ab151aa0e3b9f78ca", size = 43472, upload-time = "2025-10-06T14:49:57.048Z" },
-    { url = "https://files.pythonhosted.org/packages/75/3f/e2639e80325af0b6c6febdf8e57cc07043ff15f57fa1ef808f4ccb5ac4cd/multidict-6.7.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cd240939f71c64bd658f186330603aac1a9a81bf6273f523fca63673cb7378a8", size = 249342, upload-time = "2025-10-06T14:49:58.368Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/cc/84e0585f805cbeaa9cbdaa95f9a3d6aed745b9d25700623ac89a6ecff400/multidict-6.7.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a60a4d75718a5efa473ebd5ab685786ba0c67b8381f781d1be14da49f1a2dc60", size = 257082, upload-time = "2025-10-06T14:49:59.89Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/9c/ac851c107c92289acbbf5cfb485694084690c1b17e555f44952c26ddc5bd/multidict-6.7.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53a42d364f323275126aff81fb67c5ca1b7a04fda0546245730a55c8c5f24bc4", size = 240704, upload-time = "2025-10-06T14:50:01.485Z" },
-    { url = "https://files.pythonhosted.org/packages/50/cc/5f93e99427248c09da95b62d64b25748a5f5c98c7c2ab09825a1d6af0e15/multidict-6.7.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3b29b980d0ddbecb736735ee5bef69bb2ddca56eff603c86f3f29a1128299b4f", size = 266355, upload-time = "2025-10-06T14:50:02.955Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/0c/2ec1d883ceb79c6f7f6d7ad90c919c898f5d1c6ea96d322751420211e072/multidict-6.7.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f8a93b1c0ed2d04b97a5e9336fd2d33371b9a6e29ab7dd6503d63407c20ffbaf", size = 267259, upload-time = "2025-10-06T14:50:04.446Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/2d/f0b184fa88d6630aa267680bdb8623fb69cb0d024b8c6f0d23f9a0f406d3/multidict-6.7.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ff96e8815eecacc6645da76c413eb3b3d34cfca256c70b16b286a687d013c32", size = 254903, upload-time = "2025-10-06T14:50:05.98Z" },
-    { url = "https://files.pythonhosted.org/packages/06/c9/11ea263ad0df7dfabcad404feb3c0dd40b131bc7f232d5537f2fb1356951/multidict-6.7.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7516c579652f6a6be0e266aec0acd0db80829ca305c3d771ed898538804c2036", size = 252365, upload-time = "2025-10-06T14:50:07.511Z" },
-    { url = "https://files.pythonhosted.org/packages/41/88/d714b86ee2c17d6e09850c70c9d310abac3d808ab49dfa16b43aba9d53fd/multidict-6.7.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:040f393368e63fb0f3330e70c26bfd336656bed925e5cbe17c9da839a6ab13ec", size = 250062, upload-time = "2025-10-06T14:50:09.074Z" },
-    { url = "https://files.pythonhosted.org/packages/15/fe/ad407bb9e818c2b31383f6131ca19ea7e35ce93cf1310fce69f12e89de75/multidict-6.7.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b3bc26a951007b1057a1c543af845f1c7e3e71cc240ed1ace7bf4484aa99196e", size = 249683, upload-time = "2025-10-06T14:50:10.714Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/a4/a89abdb0229e533fb925e7c6e5c40201c2873efebc9abaf14046a4536ee6/multidict-6.7.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7b022717c748dd1992a83e219587aabe45980d88969f01b316e78683e6285f64", size = 261254, upload-time = "2025-10-06T14:50:12.28Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/aa/0e2b27bd88b40a4fb8dc53dd74eecac70edaa4c1dd0707eb2164da3675b3/multidict-6.7.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9600082733859f00d79dee64effc7aef1beb26adb297416a4ad2116fd61374bd", size = 257967, upload-time = "2025-10-06T14:50:14.16Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/8e/0c67b7120d5d5f6d874ed85a085f9dc770a7f9d8813e80f44a9fec820bb7/multidict-6.7.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94218fcec4d72bc61df51c198d098ce2b378e0ccbac41ddbed5ef44092913288", size = 250085, upload-time = "2025-10-06T14:50:15.639Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/68/7b3a5170a382a340147337b300b9eb25a9ddb573bcdfff19c0fa3f31ffba/multidict-6.7.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:ad9ce259f50abd98a1ca0aa6e490b58c316a0fce0617f609723e40804add2c00", size = 83114, upload-time = "2025-10-06T14:50:21.223Z" },
-    { url = "https://files.pythonhosted.org/packages/55/5c/3fa2d07c84df4e302060f555bbf539310980362236ad49f50eeb0a1c1eb9/multidict-6.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07f5594ac6d084cbb5de2df218d78baf55ef150b91f0ff8a21cc7a2e3a5a58eb", size = 48442, upload-time = "2025-10-06T14:50:22.871Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/56/67212d33239797f9bd91962bb899d72bb0f4c35a8652dcdb8ed049bef878/multidict-6.7.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0591b48acf279821a579282444814a2d8d0af624ae0bc600aa4d1b920b6e924b", size = 46885, upload-time = "2025-10-06T14:50:24.258Z" },
-    { url = "https://files.pythonhosted.org/packages/46/d1/908f896224290350721597a61a69cd19b89ad8ee0ae1f38b3f5cd12ea2ac/multidict-6.7.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:749a72584761531d2b9467cfbdfd29487ee21124c304c4b6cb760d8777b27f9c", size = 242588, upload-time = "2025-10-06T14:50:25.716Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/67/8604288bbd68680eee0ab568fdcb56171d8b23a01bcd5cb0c8fedf6e5d99/multidict-6.7.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b4c3d199f953acd5b446bf7c0de1fe25d94e09e79086f8dc2f48a11a129cdf1", size = 249966, upload-time = "2025-10-06T14:50:28.192Z" },
-    { url = "https://files.pythonhosted.org/packages/20/33/9228d76339f1ba51e3efef7da3ebd91964d3006217aae13211653193c3ff/multidict-6.7.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9fb0211dfc3b51efea2f349ec92c114d7754dd62c01f81c3e32b765b70c45c9b", size = 228618, upload-time = "2025-10-06T14:50:29.82Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/2d/25d9b566d10cab1c42b3b9e5b11ef79c9111eaf4463b8c257a3bd89e0ead/multidict-6.7.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a027ec240fe73a8d6281872690b988eed307cd7d91b23998ff35ff577ca688b5", size = 257539, upload-time = "2025-10-06T14:50:31.731Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/b1/8d1a965e6637fc33de3c0d8f414485c2b7e4af00f42cab3d84e7b955c222/multidict-6.7.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1d964afecdf3a8288789df2f5751dc0a8261138c3768d9af117ed384e538fad", size = 256345, upload-time = "2025-10-06T14:50:33.26Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/0c/06b5a8adbdeedada6f4fb8d8f193d44a347223b11939b42953eeb6530b6b/multidict-6.7.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:caf53b15b1b7df9fbd0709aa01409000a2b4dd03a5f6f5cc548183c7c8f8b63c", size = 247934, upload-time = "2025-10-06T14:50:34.808Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/31/b2491b5fe167ca044c6eb4b8f2c9f3b8a00b24c432c365358eadac5d7625/multidict-6.7.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:654030da3197d927f05a536a66186070e98765aa5142794c9904555d3a9d8fb5", size = 245243, upload-time = "2025-10-06T14:50:36.436Z" },
-    { url = "https://files.pythonhosted.org/packages/61/1a/982913957cb90406c8c94f53001abd9eafc271cb3e70ff6371590bec478e/multidict-6.7.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:2090d3718829d1e484706a2f525e50c892237b2bf9b17a79b059cb98cddc2f10", size = 235878, upload-time = "2025-10-06T14:50:37.953Z" },
-    { url = "https://files.pythonhosted.org/packages/be/c0/21435d804c1a1cf7a2608593f4d19bca5bcbd7a81a70b253fdd1c12af9c0/multidict-6.7.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2d2cfeec3f6f45651b3d408c4acec0ebf3daa9bc8a112a084206f5db5d05b754", size = 243452, upload-time = "2025-10-06T14:50:39.574Z" },
-    { url = "https://files.pythonhosted.org/packages/54/0a/4349d540d4a883863191be6eb9a928846d4ec0ea007d3dcd36323bb058ac/multidict-6.7.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:4ef089f985b8c194d341eb2c24ae6e7408c9a0e2e5658699c92f497437d88c3c", size = 252312, upload-time = "2025-10-06T14:50:41.612Z" },
-    { url = "https://files.pythonhosted.org/packages/26/64/d5416038dbda1488daf16b676e4dbfd9674dde10a0cc8f4fc2b502d8125d/multidict-6.7.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e93a0617cd16998784bf4414c7e40f17a35d2350e5c6f0bd900d3a8e02bd3762", size = 246935, upload-time = "2025-10-06T14:50:43.972Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/8c/8290c50d14e49f35e0bd4abc25e1bc7711149ca9588ab7d04f886cdf03d9/multidict-6.7.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f0feece2ef8ebc42ed9e2e8c78fc4aa3cf455733b507c09ef7406364c94376c6", size = 243385, upload-time = "2025-10-06T14:50:45.648Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/b1/3da6934455dd4b261d4c72f897e3a5728eba81db59959f3a639245891baa/multidict-6.7.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3bab1e4aff7adaa34410f93b1f8e57c4b36b9af0426a76003f441ee1d3c7e842", size = 75128, upload-time = "2025-10-06T14:50:51.92Z" },
-    { url = "https://files.pythonhosted.org/packages/14/2c/f069cab5b51d175a1a2cb4ccdf7a2c2dabd58aa5bd933fa036a8d15e2404/multidict-6.7.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b8512bac933afc3e45fb2b18da8e59b78d4f408399a960339598374d4ae3b56b", size = 44410, upload-time = "2025-10-06T14:50:53.275Z" },
-    { url = "https://files.pythonhosted.org/packages/42/e2/64bb41266427af6642b6b128e8774ed84c11b80a90702c13ac0a86bb10cc/multidict-6.7.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:79dcf9e477bc65414ebfea98ffd013cb39552b5ecd62908752e0e413d6d06e38", size = 43205, upload-time = "2025-10-06T14:50:54.911Z" },
-    { url = "https://files.pythonhosted.org/packages/02/68/6b086fef8a3f1a8541b9236c594f0c9245617c29841f2e0395d979485cde/multidict-6.7.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:31bae522710064b5cbeddaf2e9f32b1abab70ac6ac91d42572502299e9953128", size = 245084, upload-time = "2025-10-06T14:50:56.369Z" },
-    { url = "https://files.pythonhosted.org/packages/15/ee/f524093232007cd7a75c1d132df70f235cfd590a7c9eaccd7ff422ef4ae8/multidict-6.7.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a0df7ff02397bb63e2fd22af2c87dfa39e8c7f12947bc524dbdc528282c7e34", size = 252667, upload-time = "2025-10-06T14:50:57.991Z" },
-    { url = "https://files.pythonhosted.org/packages/02/a5/eeb3f43ab45878f1895118c3ef157a480db58ede3f248e29b5354139c2c9/multidict-6.7.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7a0222514e8e4c514660e182d5156a415c13ef0aabbd71682fc714e327b95e99", size = 233590, upload-time = "2025-10-06T14:50:59.589Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/1e/76d02f8270b97269d7e3dbd45644b1785bda457b474315f8cf999525a193/multidict-6.7.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2397ab4daaf2698eb51a76721e98db21ce4f52339e535725de03ea962b5a3202", size = 264112, upload-time = "2025-10-06T14:51:01.183Z" },
-    { url = "https://files.pythonhosted.org/packages/76/0b/c28a70ecb58963847c2a8efe334904cd254812b10e535aefb3bcce513918/multidict-6.7.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8891681594162635948a636c9fe0ff21746aeb3dd5463f6e25d9bea3a8a39ca1", size = 261194, upload-time = "2025-10-06T14:51:02.794Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/63/2ab26e4209773223159b83aa32721b4021ffb08102f8ac7d689c943fded1/multidict-6.7.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18706cc31dbf402a7945916dd5cddf160251b6dab8a2c5f3d6d5a55949f676b3", size = 248510, upload-time = "2025-10-06T14:51:04.724Z" },
-    { url = "https://files.pythonhosted.org/packages/93/cd/06c1fa8282af1d1c46fd55c10a7930af652afdce43999501d4d68664170c/multidict-6.7.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f844a1bbf1d207dd311a56f383f7eda2d0e134921d45751842d8235e7778965d", size = 248395, upload-time = "2025-10-06T14:51:06.306Z" },
-    { url = "https://files.pythonhosted.org/packages/99/ac/82cb419dd6b04ccf9e7e61befc00c77614fc8134362488b553402ecd55ce/multidict-6.7.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:d4393e3581e84e5645506923816b9cc81f5609a778c7e7534054091acc64d1c6", size = 239520, upload-time = "2025-10-06T14:51:08.091Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/f3/a0f9bf09493421bd8716a362e0cd1d244f5a6550f5beffdd6b47e885b331/multidict-6.7.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:fbd18dc82d7bf274b37aa48d664534330af744e03bccf696d6f4c6042e7d19e7", size = 245479, upload-time = "2025-10-06T14:51:10.365Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/01/476d38fc73a212843f43c852b0eee266b6971f0e28329c2184a8df90c376/multidict-6.7.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b6234e14f9314731ec45c42fc4554b88133ad53a09092cc48a88e771c125dadb", size = 258903, upload-time = "2025-10-06T14:51:12.466Z" },
-    { url = "https://files.pythonhosted.org/packages/49/6d/23faeb0868adba613b817d0e69c5f15531b24d462af8012c4f6de4fa8dc3/multidict-6.7.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:08d4379f9744d8f78d98c8673c06e202ffa88296f009c71bbafe8a6bf847d01f", size = 252333, upload-time = "2025-10-06T14:51:14.48Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/cc/48d02ac22b30fa247f7dad82866e4b1015431092f4ba6ebc7e77596e0b18/multidict-6.7.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9fe04da3f79387f450fd0061d4dd2e45a72749d31bf634aecc9e27f24fdc4b3f", size = 243411, upload-time = "2025-10-06T14:51:16.072Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/40/cd499bd0dbc5f1136726db3153042a735fffd0d77268e2ee20d5f33c010f/multidict-6.7.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:c1dcc7524066fa918c6a27d61444d4ee7900ec635779058571f70d042d86ed63", size = 82326, upload-time = "2025-10-06T14:51:21.588Z" },
-    { url = "https://files.pythonhosted.org/packages/13/8a/18e031eca251c8df76daf0288e6790561806e439f5ce99a170b4af30676b/multidict-6.7.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:27e0b36c2d388dc7b6ced3406671b401e84ad7eb0656b8f3a2f46ed0ce483718", size = 48065, upload-time = "2025-10-06T14:51:22.93Z" },
-    { url = "https://files.pythonhosted.org/packages/40/71/5e6701277470a87d234e433fb0a3a7deaf3bcd92566e421e7ae9776319de/multidict-6.7.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a7baa46a22e77f0988e3b23d4ede5513ebec1929e34ee9495be535662c0dfe2", size = 46475, upload-time = "2025-10-06T14:51:24.352Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/6a/bab00cbab6d9cfb57afe1663318f72ec28289ea03fd4e8236bb78429893a/multidict-6.7.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7bf77f54997a9166a2f5675d1201520586439424c2511723a7312bdb4bcc034e", size = 239324, upload-time = "2025-10-06T14:51:25.822Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/5f/8de95f629fc22a7769ade8b41028e3e5a822c1f8904f618d175945a81ad3/multidict-6.7.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e011555abada53f1578d63389610ac8a5400fc70ce71156b0aa30d326f1a5064", size = 246877, upload-time = "2025-10-06T14:51:27.604Z" },
-    { url = "https://files.pythonhosted.org/packages/23/b4/38881a960458f25b89e9f4a4fdcb02ac101cfa710190db6e5528841e67de/multidict-6.7.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:28b37063541b897fd6a318007373930a75ca6d6ac7c940dbe14731ffdd8d498e", size = 225824, upload-time = "2025-10-06T14:51:29.664Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/39/6566210c83f8a261575f18e7144736059f0c460b362e96e9cf797a24b8e7/multidict-6.7.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05047ada7a2fde2631a0ed706f1fd68b169a681dfe5e4cf0f8e4cb6618bbc2cd", size = 253558, upload-time = "2025-10-06T14:51:31.684Z" },
-    { url = "https://files.pythonhosted.org/packages/00/a3/67f18315100f64c269f46e6c0319fa87ba68f0f64f2b8e7fd7c72b913a0b/multidict-6.7.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:716133f7d1d946a4e1b91b1756b23c088881e70ff180c24e864c26192ad7534a", size = 252339, upload-time = "2025-10-06T14:51:33.699Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/2a/1cb77266afee2458d82f50da41beba02159b1d6b1f7973afc9a1cad1499b/multidict-6.7.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d1bed1b467ef657f2a0ae62844a607909ef1c6889562de5e1d505f74457d0b96", size = 244895, upload-time = "2025-10-06T14:51:36.189Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/72/09fa7dd487f119b2eb9524946ddd36e2067c08510576d43ff68469563b3b/multidict-6.7.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ca43bdfa5d37bd6aee89d85e1d0831fb86e25541be7e9d376ead1b28974f8e5e", size = 241862, upload-time = "2025-10-06T14:51:41.291Z" },
-    { url = "https://files.pythonhosted.org/packages/65/92/bc1f8bd0853d8669300f732c801974dfc3702c3eeadae2f60cef54dc69d7/multidict-6.7.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:44b546bd3eb645fd26fb949e43c02a25a2e632e2ca21a35e2e132c8105dc8599", size = 232376, upload-time = "2025-10-06T14:51:43.55Z" },
-    { url = "https://files.pythonhosted.org/packages/09/86/ac39399e5cb9d0c2ac8ef6e10a768e4d3bc933ac808d49c41f9dc23337eb/multidict-6.7.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a6ef16328011d3f468e7ebc326f24c1445f001ca1dec335b2f8e66bed3006394", size = 240272, upload-time = "2025-10-06T14:51:45.265Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/b6/fed5ac6b8563ec72df6cb1ea8dac6d17f0a4a1f65045f66b6d3bf1497c02/multidict-6.7.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:5aa873cbc8e593d361ae65c68f85faadd755c3295ea2c12040ee146802f23b38", size = 248774, upload-time = "2025-10-06T14:51:46.836Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/8d/b954d8c0dc132b68f760aefd45870978deec6818897389dace00fcde32ff/multidict-6.7.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:3d7b6ccce016e29df4b7ca819659f516f0bc7a4b3efa3bb2012ba06431b044f9", size = 242731, upload-time = "2025-10-06T14:51:48.541Z" },
-    { url = "https://files.pythonhosted.org/packages/16/9d/a2dac7009125d3540c2f54e194829ea18ac53716c61b655d8ed300120b0f/multidict-6.7.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:171b73bd4ee683d307599b66793ac80981b06f069b62eea1c9e29c9241aa66b0", size = 240193, upload-time = "2025-10-06T14:51:50.355Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/22/929c141d6c0dba87d3e1d38fbdf1ba8baba86b7776469f2bc2d3227a1e67/multidict-6.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2b41f5fed0ed563624f1c17630cb9941cf2309d4df00e494b551b5f3e3d67a23", size = 76174, upload-time = "2026-01-26T02:44:18.509Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/75/bc704ae15fee974f8fccd871305e254754167dce5f9e42d88a2def741a1d/multidict-6.7.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84e61e3af5463c19b67ced91f6c634effb89ef8bfc5ca0267f954451ed4bb6a2", size = 45116, upload-time = "2026-01-26T02:44:19.745Z" },
+    { url = "https://files.pythonhosted.org/packages/79/76/55cd7186f498ed080a18440c9013011eb548f77ae1b297206d030eb1180a/multidict-6.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:935434b9853c7c112eee7ac891bc4cb86455aa631269ae35442cb316790c1445", size = 43524, upload-time = "2026-01-26T02:44:21.571Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/3c/414842ef8d5a1628d68edee29ba0e5bcf235dbfb3ccd3ea303a7fe8c72ff/multidict-6.7.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:432feb25a1cb67fe82a9680b4d65fb542e4635cb3166cd9c01560651ad60f177", size = 249368, upload-time = "2026-01-26T02:44:22.803Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/32/befed7f74c458b4a525e60519fe8d87eef72bb1e99924fa2b0f9d97a221e/multidict-6.7.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e82d14e3c948952a1a85503817e038cba5905a3352de76b9a465075d072fba23", size = 256952, upload-time = "2026-01-26T02:44:24.306Z" },
+    { url = "https://files.pythonhosted.org/packages/03/d6/c878a44ba877f366630c860fdf74bfb203c33778f12b6ac274936853c451/multidict-6.7.1-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4cfb48c6ea66c83bcaaf7e4dfa7ec1b6bbcf751b7db85a328902796dfde4c060", size = 240317, upload-time = "2026-01-26T02:44:25.772Z" },
+    { url = "https://files.pythonhosted.org/packages/68/49/57421b4d7ad2e9e60e25922b08ceb37e077b90444bde6ead629095327a6f/multidict-6.7.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1d540e51b7e8e170174555edecddbd5538105443754539193e3e1061864d444d", size = 267132, upload-time = "2026-01-26T02:44:27.648Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/fe/ec0edd52ddbcea2a2e89e174f0206444a61440b40f39704e64dc807a70bd/multidict-6.7.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:273d23f4b40f3dce4d6c8a821c741a86dec62cded82e1175ba3d99be128147ed", size = 268140, upload-time = "2026-01-26T02:44:29.588Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/73/6e1b01cbeb458807aa0831742232dbdd1fa92bfa33f52a3f176b4ff3dc11/multidict-6.7.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d624335fd4fa1c08a53f8b4be7676ebde19cd092b3895c421045ca87895b429", size = 254277, upload-time = "2026-01-26T02:44:30.902Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/b2/5fb8c124d7561a4974c342bc8c778b471ebbeb3cc17df696f034a7e9afe7/multidict-6.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:12fad252f8b267cc75b66e8fc51b3079604e8d43a75428ffe193cd9e2195dfd6", size = 252291, upload-time = "2026-01-26T02:44:32.31Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/96/51d4e4e06bcce92577fcd488e22600bd38e4fd59c20cb49434d054903bd2/multidict-6.7.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:03ede2a6ffbe8ef936b92cb4529f27f42be7f56afcdab5ab739cd5f27fb1cbf9", size = 250156, upload-time = "2026-01-26T02:44:33.734Z" },
+    { url = "https://files.pythonhosted.org/packages/db/6b/420e173eec5fba721a50e2a9f89eda89d9c98fded1124f8d5c675f7a0c0f/multidict-6.7.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:90efbcf47dbe33dcf643a1e400d67d59abeac5db07dc3f27d6bdeae497a2198c", size = 249742, upload-time = "2026-01-26T02:44:35.222Z" },
+    { url = "https://files.pythonhosted.org/packages/44/a3/ec5b5bd98f306bc2aa297b8c6f11a46714a56b1e6ef5ebda50a4f5d7c5fb/multidict-6.7.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:5c4b9bfc148f5a91be9244d6264c53035c8a0dcd2f51f1c3c6e30e30ebaa1c84", size = 262221, upload-time = "2026-01-26T02:44:36.604Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/f7/e8c0d0da0cd1e28d10e624604e1a36bcc3353aaebdfdc3a43c72bc683a12/multidict-6.7.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:401c5a650f3add2472d1d288c26deebc540f99e2fb83e9525007a74cd2116f1d", size = 258664, upload-time = "2026-01-26T02:44:38.008Z" },
+    { url = "https://files.pythonhosted.org/packages/52/da/151a44e8016dd33feed44f730bd856a66257c1ee7aed4f44b649fb7edeb3/multidict-6.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:97891f3b1b3ffbded884e2916cacf3c6fc87b66bb0dde46f7357404750559f33", size = 249490, upload-time = "2026-01-26T02:44:39.386Z" },
+    { url = "https://files.pythonhosted.org/packages/87/af/a3b86bf9630b732897f6fc3f4c4714b90aa4361983ccbdcd6c0339b21b0c/multidict-6.7.1-cp313-cp313-win32.whl", hash = "sha256:e1c5988359516095535c4301af38d8a8838534158f649c05dd1050222321bcb3", size = 41695, upload-time = "2026-01-26T02:44:41.318Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/35/e994121b0e90e46134673422dd564623f93304614f5d11886b1b3e06f503/multidict-6.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:960c83bf01a95b12b08fd54324a4eb1d5b52c88932b5cba5d6e712bb3ed12eb5", size = 45884, upload-time = "2026-01-26T02:44:42.488Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/61/42d3e5dbf661242a69c97ea363f2d7b46c567da8eadef8890022be6e2ab0/multidict-6.7.1-cp313-cp313-win_arm64.whl", hash = "sha256:563fe25c678aaba333d5399408f5ec3c383ca5b663e7f774dd179a520b8144df", size = 43122, upload-time = "2026-01-26T02:44:43.664Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/b3/e6b21c6c4f314bb956016b0b3ef2162590a529b84cb831c257519e7fde44/multidict-6.7.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:c76c4bec1538375dad9d452d246ca5368ad6e1c9039dadcf007ae59c70619ea1", size = 83175, upload-time = "2026-01-26T02:44:44.894Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/76/23ecd2abfe0957b234f6c960f4ade497f55f2c16aeb684d4ecdbf1c95791/multidict-6.7.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:57b46b24b5d5ebcc978da4ec23a819a9402b4228b8a90d9c656422b4bdd8a963", size = 48460, upload-time = "2026-01-26T02:44:46.106Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/57/a0ed92b23f3a042c36bc4227b72b97eca803f5f1801c1ab77c8a212d455e/multidict-6.7.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e954b24433c768ce78ab7929e84ccf3422e46deb45a4dc9f93438f8217fa2d34", size = 46930, upload-time = "2026-01-26T02:44:47.278Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/66/02ec7ace29162e447f6382c495dc95826bf931d3818799bbef11e8f7df1a/multidict-6.7.1-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3bd231490fa7217cc832528e1cd8752a96f0125ddd2b5749390f7c3ec8721b65", size = 242582, upload-time = "2026-01-26T02:44:48.604Z" },
+    { url = "https://files.pythonhosted.org/packages/58/18/64f5a795e7677670e872673aca234162514696274597b3708b2c0d276cce/multidict-6.7.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:253282d70d67885a15c8a7716f3a73edf2d635793ceda8173b9ecc21f2fb8292", size = 250031, upload-time = "2026-01-26T02:44:50.544Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/ed/e192291dbbe51a8290c5686f482084d31bcd9d09af24f63358c3d42fd284/multidict-6.7.1-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0b4c48648d7649c9335cf1927a8b87fa692de3dcb15faa676c6a6f1f1aabda43", size = 228596, upload-time = "2026-01-26T02:44:51.951Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/7e/3562a15a60cf747397e7f2180b0a11dc0c38d9175a650e75fa1b4d325e15/multidict-6.7.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:98bc624954ec4d2c7cb074b8eefc2b5d0ce7d482e410df446414355d158fe4ca", size = 257492, upload-time = "2026-01-26T02:44:53.902Z" },
+    { url = "https://files.pythonhosted.org/packages/24/02/7d0f9eae92b5249bb50ac1595b295f10e263dd0078ebb55115c31e0eaccd/multidict-6.7.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1b99af4d9eec0b49927b4402bcbb58dea89d3e0db8806a4086117019939ad3dd", size = 255899, upload-time = "2026-01-26T02:44:55.316Z" },
+    { url = "https://files.pythonhosted.org/packages/00/e3/9b60ed9e23e64c73a5cde95269ef1330678e9c6e34dd4eb6b431b85b5a10/multidict-6.7.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6aac4f16b472d5b7dc6f66a0d49dd57b0e0902090be16594dc9ebfd3d17c47e7", size = 247970, upload-time = "2026-01-26T02:44:56.783Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/06/538e58a63ed5cfb0bd4517e346b91da32fde409d839720f664e9a4ae4f9d/multidict-6.7.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:21f830fe223215dffd51f538e78c172ed7c7f60c9b96a2bf05c4848ad49921c3", size = 245060, upload-time = "2026-01-26T02:44:58.195Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/2f/d743a3045a97c895d401e9bd29aaa09b94f5cbdf1bd561609e5a6c431c70/multidict-6.7.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f5dd81c45b05518b9aa4da4aa74e1c93d715efa234fd3e8a179df611cc85e5f4", size = 235888, upload-time = "2026-01-26T02:44:59.57Z" },
+    { url = "https://files.pythonhosted.org/packages/38/83/5a325cac191ab28b63c52f14f1131f3b0a55ba3b9aa65a6d0bf2a9b921a0/multidict-6.7.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:eb304767bca2bb92fb9c5bd33cedc95baee5bb5f6c88e63706533a1c06ad08c8", size = 243554, upload-time = "2026-01-26T02:45:01.054Z" },
+    { url = "https://files.pythonhosted.org/packages/20/1f/9d2327086bd15da2725ef6aae624208e2ef828ed99892b17f60c344e57ed/multidict-6.7.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:c9035dde0f916702850ef66460bc4239d89d08df4d02023a5926e7446724212c", size = 252341, upload-time = "2026-01-26T02:45:02.484Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/2c/2a1aa0280cf579d0f6eed8ee5211c4f1730bd7e06c636ba2ee6aafda302e/multidict-6.7.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:af959b9beeb66c822380f222f0e0a1889331597e81f1ded7f374f3ecb0fd6c52", size = 246391, upload-time = "2026-01-26T02:45:03.862Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/03/7ca022ffc36c5a3f6e03b179a5ceb829be9da5783e6fe395f347c0794680/multidict-6.7.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:41f2952231456154ee479651491e94118229844dd7226541788be783be2b5108", size = 243422, upload-time = "2026-01-26T02:45:05.296Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/1d/b31650eab6c5778aceed46ba735bd97f7c7d2f54b319fa916c0f96e7805b/multidict-6.7.1-cp313-cp313t-win32.whl", hash = "sha256:df9f19c28adcb40b6aae30bbaa1478c389efd50c28d541d76760199fc1037c32", size = 47770, upload-time = "2026-01-26T02:45:06.754Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/5b/2d2d1d522e51285bd61b1e20df8f47ae1a9d80839db0b24ea783b3832832/multidict-6.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:d54ecf9f301853f2c5e802da559604b3e95bb7a3b01a9c295c6ee591b9882de8", size = 53109, upload-time = "2026-01-26T02:45:08.044Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/a3/cc409ba012c83ca024a308516703cf339bdc4b696195644a7215a5164a24/multidict-6.7.1-cp313-cp313t-win_arm64.whl", hash = "sha256:5a37ca18e360377cfda1d62f5f382ff41f2b8c4ccb329ed974cc2e1643440118", size = 45573, upload-time = "2026-01-26T02:45:09.349Z" },
+    { url = "https://files.pythonhosted.org/packages/81/08/7036c080d7117f28a4af526d794aab6a84463126db031b007717c1a6676e/multidict-6.7.1-py3-none-any.whl", hash = "sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56", size = 12319, upload-time = "2026-01-26T02:46:44.004Z" },
 ]
 
 [[package]]
 name = "multiprocess"
-version = "0.70.18"
+version = "0.70.19"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/f2/e783ac7f2aeeed14e9e12801f22529cc7e6b7ab80928d6dcce4e9f00922d/multiprocess-0.70.19.tar.gz", hash = "sha256:952021e0e6c55a4a9fe4cd787895b86e239a40e76802a789d6305398d3975897", size = 2079989, upload-time = "2026-01-19T06:47:39.744Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ba/d8/0cba6cf51a1a31f20471fbc823a716170c73012ddc4fb85d706630ed6e8f/multiprocess-0.70.18-py310-none-any.whl", hash = "sha256:60c194974c31784019c1f459d984e8f33ee48f10fcf42c309ba97b30d9bd53ea", size = 134948, upload-time = "2025-04-17T03:11:20.223Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/88/9039f2fed1012ef584751d4ceff9ab4a51e5ae264898f0b7cbf44340a859/multiprocess-0.70.18-py311-none-any.whl", hash = "sha256:5aa6eef98e691281b3ad923be2832bf1c55dd2c859acd73e5ec53a66aae06a1d", size = 144462, upload-time = "2025-04-17T03:11:21.657Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/b6/5f922792be93b82ec6b5f270bbb1ef031fd0622847070bbcf9da816502cc/multiprocess-0.70.18-py312-none-any.whl", hash = "sha256:9b78f8e5024b573730bfb654783a13800c2c0f2dfc0c25e70b40d184d64adaa2", size = 150287, upload-time = "2025-04-17T03:11:22.69Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/25/7d7e78e750bc1aecfaf0efbf826c69a791d2eeaf29cf20cba93ff4cced78/multiprocess-0.70.18-py313-none-any.whl", hash = "sha256:871743755f43ef57d7910a38433cfe41319e72be1bbd90b79c7a5ac523eb9334", size = 151917, upload-time = "2025-04-17T03:11:24.044Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/c3/ca84c19bd14cdfc21c388fdcebf08b86a7a470ebc9f5c3c084fc2dbc50f7/multiprocess-0.70.18-py38-none-any.whl", hash = "sha256:dbf705e52a154fe5e90fb17b38f02556169557c2dd8bb084f2e06c2784d8279b", size = 132636, upload-time = "2025-04-17T03:11:24.936Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/45/8004d1e6b9185c1a444d6b55ac5682acf9d98035e54386d967366035a03a/multiprocess-0.70.19-py310-none-any.whl", hash = "sha256:97404393419dcb2a8385910864eedf47a3cadf82c66345b44f036420eb0b5d87", size = 134948, upload-time = "2026-01-19T06:47:32.325Z" },
+    { url = "https://files.pythonhosted.org/packages/86/c2/dec9722dc3474c164a0b6bcd9a7ed7da542c98af8cabce05374abab35edd/multiprocess-0.70.19-py311-none-any.whl", hash = "sha256:928851ae7973aea4ce0eaf330bbdafb2e01398a91518d5c8818802845564f45c", size = 144457, upload-time = "2026-01-19T06:47:33.711Z" },
+    { url = "https://files.pythonhosted.org/packages/71/70/38998b950a97ea279e6bd657575d22d1a2047256caf707d9a10fbce4f065/multiprocess-0.70.19-py312-none-any.whl", hash = "sha256:3a56c0e85dd5025161bac5ce138dcac1e49174c7d8e74596537e729fd5c53c28", size = 150281, upload-time = "2026-01-19T06:47:35.037Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/74/d2c27e03cb84251dfe7249b8e82923643c6d48fa4883b9476b025e7dc7eb/multiprocess-0.70.19-py313-none-any.whl", hash = "sha256:8d5eb4ec5017ba2fab4e34a747c6d2c2b6fecfe9e7236e77988db91580ada952", size = 156414, upload-time = "2026-01-19T06:47:35.915Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/82/69e539c4c2027f1e1697e09aaa2449243085a0edf81ae2c6341e84d769b6/multiprocess-0.70.19-py39-none-any.whl", hash = "sha256:0d4b4397ed669d371c81dcd1ef33fd384a44d6c3de1bd0ca7ac06d837720d3c5", size = 133477, upload-time = "2026-01-19T06:47:38.619Z" },
+]
+
+[[package]]
+name = "nanobind"
+version = "2.12.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c6/5c/3b69bc3933ad3c3668ba029ad410ba8ecfdc8ee7262ff1009f3304f3c562/nanobind-2.12.0.tar.gz", hash = "sha256:0ae77c1a88f27153fa57045ee00f7b0a7b06b1cd3df942e95a34b38c5d0a5bee", size = 1002704, upload-time = "2026-02-25T09:41:54.691Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/bf/1a54e3573736f3ad15fc599c5dde007937234652a1a7efd62573b4ce3a7e/nanobind-2.12.0-py3-none-any.whl", hash = "sha256:a10d3d88e691dcdf22696f9acd893fda3c5a05635763aea238829d274fcad480", size = 249512, upload-time = "2026-02-25T09:41:52.908Z" },
 ]
 
 [[package]]
@@ -1704,245 +1701,184 @@ wheels = [
 
 [[package]]
 name = "nh3"
-version = "0.3.2"
+version = "0.3.5"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ca/a5/34c26015d3a434409f4d2a1cd8821a06c05238703f49283ffeb937bef093/nh3-0.3.2.tar.gz", hash = "sha256:f394759a06df8b685a4ebfb1874fb67a9cbfd58c64fc5ed587a663c0e63ec376", size = 19288, upload-time = "2025-10-30T11:17:45.948Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/9c/5f/1d19bdc7d27238e37f3672cdc02cb77c56a4a86d140cd4f4f23c90df6e16/nh3-0.3.5.tar.gz", hash = "sha256:45855e14ff056064fec77133bfcf7cd691838168e5e17bbef075394954dc9dc8", size = 20743, upload-time = "2026-04-25T10:44:16.066Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5b/01/a1eda067c0ba823e5e2bb033864ae4854549e49fb6f3407d2da949106bfb/nh3-0.3.2-cp314-cp314t-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:d18957a90806d943d141cc5e4a0fefa1d77cf0d7a156878bf9a66eed52c9cc7d", size = 1419839, upload-time = "2025-10-30T11:17:09.956Z" },
-    { url = "https://files.pythonhosted.org/packages/30/57/07826ff65d59e7e9cc789ef1dc405f660cabd7458a1864ab58aefa17411b/nh3-0.3.2-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45c953e57028c31d473d6b648552d9cab1efe20a42ad139d78e11d8f42a36130", size = 791183, upload-time = "2025-10-30T11:17:11.99Z" },
-    { url = "https://files.pythonhosted.org/packages/af/2f/e8a86f861ad83f3bb5455f596d5c802e34fcdb8c53a489083a70fd301333/nh3-0.3.2-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2c9850041b77a9147d6bbd6dbbf13eeec7009eb60b44e83f07fcb2910075bf9b", size = 829127, upload-time = "2025-10-30T11:17:13.192Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/97/77aef4daf0479754e8e90c7f8f48f3b7b8725a3b8c0df45f2258017a6895/nh3-0.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:403c11563e50b915d0efdb622866d1d9e4506bce590ef7da57789bf71dd148b5", size = 997131, upload-time = "2025-10-30T11:17:14.677Z" },
-    { url = "https://files.pythonhosted.org/packages/41/ee/fd8140e4df9d52143e89951dd0d797f5546004c6043285289fbbe3112293/nh3-0.3.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:0dca4365db62b2d71ff1620ee4f800c4729849906c5dd504ee1a7b2389558e31", size = 1068783, upload-time = "2025-10-30T11:17:15.861Z" },
-    { url = "https://files.pythonhosted.org/packages/87/64/bdd9631779e2d588b08391f7555828f352e7f6427889daf2fa424bfc90c9/nh3-0.3.2-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0fe7ee035dd7b2290715baf29cb27167dddd2ff70ea7d052c958dbd80d323c99", size = 994732, upload-time = "2025-10-30T11:17:17.155Z" },
-    { url = "https://files.pythonhosted.org/packages/79/66/90190033654f1f28ca98e3d76b8be1194505583f9426b0dcde782a3970a2/nh3-0.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a40202fd58e49129764f025bbaae77028e420f1d5b3c8e6f6fd3a6490d513868", size = 975997, upload-time = "2025-10-30T11:17:18.77Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/3e/f5a5cc2885c24be13e9b937441bd16a012ac34a657fe05e58927e8af8b7a/nh3-0.3.2-cp38-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7064ccf5ace75825bd7bf57859daaaf16ed28660c1c6b306b649a9eda4b54b1e", size = 1431980, upload-time = "2025-10-30T11:17:25.457Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/f7/529a99324d7ef055de88b690858f4189379708abae92ace799365a797b7f/nh3-0.3.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8745454cdd28bbbc90861b80a0111a195b0e3961b9fa2e672be89eb199fa5d8", size = 820805, upload-time = "2025-10-30T11:17:26.98Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/62/19b7c50ccd1fa7d0764822d2cea8f2a320f2fd77474c7a1805cb22cf69b0/nh3-0.3.2-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72d67c25a84579f4a432c065e8b4274e53b7cf1df8f792cf846abfe2c3090866", size = 803527, upload-time = "2025-10-30T11:17:28.284Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/ca/f022273bab5440abff6302731a49410c5ef66b1a9502ba3fbb2df998d9ff/nh3-0.3.2-cp38-abi3-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:13398e676a14d6233f372c75f52d5ae74f98210172991f7a3142a736bd92b131", size = 1051674, upload-time = "2025-10-30T11:17:29.909Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/f7/5728e3b32a11daf5bd21cf71d91c463f74305938bc3eb9e0ac1ce141646e/nh3-0.3.2-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:03d617e5c8aa7331bd2659c654e021caf9bba704b109e7b2b28b039a00949fe5", size = 1004737, upload-time = "2025-10-30T11:17:31.205Z" },
-    { url = "https://files.pythonhosted.org/packages/53/7f/f17e0dba0a99cee29e6cee6d4d52340ef9cb1f8a06946d3a01eb7ec2fb01/nh3-0.3.2-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2f55c4d2d5a207e74eefe4d828067bbb01300e06e2a7436142f915c5928de07", size = 911745, upload-time = "2025-10-30T11:17:32.945Z" },
-    { url = "https://files.pythonhosted.org/packages/42/0f/c76bf3dba22c73c38e9b1113b017cf163f7696f50e003404ec5ecdb1e8a6/nh3-0.3.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb18403f02b655a1bbe4e3a4696c2ae1d6ae8f5991f7cacb684b1ae27e6c9f7", size = 797184, upload-time = "2025-10-30T11:17:34.226Z" },
-    { url = "https://files.pythonhosted.org/packages/08/a1/73d8250f888fb0ddf1b119b139c382f8903d8bb0c5bd1f64afc7e38dad1d/nh3-0.3.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d66f41672eb4060cf87c037f760bdbc6847852ca9ef8e9c5a5da18f090abf87", size = 838556, upload-time = "2025-10-30T11:17:35.875Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/09/deb57f1fb656a7a5192497f4a287b0ade5a2ff6b5d5de4736d13ef6d2c1f/nh3-0.3.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f97f8b25cb2681d25e2338148159447e4d689aafdccfcf19e61ff7db3905768a", size = 1006695, upload-time = "2025-10-30T11:17:37.071Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/61/8f4d41c4ccdac30e4b1a4fa7be4b0f9914d8314a5058472f84c8e101a418/nh3-0.3.2-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:2ab70e8c6c7d2ce953d2a58102eefa90c2d0a5ed7aa40c7e29a487bc5e613131", size = 1075471, upload-time = "2025-10-30T11:17:38.225Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/c6/966aec0cb4705e69f6c3580422c239205d5d4d0e50fac380b21e87b6cf1b/nh3-0.3.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:1710f3901cd6440ca92494ba2eb6dc260f829fa8d9196b659fa10de825610ce0", size = 1002439, upload-time = "2025-10-30T11:17:39.553Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/c8/97a2d5f7a314cce2c5c49f30c6f161b7f3617960ade4bfc2fd1ee092cb20/nh3-0.3.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:91e9b001101fb4500a2aafe3e7c92928d85242d38bf5ac0aba0b7480da0a4cd6", size = 987439, upload-time = "2025-10-30T11:17:40.81Z" },
+    { url = "https://files.pythonhosted.org/packages/85/30/d162e99746a2fb1d98bb0ef23af3e201b156cf09f7de867c7390c8fe1c06/nh3-0.3.5-cp38-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:3bb854485c9b33e5bb143ff3e49e577073bc6bc320f0ff8fc316dd89c0d3c101", size = 1442393, upload-time = "2026-04-25T10:43:53.556Z" },
+    { url = "https://files.pythonhosted.org/packages/25/8c/072120d506978ab053e1732d0efa7c86cb478fee0ee098fda0ac0d31cb34/nh3-0.3.5-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50d401ab2d8e86d59e2126e3ab2a2f45840c405842b626d9a51624b3a33b6878", size = 837722, upload-time = "2026-04-25T10:43:55.073Z" },
+    { url = "https://files.pythonhosted.org/packages/52/86/d4e06e28c5ad1c4b065f89737d02631bd49f1660b6ebcf17a87ffcd201da/nh3-0.3.5-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:acfd354e61accbe4c74f8017c6e397a776916dfe47c48643cf7fd84ade826f93", size = 822872, upload-time = "2026-04-25T10:43:56.581Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/62/50659255213f241ec5797ae7427464c969397373e83b3659372b341ae869/nh3-0.3.5-cp38-abi3-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:52d877980d7ca01dc3baf3936bf844828bc6f332962227a684ed79c18cce14c3", size = 1100031, upload-time = "2026-04-25T10:43:58.098Z" },
+    { url = "https://files.pythonhosted.org/packages/00/7a/a12ae77593b2fcf3be25df7bc1c01967d0de448bdb4b6c7ec80fe4f5a74f/nh3-0.3.5-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:207c01801d3e9bb8ec08f08689346bdd30ce15b8bf60013a925d08b5388962a4", size = 1057669, upload-time = "2026-04-25T10:43:59.328Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/71/5647dc04c0233192a3956fc91708822b21403a06508cacf78083c68e7bf0/nh3-0.3.5-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea232933394d1d58bf7c4bb348dc4660eae6604e1ae81cd2ba6d9ed80d390f3b", size = 914795, upload-time = "2026-04-25T10:44:00.52Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/0e/bf298920729f216adcb002acf7ea01b90842603d2e4e2ce9b900d9ee8fab/nh3-0.3.5-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe3a787dc76b50de6bee54ef242f26c41dfe47654428e3e94f0fae5bb6dd2cc1", size = 806976, upload-time = "2026-04-25T10:44:01.743Z" },
+    { url = "https://files.pythonhosted.org/packages/85/01/26761e1dc2b848e65a62c19e5d39ad446283287cd4afddc89f364ab86bc9/nh3-0.3.5-cp38-abi3-manylinux_2_31_riscv64.whl", hash = "sha256:488928988caad25ba14b1eb5bc74e25e21f3b5e40341d956f3ce4a8bc19460dc", size = 834904, upload-time = "2026-04-25T10:44:03.454Z" },
+    { url = "https://files.pythonhosted.org/packages/33/53/0766113e679540ac1edc1b82b1295aecd321eeb75d6fead70109a838b6ee/nh3-0.3.5-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2c069570b06aa848457713ad7af4a9905691291548c4466a9ad78ee95808382b", size = 857159, upload-time = "2026-04-25T10:44:05.003Z" },
+    { url = "https://files.pythonhosted.org/packages/58/36/734d353dfaf292fed574b8b3092f0ef79dc6404f3879f7faaa61a4701fad/nh3-0.3.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:eeedc90ed8c42c327e8e10e621ccfa314fc6cce35d5929f4297ff1cdb89667c4", size = 1018600, upload-time = "2026-04-25T10:44:06.18Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/aa/d9c59c1b49669fcb7bababa55df82385f029ad5c2651f583c3a1141cfdd1/nh3-0.3.5-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:de8e8621853b6470fe928c684ee0d3f39ea8086cebafe4c416486488dea7b68d", size = 1103530, upload-time = "2026-04-25T10:44:07.68Z" },
+    { url = "https://files.pythonhosted.org/packages/90/b0/cdd210bfb8d9d43fb02fc3c868336b9955934d8e15e66eb1d15a147b8af0/nh3-0.3.5-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:6ea58cc44d274c643b83547ca9654a0b1a817609b160601356f76a2b744c49ad", size = 1061754, upload-time = "2026-04-25T10:44:09.362Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/cb/7a39e72e668c8445bdd95e494b3e21cfdddc68329be8ea3522c8befb46c4/nh3-0.3.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e49c9b564e6bcb03ecd2f057213df9a0de15a95812ac9db9600b590db23d3ae9", size = 1040938, upload-time = "2026-04-25T10:44:10.775Z" },
+    { url = "https://files.pythonhosted.org/packages/af/4c/fc2f9ed208a3801a319f59b5fea03cdc20cf3bd8af14be930d3a8de01224/nh3-0.3.5-cp38-abi3-win32.whl", hash = "sha256:559e4c73b689e9a7aa97ac9760b1bc488038d7c1a575aa4ab5a0e19ee9630c0f", size = 611445, upload-time = "2026-04-25T10:44:12.317Z" },
+    { url = "https://files.pythonhosted.org/packages/db/1a/e4c9b5e2ae13e6092c9ec16d8ca30646cb01fcdea245f36c5b08fd21fbd5/nh3-0.3.5-cp38-abi3-win_amd64.whl", hash = "sha256:45e6a65dc88a300a2e3502cb9c8e6d1d6b831d6fba7470643333609c6aab1f30", size = 626502, upload-time = "2026-04-25T10:44:13.682Z" },
+    { url = "https://files.pythonhosted.org/packages/80/7c/19cd0671d1ba2762fb388fc149697d20d0568ccfeef833b11280a619e526/nh3-0.3.5-cp38-abi3-win_arm64.whl", hash = "sha256:8f85285700a18e9f3fc5bff41fe573fa84f81542ef13b48a89f9fecca0474d3b", size = 611069, upload-time = "2026-04-25T10:44:14.934Z" },
 ]
 
 [[package]]
 name = "nltk"
-version = "3.9.3"
+version = "3.9.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "joblib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "joblib", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/e1/8f/915e1c12df07c70ed779d18ab83d065718a926e70d3ea33eb0cd66ffb7c0/nltk-3.9.3.tar.gz", hash = "sha256:cb5945d6424a98d694c2b9a0264519fab4363711065a46aa0ae7a2195b92e71f", size = 2923673, upload-time = "2026-02-24T12:05:53.833Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/74/a1/b3b4adf15585a5bc4c357adde150c01ebeeb642173ded4d871e89468767c/nltk-3.9.4.tar.gz", hash = "sha256:ed03bc098a40481310320808b2db712d95d13ca65b27372f8a403949c8b523d0", size = 2946864, upload-time = "2026-03-24T06:13:40.641Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c2/7e/9af5a710a1236e4772de8dfcc6af942a561327bb9f42b5b4a24d0cf100fd/nltk-3.9.3-py3-none-any.whl", hash = "sha256:60b3db6e9995b3dd976b1f0fa7dec22069b2677e759c28eb69b62ddd44870522", size = 1525385, upload-time = "2026-02-24T12:05:46.54Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/91/04e965f8e717ba0ab4bdca5c112deeab11c9e750d94c4d4602f050295d39/nltk-3.9.4-py3-none-any.whl", hash = "sha256:f2fa301c3a12718ce4a0e9305c5675299da5ad9e26068218b69d692fda84828f", size = 1552087, upload-time = "2026-03-24T06:13:38.47Z" },
 ]
 
 [[package]]
 name = "nodejs-wheel-binaries"
-version = "25.2.1rc0"
+version = "26.1.0rc0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/af/d7/ea97424b7b066e6b481c68585ce3cf8f7164813975797b4f917ad4396b13/nodejs_wheel_binaries-25.2.1rc0.tar.gz", hash = "sha256:7d66bb66b7b964f5efa9b0e09f5bd6bbfe34643235a2e0d0e8193b26560ce5f4", size = 7899, upload-time = "2025-11-24T22:56:19Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ce/4e/201569198b18324abca399156f74ecdd0305be70f24174a07e45b594d4f3/nodejs_wheel_binaries-26.1.0rc0.tar.gz", hash = "sha256:cc2936dbbe50dbc671ba38866a35343b8bfa9461e34d137d34c4ee576af64d59", size = 7899, upload-time = "2026-05-12T08:15:41.677Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cb/80/5172f82028f11b8889135d340aa5e1aee29c0f0b6d1d19ca72ecbe2e974f/nodejs_wheel_binaries-25.2.1rc0-py2.py3-none-macosx_13_0_arm64.whl", hash = "sha256:30ff879dcc45f947a711212011150ee67062141ff3d56cbba919910f3a77d7db", size = 56048550, upload-time = "2025-11-24T22:55:42.476Z" },
-    { url = "https://files.pythonhosted.org/packages/06/c0/e9d0dcde5488e3c777179ec1f48d595dab3b8e2082c5bd7d4d8e056b582a/nodejs_wheel_binaries-25.2.1rc0-py2.py3-none-macosx_13_0_x86_64.whl", hash = "sha256:ac27a6429d6c40a5e22ab2433b2e1460130263859eda1d2dbe2cbb3743a19837", size = 56211915, upload-time = "2025-11-24T22:55:47.137Z" },
-    { url = "https://files.pythonhosted.org/packages/48/81/060766ffa2fe3150120792a47c1e5df9207edf8e9e992a8ef56e507b329d/nodejs_wheel_binaries-25.2.1rc0-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:a735144d06d5b39516267617b1697ec7cf204a336dbb495849d29f68b1531c41", size = 60689580, upload-time = "2025-11-24T22:55:51.774Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/0c/31f3d8c327df06df26393fdbe4082398e768429132f2690c57290da7d7ca/nodejs_wheel_binaries-25.2.1rc0-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:ce9410db0cd11b9ce5e56774f58b9d4ca6f06a6a6237801a1d70a6a2b4d57ae9", size = 61289023, upload-time = "2025-11-24T22:55:56.446Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/e6/7b1680085d0fc863ab3d0c8fe43c71ea2999140b083130b506c69d4e5351/nodejs_wheel_binaries-25.2.1rc0-py2.py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:30d9a0bb559006689c10561dbcc7748cd7e73d51d2d2318cfffc46ba08c2c539", size = 62740952, upload-time = "2025-11-24T22:56:00.693Z" },
-    { url = "https://files.pythonhosted.org/packages/11/3a/865f45bca0f6daf6a6150e20ae4e1ef1757574967b5c1a55705eb1a3aa51/nodejs_wheel_binaries-25.2.1rc0-py2.py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:8c30fe61adfcf89002002438fe810ebd660a856417540578aeb6eb4b9ef88c74", size = 63431735, upload-time = "2025-11-24T22:56:07.462Z" },
+    { url = "https://files.pythonhosted.org/packages/49/e7/e2f4b6cff29c3e6fa18e7c301bc6faa610909c9a6661279056845f109180/nodejs_wheel_binaries-26.1.0rc0-py2.py3-none-macosx_13_0_arm64.whl", hash = "sha256:d56a4d21f48dc84defaee7b6c3797397a762f568e2b1d17eca42a6251b8b2982", size = 59793824, upload-time = "2026-05-12T08:15:05.588Z" },
+    { url = "https://files.pythonhosted.org/packages/04/76/42315aab9c07ae1dded3eeea1432510fe21d57bc99b36c411f8fcb00c0f2/nodejs_wheel_binaries-26.1.0rc0-py2.py3-none-macosx_13_0_x86_64.whl", hash = "sha256:bb988706e77512b14ad81fb539045a5dfe33975623352d8a6d0978dabfd7ed5b", size = 59999985, upload-time = "2026-05-12T08:15:09.957Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/f8/b03520f5e70265f14d98fd95521d83d0b02bc75b8fc64dd7fe0eda068171/nodejs_wheel_binaries-26.1.0rc0-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:07a5255eb7ed9abec062b37cf206f6981abddd696cd811106b9072a29f3d80eb", size = 63904068, upload-time = "2026-05-12T08:15:13.71Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/1e/b470956692fa9561aa777b3a058521636163f18ff91bff450351368d9037/nodejs_wheel_binaries-26.1.0rc0-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:8e448706b3e8ab154ceb11e20f3674dbe625f23be8d7239342846fdae3c5099e", size = 64758719, upload-time = "2026-05-12T08:15:22.681Z" },
+    { url = "https://files.pythonhosted.org/packages/94/ab/1adead1acb9bfd542ffc9f7210609ab4d4f3a57a5a0c7536e44cc8f142d2/nodejs_wheel_binaries-26.1.0rc0-py2.py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:8c384155306a2c1be7c4241ba171b1d5fdc04323ced8b04daf6cb6180cca870b", size = 66087284, upload-time = "2026-05-12T08:15:26.89Z" },
+    { url = "https://files.pythonhosted.org/packages/43/c9/150f3bc37f8292ef93045fd2b290bcacd1362c9cb92e39d0a8cd73a5394f/nodejs_wheel_binaries-26.1.0rc0-py2.py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6864bf8631204253fb307a9de24056865d008b05fec182a5ec6b2d3479678cf1", size = 67058061, upload-time = "2026-05-12T08:15:31.004Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/71/8ab3930c392e8c9ca9e789ba1f5b51f4deef803a283d81e7374baa88095f/nodejs_wheel_binaries-26.1.0rc0-py2.py3-none-win_amd64.whl", hash = "sha256:073a62f84cf2191decb20837c106ec4f5fa761c7aab878a814ab06687d958c25", size = 45309189, upload-time = "2026-05-12T08:15:34.436Z" },
+    { url = "https://files.pythonhosted.org/packages/94/46/0d042977cbbb933245d9fce9a43eabaee6d215fe05e10fc3f04c9c415cb1/nodejs_wheel_binaries-26.1.0rc0-py2.py3-none-win_arm64.whl", hash = "sha256:b8b369d305bea0fb36106c61e191a031d319df6f562278e52cfffef60619eaa7", size = 42671551, upload-time = "2026-05-12T08:15:37.694Z" },
 ]
 
 [[package]]
 name = "numpy"
-version = "2.4.1"
+version = "2.4.6"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/24/62/ae72ff66c0f1fd959925b4c11f8c2dea61f47f6acaea75a08512cdfe3fed/numpy-2.4.1.tar.gz", hash = "sha256:a1ceafc5042451a858231588a104093474c6a5c57dcc724841f5c888d237d690", size = 20721320, upload-time = "2026-01-10T06:44:59.619Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d0/ad/fed0499ce6a338d2a03ebae59cd15093910c8875328855781952abf6c2fe/numpy-2.4.6.tar.gz", hash = "sha256:f3a3570c4a2a16746ac2c31a7c7c7b0c186b95ce902e33db6f28094ed7387dda", size = 20735807, upload-time = "2026-05-18T23:37:14.07Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/04/68/732d4b7811c00775f3bd522a21e8dd5a23f77eb11acdeb663e4a4ebf0ef4/numpy-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d797454e37570cfd61143b73b8debd623c3c0952959adb817dd310a483d58a1b", size = 16652495, upload-time = "2026-01-10T06:43:06.283Z" },
-    { url = "https://files.pythonhosted.org/packages/20/ca/857722353421a27f1465652b2c66813eeeccea9d76d5f7b74b99f298e60e/numpy-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82c55962006156aeef1629b953fd359064aa47e4d82cfc8e67f0918f7da3344f", size = 12368657, upload-time = "2026-01-10T06:43:09.094Z" },
-    { url = "https://files.pythonhosted.org/packages/81/0d/2377c917513449cc6240031a79d30eb9a163d32a91e79e0da47c43f2c0c8/numpy-2.4.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:71abbea030f2cfc3092a0ff9f8c8fdefdc5e0bf7d9d9c99663538bb0ecdac0b9", size = 5197256, upload-time = "2026-01-10T06:43:13.634Z" },
-    { url = "https://files.pythonhosted.org/packages/17/39/569452228de3f5de9064ac75137082c6214be1f5c532016549a7923ab4b5/numpy-2.4.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b55aa56165b17aaf15520beb9cbd33c9039810e0d9643dd4379e44294c7303e", size = 6545212, upload-time = "2026-01-10T06:43:15.661Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/a4/77333f4d1e4dac4395385482557aeecf4826e6ff517e32ca48e1dafbe42a/numpy-2.4.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0faba4a331195bfa96f93dd9dfaa10b2c7aa8cda3a02b7fd635e588fe821bf5", size = 14402871, upload-time = "2026-01-10T06:43:17.324Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/87/d341e519956273b39d8d47969dd1eaa1af740615394fe67d06f1efa68773/numpy-2.4.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3e3087f53e2b4428766b54932644d148613c5a595150533ae7f00dab2f319a8", size = 16359305, upload-time = "2026-01-10T06:43:19.376Z" },
-    { url = "https://files.pythonhosted.org/packages/32/91/789132c6666288eaa20ae8066bb99eba1939362e8f1a534949a215246e97/numpy-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:49e792ec351315e16da54b543db06ca8a86985ab682602d90c60ef4ff4db2a9c", size = 16181909, upload-time = "2026-01-10T06:43:21.808Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/b8/090b8bd27b82a844bb22ff8fdf7935cb1980b48d6e439ae116f53cdc2143/numpy-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:79e9e06c4c2379db47f3f6fc7a8652e7498251789bf8ff5bd43bf478ef314ca2", size = 18284380, upload-time = "2026-01-10T06:43:23.957Z" },
-    { url = "https://files.pythonhosted.org/packages/da/a1/354583ac5c4caa566de6ddfbc42744409b515039e085fab6e0ff942e0df5/numpy-2.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f93bc6892fe7b0663e5ffa83b61aab510aacffd58c16e012bb9352d489d90cb7", size = 12496156, upload-time = "2026-01-10T06:43:34.237Z" },
-    { url = "https://files.pythonhosted.org/packages/51/b0/42807c6e8cce58c00127b1dc24d365305189991f2a7917aa694a109c8d7d/numpy-2.4.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:178de8f87948163d98a4c9ab5bee4ce6519ca918926ec8df195af582de28544d", size = 5324663, upload-time = "2026-01-10T06:43:36.211Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/55/7a621694010d92375ed82f312b2f28017694ed784775269115323e37f5e2/numpy-2.4.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:98b35775e03ab7f868908b524fc0a84d38932d8daf7b7e1c3c3a1b6c7a2c9f15", size = 6645224, upload-time = "2026-01-10T06:43:37.884Z" },
-    { url = "https://files.pythonhosted.org/packages/50/96/9fa8635ed9d7c847d87e30c834f7109fac5e88549d79ef3324ab5c20919f/numpy-2.4.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:941c2a93313d030f219f3a71fd3d91a728b82979a5e8034eb2e60d394a2b83f9", size = 14462352, upload-time = "2026-01-10T06:43:39.479Z" },
-    { url = "https://files.pythonhosted.org/packages/03/d1/8cf62d8bb2062da4fb82dd5d49e47c923f9c0738032f054e0a75342faba7/numpy-2.4.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:529050522e983e00a6c1c6b67411083630de8b57f65e853d7b03d9281b8694d2", size = 16407279, upload-time = "2026-01-10T06:43:41.93Z" },
-    { url = "https://files.pythonhosted.org/packages/86/1c/95c86e17c6b0b31ce6ef219da00f71113b220bcb14938c8d9a05cee0ff53/numpy-2.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2302dc0224c1cbc49bb94f7064f3f923a971bfae45c33870dcbff63a2a550505", size = 16248316, upload-time = "2026-01-10T06:43:44.121Z" },
-    { url = "https://files.pythonhosted.org/packages/30/b4/e7f5ff8697274c9d0fa82398b6a372a27e5cef069b37df6355ccb1f1db1a/numpy-2.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9171a42fcad32dcf3fa86f0a4faa5e9f8facefdb276f54b8b390d90447cff4e2", size = 18329884, upload-time = "2026-01-10T06:43:46.613Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/a7/ef08d25698e0e4b4efbad8d55251d20fe2a15f6d9aa7c9b30cd03c165e6f/numpy-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3869ea1ee1a1edc16c29bbe3a2f2a4e515cc3a44d43903ad41e0cacdbaf733dc", size = 16652046, upload-time = "2026-01-10T06:43:54.797Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/39/e378b3e3ca13477e5ac70293ec027c438d1927f18637e396fe90b1addd72/numpy-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e867df947d427cdd7a60e3e271729090b0f0df80f5f10ab7dd436f40811699c3", size = 12378858, upload-time = "2026-01-10T06:43:57.099Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/74/7ec6154f0006910ed1fdbb7591cf4432307033102b8a22041599935f8969/numpy-2.4.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:e3bd2cb07841166420d2fa7146c96ce00cb3410664cbc1a6be028e456c4ee220", size = 5207417, upload-time = "2026-01-10T06:43:59.037Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/b7/053ac11820d84e42f8feea5cb81cc4fcd1091499b45b1ed8c7415b1bf831/numpy-2.4.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:f0a90aba7d521e6954670550e561a4cb925713bd944445dbe9e729b71f6cabee", size = 6542643, upload-time = "2026-01-10T06:44:01.852Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d558123217a83b2d1ba316b986e9248a1ed1971ad495963d555ccd75dcb1556", size = 14418963, upload-time = "2026-01-10T06:44:04.047Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/c0/3ed5083d94e7ffd7c404e54619c088e11f2e1939a9544f5397f4adb1b8ba/numpy-2.4.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f44de05659b67d20499cbc96d49f2650769afcb398b79b324bb6e297bfe3844", size = 16363811, upload-time = "2026-01-10T06:44:06.207Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/68/42b66f1852bf525050a67315a4fb94586ab7e9eaa541b1bef530fab0c5dd/numpy-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:69e7419c9012c4aaf695109564e3387f1259f001b4326dfa55907b098af082d3", size = 16197643, upload-time = "2026-01-10T06:44:08.33Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/40/e8714fc933d85f82c6bfc7b998a0649ad9769a32f3494ba86598aaf18a48/numpy-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2ffd257026eb1b34352e749d7cc1678b5eeec3e329ad8c9965a797e08ccba205", size = 18289601, upload-time = "2026-01-10T06:44:10.841Z" },
-    { url = "https://files.pythonhosted.org/packages/de/bc/ea3f2c96fcb382311827231f911723aeff596364eb6e1b6d1d91128aa29b/numpy-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e53170557d37ae404bf8d542ca5b7c629d6efa1117dac6a83e394142ea0a43f", size = 12498774, upload-time = "2026-01-10T06:44:19.467Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/ab/ef9d939fe4a812648c7a712610b2ca6140b0853c5efea361301006c02ae5/numpy-2.4.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:a73044b752f5d34d4232f25f18160a1cc418ea4507f5f11e299d8ac36875f8a0", size = 5327274, upload-time = "2026-01-10T06:44:23.189Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/31/d381368e2a95c3b08b8cf7faac6004849e960f4a042d920337f71cef0cae/numpy-2.4.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:fb1461c99de4d040666ca0444057b06541e5642f800b71c56e6ea92d6a853a0c", size = 6648306, upload-time = "2026-01-10T06:44:25.012Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/e5/0989b44ade47430be6323d05c23207636d67d7362a1796ccbccac6773dd2/numpy-2.4.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423797bdab2eeefbe608d7c1ec7b2b4fd3c58d51460f1ee26c7500a1d9c9ee93", size = 14464653, upload-time = "2026-01-10T06:44:26.706Z" },
-    { url = "https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52b5f61bdb323b566b528899cc7db2ba5d1015bda7ea811a8bcf3c89c331fa42", size = 16405144, upload-time = "2026-01-10T06:44:29.378Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/a3/0c63fe66b534888fa5177cc7cef061541064dbe2b4b60dcc60ffaf0d2157/numpy-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42d7dd5fa36d16d52a84f821eb96031836fd405ee6955dd732f2023724d0aa01", size = 16247425, upload-time = "2026-01-10T06:44:31.721Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/2b/55d980cfa2c93bd40ff4c290bf824d792bd41d2fe3487b07707559071760/numpy-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e7b6b5e28bbd47b7532698e5db2fe1db693d84b58c254e4389d99a27bb9b8f6b", size = 18330053, upload-time = "2026-01-10T06:44:34.617Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/82/bdab26d7438c6791ca31b7c024ca37c1eab8b726ba236129005cd4a06e45/numpy-2.4.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:511dbaf848decaaaf4b4ca48032619fb3138710c4bf7da7617765edad1ef96b0", size = 16684648, upload-time = "2026-05-18T23:34:29.41Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/30/a80189bcc7f5e4258b3fbc3968d909d1756f54d023299ecc39ad6fdb9ef8/numpy-2.4.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bf162abab1c1a736333192707cef898e735a5ca00f38f27eeedf44b39d9e85eb", size = 14693902, upload-time = "2026-05-18T23:34:33.013Z" },
+    { url = "https://files.pythonhosted.org/packages/97/12/70b5d0d7c15e1ebb8a6a84a8caa1d19e181d84fb58bb6d70aca29099dec1/numpy-2.4.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:043191bfa8eab18c776647b62723ac9dddece59743b13f49b2016094129c2b3f", size = 5198992, upload-time = "2026-05-18T23:34:36.132Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/8c/ebd2a8f8a83541f8d38cc5667e8c2b69cecfd30da6e45693e8158857d44b/numpy-2.4.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:6180d8b35af935aed8ece3a85e0a43f87393ae0ac87c8d2c8bd2c993f7270ef3", size = 6546944, upload-time = "2026-05-18T23:34:38.484Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/c5/7b863a97a91671a0338f4253bd3b5a3d3852f0692dae91711c9f4a10e787/numpy-2.4.6-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72fbe16c6fac95aedf5937fa873445cec2110be35d8a4e9433d7501fd98dae6b", size = 15669392, upload-time = "2026-05-18T23:34:41.257Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/9d/3584b9984ca4c047aea75214ce1a4c4c73d849bd71b604264b7f5653f8a8/numpy-2.4.6-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a7830bab239b79cda9c08c2da014761cafb48da6150e1da17ac06283f43b6089", size = 16633220, upload-time = "2026-05-18T23:34:45.075Z" },
+    { url = "https://files.pythonhosted.org/packages/05/ae/7c67fba23bd98caec7c99261f3a16072ade14813486b0282cb29846de832/numpy-2.4.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ef4aea96ce4d3b074422cb4f2f64e216bf9e213004bb58ecfdf50ea02ea8eb9a", size = 17020800, upload-time = "2026-05-18T23:34:49.065Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/5d/3b6725cb31d983c5e66916f5d36f6d7e5521129e4c4404d64f918292a5b6/numpy-2.4.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dfa20cc6ca228e6b155b11da03825975ce66aea520985dbbddf0f2a5a495c605", size = 18357600, upload-time = "2026-05-18T23:34:52.709Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/da/2ccc6c2fe8898dee01d90c75c5f5f914a23daf99e3e0f59516a08760c8b5/numpy-2.4.6-cp313-cp313-win32.whl", hash = "sha256:56b39e5e0622a09a25bf5baf62f4bcf0cb8a41ae6e2819cf49bbc5a74c083f91", size = 5961134, upload-time = "2026-05-18T23:34:55.618Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/cd/9cc4dc876fb065d5c220aae4d5e14826b2715331bb7618ce1fb07a679d99/numpy-2.4.6-cp313-cp313-win_amd64.whl", hash = "sha256:c4fc99836233ea196540b17ab0983aff60ed07941751930f5f4d05bc3b3b7359", size = 12318598, upload-time = "2026-05-18T23:34:58.928Z" },
+    { url = "https://files.pythonhosted.org/packages/39/1e/c0bcba1f8694116485fe28fd1be698c278fcda4141c5b0e53a2aed8b12a8/numpy-2.4.6-cp313-cp313-win_arm64.whl", hash = "sha256:a7c711e21628b52034bb5ab8d1bce291f752fcc5e92accc615778acee1ff4778", size = 10222272, upload-time = "2026-05-18T23:35:02.167Z" },
+    { url = "https://files.pythonhosted.org/packages/63/6d/cc5619247c8f4204e507f5883528372e4ac4bb189e579fb859a12e480b1f/numpy-2.4.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:112b06a867b235ef466ed3508ddf0238050df9c727cafb5301ac385b899189a1", size = 14821197, upload-time = "2026-05-18T23:35:05.468Z" },
+    { url = "https://files.pythonhosted.org/packages/00/58/f1c39161c87d9e9bed660f1ed4bafc0e403d5ec9650b6dd77aead07d489b/numpy-2.4.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:eaf7fa2de5c0be8ae6ff8e9bea2ccd725e980541244521d8d4b5f3354a27babe", size = 5326287, upload-time = "2026-05-18T23:35:08.693Z" },
+    { url = "https://files.pythonhosted.org/packages/af/57/3917ab0fd97f271a8694513581b8a36c655f111c446852c302f04ccdb6fc/numpy-2.4.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:7265a2f3d436e54ef9f2b52b5c937e6be778781bd97a590319d7348f1c1ca997", size = 6646763, upload-time = "2026-05-18T23:35:11.459Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/0f/037e64c494b67581ae18193d770adef354c41f3f2c8ebf865602d949bf8f/numpy-2.4.6-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f74a575920ab21fe304421a3fc28793d82e299cae9eccb37084e9fc7f3617c20", size = 15728070, upload-time = "2026-05-18T23:35:14.79Z" },
+    { url = "https://files.pythonhosted.org/packages/21/a6/5d2bae9c9542eb4df16dc9c46dc79c186e9bad53805dfa5399a6023c6db0/numpy-2.4.6-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ede83e07a75dd06bc501566c1eca2afc0d61677c1472ac9ad93fdee6e638a48d", size = 16681752, upload-time = "2026-05-18T23:35:18.836Z" },
+    { url = "https://files.pythonhosted.org/packages/92/14/23d1dfb410ae362cd59ce53e936b1513d545eb40db3949ced632e19a459e/numpy-2.4.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:68bb27509ac1b9a3443094260f6326150663b06abe40b73a2f81160623da5b67", size = 17086024, upload-time = "2026-05-18T23:35:22.52Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/6e/23595a2c642cdf3bc567877064bdd7f91c8b0038a4453cf2daf7248eafe9/numpy-2.4.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a0df0043bdb289bde1f62da130d20df23d58b45429f752bc7a8fc5325a225ecd", size = 18403398, upload-time = "2026-05-18T23:35:26.398Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/90/0ac3bc947217e66dec77e7cbc6a1979d1af70b6461b82f620d3bccd5e4c8/numpy-2.4.6-cp313-cp313t-win32.whl", hash = "sha256:29a287e0cf63ff528da061de6b9f64a4618da591ca1046aafc54062e40ca7eab", size = 6084971, upload-time = "2026-05-18T23:35:29.387Z" },
+    { url = "https://files.pythonhosted.org/packages/77/71/5673e351671a1d2bd6063b91b44f70c0affea7d1516fa7a6572941ba4aa1/numpy-2.4.6-cp313-cp313t-win_amd64.whl", hash = "sha256:25c692919ac5a01f170a3bfcd62d745b24fd095c353d50812637d6fcab442e75", size = 12458532, upload-time = "2026-05-18T23:35:32.175Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/88/19d3503c5046e688f049274b27a3ef3d771152fa80d3ba3d01a3dff61abe/numpy-2.4.6-cp313-cp313t-win_arm64.whl", hash = "sha256:1e978ec1e8bd0e0e4de6bb75de9d30cbb74db6b6a2bb727618613703ca0167dd", size = 10291881, upload-time = "2026-05-18T23:35:35.465Z" },
 ]
 
 [[package]]
 name = "nvidia-cublas"
-version = "13.1.0.3"
+version = "13.4.1.1"
 source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cuda-nvrtc", marker = "sys_platform == 'linux'" },
+]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e1/a5/fce49e2ae977e0ccc084e5adafceb4f0ac0c8333cb6863501618a7277f67/nvidia_cublas-13.1.0.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c86fc7f7ae36d7528288c5d88098edcb7b02c633d262e7ddbb86b0ad91be5df2", size = 542851226, upload-time = "2025-10-09T08:59:04.818Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/44/423ac00af4dd95a5aeb27207e2c0d9b7118702149bf4704c3ddb55bb7429/nvidia_cublas-13.1.0.3-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:ee8722c1f0145ab246bccb9e452153b5e0515fd094c3678df50b2a0888b8b171", size = 423133236, upload-time = "2025-10-09T08:59:32.536Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/d0/4e327087b6026a15d83b31ee1268fa16292e3bb71fb402bbe017bf6649ac/nvidia_cublas-13.4.1.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:6351d2d921527479d6d5002e620fe1971d3e10c730b544c02a36d65301efb00d", size = 513048233, upload-time = "2026-04-28T21:25:23.612Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/79/0cefdaa1d9e45018a227bac64a79b92d2733cde28a8fd09c65362de08622/nvidia_cublas-13.4.1.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:28c983c8c03aa9a2d7b36cddcef2bfeeea85e13241d77df7622665502159f347", size = 404870806, upload-time = "2026-04-28T21:25:06.483Z" },
 ]
 
 [[package]]
-name = "nvidia-cuda-cupti"
-version = "13.0.85"
+name = "nvidia-cublas-cu12"
+version = "12.9.2.10"
 source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "sys_platform == 'linux'" },
+]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2a/2a/80353b103fc20ce05ef51e928daed4b6015db4aaa9162ed0997090fe2250/nvidia_cuda_cupti-13.0.85-py3-none-manylinux_2_25_aarch64.whl", hash = "sha256:796bd679890ee55fb14a94629b698b6db54bcfd833d391d5e94017dd9d7d3151", size = 10310827, upload-time = "2025-09-04T08:26:42.012Z" },
-    { url = "https://files.pythonhosted.org/packages/33/6d/737d164b4837a9bbd202f5ae3078975f0525a55730fe871d8ed4e3b952b0/nvidia_cuda_cupti-13.0.85-py3-none-manylinux_2_25_x86_64.whl", hash = "sha256:4eb01c08e859bf924d222250d2e8f8b8ff6d3db4721288cf35d14252a4d933c8", size = 10715597, upload-time = "2025-09-04T08:26:51.312Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/a2/c96163a0fff1839c0c9548bbdeae7b853b867009e33b9b9264adc238b1cf/nvidia_cublas_cu12-12.9.2.10-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:5572131a59c3eebeeb1c4c8144f772d49372c20124916e072a0e3fc30df421d5", size = 575012079, upload-time = "2026-04-08T18:51:47.303Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/c0/0a517bfe63ccd3b92eb254d264e28fca3c7cab75d07daea315250fb1bf73/nvidia_cublas_cu12-12.9.2.10-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:e4f53a8ca8c5d6e8c492d0d0a3d565ecb59a751b19cfdaa4f6da0ab2104c1702", size = 581240110, upload-time = "2026-04-08T18:52:31.532Z" },
+    { url = "https://files.pythonhosted.org/packages/20/e2/fc9a0e985249d873150276d5afb02e39a66817fedbf1a385724393e505ed/nvidia_cublas_cu12-12.9.2.10-py3-none-win_amd64.whl", hash = "sha256:623f43027d40d44ceadf0043f002bd25cf353e8f13ce90b9a87057019f560661", size = 553162896, upload-time = "2026-04-08T18:53:10.035Z" },
 ]
 
 [[package]]
 name = "nvidia-cuda-nvrtc"
-version = "13.0.88"
+version = "13.2.78"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c3/68/483a78f5e8f31b08fb1bb671559968c0ca3a065ac7acabfc7cee55214fd6/nvidia_cuda_nvrtc-13.0.88-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:ad9b6d2ead2435f11cbb6868809d2adeeee302e9bb94bcf0539c7a40d80e8575", size = 90215200, upload-time = "2025-09-04T08:28:44.204Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/dc/6bb80850e0b7edd6588d560758f17e0550893a1feaf436807d64d2da040f/nvidia_cuda_nvrtc-13.0.88-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d27f20a0ca67a4bb34268a5e951033496c5b74870b868bacd046b1b8e0c3267b", size = 43015449, upload-time = "2025-09-04T08:28:20.239Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/96/237b40b171e06eb65905375c4ad5c96f78c2f861ac6e8ae7f650d95e1dfd/nvidia_cuda_nvrtc-13.2.78-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a9049031da08cbedd0c20e3470e5a978dc330af0e0326b3b05774718c665dc3e", size = 47019062, upload-time = "2026-04-13T09:45:33.875Z" },
+    { url = "https://files.pythonhosted.org/packages/af/be/8476aa006686fb264d61de43e0408a8dbd001003a702574759b25e645587/nvidia_cuda_nvrtc-13.2.78-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a50367a7e2a0bd00fb27e5648179149cc7a60e7c7811740a5ff559f06234526d", size = 44754755, upload-time = "2026-04-13T09:44:58.919Z" },
+    { url = "https://files.pythonhosted.org/packages/48/35/41b84ff9b4a9acc42590be44e69f32fc867a57d7018e87c532019d627f17/nvidia_cuda_nvrtc-13.2.78-py3-none-win_amd64.whl", hash = "sha256:46aff2df5615c408f23fb968a75e5641060f89fa611a85af51a387dff9bf375b", size = 40817783, upload-time = "2026-04-13T10:06:31.711Z" },
 ]
 
 [[package]]
-name = "nvidia-cuda-runtime"
-version = "13.0.96"
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.9.86"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/87/4f/17d7b9b8e285199c58ce28e31b5c5bbaa4d8271af06a89b6405258245de2/nvidia_cuda_runtime-13.0.96-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ef9bcbe90493a2b9d810e43d249adb3d02e98dd30200d86607d8d02687c43f55", size = 2261060, upload-time = "2025-10-09T08:55:15.78Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/24/d1558f3b68b1d26e706813b1d10aa1d785e4698c425af8db8edc3dced472/nvidia_cuda_runtime-13.0.96-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7f82250d7782aa23b6cfe765ecc7db554bd3c2870c43f3d1821f1d18aebf0548", size = 2243632, upload-time = "2025-10-09T08:55:36.117Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/85/e4af82cc9202023862090bfca4ea827d533329e925c758f0cde964cb54b7/nvidia_cuda_nvrtc_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:210cf05005a447e29214e9ce50851e83fc5f4358df8b453155d5e1918094dcb4", size = 89568129, upload-time = "2025-06-05T20:02:41.973Z" },
+    { url = "https://files.pythonhosted.org/packages/64/eb/c2295044b8f3b3b08860e2f6a912b702fc92568a167259df5dddb78f325e/nvidia_cuda_nvrtc_cu12-12.9.86-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:096d4de6bda726415dfaf3198d4f5c522b8e70139c97feef5cd2ca6d4cd9cead", size = 44528905, upload-time = "2025-06-05T20:02:29.754Z" },
+    { url = "https://files.pythonhosted.org/packages/52/de/823919be3b9d0ccbf1f784035423c5f18f4267fb0123558d58b813c6ec86/nvidia_cuda_nvrtc_cu12-12.9.86-py3-none-win_amd64.whl", hash = "sha256:72972ebdcf504d69462d3bcd67e7b81edd25d0fb85a2c46d3ea3517666636349", size = 76408187, upload-time = "2025-06-05T20:12:27.819Z" },
+]
+
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.22.0.52"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/ff/c6a098c1e0bccc68aac5f1684526cf8936abb7024dcc46dca315b8a6f47f/nvidia_cudnn_cu12-9.22.0.52-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:cd9011812498376f866b9826331cc965ac922eb2df8549c9f2989f10255e5001", size = 774536507, upload-time = "2026-05-08T15:36:09.067Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/8f/2ede6b758b7524608472010f632bdd3370ea271d715d1d66044614b84cdc/nvidia_cudnn_cu12-9.22.0.52-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:391b9a7ee6386daaca7f8dca41e83c2c99f760c9581a0400755e87b4287b8847", size = 718382818, upload-time = "2026-05-08T15:37:38.061Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/a4/045f8d0ce6b99726d88e76bbb8ee147123f55e80111d89262762d8149abb/nvidia_cudnn_cu12-9.22.0.52-py3-none-win_amd64.whl", hash = "sha256:5d10117314c861245992dbcf8a6f8ae1f54852137a7c9f80cc9de9fa596f7d62", size = 687235974, upload-time = "2026-05-08T15:39:37.967Z" },
 ]
 
 [[package]]
 name = "nvidia-cudnn-cu13"
-version = "9.19.0.56"
+version = "9.22.0.52"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "nvidia-cublas", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f1/84/26025437c1e6b61a707442184fa0c03d083b661adf3a3eecfd6d21677740/nvidia_cudnn_cu13-9.19.0.56-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:6ed29ffaee1176c612daf442e4dd6cfeb6a0caa43ddcbeb59da94953030b1be4", size = 433781201, upload-time = "2026-02-03T20:40:53.805Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/22/0b4b932655d17a6da1b92fa92ab12844b053bb2ac2475e179ba6f043da1e/nvidia_cudnn_cu13-9.19.0.56-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:d20e1734305e9d68889a96e3f35094d733ff1f83932ebe462753973e53a572bf", size = 366066321, upload-time = "2026-02-03T20:44:52.837Z" },
+    { url = "https://files.pythonhosted.org/packages/88/b0/0a76f3334b98f356188c71e71bde7945f8ac9d52a0011e0031fc8938ba67/nvidia_cudnn_cu13-9.22.0.52-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:876406c9abc74314dea03dedb338ac6329cfb41055d9c7873ce9f77fc1651e5f", size = 609342557, upload-time = "2026-05-08T15:37:01.733Z" },
+    { url = "https://files.pythonhosted.org/packages/57/96/ce2cb84b5e8bb94dd55f554e3454b91e9ecd6708aa27d4a7b12f287613bc/nvidia_cudnn_cu13-9.22.0.52-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:7b24277af8cd2e4e5be731f5cf910255105d4b92481999771b99dbffee75d03e", size = 512596221, upload-time = "2026-05-08T15:38:10.195Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/dc/dc4a724d0f8601753fcb74650675fd242fdb39d619283821c0d1f64180a6/nvidia_cudnn_cu13-9.22.0.52-py3-none-win_amd64.whl", hash = "sha256:ab6dc11dfd75c7ffed9b17dea3eb9380f68e5e3dc4eebce16fe1eda67135392c", size = 397009271, upload-time = "2026-05-08T15:40:10.48Z" },
 ]
 
 [[package]]
-name = "nvidia-cufft"
-version = "12.0.0.61"
+name = "nvidia-ml-py"
+version = "13.595.45"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink", marker = "sys_platform == 'linux'" },
-]
+sdist = { url = "https://files.pythonhosted.org/packages/ce/49/c29f6e30d8662d2e94fef17739ea7309cc76aba269922ae999e4cc07f268/nvidia_ml_py-13.595.45.tar.gz", hash = "sha256:c9f34897fe0441ff35bc8f35baf80f830a20b0f4e6ce71e0a325bc0e66acf079", size = 50780, upload-time = "2026-03-19T16:59:44.956Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8b/ae/f417a75c0259e85c1d2f83ca4e960289a5f814ed0cea74d18c353d3e989d/nvidia_cufft-12.0.0.61-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2708c852ef8cd89d1d2068bdbece0aa188813a0c934db3779b9b1faa8442e5f5", size = 214053554, upload-time = "2025-09-04T08:31:38.196Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/2f/7b57e29836ea8714f81e9898409196f47d772d5ddedddf1592eadb8ab743/nvidia_cufft-12.0.0.61-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6c44f692dce8fd5ffd3e3df134b6cdb9c2f72d99cf40b62c32dde45eea9ddad3", size = 214085489, upload-time = "2025-09-04T08:31:56.044Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/24/fc256107d23597fa33d319505ce77160fa1a2349c096d01901ffc7cb7fc4/nvidia_ml_py-13.595.45-py3-none-any.whl", hash = "sha256:b65a7977f503d56154b14d683710125ef93594adb63fbf7e559336e3318f1376", size = 51776, upload-time = "2026-03-19T16:59:43.603Z" },
 ]
 
 [[package]]
-name = "nvidia-cufile"
-version = "1.15.1.6"
+name = "nvidia-nccl-cu12"
+version = "2.30.4"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3f/70/4f193de89a48b71714e74602ee14d04e4019ad36a5a9f20c425776e72cd6/nvidia_cufile-1.15.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08a3ecefae5a01c7f5117351c64f17c7c62efa5fffdbe24fc7d298da19cd0b44", size = 1223672, upload-time = "2025-09-04T08:32:22.779Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/73/cc4a14c9813a8a0d509417cf5f4bdaba76e924d58beb9864f5a7baceefbf/nvidia_cufile-1.15.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:bdc0deedc61f548bddf7733bdc216456c2fdb101d020e1ab4b88d232d5e2f6d1", size = 1136992, upload-time = "2025-09-04T08:32:14.119Z" },
-]
-
-[[package]]
-name = "nvidia-curand"
-version = "10.4.0.35"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/1e/72/7c2ae24fb6b63a32e6ae5d241cc65263ea18d08802aaae087d9f013335a2/nvidia_curand-10.4.0.35-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:133df5a7509c3e292aaa2b477afd0194f06ce4ea24d714d616ff36439cee349a", size = 61962106, upload-time = "2025-08-04T10:21:41.128Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/9f/be0a41ca4a4917abf5cb9ae0daff1a6060cc5de950aec0396de9f3b52bc5/nvidia_curand-10.4.0.35-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:1aee33a5da6e1db083fe2b90082def8915f30f3248d5896bcec36a579d941bfc", size = 59544258, upload-time = "2025-08-04T10:22:03.992Z" },
-]
-
-[[package]]
-name = "nvidia-cusolver"
-version = "12.0.4.66"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cusparse", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-nvjitlink", marker = "sys_platform == 'linux'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c8/c3/b30c9e935fc01e3da443ec0116ed1b2a009bb867f5324d3f2d7e533e776b/nvidia_cusolver-12.0.4.66-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:02c2457eaa9e39de20f880f4bd8820e6a1cfb9f9a34f820eb12a155aa5bc92d2", size = 223467760, upload-time = "2025-09-04T08:33:04.222Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/67/cba3777620cdacb99102da4042883709c41c709f4b6323c10781a9c3aa34/nvidia_cusolver-12.0.4.66-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:0a759da5dea5c0ea10fd307de75cdeb59e7ea4fcb8add0924859b944babf1112", size = 200941980, upload-time = "2025-09-04T08:33:22.767Z" },
-]
-
-[[package]]
-name = "nvidia-cusparse"
-version = "12.6.3.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink", marker = "sys_platform == 'linux'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/94/5c26f33738ae35276672f12615a64bd008ed5be6d1ebcb23579285d960a9/nvidia_cusparse-12.6.3.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:80bcc4662f23f1054ee334a15c72b8940402975e0eab63178fc7e670aa59472c", size = 162155568, upload-time = "2025-09-04T08:33:42.864Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/18/623c77619c31d62efd55302939756966f3ecc8d724a14dab2b75f1508850/nvidia_cusparse-12.6.3.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b3c89c88d01ee0e477cb7f82ef60a11a4bcd57b6b87c33f789350b59759360b", size = 145942937, upload-time = "2025-09-04T08:33:58.029Z" },
-]
-
-[[package]]
-name = "nvidia-cusparselt-cu13"
-version = "0.8.0"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/46/10/8dcd1175260706a2fc92a16a52e306b71d4c1ea0b0cc4a9484183399818a/nvidia_cusparselt_cu13-0.8.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:400c6ed1cf6780fc6efedd64ec9f1345871767e6a1a0a552a1ea0578117ea77c", size = 220791277, upload-time = "2025-08-13T19:22:40.982Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/53/43b0d71f4e702fa9733f8b4571fdca50a8813f1e450b656c239beff12315/nvidia_cusparselt_cu13-0.8.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:25e30a8a7323935d4ad0340b95a0b69926eee755767e8e0b1cf8dd85b197d3fd", size = 169884119, upload-time = "2025-08-13T19:23:41.967Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/2b/1757b6b74ee241de5efee3f35487dcb33e09c07605254809c6ce36aeb783/nvidia_nccl_cu12-2.30.4-py3-none-manylinux_2_18_aarch64.whl", hash = "sha256:606fa9aa9215c00367d060188eb1a5bbd28396aff5e11b9200d99d1a6ab79a71", size = 300091935, upload-time = "2026-04-23T03:22:58.024Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/c3/0e45ff4dce8401f6ea7c25d80d75738813a47f5ae2691e2478f2fd1e5e93/nvidia_nccl_cu12-2.30.4-py3-none-manylinux_2_18_x86_64.whl", hash = "sha256:040974b261edec4b8b793e59e92ab7176fe4ab4bc61b800f9f3bfaeec2d436f3", size = 300164158, upload-time = "2026-04-23T03:23:19.589Z" },
 ]
 
 [[package]]
 name = "nvidia-nccl-cu13"
-version = "2.28.9"
+version = "2.30.4"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/39/55/1920646a2e43ffd4fc958536b276197ed740e9e0c54105b4bb3521591fc7/nvidia_nccl_cu13-2.28.9-py3-none-manylinux_2_18_aarch64.whl", hash = "sha256:01c873ba1626b54caa12272ed228dc5b2781545e0ae8ba3f432a8ef1c6d78643", size = 196561677, upload-time = "2025-11-18T05:49:03.45Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/b4/878fefaad5b2bcc6fcf8d474a25e3e3774bc5133e4b58adff4d0bca238bc/nvidia_nccl_cu13-2.28.9-py3-none-manylinux_2_18_x86_64.whl", hash = "sha256:e4553a30f34195f3fa1da02a6da3d6337d28f2003943aa0a3d247bbc25fefc42", size = 196493177, upload-time = "2025-11-18T05:49:17.677Z" },
-]
-
-[[package]]
-name = "nvidia-nvjitlink"
-version = "13.0.88"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/56/7a/123e033aaff487c77107195fa5a2b8686795ca537935a24efae476c41f05/nvidia_nvjitlink-13.0.88-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:13a74f429e23b921c1109976abefacc69835f2f433ebd323d3946e11d804e47b", size = 40713933, upload-time = "2025-09-04T08:35:43.553Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/2c/93c5250e64df4f894f1cbb397c6fd71f79813f9fd79d7cd61de3f97b3c2d/nvidia_nvjitlink-13.0.88-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e931536ccc7d467a98ba1d8b89ff7fa7f1fa3b13f2b0069118cd7f47bff07d0c", size = 38768748, upload-time = "2025-09-04T08:35:20.008Z" },
-]
-
-[[package]]
-name = "nvidia-nvshmem-cu13"
-version = "3.4.5"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/dc/0f/05cc9c720236dcd2db9c1ab97fff629e96821be2e63103569da0c9b72f19/nvidia_nvshmem_cu13-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6dc2a197f38e5d0376ad52cd1a2a3617d3cdc150fd5966f4aee9bcebb1d68fe9", size = 60215947, upload-time = "2025-09-06T00:32:20.022Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/35/a9bf80a609e74e3b000fef598933235c908fcefcef9026042b8e6dfde2a9/nvidia_nvshmem_cu13-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:290f0a2ee94c9f3687a02502f3b9299a9f9fe826e6d0287ee18482e78d495b80", size = 60412546, upload-time = "2025-09-06T00:32:41.564Z" },
-]
-
-[[package]]
-name = "nvidia-nvtx"
-version = "13.0.85"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c2/f3/d86c845465a2723ad7e1e5c36dcd75ddb82898b3f53be47ebd429fb2fa5d/nvidia_nvtx-13.0.85-py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4936d1d6780fbe68db454f5e72a42ff64d1fd6397df9f363ae786930fd5c1cd4", size = 148047, upload-time = "2025-09-04T08:29:01.761Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/64/3708a90d1ebe202ffdeb7185f878a3c84d15c2b2c31858da2ce0583e2def/nvidia_nvtx-13.0.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb7780edb6b14107373c835bf8b72e7a178bac7367e23da7acb108f973f157a6", size = 148878, upload-time = "2025-09-04T08:28:53.627Z" },
+    { url = "https://files.pythonhosted.org/packages/65/32/ff4e28cbed87f99fed63df446ef1986e0617842258a3535eaa2ee92d6226/nvidia_nccl_cu13-2.30.4-py3-none-manylinux_2_18_aarch64.whl", hash = "sha256:e99308a3a89fba78918d50886e81072a6c8b0b4199feb02c3903e63713a6525a", size = 212898082, upload-time = "2026-04-23T03:22:28.608Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/93/6d020a69fc37e57fae8a96ab0c53102d96538db256e933e914d100e5a430/nvidia_nccl_cu13-2.30.4-py3-none-manylinux_2_18_x86_64.whl", hash = "sha256:534dbf3058cadb625f08ab0d17f1dffad3b961a2bfa360d66633fcf21be53f57", size = 212855427, upload-time = "2026-04-23T03:22:47.436Z" },
 ]
 
 [[package]]
@@ -1950,7 +1886,7 @@ name = "openai-harmony"
 version = "0.0.8"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/3e/92/2d038d096f29179c7c9571b431f9e739f87a487121901725e23fe338dd9d/openai_harmony-0.0.8.tar.gz", hash = "sha256:6e43f98e6c242fa2de6f8ea12eab24af63fa2ed3e89c06341fb9d92632c5cbdf", size = 284777, upload-time = "2025-11-05T19:07:06.727Z" }
 wheels = [
@@ -1964,67 +1900,44 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1d/10/4327dbf87f75ae813405fd9a9b4a5cde63d506ffed0a096a440a4cabd89c/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:cbaa3bda75ef0d8836e1f8cc84af62f971b1d756d740efc95c38c3e04c0bfde2", size = 2932931, upload-time = "2025-11-05T19:07:01.437Z" },
     { url = "https://files.pythonhosted.org/packages/8a/c8/1774eec4f6f360ef57618fb8f52e3d3af245b2491bd0297513aa09eec04b/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:772922a9bd24e133950fad71eb1550836f415a88e8c77870e12d0c3bd688ddc2", size = 2996140, upload-time = "2025-11-05T19:07:03.438Z" },
     { url = "https://files.pythonhosted.org/packages/60/c3/3d1e01e2dba517a91760e4a03e4f20ffc75039a6fe584d0e6f9b5c78fd15/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:007b0476a1f331f8130783f901f1da6f5a7057af1a4891f1b6a31dec364189b5", size = 3205080, upload-time = "2025-11-05T19:07:05.078Z" },
-]
-
-[[package]]
-name = "opencv-python"
-version = "4.13.0.90"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/77/d7/133d5756aef78090f4d8dd4895793aed24942dec6064a15375cfac9175fc/opencv_python-4.13.0.90-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:58803f8b05b51d8a785e2306d83b44173b32536f980342f3bc76d8c122b5938d", size = 46020278, upload-time = "2026-01-18T08:57:42.539Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/65/3b8cdbe13fa2436695d00e1d8c1ddf5edb4050a93436f34ed867233d1960/opencv_python-4.13.0.90-cp37-abi3-macosx_14_0_x86_64.whl", hash = "sha256:a5354e8b161409fce7710ba4c1cfe88b7bb460d97f705dc4e714a1636616f87d", size = 32568376, upload-time = "2026-01-18T08:58:47.19Z" },
-    { url = "https://files.pythonhosted.org/packages/34/ff/e4d7c165e678563f49505d3d2811fcc16011e929cd00bc4b0070c7ee82b0/opencv_python-4.13.0.90-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d557cbf0c7818081c9acf56585b68e781af4f00638971f75eaa3de70904a6314", size = 47685110, upload-time = "2026-01-18T08:59:58.045Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/02/d9b73dbce28712204e85ae4c1e179505e9a771f95b33743a97e170caedde/opencv_python-4.13.0.90-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9911581e37b24169e4842069ff01d6645ea2bc4af7e10a022d9ebe340fd035ec", size = 70460479, upload-time = "2026-01-18T09:01:16.377Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/1c/87fa71968beb71481ed359e21772061ceff7c9b45a61b3e7daa71e5b0b66/opencv_python-4.13.0.90-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1150b8f1947761b848bbfa9c96ceba8877743ffef157c08a04af6f7717ddd709", size = 46707819, upload-time = "2026-01-18T09:02:48.049Z" },
-    { url = "https://files.pythonhosted.org/packages/af/16/915a94e5b537c328fa3e96b769c7d4eed3b67d1be978e0af658a3d3faed8/opencv_python-4.13.0.90-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:d6716f16149b04eea52f953b8ca983d60dd9cd4872c1fd5113f6e2fcebb90e93", size = 72926629, upload-time = "2026-01-18T09:04:29.23Z" },
+    { url = "https://files.pythonhosted.org/packages/14/63/119de431572d7c70a7bf1037034a9be6ed0a7502a7498ba7302bca5b3242/openai_harmony-0.0.8-cp38-abi3-win32.whl", hash = "sha256:a9b5f893326b28d9e935ade14b4f655f5a840942473bc89b201c25f7a15af9cf", size = 2082457, upload-time = "2025-11-05T19:07:09.631Z" },
+    { url = "https://files.pythonhosted.org/packages/40/1f/c83cf5a206c263ee70448a5ae4264682555f4d0b5bed0d2cc6ca1108103d/openai_harmony-0.0.8-cp38-abi3-win_amd64.whl", hash = "sha256:39d44f0d8f466bd56698e7ead708bead3141e27b9b87e3ab7d5a6d0e4a869ee5", size = 2438369, upload-time = "2025-11-05T19:07:08.1Z" },
 ]
 
 [[package]]
 name = "packaging"
-version = "26.0rc1"
+version = "26.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/55/d0/88784ecdb0e481b39af721f637a60046e6f09ca03553aa71d788062e2012/packaging-26.0rc1.tar.gz", hash = "sha256:2104df24f61f17179ac8459cda8138cd344967d3b4f0934afa582a6826963fc5", size = 142273, upload-time = "2026-01-09T17:41:18.505Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d7/f1/e7a6dd94a8d4a5626c03e4e99c87f241ba9e350cd9e6d75123f992427270/packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661", size = 228134, upload-time = "2026-04-24T20:15:23.917Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/40/35/ddf3a6e8fc754fb939e2ea36fde96c28189184d6115afcf60011bb438ae5/packaging-26.0rc1-py3-none-any.whl", hash = "sha256:ecf921b33c620e357b1eed2ac3bc6313b1582874b0282d0773b6797b79cb0786", size = 74021, upload-time = "2026-01-09T17:41:17.134Z" },
+    { url = "https://files.pythonhosted.org/packages/df/b2/87e62e8c3e2f4b32e5fe99e0b86d576da1312593b39f47d8ceef365e95ed/packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e", size = 100195, upload-time = "2026-04-24T20:15:22.081Z" },
 ]
 
 [[package]]
 name = "pandas"
-version = "3.0.1"
+version = "3.0.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/2e/0c/b28ed414f080ee0ad153f848586d61d1878f91689950f037f976ce15f6c8/pandas-3.0.1.tar.gz", hash = "sha256:4186a699674af418f655dbd420ed87f50d56b4cd6603784279d9eef6627823c8", size = 4641901, upload-time = "2026-02-17T22:20:16.434Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/87/4341c6252d1c47b08768c3d25ac487362bf403f0313ddae4a2a26c9b1b4c/pandas-3.0.3.tar.gz", hash = "sha256:696a4a00a2a2a35d4e5deb3fc946641b96c944f02230e4f76137fe35d806c4fc", size = 4651414, upload-time = "2026-05-11T18:54:29.21Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0b/48/aad6ec4f8d007534c091e9a7172b3ec1b1ee6d99a9cbb936b5eab6c6cf58/pandas-3.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5272627187b5d9c20e55d27caf5f2cd23e286aba25cadf73c8590e432e2b7262", size = 10317509, upload-time = "2026-02-17T22:18:59.498Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/14/5990826f779f79148ae9d3a2c39593dc04d61d5d90541e71b5749f35af95/pandas-3.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:661e0f665932af88c7877f31da0dc743fe9c8f2524bdffe23d24fdcb67ef9d56", size = 9860561, upload-time = "2026-02-17T22:19:02.265Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/80/f01ff54664b6d70fed71475543d108a9b7c888e923ad210795bef04ffb7d/pandas-3.0.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:75e6e292ff898679e47a2199172593d9f6107fd2dd3617c22c2946e97d5df46e", size = 10365506, upload-time = "2026-02-17T22:19:05.017Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/85/ab6d04733a7d6ff32bfc8382bf1b07078228f5d6ebec5266b91bfc5c4ff7/pandas-3.0.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1ff8cf1d2896e34343197685f432450ec99a85ba8d90cce2030c5eee2ef98791", size = 10873196, upload-time = "2026-02-17T22:19:07.204Z" },
-    { url = "https://files.pythonhosted.org/packages/48/a9/9301c83d0b47c23ac5deab91c6b39fd98d5b5db4d93b25df8d381451828f/pandas-3.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eca8b4510f6763f3d37359c2105df03a7a221a508f30e396a51d0713d462e68a", size = 11370859, upload-time = "2026-02-17T22:19:09.436Z" },
-    { url = "https://files.pythonhosted.org/packages/59/fe/0c1fc5bd2d29c7db2ab372330063ad555fb83e08422829c785f5ec2176ca/pandas-3.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:06aff2ad6f0b94a17822cf8b83bbb563b090ed82ff4fe7712db2ce57cd50d9b8", size = 11924584, upload-time = "2026-02-17T22:19:11.562Z" },
-    { url = "https://files.pythonhosted.org/packages/92/fa/423c89086cca1f039cf1253c3ff5b90f157b5b3757314aa635f6bf3e30aa/pandas-3.0.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d54855f04f8246ed7b6fc96b05d4871591143c46c0b6f4af874764ed0d2d6f06", size = 10752673, upload-time = "2026-02-17T22:19:18.304Z" },
-    { url = "https://files.pythonhosted.org/packages/22/23/b5a08ec1f40020397f0faba72f1e2c11f7596a6169c7b3e800abff0e433f/pandas-3.0.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e1b677accee34a09e0dc2ce5624e4a58a1870ffe56fc021e9caf7f23cd7668f", size = 10404967, upload-time = "2026-02-17T22:19:20.726Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/81/94841f1bb4afdc2b52a99daa895ac2c61600bb72e26525ecc9543d453ebc/pandas-3.0.1-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a9cabbdcd03f1b6cd254d6dda8ae09b0252524be1592594c00b7895916cb1324", size = 10320575, upload-time = "2026-02-17T22:19:24.919Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/8b/2ae37d66a5342a83adadfd0cb0b4bf9c3c7925424dd5f40d15d6cfaa35ee/pandas-3.0.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ae2ab1f166668b41e770650101e7090824fd34d17915dd9cd479f5c5e0065e9", size = 10710921, upload-time = "2026-02-17T22:19:27.181Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/61/772b2e2757855e232b7ccf7cb8079a5711becb3a97f291c953def15a833f/pandas-3.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6bf0603c2e30e2cafac32807b06435f28741135cb8697eae8b28c7d492fc7d76", size = 11334191, upload-time = "2026-02-17T22:19:29.411Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/08/b16c6df3ef555d8495d1d265a7963b65be166785d28f06a350913a4fac78/pandas-3.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6c426422973973cae1f4a23e51d4ae85974f44871b24844e4f7de752dd877098", size = 11782256, upload-time = "2026-02-17T22:19:32.34Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/8b/4bb774a998b97e6c2fd62a9e6cfdaae133b636fd1c468f92afb4ae9a447a/pandas-3.0.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:99d0f92ed92d3083d140bf6b97774f9f13863924cf3f52a70711f4e7588f9d0a", size = 10322465, upload-time = "2026-02-17T22:19:36.803Z" },
-    { url = "https://files.pythonhosted.org/packages/72/3a/5b39b51c64159f470f1ca3b1c2a87da290657ca022f7cd11442606f607d1/pandas-3.0.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3b66857e983208654294bb6477b8a63dee26b37bdd0eb34d010556e91261784f", size = 9910632, upload-time = "2026-02-17T22:19:39.001Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/f7/b449ffb3f68c11da12fc06fbf6d2fa3a41c41e17d0284d23a79e1c13a7e4/pandas-3.0.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56cf59638bf24dc9bdf2154c81e248b3289f9a09a6d04e63608c159022352749", size = 10440535, upload-time = "2026-02-17T22:19:41.157Z" },
-    { url = "https://files.pythonhosted.org/packages/55/77/6ea82043db22cb0f2bbfe7198da3544000ddaadb12d26be36e19b03a2dc5/pandas-3.0.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1a9f55e0f46951874b863d1f3906dcb57df2d9be5c5847ba4dfb55b2c815249", size = 10893940, upload-time = "2026-02-17T22:19:43.493Z" },
-    { url = "https://files.pythonhosted.org/packages/03/30/f1b502a72468c89412c1b882a08f6eed8a4ee9dc033f35f65d0663df6081/pandas-3.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1849f0bba9c8a2fb0f691d492b834cc8dadf617e29015c66e989448d58d011ee", size = 11442711, upload-time = "2026-02-17T22:19:46.074Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/f0/ebb6ddd8fc049e98cabac5c2924d14d1dda26a20adb70d41ea2e428d3ec4/pandas-3.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c3d288439e11b5325b02ae6e9cc83e6805a62c40c5a6220bea9beb899c073b1c", size = 11963918, upload-time = "2026-02-17T22:19:48.838Z" },
-    { url = "https://files.pythonhosted.org/packages/66/fc/848bb6710bc6061cb0c5badd65b92ff75c81302e0e31e496d00029fe4953/pandas-3.0.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:58eeb1b2e0fb322befcf2bbc9ba0af41e616abadb3d3414a6bc7167f6cbfce32", size = 10772664, upload-time = "2026-02-17T22:19:55.806Z" },
-    { url = "https://files.pythonhosted.org/packages/69/5c/866a9bbd0f79263b4b0db6ec1a341be13a1473323f05c122388e0f15b21d/pandas-3.0.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cd9af1276b5ca9e298bd79a26bda32fa9cc87ed095b2a9a60978d2ca058eaf87", size = 10421286, upload-time = "2026-02-17T22:19:58.091Z" },
-    { url = "https://files.pythonhosted.org/packages/51/a4/2058fb84fb1cfbfb2d4a6d485e1940bb4ad5716e539d779852494479c580/pandas-3.0.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f87a04984d6b63788327cd9f79dda62b7f9043909d2440ceccf709249ca988", size = 10342050, upload-time = "2026-02-17T22:20:01.376Z" },
-    { url = "https://files.pythonhosted.org/packages/22/1b/674e89996cc4be74db3c4eb09240c4bb549865c9c3f5d9b086ff8fcfbf00/pandas-3.0.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85fe4c4df62e1e20f9db6ebfb88c844b092c22cd5324bdcf94bfa2fc1b391221", size = 10740055, upload-time = "2026-02-17T22:20:04.328Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/f8/e954b750764298c22fa4614376531fe63c521ef517e7059a51f062b87dca/pandas-3.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:331ca75a2f8672c365ae25c0b29e46f5ac0c6551fdace8eec4cd65e4fac271ff", size = 11357632, upload-time = "2026-02-17T22:20:06.647Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/02/c6e04b694ffd68568297abd03588b6d30295265176a5c01b7459d3bc35a3/pandas-3.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:15860b1fdb1973fffade772fdb931ccf9b2f400a3f5665aef94a00445d7d8dd5", size = 11810974, upload-time = "2026-02-17T22:20:08.946Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/90/62d8302883c44308c477e222c3daf7c813a34c8e96985882fbd53d964352/pandas-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:67b3b64c11910cfa29f4e94a14d3bff9ee693b6fc76055e7cad549cee0aec5fa", size = 10331071, upload-time = "2026-05-11T18:52:58.838Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/ae/6a6493c783a101f165e4356953ba3c74d6f77f0042fa7d753da9dfbb640c/pandas-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:39436b377d56d2a2e52d0395bdbee171f01068e99af5250509aceeb929f765c7", size = 9875690, upload-time = "2026-05-11T18:53:01.431Z" },
+    { url = "https://files.pythonhosted.org/packages/62/7c/5df8e9f56c69a2769fbe9382a5ef8f2658c007e376434e1e2cbb57ad895f/pandas-3.0.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4be06d68f9ddcfc645b87534911da79a8fbffc7573c80e0edcf42a5020624d8", size = 10381634, upload-time = "2026-05-11T18:53:04.393Z" },
+    { url = "https://files.pythonhosted.org/packages/99/68/1237369725aa617bb358263d535803e3053fdbc593513ec5ed9c9896b5b6/pandas-3.0.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a4eeb6830daf35a71cc09649bd823e2b542dac246cdee9614c6e4bd65028cd6a", size = 10891243, upload-time = "2026-05-11T18:53:07.643Z" },
+    { url = "https://files.pythonhosted.org/packages/25/93/77d108e8af7222b4a503ebde0e30215b1c2e4f8e53a526431890f22d5586/pandas-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1928e07221f82db493cd4af1e23c1bfca524a19a4699887975bff68f49a72bfb", size = 11388659, upload-time = "2026-05-11T18:53:10.634Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/bd/eff5b4399f332ac386c853f6cd2bd3fa2ca0061b9f36ecd9c4d7c4265649/pandas-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51b1fe551acb77dac643c6fda86084d8d446c10fe64b06a9cc29c4cc8540e7f2", size = 11942880, upload-time = "2026-05-11T18:53:13.536Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/20/559ace4200982c3887d0b86bfd0d856a2143ef8ddab63cc07934951a964c/pandas-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:a82d532a3351d435432cd913edbccaf8b8e01d4dd0e5ced5a8d2e8ecd94c7e44", size = 9757091, upload-time = "2026-05-11T18:53:16.306Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/66/69055a09fe200f29f922a3eeec4804611900b95f52d932ece3393c3c0c19/pandas-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:275c14e0fce14a2ec20eee474aecd305478ea3c1e6f6a9d8fe219a165542717e", size = 9057282, upload-time = "2026-05-11T18:53:18.768Z" },
+    { url = "https://files.pythonhosted.org/packages/57/0e/efe801b0e6811e8e650cd21b7f2608e30f08a7067e2bf6e8752b0d56ee3c/pandas-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:46997386d528eb40376ecd6b033cf4a8a1e5282580f68f43de875b78cba2199d", size = 10767016, upload-time = "2026-05-11T18:53:21.227Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/dc/eb55135a1d5f0f0519f28da1f609a206d2cad1f9c35c32d51e38dd7261ae/pandas-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:261e308dfb22448384b7580cf719d2f998fe2966c92893c3e77d14008af1f066", size = 10420210, upload-time = "2026-05-11T18:53:23.982Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/3e/b1d5d955ce33ffecb407465a60bc32769d74fcf68224b7ae67ae11d4dea4/pandas-3.0.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd1a5d1def6a46002e964510bdc67c368aa0951df5d1d9f8365336f5a1f490cd", size = 10336126, upload-time = "2026-05-11T18:53:26.731Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/76/a01261711ab60a22d71b862f0de20e4c504bf80457270ad8cb42110f6abc/pandas-3.0.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d72828c20c6d6e83e1e22a6a3b47b326b71664112fa9705dcbccfd7a39b62085", size = 10728051, upload-time = "2026-05-11T18:53:29.125Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/21/ea191195e587b18cf682e97f433f81b2d0fbe341380e80a3e0d6e4403c8e/pandas-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d26cbe1fcfc12e8fd900e2454163e466b2d3af84f7c75481df7683ffc073d870", size = 11350796, upload-time = "2026-05-11T18:53:32.056Z" },
+    { url = "https://files.pythonhosted.org/packages/64/69/f0eaaf54939f0e8c6768fd06be9af2cef9b36048b96dfb9e1b2c685a807e/pandas-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e91cec1879ada0624fc3dc9953c5cbd60208e59c0db28f540c5d6d47502422f", size = 11799741, upload-time = "2026-05-11T18:53:34.985Z" },
+    { url = "https://files.pythonhosted.org/packages/45/a4/865e0e510cae5fc2194de4db28be638952de942571ba9125934fd9c01d47/pandas-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:08d789b41f87e0905880e293cedf6197ce71fe67cc081358b1e148a491b9bd13", size = 10499958, upload-time = "2026-05-11T18:53:37.857Z" },
 ]
 
 [[package]]
@@ -2047,57 +1960,63 @@ wheels = [
 
 [[package]]
 name = "pillow"
-version = "12.1.1"
+version = "12.2.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/1f/42/5c74462b4fd957fcd7b13b04fb3205ff8349236ea74c7c375766d6c82288/pillow-12.1.1.tar.gz", hash = "sha256:9ad8fa5937ab05218e2b6a4cff30295ad35afd2f83ac592e68c0d871bb0fdbc4", size = 46980264, upload-time = "2026-02-11T04:23:07.146Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/8c/21/c2bcdd5906101a30244eaffc1b6e6ce71a31bd0742a01eb89e660ebfac2d/pillow-12.2.0.tar.gz", hash = "sha256:a830b1a40919539d07806aa58e1b114df53ddd43213d9c8b75847eee6c0182b5", size = 46987819, upload-time = "2026-04-01T14:46:17.687Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d5/11/6db24d4bd7685583caeae54b7009584e38da3c3d4488ed4cd25b439de486/pillow-12.1.1-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:d242e8ac078781f1de88bf823d70c1a9b3c7950a44cdf4b7c012e22ccbcd8e4e", size = 4062689, upload-time = "2026-02-11T04:21:06.804Z" },
-    { url = "https://files.pythonhosted.org/packages/33/c0/ce6d3b1fe190f0021203e0d9b5b99e57843e345f15f9ef22fcd43842fd21/pillow-12.1.1-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:02f84dfad02693676692746df05b89cf25597560db2857363a208e393429f5e9", size = 4138535, upload-time = "2026-02-11T04:21:08.452Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/c6/d5eb6a4fb32a3f9c21a8c7613ec706534ea1cf9f4b3663e99f0d83f6fca8/pillow-12.1.1-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:e65498daf4b583091ccbb2556c7000abf0f3349fcd57ef7adc9a84a394ed29f6", size = 3601364, upload-time = "2026-02-11T04:21:10.194Z" },
-    { url = "https://files.pythonhosted.org/packages/14/a1/16c4b823838ba4c9c52c0e6bbda903a3fe5a1bdbf1b8eb4fff7156f3e318/pillow-12.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c6db3b84c87d48d0088943bf33440e0c42370b99b1c2a7989216f7b42eede60", size = 5262561, upload-time = "2026-02-11T04:21:11.742Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/ad/ad9dc98ff24f485008aa5cdedaf1a219876f6f6c42a4626c08bc4e80b120/pillow-12.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8b7e5304e34942bf62e15184219a7b5ad4ff7f3bb5cca4d984f37df1a0e1aee2", size = 4657460, upload-time = "2026-02-11T04:21:13.786Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/1b/f1a4ea9a895b5732152789326202a82464d5254759fbacae4deea3069334/pillow-12.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:18e5bddd742a44b7e6b1e773ab5db102bd7a94c32555ba656e76d319d19c3850", size = 6232698, upload-time = "2026-02-11T04:21:15.949Z" },
-    { url = "https://files.pythonhosted.org/packages/95/f4/86f51b8745070daf21fd2e5b1fe0eb35d4db9ca26e6d58366562fb56a743/pillow-12.1.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc44ef1f3de4f45b50ccf9136999d71abb99dca7706bc75d222ed350b9fd2289", size = 8041706, upload-time = "2026-02-11T04:21:17.723Z" },
-    { url = "https://files.pythonhosted.org/packages/29/9b/d6ecd956bb1266dd1045e995cce9b8d77759e740953a1c9aad9502a0461e/pillow-12.1.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a8eb7ed8d4198bccbd07058416eeec51686b498e784eda166395a23eb99138e", size = 6346621, upload-time = "2026-02-11T04:21:19.547Z" },
-    { url = "https://files.pythonhosted.org/packages/71/24/538bff45bde96535d7d998c6fed1a751c75ac7c53c37c90dc2601b243893/pillow-12.1.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47b94983da0c642de92ced1702c5b6c292a84bd3a8e1d1702ff923f183594717", size = 7038069, upload-time = "2026-02-11T04:21:21.378Z" },
-    { url = "https://files.pythonhosted.org/packages/94/0e/58cb1a6bc48f746bc4cb3adb8cabff73e2742c92b3bf7a220b7cf69b9177/pillow-12.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:518a48c2aab7ce596d3bf79d0e275661b846e86e4d0e7dec34712c30fe07f02a", size = 6460040, upload-time = "2026-02-11T04:21:23.148Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/57/9045cb3ff11eeb6c1adce3b2d60d7d299d7b273a2e6c8381a524abfdc474/pillow-12.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a550ae29b95c6dc13cf69e2c9dc5747f814c54eeb2e32d683e5e93af56caa029", size = 7164523, upload-time = "2026-02-11T04:21:25.01Z" },
-    { url = "https://files.pythonhosted.org/packages/19/2a/b9d62794fc8a0dd14c1943df68347badbd5511103e0d04c035ffe5cf2255/pillow-12.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0330d233c1a0ead844fc097a7d16c0abff4c12e856c0b325f231820fee1f39da", size = 5264880, upload-time = "2026-02-11T04:21:32.865Z" },
-    { url = "https://files.pythonhosted.org/packages/26/9d/e03d857d1347fa5ed9247e123fcd2a97b6220e15e9cb73ca0a8d91702c6e/pillow-12.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5dae5f21afb91322f2ff791895ddd8889e5e947ff59f71b46041c8ce6db790bc", size = 4660616, upload-time = "2026-02-11T04:21:34.97Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/ec/8a6d22afd02570d30954e043f09c32772bfe143ba9285e2fdb11284952cd/pillow-12.1.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2e0c664be47252947d870ac0d327fea7e63985a08794758aa8af5b6cb6ec0c9c", size = 6269008, upload-time = "2026-02-11T04:21:36.623Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/1d/6d875422c9f28a4a361f495a5f68d9de4a66941dc2c619103ca335fa6446/pillow-12.1.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:691ab2ac363b8217f7d31b3497108fb1f50faab2f75dfb03284ec2f217e87bf8", size = 8073226, upload-time = "2026-02-11T04:21:38.585Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/cd/134b0b6ee5eda6dc09e25e24b40fdafe11a520bc725c1d0bbaa5e00bf95b/pillow-12.1.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e9e8064fb1cc019296958595f6db671fba95209e3ceb0c4734c9baf97de04b20", size = 6380136, upload-time = "2026-02-11T04:21:40.562Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/a9/7628f013f18f001c1b98d8fffe3452f306a70dc6aba7d931019e0492f45e/pillow-12.1.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:472a8d7ded663e6162dafdf20015c486a7009483ca671cece7a9279b512fcb13", size = 7067129, upload-time = "2026-02-11T04:21:42.521Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/f8/66ab30a2193b277785601e82ee2d49f68ea575d9637e5e234faaa98efa4c/pillow-12.1.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:89b54027a766529136a06cfebeecb3a04900397a3590fd252160b888479517bf", size = 6491807, upload-time = "2026-02-11T04:21:44.22Z" },
-    { url = "https://files.pythonhosted.org/packages/da/0b/a877a6627dc8318fdb84e357c5e1a758c0941ab1ddffdafd231983788579/pillow-12.1.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:86172b0831b82ce4f7877f280055892b31179e1576aa00d0df3bb1bbf8c3e524", size = 7190954, upload-time = "2026-02-11T04:21:46.114Z" },
-    { url = "https://files.pythonhosted.org/packages/03/d0/bebb3ffbf31c5a8e97241476c4cf8b9828954693ce6744b4a2326af3e16b/pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:417423db963cb4be8bac3fc1204fe61610f6abeed1580a7a2cbb2fbda20f12af", size = 4062652, upload-time = "2026-02-11T04:21:53.19Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/c0/0e16fb0addda4851445c28f8350d8c512f09de27bbb0d6d0bbf8b6709605/pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:b957b71c6b2387610f556a7eb0828afbe40b4a98036fc0d2acfa5a44a0c2036f", size = 4138823, upload-time = "2026-02-11T04:22:03.088Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/fb/6170ec655d6f6bb6630a013dd7cf7bc218423d7b5fa9071bf63dc32175ae/pillow-12.1.1-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:097690ba1f2efdeb165a20469d59d8bb03c55fb6621eb2041a060ae8ea3e9642", size = 3601143, upload-time = "2026-02-11T04:22:04.909Z" },
-    { url = "https://files.pythonhosted.org/packages/59/04/dc5c3f297510ba9a6837cbb318b87dd2b8f73eb41a43cc63767f65cb599c/pillow-12.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2815a87ab27848db0321fb78c7f0b2c8649dee134b7f2b80c6a45c6831d75ccd", size = 5266254, upload-time = "2026-02-11T04:22:07.656Z" },
-    { url = "https://files.pythonhosted.org/packages/05/30/5db1236b0d6313f03ebf97f5e17cda9ca060f524b2fcc875149a8360b21c/pillow-12.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f7ed2c6543bad5a7d5530eb9e78c53132f93dfa44a28492db88b41cdab885202", size = 4657499, upload-time = "2026-02-11T04:22:09.613Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/18/008d2ca0eb612e81968e8be0bbae5051efba24d52debf930126d7eaacbba/pillow-12.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:652a2c9ccfb556235b2b501a3a7cf3742148cd22e04b5625c5fe057ea3e3191f", size = 6232137, upload-time = "2026-02-11T04:22:11.434Z" },
-    { url = "https://files.pythonhosted.org/packages/70/f1/f14d5b8eeb4b2cd62b9f9f847eb6605f103df89ef619ac68f92f748614ea/pillow-12.1.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d6e4571eedf43af33d0fc233a382a76e849badbccdf1ac438841308652a08e1f", size = 8042721, upload-time = "2026-02-11T04:22:13.321Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/d6/17824509146e4babbdabf04d8171491fa9d776f7061ff6e727522df9bd03/pillow-12.1.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b574c51cf7d5d62e9be37ba446224b59a2da26dc4c1bb2ecbe936a4fb1a7cb7f", size = 6347798, upload-time = "2026-02-11T04:22:15.449Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/ee/c85a38a9ab92037a75615aba572c85ea51e605265036e00c5b67dfafbfe2/pillow-12.1.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a37691702ed687799de29a518d63d4682d9016932db66d4e90c345831b02fb4e", size = 7039315, upload-time = "2026-02-11T04:22:17.24Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/f3/bc8ccc6e08a148290d7523bde4d9a0d6c981db34631390dc6e6ec34cacf6/pillow-12.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f95c00d5d6700b2b890479664a06e754974848afaae5e21beb4d83c106923fd0", size = 6462360, upload-time = "2026-02-11T04:22:19.111Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/ab/69a42656adb1d0665ab051eec58a41f169ad295cf81ad45406963105408f/pillow-12.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:559b38da23606e68681337ad74622c4dbba02254fc9cb4488a305dd5975c7eeb", size = 7165438, upload-time = "2026-02-11T04:22:21.041Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/9d/efd18493f9de13b87ede7c47e69184b9e859e4427225ea962e32e56a49bc/pillow-12.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1f90cff8aa76835cba5769f0b3121a22bd4eb9e6884cfe338216e557a9a548b8", size = 5268612, upload-time = "2026-02-11T04:22:29.884Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/f1/4f42eb2b388eb2ffc660dcb7f7b556c1015c53ebd5f7f754965ef997585b/pillow-12.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1f1be78ce9466a7ee64bfda57bdba0f7cc499d9794d518b854816c41bf0aa4e9", size = 4660567, upload-time = "2026-02-11T04:22:31.799Z" },
-    { url = "https://files.pythonhosted.org/packages/01/54/df6ef130fa43e4b82e32624a7b821a2be1c5653a5fdad8469687a7db4e00/pillow-12.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:42fc1f4677106188ad9a55562bbade416f8b55456f522430fadab3cef7cd4e60", size = 6269951, upload-time = "2026-02-11T04:22:33.921Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/48/618752d06cc44bb4aae8ce0cd4e6426871929ed7b46215638088270d9b34/pillow-12.1.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98edb152429ab62a1818039744d8fbb3ccab98a7c29fc3d5fcef158f3f1f68b7", size = 8074769, upload-time = "2026-02-11T04:22:35.877Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/bd/f1d71eb39a72fa088d938655afba3e00b38018d052752f435838961127d8/pillow-12.1.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d470ab1178551dd17fdba0fef463359c41aaa613cdcd7ff8373f54be629f9f8f", size = 6381358, upload-time = "2026-02-11T04:22:37.698Z" },
-    { url = "https://files.pythonhosted.org/packages/64/ef/c784e20b96674ed36a5af839305f55616f8b4f8aa8eeccf8531a6e312243/pillow-12.1.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6408a7b064595afcab0a49393a413732a35788f2a5092fdc6266952ed67de586", size = 7068558, upload-time = "2026-02-11T04:22:39.597Z" },
-    { url = "https://files.pythonhosted.org/packages/73/cb/8059688b74422ae61278202c4e1ad992e8a2e7375227be0a21c6b87ca8d5/pillow-12.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5d8c41325b382c07799a3682c1c258469ea2ff97103c53717b7893862d0c98ce", size = 6493028, upload-time = "2026-02-11T04:22:42.73Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/da/e3c008ed7d2dd1f905b15949325934510b9d1931e5df999bb15972756818/pillow-12.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c7697918b5be27424e9ce568193efd13d925c4481dd364e43f5dff72d33e10f8", size = 7191940, upload-time = "2026-02-11T04:22:44.543Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/01/53d10cf0dbad820a8db274d259a37ba50b88b24768ddccec07355382d5ad/pillow-12.2.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:8297651f5b5679c19968abefd6bb84d95fe30ef712eb1b2d9b2d31ca61267f4c", size = 4100837, upload-time = "2026-04-01T14:43:41.506Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/98/f3a6657ecb698c937f6c76ee564882945f29b79bad496abcba0e84659ec5/pillow-12.2.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:50d8520da2a6ce0af445fa6d648c4273c3eeefbc32d7ce049f22e8b5c3daecc2", size = 4176528, upload-time = "2026-04-01T14:43:43.773Z" },
+    { url = "https://files.pythonhosted.org/packages/69/bc/8986948f05e3ea490b8442ea1c1d4d990b24a7e43d8a51b2c7d8b1dced36/pillow-12.2.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:766cef22385fa1091258ad7e6216792b156dc16d8d3fa607e7545b2b72061f1c", size = 3640401, upload-time = "2026-04-01T14:43:45.87Z" },
+    { url = "https://files.pythonhosted.org/packages/34/46/6c717baadcd62bc8ed51d238d521ab651eaa74838291bda1f86fe1f864c9/pillow-12.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5d2fd0fa6b5d9d1de415060363433f28da8b1526c1c129020435e186794b3795", size = 5308094, upload-time = "2026-04-01T14:43:48.438Z" },
+    { url = "https://files.pythonhosted.org/packages/71/43/905a14a8b17fdb1ccb58d282454490662d2cb89a6bfec26af6d3520da5ec/pillow-12.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:56b25336f502b6ed02e889f4ece894a72612fe885889a6e8c4c80239ff6e5f5f", size = 4695402, upload-time = "2026-04-01T14:43:51.292Z" },
+    { url = "https://files.pythonhosted.org/packages/73/dd/42107efcb777b16fa0393317eac58f5b5cf30e8392e266e76e51cff28c3d/pillow-12.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f1c943e96e85df3d3478f7b691f229887e143f81fedab9b20205349ab04d73ed", size = 6280005, upload-time = "2026-04-01T14:43:54.242Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/68/b93e09e5e8549019e61acf49f65b1a8530765a7f812c77a7461bca7e4494/pillow-12.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:03f6fab9219220f041c74aeaa2939ff0062bd5c364ba9ce037197f4c6d498cd9", size = 8090669, upload-time = "2026-04-01T14:43:57.335Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/6e/3ccb54ce8ec4ddd1accd2d89004308b7b0b21c4ac3d20fa70af4760a4330/pillow-12.2.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5cdfebd752ec52bf5bb4e35d9c64b40826bc5b40a13df7c3cda20a2c03a0f5ed", size = 6395194, upload-time = "2026-04-01T14:43:59.864Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ee/21d4e8536afd1a328f01b359b4d3997b291ffd35a237c877b331c1c3b71c/pillow-12.2.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eedf4b74eda2b5a4b2b2fb4c006d6295df3bf29e459e198c90ea48e130dc75c3", size = 7082423, upload-time = "2026-04-01T14:44:02.74Z" },
+    { url = "https://files.pythonhosted.org/packages/78/5f/e9f86ab0146464e8c133fe85df987ed9e77e08b29d8d35f9f9f4d6f917ba/pillow-12.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:00a2865911330191c0b818c59103b58a5e697cae67042366970a6b6f1b20b7f9", size = 6505667, upload-time = "2026-04-01T14:44:05.381Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/1e/409007f56a2fdce61584fd3acbc2bbc259857d555196cedcadc68c015c82/pillow-12.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1e1757442ed87f4912397c6d35a0db6a7b52592156014706f17658ff58bbf795", size = 7208580, upload-time = "2026-04-01T14:44:08.39Z" },
+    { url = "https://files.pythonhosted.org/packages/23/c4/7349421080b12fb35414607b8871e9534546c128a11965fd4a7002ccfbee/pillow-12.2.0-cp313-cp313-win32.whl", hash = "sha256:144748b3af2d1b358d41286056d0003f47cb339b8c43a9ea42f5fea4d8c66b6e", size = 6375896, upload-time = "2026-04-01T14:44:11.197Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/82/8a3739a5e470b3c6cbb1d21d315800d8e16bff503d1f16b03a4ec3212786/pillow-12.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:390ede346628ccc626e5730107cde16c42d3836b89662a115a921f28440e6a3b", size = 7081266, upload-time = "2026-04-01T14:44:13.947Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/25/f968f618a062574294592f668218f8af564830ccebdd1fa6200f598e65c5/pillow-12.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:8023abc91fba39036dbce14a7d6535632f99c0b857807cbbbf21ecc9f4717f06", size = 2463508, upload-time = "2026-04-01T14:44:16.312Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/a4/b342930964e3cb4dce5038ae34b0eab4653334995336cd486c5a8c25a00c/pillow-12.2.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:042db20a421b9bafecc4b84a8b6e444686bd9d836c7fd24542db3e7df7baad9b", size = 5309927, upload-time = "2026-04-01T14:44:18.89Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/de/23198e0a65a9cf06123f5435a5d95cea62a635697f8f03d134d3f3a96151/pillow-12.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd025009355c926a84a612fecf58bb315a3f6814b17ead51a8e48d3823d9087f", size = 4698624, upload-time = "2026-04-01T14:44:21.115Z" },
+    { url = "https://files.pythonhosted.org/packages/01/a6/1265e977f17d93ea37aa28aa81bad4fa597933879fac2520d24e021c8da3/pillow-12.2.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88ddbc66737e277852913bd1e07c150cc7bb124539f94c4e2df5344494e0a612", size = 6321252, upload-time = "2026-04-01T14:44:23.663Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/83/5982eb4a285967baa70340320be9f88e57665a387e3a53a7f0db8231a0cd/pillow-12.2.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d362d1878f00c142b7e1a16e6e5e780f02be8195123f164edf7eddd911eefe7c", size = 8126550, upload-time = "2026-04-01T14:44:26.772Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/48/6ffc514adce69f6050d0753b1a18fd920fce8cac87620d5a31231b04bfc5/pillow-12.2.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2c727a6d53cb0018aadd8018c2b938376af27914a68a492f59dfcaca650d5eea", size = 6433114, upload-time = "2026-04-01T14:44:29.615Z" },
+    { url = "https://files.pythonhosted.org/packages/36/a3/f9a77144231fb8d40ee27107b4463e205fa4677e2ca2548e14da5cf18dce/pillow-12.2.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:efd8c21c98c5cc60653bcb311bef2ce0401642b7ce9d09e03a7da87c878289d4", size = 7115667, upload-time = "2026-04-01T14:44:32.773Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/fc/ac4ee3041e7d5a565e1c4fd72a113f03b6394cc72ab7089d27608f8aaccb/pillow-12.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9f08483a632889536b8139663db60f6724bfcb443c96f1b18855860d7d5c0fd4", size = 6538966, upload-time = "2026-04-01T14:44:35.252Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/a8/27fb307055087f3668f6d0a8ccb636e7431d56ed0750e07a60547b1e083e/pillow-12.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dac8d77255a37e81a2efcbd1fc05f1c15ee82200e6c240d7e127e25e365c39ea", size = 7238241, upload-time = "2026-04-01T14:44:37.875Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/4b/926ab182c07fccae9fcb120043464e1ff1564775ec8864f21a0ebce6ac25/pillow-12.2.0-cp313-cp313t-win32.whl", hash = "sha256:ee3120ae9dff32f121610bb08e4313be87e03efeadfc6c0d18f89127e24d0c24", size = 6379592, upload-time = "2026-04-01T14:44:40.336Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/c4/f9e476451a098181b30050cc4c9a3556b64c02cf6497ea421ac047e89e4b/pillow-12.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:325ca0528c6788d2a6c3d40e3568639398137346c3d6e66bb61db96b96511c98", size = 7085542, upload-time = "2026-04-01T14:44:43.251Z" },
+    { url = "https://files.pythonhosted.org/packages/00/a4/285f12aeacbe2d6dc36c407dfbbe9e96d4a80b0fb710a337f6d2ad978c75/pillow-12.2.0-cp313-cp313t-win_arm64.whl", hash = "sha256:2e5a76d03a6c6dcef67edabda7a52494afa4035021a79c8558e14af25313d453", size = 2465765, upload-time = "2026-04-01T14:44:45.996Z" },
 ]
 
 [[package]]
 name = "platformdirs"
-version = "4.5.1"
+version = "4.9.6"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/cf/86/0248f086a84f01b37aaec0fa567b397df1a119f73c16f6c7a9aac73ea309/platformdirs-4.5.1.tar.gz", hash = "sha256:61d5cdcc6065745cdd94f0f878977f8de9437be93de97c1c12f853c9c0cdcbda", size = 21715, upload-time = "2025-12-05T13:52:58.638Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/9f/4a/0883b8e3802965322523f0b200ecf33d31f10991d0401162f4b23c698b42/platformdirs-4.9.6.tar.gz", hash = "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a", size = 29400, upload-time = "2026-04-09T00:04:10.812Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731, upload-time = "2025-12-05T13:52:56.823Z" },
+    { url = "https://files.pythonhosted.org/packages/75/a6/a0a304dc33b49145b21f4808d763822111e67d1c3a32b524a1baf947b6e1/platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917", size = 21348, upload-time = "2026-04-09T00:04:09.463Z" },
+]
+
+[[package]]
+name = "playwright"
+version = "1.60.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "greenlet", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pyee", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/21/f0/832bd9677194908da118064eef20082f2791e3d18215cc6d9391ee2c5a67/playwright-1.60.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:6a8cd0fec171fb3089e95e898c8bc8a6f35dea0b78b399e12fcc19427e91b1d7", size = 43474635, upload-time = "2026-05-18T12:00:31.969Z" },
+    { url = "https://files.pythonhosted.org/packages/59/7b/e1d32ae8a3ed937ec2be3721c5f728b13d731a0b7c6442e0b3bec5094ac0/playwright-1.60.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:39b5420ba6145045b69ced4c5c47d4d9fe5bddfc8ff816c518913afcb25ec7a5", size = 42261327, upload-time = "2026-05-18T12:00:35.638Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/bc/23de499ded6411c188a20c5a0dea6f0cd4ed5d2b3cc6042a5dbd3ed609aa/playwright-1.60.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:2581d0e6a3392c71f91b27460c7fd093356818dc430f48153896c8aeeaef7705", size = 43474636, upload-time = "2026-05-18T12:00:39.294Z" },
+    { url = "https://files.pythonhosted.org/packages/22/7b/1d679f4fced4ea94efadd17103856d8c565384f68382a1681264e46f5925/playwright-1.60.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:1c2bfae7884fb3fb05b853290eab8f343d524e5016f2f1def702acbbdf14c93e", size = 47467220, upload-time = "2026-05-18T12:00:43.179Z" },
+    { url = "https://files.pythonhosted.org/packages/84/c2/1528d267d4442bd2c6b8eaeab819dd52c2030bf80e89293f0ba1f687473b/playwright-1.60.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:43e66564125ee31b07a58cefb21e256d62d67d8d1713e6858df7a3019d8ed353", size = 47154856, upload-time = "2026-05-18T12:00:46.715Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/4e/b008b6440a7a1624378041da94829956d4b8f7ab9ef5aad22d0dc3f2e26d/playwright-1.60.0-py3-none-win32.whl", hash = "sha256:ec94e416ea320711e0ad4bf185dcbf41833672961e90773e1885255d7db7b7e7", size = 37902157, upload-time = "2026-05-18T12:00:50.374Z" },
+    { url = "https://files.pythonhosted.org/packages/55/f0/0541524133104f9cc20bf900870ff4a736b76a23483f3a55295ddfa58409/playwright-1.60.0-py3-none-win_amd64.whl", hash = "sha256:9566821ce6030a1f9e7146a24e19355ab0d98805fd0f9be50bb3d8fef1750c02", size = 37902159, upload-time = "2026-05-18T12:00:53.728Z" },
+    { url = "https://files.pythonhosted.org/packages/80/c8/210f282d278e4709cdd71b12a31af45a30a22ab3207b387e29b37e478713/playwright-1.60.0-py3-none-win_arm64.whl", hash = "sha256:6e4f6700a4c2250efff8e690a81d66e3855754fb587b6b87cf5c784014f91537", size = 34037981, upload-time = "2026-05-18T12:00:57.584Z" },
 ]
 
 [[package]]
@@ -2129,247 +2048,223 @@ wheels = [
 
 [[package]]
 name = "propcache"
-version = "0.4.1"
+version = "0.5.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/9e/da/e9fc233cf63743258bff22b3dfa7ea5baef7b5bc324af47a0ad89b8ffc6f/propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d", size = 46442, upload-time = "2025-10-08T19:49:02.291Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ec/44/c87281c333769159c50594f22610f77398a47ccbfbbf23074e744e86f87c/propcache-0.5.2.tar.gz", hash = "sha256:01c4fc7480cd0598bb4b57022df55b9ca296da7fc5a8760bd8451a7e63a7d427", size = 50208, upload-time = "2026-05-08T21:02:12.199Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bf/df/6d9c1b6ac12b003837dde8a10231a7344512186e87b36e855bef32241942/propcache-0.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:43eedf29202c08550aac1d14e0ee619b0430aaef78f85864c1a892294fbc28cf", size = 77750, upload-time = "2025-10-08T19:47:07.648Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/e8/677a0025e8a2acf07d3418a2e7ba529c9c33caf09d3c1f25513023c1db56/propcache-0.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d62cdfcfd89ccb8de04e0eda998535c406bf5e060ffd56be6c586cbcc05b3311", size = 44780, upload-time = "2025-10-08T19:47:08.851Z" },
-    { url = "https://files.pythonhosted.org/packages/89/a4/92380f7ca60f99ebae761936bc48a72a639e8a47b29050615eef757cb2a7/propcache-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cae65ad55793da34db5f54e4029b89d3b9b9490d8abe1b4c7ab5d4b8ec7ebf74", size = 46308, upload-time = "2025-10-08T19:47:09.982Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/48/c5ac64dee5262044348d1d78a5f85dd1a57464a60d30daee946699963eb3/propcache-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:333ddb9031d2704a301ee3e506dc46b1fe5f294ec198ed6435ad5b6a085facfe", size = 208182, upload-time = "2025-10-08T19:47:11.319Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/0c/cd762dd011a9287389a6a3eb43aa30207bde253610cca06824aeabfe9653/propcache-0.4.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fd0858c20f078a32cf55f7e81473d96dcf3b93fd2ccdb3d40fdf54b8573df3af", size = 211215, upload-time = "2025-10-08T19:47:13.146Z" },
-    { url = "https://files.pythonhosted.org/packages/30/3e/49861e90233ba36890ae0ca4c660e95df565b2cd15d4a68556ab5865974e/propcache-0.4.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:678ae89ebc632c5c204c794f8dab2837c5f159aeb59e6ed0539500400577298c", size = 218112, upload-time = "2025-10-08T19:47:14.913Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/8b/544bc867e24e1bd48f3118cecd3b05c694e160a168478fa28770f22fd094/propcache-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d472aeb4fbf9865e0c6d622d7f4d54a4e101a89715d8904282bb5f9a2f476c3f", size = 204442, upload-time = "2025-10-08T19:47:16.277Z" },
-    { url = "https://files.pythonhosted.org/packages/50/a6/4282772fd016a76d3e5c0df58380a5ea64900afd836cec2c2f662d1b9bb3/propcache-0.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4d3df5fa7e36b3225954fba85589da77a0fe6a53e3976de39caf04a0db4c36f1", size = 199398, upload-time = "2025-10-08T19:47:17.962Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/ec/d8a7cd406ee1ddb705db2139f8a10a8a427100347bd698e7014351c7af09/propcache-0.4.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ee17f18d2498f2673e432faaa71698032b0127ebf23ae5974eeaf806c279df24", size = 196920, upload-time = "2025-10-08T19:47:19.355Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/6c/f38ab64af3764f431e359f8baf9e0a21013e24329e8b85d2da32e8ed07ca/propcache-0.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:580e97762b950f993ae618e167e7be9256b8353c2dcd8b99ec100eb50f5286aa", size = 203748, upload-time = "2025-10-08T19:47:21.338Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/e3/fa846bd70f6534d647886621388f0a265254d30e3ce47e5c8e6e27dbf153/propcache-0.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:501d20b891688eb8e7aa903021f0b72d5a55db40ffaab27edefd1027caaafa61", size = 205877, upload-time = "2025-10-08T19:47:23.059Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/39/8163fc6f3133fea7b5f2827e8eba2029a0277ab2c5beee6c1db7b10fc23d/propcache-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a0bd56e5b100aef69bd8562b74b46254e7c8812918d3baa700c8a8009b0af66", size = 199437, upload-time = "2025-10-08T19:47:24.445Z" },
-    { url = "https://files.pythonhosted.org/packages/83/ce/a31bbdfc24ee0dcbba458c8175ed26089cf109a55bbe7b7640ed2470cfe9/propcache-0.4.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:92d1935ee1f8d7442da9c0c4fa7ac20d07e94064184811b685f5c4fada64553b", size = 81451, upload-time = "2025-10-08T19:47:29.445Z" },
-    { url = "https://files.pythonhosted.org/packages/25/9c/442a45a470a68456e710d96cacd3573ef26a1d0a60067e6a7d5e655621ed/propcache-0.4.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:473c61b39e1460d386479b9b2f337da492042447c9b685f28be4f74d3529e566", size = 46374, upload-time = "2025-10-08T19:47:30.579Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/bf/b1d5e21dbc3b2e889ea4327044fb16312a736d97640fb8b6aa3f9c7b3b65/propcache-0.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c0ef0aaafc66fbd87842a3fe3902fd889825646bc21149eafe47be6072725835", size = 48396, upload-time = "2025-10-08T19:47:31.79Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/04/5b4c54a103d480e978d3c8a76073502b18db0c4bc17ab91b3cb5092ad949/propcache-0.4.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f95393b4d66bfae908c3ca8d169d5f79cd65636ae15b5e7a4f6e67af675adb0e", size = 275950, upload-time = "2025-10-08T19:47:33.481Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/c1/86f846827fb969c4b78b0af79bba1d1ea2156492e1b83dea8b8a6ae27395/propcache-0.4.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c07fda85708bc48578467e85099645167a955ba093be0a2dcba962195676e859", size = 273856, upload-time = "2025-10-08T19:47:34.906Z" },
-    { url = "https://files.pythonhosted.org/packages/36/1d/fc272a63c8d3bbad6878c336c7a7dea15e8f2d23a544bda43205dfa83ada/propcache-0.4.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:af223b406d6d000830c6f65f1e6431783fc3f713ba3e6cc8c024d5ee96170a4b", size = 280420, upload-time = "2025-10-08T19:47:36.338Z" },
-    { url = "https://files.pythonhosted.org/packages/07/0c/01f2219d39f7e53d52e5173bcb09c976609ba30209912a0680adfb8c593a/propcache-0.4.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a78372c932c90ee474559c5ddfffd718238e8673c340dc21fe45c5b8b54559a0", size = 263254, upload-time = "2025-10-08T19:47:37.692Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/18/cd28081658ce597898f0c4d174d4d0f3c5b6d4dc27ffafeef835c95eb359/propcache-0.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:564d9f0d4d9509e1a870c920a89b2fec951b44bf5ba7d537a9e7c1ccec2c18af", size = 261205, upload-time = "2025-10-08T19:47:39.659Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/71/1f9e22eb8b8316701c2a19fa1f388c8a3185082607da8e406a803c9b954e/propcache-0.4.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:17612831fda0138059cc5546f4d12a2aacfb9e47068c06af35c400ba58ba7393", size = 247873, upload-time = "2025-10-08T19:47:41.084Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/65/3d4b61f36af2b4eddba9def857959f1016a51066b4f1ce348e0cf7881f58/propcache-0.4.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:41a89040cb10bd345b3c1a873b2bf36413d48da1def52f268a055f7398514874", size = 262739, upload-time = "2025-10-08T19:47:42.51Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/42/26746ab087faa77c1c68079b228810436ccd9a5ce9ac85e2b7307195fd06/propcache-0.4.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e35b88984e7fa64aacecea39236cee32dd9bd8c55f57ba8a75cf2399553f9bd7", size = 263514, upload-time = "2025-10-08T19:47:43.927Z" },
-    { url = "https://files.pythonhosted.org/packages/94/13/630690fe201f5502d2403dd3cfd451ed8858fe3c738ee88d095ad2ff407b/propcache-0.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f8b465489f927b0df505cbe26ffbeed4d6d8a2bbc61ce90eb074ff129ef0ab1", size = 257781, upload-time = "2025-10-08T19:47:45.448Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/5c/bca52d654a896f831b8256683457ceddd490ec18d9ec50e97dfd8fc726a8/propcache-0.4.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3f7124c9d820ba5548d431afb4632301acf965db49e666aa21c305cbe8c6de12", size = 78152, upload-time = "2025-10-08T19:47:51.051Z" },
-    { url = "https://files.pythonhosted.org/packages/65/9b/03b04e7d82a5f54fb16113d839f5ea1ede58a61e90edf515f6577c66fa8f/propcache-0.4.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c0d4b719b7da33599dfe3b22d3db1ef789210a0597bc650b7cee9c77c2be8c5c", size = 44869, upload-time = "2025-10-08T19:47:52.594Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/fa/89a8ef0468d5833a23fff277b143d0573897cf75bd56670a6d28126c7d68/propcache-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9f302f4783709a78240ebc311b793f123328716a60911d667e0c036bc5dcbded", size = 46596, upload-time = "2025-10-08T19:47:54.073Z" },
-    { url = "https://files.pythonhosted.org/packages/86/bd/47816020d337f4a746edc42fe8d53669965138f39ee117414c7d7a340cfe/propcache-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c80ee5802e3fb9ea37938e7eecc307fb984837091d5fd262bb37238b1ae97641", size = 206981, upload-time = "2025-10-08T19:47:55.715Z" },
-    { url = "https://files.pythonhosted.org/packages/df/f6/c5fa1357cc9748510ee55f37173eb31bfde6d94e98ccd9e6f033f2fc06e1/propcache-0.4.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ed5a841e8bb29a55fb8159ed526b26adc5bdd7e8bd7bf793ce647cb08656cdf4", size = 211490, upload-time = "2025-10-08T19:47:57.499Z" },
-    { url = "https://files.pythonhosted.org/packages/80/1e/e5889652a7c4a3846683401a48f0f2e5083ce0ec1a8a5221d8058fbd1adf/propcache-0.4.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:55c72fd6ea2da4c318e74ffdf93c4fe4e926051133657459131a95c846d16d44", size = 215371, upload-time = "2025-10-08T19:47:59.317Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/f2/889ad4b2408f72fe1a4f6a19491177b30ea7bf1a0fd5f17050ca08cfc882/propcache-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8326e144341460402713f91df60ade3c999d601e7eb5ff8f6f7862d54de0610d", size = 201424, upload-time = "2025-10-08T19:48:00.67Z" },
-    { url = "https://files.pythonhosted.org/packages/27/73/033d63069b57b0812c8bd19f311faebeceb6ba31b8f32b73432d12a0b826/propcache-0.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:060b16ae65bc098da7f6d25bf359f1f31f688384858204fe5d652979e0015e5b", size = 197566, upload-time = "2025-10-08T19:48:02.604Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/89/ce24f3dc182630b4e07aa6d15f0ff4b14ed4b9955fae95a0b54c58d66c05/propcache-0.4.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:89eb3fa9524f7bec9de6e83cf3faed9d79bffa560672c118a96a171a6f55831e", size = 193130, upload-time = "2025-10-08T19:48:04.499Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/24/ef0d5fd1a811fb5c609278d0209c9f10c35f20581fcc16f818da959fc5b4/propcache-0.4.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:dee69d7015dc235f526fe80a9c90d65eb0039103fe565776250881731f06349f", size = 202625, upload-time = "2025-10-08T19:48:06.213Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/02/98ec20ff5546f68d673df2f7a69e8c0d076b5abd05ca882dc7ee3a83653d/propcache-0.4.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5558992a00dfd54ccbc64a32726a3357ec93825a418a401f5cc67df0ac5d9e49", size = 204209, upload-time = "2025-10-08T19:48:08.432Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/87/492694f76759b15f0467a2a93ab68d32859672b646aa8a04ce4864e7932d/propcache-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c9b822a577f560fbd9554812526831712c1436d2c046cedee4c3796d3543b144", size = 197797, upload-time = "2025-10-08T19:48:09.968Z" },
-    { url = "https://files.pythonhosted.org/packages/99/85/9ff785d787ccf9bbb3f3106f79884a130951436f58392000231b4c737c80/propcache-0.4.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:824e908bce90fb2743bd6b59db36eb4f45cd350a39637c9f73b1c1ea66f5b75f", size = 81455, upload-time = "2025-10-08T19:48:15.16Z" },
-    { url = "https://files.pythonhosted.org/packages/90/85/2431c10c8e7ddb1445c1f7c4b54d886e8ad20e3c6307e7218f05922cad67/propcache-0.4.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2b5e7db5328427c57c8e8831abda175421b709672f6cfc3d630c3b7e2146393", size = 46372, upload-time = "2025-10-08T19:48:16.424Z" },
-    { url = "https://files.pythonhosted.org/packages/01/20/b0972d902472da9bcb683fa595099911f4d2e86e5683bcc45de60dd05dc3/propcache-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6f6ff873ed40292cd4969ef5310179afd5db59fdf055897e282485043fc80ad0", size = 48411, upload-time = "2025-10-08T19:48:17.577Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/e3/7dc89f4f21e8f99bad3d5ddb3a3389afcf9da4ac69e3deb2dcdc96e74169/propcache-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49a2dc67c154db2c1463013594c458881a069fcf98940e61a0569016a583020a", size = 275712, upload-time = "2025-10-08T19:48:18.901Z" },
-    { url = "https://files.pythonhosted.org/packages/20/67/89800c8352489b21a8047c773067644e3897f02ecbbd610f4d46b7f08612/propcache-0.4.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:005f08e6a0529984491e37d8dbc3dd86f84bd78a8ceb5fa9a021f4c48d4984be", size = 273557, upload-time = "2025-10-08T19:48:20.762Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/a1/b52b055c766a54ce6d9c16d9aca0cad8059acd9637cdf8aa0222f4a026ef/propcache-0.4.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5c3310452e0d31390da9035c348633b43d7e7feb2e37be252be6da45abd1abcc", size = 280015, upload-time = "2025-10-08T19:48:22.592Z" },
-    { url = "https://files.pythonhosted.org/packages/48/c8/33cee30bd890672c63743049f3c9e4be087e6780906bfc3ec58528be59c1/propcache-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c3c70630930447f9ef1caac7728c8ad1c56bc5015338b20fed0d08ea2480b3a", size = 262880, upload-time = "2025-10-08T19:48:23.947Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/b1/8f08a143b204b418285c88b83d00edbd61afbc2c6415ffafc8905da7038b/propcache-0.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8e57061305815dfc910a3634dcf584f08168a8836e6999983569f51a8544cd89", size = 260938, upload-time = "2025-10-08T19:48:25.656Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/12/96e4664c82ca2f31e1c8dff86afb867348979eb78d3cb8546a680287a1e9/propcache-0.4.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:521a463429ef54143092c11a77e04056dd00636f72e8c45b70aaa3140d639726", size = 247641, upload-time = "2025-10-08T19:48:27.207Z" },
-    { url = "https://files.pythonhosted.org/packages/18/ed/e7a9cfca28133386ba52278136d42209d3125db08d0a6395f0cba0c0285c/propcache-0.4.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:120c964da3fdc75e3731aa392527136d4ad35868cc556fd09bb6d09172d9a367", size = 262510, upload-time = "2025-10-08T19:48:28.65Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/76/16d8bf65e8845dd62b4e2b57444ab81f07f40caa5652b8969b87ddcf2ef6/propcache-0.4.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:d8f353eb14ee3441ee844ade4277d560cdd68288838673273b978e3d6d2c8f36", size = 263161, upload-time = "2025-10-08T19:48:30.133Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/70/c99e9edb5d91d5ad8a49fa3c1e8285ba64f1476782fed10ab251ff413ba1/propcache-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ab2943be7c652f09638800905ee1bab2c544e537edb57d527997a24c13dc1455", size = 257393, upload-time = "2025-10-08T19:48:31.567Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/09/f049e45385503fe67db75a6b6186a7b9f0c3930366dc960522c312a825b1/propcache-0.5.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:099aaf4b4d1a02265b92a977edf00b5c4f63b3b17ac6de39b0d637c9cac0188a", size = 94457, upload-time = "2026-05-08T21:00:36.355Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/65/83d1d05655baf63113731bd5a1008435e14f8d1e5a06cbe4ec5b23ad7a31/propcache-0.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:68ce1c44c7a813a7f71ea04315a8c7b330b63db99d059a797a4651bb6f69f117", size = 53835, upload-time = "2026-05-08T21:00:38.072Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/12/a6ba6482bb5ea3260c000c9b20881c95fa11c6b30173715668259f844ed7/propcache-0.5.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fc299c129490f55f254cd90be0deca4764e36e9a7c08b4aa588479a3bbed3098", size = 54545, upload-time = "2026-05-08T21:00:39.319Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/19/7fa086f5764c59ec8a8e157cd93aa8497acc00aba9dcdec56bfffb32602d/propcache-0.5.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a6ae2198be502c10f09b2516e7b5d019816924bc3183a43ce792a7bd6625e6f4", size = 59886, upload-time = "2026-05-08T21:00:40.621Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/e4/5d7663dc8235956c8f5281698a3af1d351d8820341ddd890f59d9a9127f2/propcache-0.5.2-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6041d31504dc1779d700e1edcfb08eea334b357620b06681a4eabb57a74e574e", size = 63261, upload-time = "2026-05-08T21:00:41.775Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/4a/15a03adee24d6350da4292caeac44c34c033d2afe5e87eb370f38854560f/propcache-0.5.2-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7eabc04151c78a9f4d5bbb5f1faf571e4defeb4b585e0fe95b60ff2dbe4d3d7", size = 64184, upload-time = "2026-05-08T21:00:43.018Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/c6/979176efdaa3d239e36d503d5af63a0a773b36662ed8f52e5b6a6d9fd40e/propcache-0.5.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4db0ba63d693afd40d249bd93f842b5f144f8fcbb83de05660373bcf30517b1d", size = 61534, upload-time = "2026-05-08T21:00:44.507Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/22/63e8cd1bae4c2d2be6493b6b7d10566ddafad88137cfbc99964a1119853c/propcache-0.5.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1dbcf7675229b35d31abb6547d8ebc8c27a830ac3f9a794edff6254873ec7c0a", size = 61500, upload-time = "2026-05-08T21:00:45.796Z" },
+    { url = "https://files.pythonhosted.org/packages/60/5a/28e5d9acbac1cc9ccb67045e8c1b943aa8d79fdf39c93bd73cacd68008ea/propcache-0.5.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d310c013aad2c72f1c3f2f8dd3279d460a858c551f97aeb8c63e4693cca7b4d2", size = 59994, upload-time = "2026-05-08T21:00:47.093Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/40/db650677f554a95b9c01a7c9d93d629e93a15562f5deb4573c9ee136fed2/propcache-0.5.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:06187263ddad280d05b4d8a8b3bb7d164cbebd469236544a42e6d9b28ac6a4fa", size = 56884, upload-time = "2026-05-08T21:00:48.376Z" },
+    { url = "https://files.pythonhosted.org/packages/80/45/70b39b89516ff8b96bf732fa6fded8cef20f293cb1508690101c3c07ec51/propcache-0.5.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3115559b8effafd63b142ea5ed53d63a16ea6469cbc63dce4ee194b42db5d853", size = 63464, upload-time = "2026-05-08T21:00:49.954Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/e2/fa59d3a89eac5534293124af4f1d0d0ada091ce4a0ab4610ce03fd2bdd8d/propcache-0.5.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c60462af8e6dc30c35407c7237ea908d777b22862bbee27bc4699c0d8bcdc45a", size = 61588, upload-time = "2026-05-08T21:00:51.281Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/97/efb547a55c4bc7381cfb202d6a2239ac621045277bc1ea5dfd3a7f0516c0/propcache-0.5.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:40314bca9ac559716fe374094fc81c11dcc34b64fd6c585360f5775690505704", size = 64667, upload-time = "2026-05-08T21:00:52.602Z" },
+    { url = "https://files.pythonhosted.org/packages/92/56/f5c7d9b4b7595d5127da38974d791b2153f3d1eae6c674af3583ace92ad3/propcache-0.5.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cfa21e036ce1e1db2be04ba3b85d2df1bb1702fa01932d984c5464c665228ff4", size = 62463, upload-time = "2026-05-08T21:00:54.303Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/3b/484a3a65fc9f9f60c41dcd17b428bace5389544e2c680994534a20755066/propcache-0.5.2-cp313-cp313-win32.whl", hash = "sha256:f156a3529f38063b6dbaf356e15602a7f95f8055b1295a438433a6386f10463d", size = 38621, upload-time = "2026-05-08T21:00:55.808Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/fd/3f0f10dba4dabad3bf53102be007abf55481067952bde0fdddff439e7c61/propcache-0.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:dfed59d0a5aeb01e242e66ff0300bc4a265a7c05f612d30016f0b60b1017d757", size = 41649, upload-time = "2026-05-08T21:00:57.061Z" },
+    { url = "https://files.pythonhosted.org/packages/90/ec/6ce619cc32bb500a482f811f9cd509368b4e58e638d13f2c68f370d6b475/propcache-0.5.2-cp313-cp313-win_arm64.whl", hash = "sha256:ba338430e87ceb9c8f0cf754de38a9860560261e56c00376debd628698a7364f", size = 37636, upload-time = "2026-05-08T21:00:58.646Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/82/c1d268bbbf2ef981c5bf0fbbe746db617c66e3bcefe431a1aa8943fbe23a/propcache-0.5.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:a592f5f3da71c8691c788c13cb6734b6d17663d2e1cb8caddf0673d01ef8847d", size = 98872, upload-time = "2026-05-08T21:00:59.889Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/d4/52c871e73e864e6b34c0e2d58ac1ec5ccd149497ddc7ad2137ae98323a35/propcache-0.5.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:6a997d0489e9668a384fcfd5061b857aa5361de73191cac204d04b889cfbbafa", size = 56257, upload-time = "2026-05-08T21:01:01.195Z" },
+    { url = "https://files.pythonhosted.org/packages/67/f0/9b90ca2a210b3d09bcfcd96ecd0f55545c091535abce2a45de2775cfd357/propcache-0.5.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:10734b5484ea113152ee25a91dccedf81631791805d2c9ccb054958e51842c94", size = 56696, upload-time = "2026-05-08T21:01:02.941Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/0e/6e9d4ba07c8e56e21ddec1e75f12148142b21ca83a51871babce095334f4/propcache-0.5.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cafca7e56c12bb02ae16d283742bef25a61122e9dab2b5b3f2ccbe589ce32164", size = 62378, upload-time = "2026-05-08T21:01:04.475Z" },
+    { url = "https://files.pythonhosted.org/packages/65/19/c10badaa463dde8a27ce884f8ee2ec37e6035b7c9f5ff0c8f74f06f08dac/propcache-0.5.2-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f064f8d2b59177878b7615df1735cd8fe3462ed6be8c7b217d17a276489c2b7f", size = 65283, upload-time = "2026-05-08T21:01:05.959Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/b6/93bea99ca80e19cef6512a8580e5b7857bbe09422d9daa7fd4ef5723306c/propcache-0.5.2-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f78abfa8dfc32376fd1aacf597b2f2fbbe0ea751419aee718af5d4f82537ef8c", size = 66616, upload-time = "2026-05-08T21:01:07.228Z" },
+    { url = "https://files.pythonhosted.org/packages/83/e4/5c7462e50625f051f37fb38b8224f7639f667184bbd34424ec83819bb1b7/propcache-0.5.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f7467da8a9822bf1a55336f877340c5bcbd3c482afc43a99771169f74a26dedc", size = 63773, upload-time = "2026-05-08T21:01:08.514Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/b6/99238894047b13c823be25027e736626cd414a52a5e30d2c3347c2733529/propcache-0.5.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a6ddc6ac9e25de626c1f129c1b467d7ecd33ce2237d3fd0c4e429feef0a7ee1f", size = 63664, upload-time = "2026-05-08T21:01:09.874Z" },
+    { url = "https://files.pythonhosted.org/packages/85/1e/a3a1a63116a2b8edb415a8bb9a6f0c34bd03830b1e18e8ce2904e1dc1cf4/propcache-0.5.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2f22cbbac9e26a8e864c0985ff1268d5d939d53d9d9411a9824279097e03a2cb", size = 62643, upload-time = "2026-05-08T21:01:11.132Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/03/893cf147de2fc6543c5eaa07ad833170e7e2a2385725bbebe8c0503723bb/propcache-0.5.2-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:fc76378c62a0f04d0cd82fbb1a2cd2d7e28fcb40d5873f28a6c44e388aaa2751", size = 59595, upload-time = "2026-05-08T21:01:12.387Z" },
+    { url = "https://files.pythonhosted.org/packages/86/3b/04c1a2e12c57766568ba75ba72b3bf2042818d4c1425fab6fc07155c7cff/propcache-0.5.2-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:acd2c8edba48e31e58a363b8cf4e5c7db3b04b3f9e371f601df30d9b0d244836", size = 65711, upload-time = "2026-05-08T21:01:13.676Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/34/80f8d0099f8d6bacc4de1624c85672681c8cd1149ca2da0e38fd120b817f/propcache-0.5.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:452b5065457eb9991ec5eb38ff41d6cd4c991c9ac7c531c4d5849ae473a9a13f", size = 64247, upload-time = "2026-05-08T21:01:14.936Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/1a/8b08f3a5f1037e9e370c55883ceeeee0f6dd0416fb2d2d67b8bfc91f2a79/propcache-0.5.2-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:3430bb2bfe1331885c427745a751e774ee679fd4344f80b97bf879815fe8fa55", size = 67102, upload-time = "2026-05-08T21:01:16.281Z" },
+    { url = "https://files.pythonhosted.org/packages/34/68/8bdb7bb7756d76e005490649d10e4a8369e610c74d619f71e1aedf889e9c/propcache-0.5.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cef6cea3922890dd6c9654971001fa797b526c16ab5e1e46c05fd6f877be7568", size = 64964, upload-time = "2026-05-08T21:01:17.57Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/aa/50fb0b5d3968b61a510926ff8b8465f1d6e976b3ab74496d7a4b9fc42515/propcache-0.5.2-cp313-cp313t-win32.whl", hash = "sha256:72d61e16dd78228b58c5d47be830ff3da7e5f139abdf0aef9d86cde1c5cf2191", size = 42546, upload-time = "2026-05-08T21:01:18.946Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/4c/0ddbae64321bd4a95bcbfc19307238016b5b1fee645c84626c8d539e5b74/propcache-0.5.2-cp313-cp313t-win_amd64.whl", hash = "sha256:0958834041a0166d343b8d2cedcd8bcbaeb4fdbe0cf08320c5379f143c3be6e7", size = 46330, upload-time = "2026-05-08T21:01:20.162Z" },
+    { url = "https://files.pythonhosted.org/packages/00/d9/9cddc8efb78d8af264c5ec9f6d10b62f57c515feda8d321595f56010fb23/propcache-0.5.2-cp313-cp313t-win_arm64.whl", hash = "sha256:6de8bd93ddde9b992cf2b2e0d796d501a19026b5b9fd87356d7d0779531a8d96", size = 40521, upload-time = "2026-05-08T21:01:21.399Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/ed/1cdcab6ba3d6ab7feca11fc14f0eeea80755bb53ef4e892079f31b10a25f/propcache-0.5.2-py3-none-any.whl", hash = "sha256:be1ddfcbb376e3de5d2e2db1d58d6d67463e6b4f9f040c000de8e300295465fe", size = 14036, upload-time = "2026-05-08T21:02:10.673Z" },
 ]
 
 [[package]]
 name = "protobuf"
-version = "5.29.6"
+version = "7.35.0rc2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/7e/57/394a763c103e0edf87f0938dafcd918d53b4c011dfc5c8ae80f3b0452dbb/protobuf-5.29.6.tar.gz", hash = "sha256:da9ee6a5424b6b30fd5e45c5ea663aef540ca95f9ad99d1e887e819cdf9b8723", size = 425623, upload-time = "2026-02-04T22:54:40.584Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f7/30/c45dd8cb38689d923a3ebf2a708ff0fcdacc4729cda4190fc63c94e39ee9/protobuf-7.35.0rc2.tar.gz", hash = "sha256:54090668dd319bfd1ee843e8811ec98772a81dd9f753c2fd8351e7d94f35afe4", size = 458797, upload-time = "2026-05-06T23:02:01.773Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/06/db/49b05966fd208ae3f44dcd33837b6243b4915c57561d730a43f881f24dea/protobuf-5.29.6-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:b5a169e664b4057183a34bdc424540e86eea47560f3c123a0d64de4e137f9269", size = 418619, upload-time = "2026-02-04T22:54:30.266Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/d7/48cbf6b0c3c39761e47a99cb483405f0fde2be22cf00d71ef316ce52b458/protobuf-5.29.6-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:a8866b2cff111f0f863c1b3b9e7572dc7eaea23a7fae27f6fc613304046483e6", size = 320284, upload-time = "2026-02-04T22:54:31.782Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/dd/cadd6ec43069247d91f6345fa7a0d2858bef6af366dbd7ba8f05d2c77d3b/protobuf-5.29.6-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:e3387f44798ac1106af0233c04fb8abf543772ff241169946f698b3a9a3d3ab9", size = 320478, upload-time = "2026-02-04T22:54:32.909Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/cb/e3065b447186cb70aa65acc70c86baf482d82bf75625bf5a2c4f6919c6a3/protobuf-5.29.6-py3-none-any.whl", hash = "sha256:6b9edb641441b2da9fa8f428760fc136a49cf97a52076010cf22a2ff73438a86", size = 173126, upload-time = "2026-02-04T22:54:39.462Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/ed/eca5d833cdba1786dc2b384f0d0cba6896ce6f7928c779a8394e315073f3/protobuf-7.35.0rc2-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:3f195cab1bd9c5cb0090a1a6fe179a82573182cc79756c775067f032ee92b5a3", size = 433330, upload-time = "2026-05-06T23:01:53.307Z" },
+    { url = "https://files.pythonhosted.org/packages/08/7a/a9a4b46c2db77263189f618e429557f98a14174de10c9cea4534dddcf226/protobuf-7.35.0rc2-cp310-abi3-manylinux2014_aarch64.whl", hash = "sha256:f54e7049de127a668870a200501ecc1da980b66d8a0c7bb9c67a49dff433a408", size = 328953, upload-time = "2026-05-06T23:01:55.271Z" },
+    { url = "https://files.pythonhosted.org/packages/30/2f/f9f4cff08176386a07db120d48bbab738d26cf3691b3985cff2f3751d65e/protobuf-7.35.0rc2-cp310-abi3-manylinux2014_s390x.whl", hash = "sha256:37056d1ff87b638e2d5e2ed17e7a120c75f5666fd6caee75d74dda9084664946", size = 344134, upload-time = "2026-05-06T23:01:56.607Z" },
+    { url = "https://files.pythonhosted.org/packages/32/cf/7878d361981883c7cb8662d01a62faf59e102d01b43875bb542f16bc8f84/protobuf-7.35.0rc2-cp310-abi3-manylinux2014_x86_64.whl", hash = "sha256:d7cd3cf4dece37f4551e95044c1f7775a060c898d1d6b4af1dd62b3705c60c8d", size = 327233, upload-time = "2026-05-06T23:01:57.634Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/75/4b21dee3a728dff7646fe87fc2a4876294ee7f54790c1da2ed0a03337f45/protobuf-7.35.0rc2-cp310-abi3-win32.whl", hash = "sha256:33b4dec7eb9b66fe8a970c9eb6ebce1e071a6eed63f3d21f2f4909e24f385708", size = 429049, upload-time = "2026-05-06T23:01:58.579Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/ab/f544d50d824f05828ad0839ba06e2fa97c9e7ac82c6e6e2db0d7726bf1a8/protobuf-7.35.0rc2-cp310-abi3-win_amd64.whl", hash = "sha256:583f80b076e8bedccf3054353be779dca21855b7ca191187eb695bd7d1441c67", size = 440098, upload-time = "2026-05-06T23:01:59.664Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/d9/5fb75bef53c54c081850d467d9026dbc646e139fdf9ac34bc9d4ad7ce1cd/protobuf-7.35.0rc2-py3-none-any.whl", hash = "sha256:1efdc8c97f5893da41b6de06bc1f377eaf5aea05e800dff1574337c22b01dc6e", size = 171762, upload-time = "2026-05-06T23:02:00.819Z" },
 ]
 
 [[package]]
 name = "psutil"
-version = "7.2.1"
+version = "7.2.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/73/cb/09e5184fb5fc0358d110fc3ca7f6b1d033800734d34cac10f4136cfac10e/psutil-7.2.1.tar.gz", hash = "sha256:f7583aec590485b43ca601dd9cea0dcd65bd7bb21d30ef4ddbf4ea6b5ed1bdd3", size = 490253, upload-time = "2025-12-29T08:26:00.169Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/77/8e/f0c242053a368c2aa89584ecd1b054a18683f13d6e5a318fc9ec36582c94/psutil-7.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:ba9f33bb525b14c3ea563b2fd521a84d2fa214ec59e3e6a2858f78d0844dd60d", size = 129624, upload-time = "2025-12-29T08:26:04.255Z" },
-    { url = "https://files.pythonhosted.org/packages/26/97/a58a4968f8990617decee234258a2b4fc7cd9e35668387646c1963e69f26/psutil-7.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:81442dac7abfc2f4f4385ea9e12ddf5a796721c0f6133260687fec5c3780fa49", size = 130132, upload-time = "2025-12-29T08:26:06.228Z" },
-    { url = "https://files.pythonhosted.org/packages/db/6d/ed44901e830739af5f72a85fa7ec5ff1edea7f81bfbf4875e409007149bd/psutil-7.2.1-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ea46c0d060491051d39f0d2cff4f98d5c72b288289f57a21556cc7d504db37fc", size = 180612, upload-time = "2025-12-29T08:26:08.276Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/65/b628f8459bca4efbfae50d4bf3feaab803de9a160b9d5f3bd9295a33f0c2/psutil-7.2.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:35630d5af80d5d0d49cfc4d64c1c13838baf6717a13effb35869a5919b854cdf", size = 183201, upload-time = "2025-12-29T08:26:10.622Z" },
-    { url = "https://files.pythonhosted.org/packages/05/c2/5fb764bd61e40e1fe756a44bd4c21827228394c17414ade348e28f83cd79/psutil-7.2.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:494c513ccc53225ae23eec7fe6e1482f1b8a44674241b54561f755a898650679", size = 129716, upload-time = "2025-12-29T08:26:16.017Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/d2/935039c20e06f615d9ca6ca0ab756cf8408a19d298ffaa08666bc18dc805/psutil-7.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3fce5f92c22b00cdefd1645aa58ab4877a01679e901555067b1bd77039aa589f", size = 130133, upload-time = "2025-12-29T08:26:18.009Z" },
-    { url = "https://files.pythonhosted.org/packages/77/69/19f1eb0e01d24c2b3eacbc2f78d3b5add8a89bf0bb69465bc8d563cc33de/psutil-7.2.1-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93f3f7b0bb07711b49626e7940d6fe52aa9940ad86e8f7e74842e73189712129", size = 181518, upload-time = "2025-12-29T08:26:20.241Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/6d/7e18b1b4fa13ad370787626c95887b027656ad4829c156bb6569d02f3262/psutil-7.2.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d34d2ca888208eea2b5c68186841336a7f5e0b990edec929be909353a202768a", size = 184348, upload-time = "2025-12-29T08:26:22.215Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/cf/5180eb8c8bdf6a503c6919f1da28328bd1e6b3b1b5b9d5b01ae64f019616/psutil-7.2.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b2e953fcfaedcfbc952b44744f22d16575d3aa78eb4f51ae74165b4e96e55f42", size = 128137, upload-time = "2025-12-29T08:26:27.759Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/2c/78e4a789306a92ade5000da4f5de3255202c534acdadc3aac7b5458fadef/psutil-7.2.1-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:05cc68dbb8c174828624062e73078e7e35406f4ca2d0866c272c2410d8ef06d1", size = 128947, upload-time = "2025-12-29T08:26:29.548Z" },
-    { url = "https://files.pythonhosted.org/packages/29/f8/40e01c350ad9a2b3cb4e6adbcc8a83b17ee50dd5792102b6142385937db5/psutil-7.2.1-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e38404ca2bb30ed7267a46c02f06ff842e92da3bb8c5bfdadbd35a5722314d8", size = 154694, upload-time = "2025-12-29T08:26:32.147Z" },
-    { url = "https://files.pythonhosted.org/packages/06/e4/b751cdf839c011a9714a783f120e6a86b7494eb70044d7d81a25a5cd295f/psutil-7.2.1-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab2b98c9fc19f13f59628d94df5cc4cc4844bc572467d113a8b517d634e362c6", size = 156136, upload-time = "2025-12-29T08:26:34.079Z" },
-    { url = "https://files.pythonhosted.org/packages/44/ad/bbf6595a8134ee1e94a4487af3f132cef7fce43aef4a93b49912a48c3af7/psutil-7.2.1-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f78baafb38436d5a128f837fab2d92c276dfb48af01a240b861ae02b2413ada8", size = 148108, upload-time = "2025-12-29T08:26:36.225Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/15/dd6fd869753ce82ff64dcbc18356093471a5a5adf4f77ed1f805d473d859/psutil-7.2.1-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:99a4cd17a5fdd1f3d014396502daa70b5ec21bf4ffe38393e152f8e449757d67", size = 147402, upload-time = "2025-12-29T08:26:39.21Z" },
+    { url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" },
+    { url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" },
+    { url = "https://files.pythonhosted.org/packages/16/ba/0756dca669f5a9300d0cbcbfae9a4c30e446dfc7440ffe43ded5724bfd93/psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b", size = 139893, upload-time = "2026-01-28T18:15:06.378Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/61/8fa0e26f33623b49949346de05ec1ddaad02ed8ba64af45f40a147dbfa97/psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9", size = 135589, upload-time = "2026-01-28T18:15:08.03Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" },
+    { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" },
+    { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" },
+    { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" },
 ]
 
 [[package]]
 name = "pyarrow"
-version = "23.0.1"
+version = "24.0.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/88/22/134986a4cc224d593c1afde5494d18ff629393d74cc2eddb176669f234a4/pyarrow-23.0.1.tar.gz", hash = "sha256:b8c5873e33440b2bc2f4a79d2b47017a89c5a24116c055625e6f2ee50523f019", size = 1167336, upload-time = "2026-02-16T10:14:12.39Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/91/13/13e1069b351bdc3881266e11147ffccf687505dbb0ea74036237f5d454a5/pyarrow-24.0.0.tar.gz", hash = "sha256:85fe721a14dd823aca09127acbb06c3ca723efbd436c004f16bca601b04dcc83", size = 1180261, upload-time = "2026-04-21T10:51:25.837Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/47/10/2cbe4c6f0fb83d2de37249567373d64327a5e4d8db72f486db42875b08f6/pyarrow-23.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6b8fda694640b00e8af3c824f99f789e836720aa8c9379fb435d4c4953a756b8", size = 34210066, upload-time = "2026-02-16T10:10:45.487Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/4f/679fa7e84dadbaca7a65f7cdba8d6c83febbd93ca12fa4adf40ba3b6362b/pyarrow-23.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:8ff51b1addc469b9444b7c6f3548e19dc931b172ab234e995a60aea9f6e6025f", size = 35825526, upload-time = "2026-02-16T10:10:52.266Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/63/d2747d930882c9d661e9398eefc54f15696547b8983aaaf11d4a2e8b5426/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:71c5be5cbf1e1cb6169d2a0980850bccb558ddc9b747b6206435313c47c37677", size = 44473279, upload-time = "2026-02-16T10:11:01.557Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/93/10a48b5e238de6d562a411af6467e71e7aedbc9b87f8d3a35f1560ae30fb/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9b6f4f17b43bc39d56fec96e53fe89d94bac3eb134137964371b45352d40d0c2", size = 47585798, upload-time = "2026-02-16T10:11:09.401Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/20/476943001c54ef078dbf9542280e22741219a184a0632862bca4feccd666/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fc13fc6c403d1337acab46a2c4346ca6c9dec5780c3c697cf8abfd5e19b6b37", size = 48179446, upload-time = "2026-02-16T10:11:17.781Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/b6/5dd0c47b335fcd8edba9bfab78ad961bd0fd55ebe53468cc393f45e0be60/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5c16ed4f53247fa3ffb12a14d236de4213a4415d127fe9cebed33d51671113e2", size = 50623972, upload-time = "2026-02-16T10:11:26.185Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/8e/38749c4b1303e6ae76b3c80618f84861ae0c55dd3c2273842ea6f8258233/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:29f7f7419a0e30264ea261fdc0e5fe63ce5a6095003db2945d7cd78df391a7e1", size = 34471544, upload-time = "2026-02-16T10:11:32.535Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/73/f237b2bc8c669212f842bcfd842b04fc8d936bfc9d471630569132dc920d/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:33d648dc25b51fd8055c19e4261e813dfc4d2427f068bcecc8b53d01b81b0500", size = 35949911, upload-time = "2026-02-16T10:11:39.813Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/86/b912195eee0903b5611bf596833def7d146ab2d301afeb4b722c57ffc966/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd395abf8f91c673dd3589cadc8cc1ee4e8674fa61b2e923c8dd215d9c7d1f41", size = 44520337, upload-time = "2026-02-16T10:11:47.764Z" },
-    { url = "https://files.pythonhosted.org/packages/69/c2/f2a717fb824f62d0be952ea724b4f6f9372a17eed6f704b5c9526f12f2f1/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:00be9576d970c31defb5c32eb72ef585bf600ef6d0a82d5eccaae96639cf9d07", size = 47548944, upload-time = "2026-02-16T10:11:56.607Z" },
-    { url = "https://files.pythonhosted.org/packages/84/a7/90007d476b9f0dc308e3bc57b832d004f848fd6c0da601375d20d92d1519/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c2139549494445609f35a5cda4eb94e2c9e4d704ce60a095b342f82460c73a83", size = 48236269, upload-time = "2026-02-16T10:12:04.47Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/3f/b16fab3e77709856eb6ac328ce35f57a6d4a18462c7ca5186ef31b45e0e0/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7044b442f184d84e2351e5084600f0d7343d6117aabcbc1ac78eb1ae11eb4125", size = 50604794, upload-time = "2026-02-16T10:12:11.797Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/1b/6da9a89583ce7b23ac611f183ae4843cd3a6cf54f079549b0e8c14031e73/pyarrow-23.0.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:5df1161da23636a70838099d4aaa65142777185cc0cdba4037a18cee7d8db9ca", size = 34238755, upload-time = "2026-02-16T10:12:32.819Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/b5/d58a241fbe324dbaeb8df07be6af8752c846192d78d2272e551098f74e88/pyarrow-23.0.1-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:fa8e51cb04b9f8c9c5ace6bab63af9a1f88d35c0d6cbf53e8c17c098552285e1", size = 35847826, upload-time = "2026-02-16T10:12:38.949Z" },
-    { url = "https://files.pythonhosted.org/packages/54/a5/8cbc83f04aba433ca7b331b38f39e000efd9f0c7ce47128670e737542996/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0b95a3994f015be13c63148fef8832e8a23938128c185ee951c98908a696e0eb", size = 44536859, upload-time = "2026-02-16T10:12:45.467Z" },
-    { url = "https://files.pythonhosted.org/packages/36/2e/c0f017c405fcdc252dbccafbe05e36b0d0eb1ea9a958f081e01c6972927f/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4982d71350b1a6e5cfe1af742c53dfb759b11ce14141870d05d9e540d13bc5d1", size = 47614443, upload-time = "2026-02-16T10:12:55.525Z" },
-    { url = "https://files.pythonhosted.org/packages/af/6b/2314a78057912f5627afa13ba43809d9d653e6630859618b0fd81a4e0759/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c250248f1fe266db627921c89b47b7c06fee0489ad95b04d50353537d74d6886", size = 48232991, upload-time = "2026-02-16T10:13:04.729Z" },
-    { url = "https://files.pythonhosted.org/packages/40/f2/1bcb1d3be3460832ef3370d621142216e15a2c7c62602a4ea19ec240dd64/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5f4763b83c11c16e5f4c15601ba6dfa849e20723b46aa2617cb4bffe8768479f", size = 50645077, upload-time = "2026-02-16T10:13:14.147Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/78/07f67434e910a0f7323269be7bfbf58699bd0c1d080b18a1ab49ba943fe8/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:17cd28e906c18af486a499422740298c52d7c6795344ea5002a7720b4eadf16d", size = 34488692, upload-time = "2026-02-16T10:13:21.541Z" },
-    { url = "https://files.pythonhosted.org/packages/50/76/34cf7ae93ece1f740a04910d9f7e80ba166b9b4ab9596a953e9e62b90fe1/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:76e823d0e86b4fb5e1cf4a58d293036e678b5a4b03539be933d3b31f9406859f", size = 35964383, upload-time = "2026-02-16T10:13:28.63Z" },
-    { url = "https://files.pythonhosted.org/packages/46/90/459b827238936d4244214be7c684e1b366a63f8c78c380807ae25ed92199/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a62e1899e3078bf65943078b3ad2a6ddcacf2373bc06379aac61b1e548a75814", size = 44538119, upload-time = "2026-02-16T10:13:35.506Z" },
-    { url = "https://files.pythonhosted.org/packages/28/a1/93a71ae5881e99d1f9de1d4554a87be37da11cd6b152239fb5bd924fdc64/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:df088e8f640c9fae3b1f495b3c64755c4e719091caf250f3a74d095ddf3c836d", size = 47571199, upload-time = "2026-02-16T10:13:42.504Z" },
-    { url = "https://files.pythonhosted.org/packages/88/a3/d2c462d4ef313521eaf2eff04d204ac60775263f1fb08c374b543f79f610/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:46718a220d64677c93bc243af1d44b55998255427588e400677d7192671845c7", size = 48259435, upload-time = "2026-02-16T10:13:49.226Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/f1/11a544b8c3d38a759eb3fbb022039117fd633e9a7b19e4841cc3da091915/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a09f3876e87f48bc2f13583ab551f0379e5dfb83210391e68ace404181a20690", size = 50629149, upload-time = "2026-02-16T10:13:57.238Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/d3/a1abf004482026ddc17f4503db227787fa3cfe41ec5091ff20e4fea55e57/pyarrow-24.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:02b001b3ed4723caa44f6cd1af2d5c86aa2cf9971dacc2ffa55b21237713dfba", size = 34976759, upload-time = "2026-04-21T10:48:07.258Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/4a/34f0a36d28a2dd32225301b79daad44e243dc1a2bb77d43b60749be255c4/pyarrow-24.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:04920d6a71aabd08a0417709efce97d45ea8e6fb733d9ca9ecffb13c67839f68", size = 36658471, upload-time = "2026-04-21T10:48:13.347Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/78/543b94712ae8bb1a6023bcc1acf1a740fbff8286747c289cd9468fced2a5/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a964266397740257f16f7bb2e4f08a0c81454004beab8ff59dd531b73610e9f2", size = 45675981, upload-time = "2026-04-21T10:48:20.201Z" },
+    { url = "https://files.pythonhosted.org/packages/84/9f/8fb7c222b100d314137fa40ec050de56cd8c6d957d1cfff685ce72f15b17/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6f066b179d68c413374294bc1735f68475457c933258df594443bb9d88ddc2a0", size = 48859172, upload-time = "2026-04-21T10:48:27.541Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/d3/1ea72538e6c8b3b475ed78d1049a2c518e655761ea50fe1171fc855fcab7/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1183baeb14c5f587b1ec52831e665718ce632caab84b7cd6b85fd44f96114495", size = 49385733, upload-time = "2026-04-21T10:48:34.7Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/be/c3d8b06a1ba35f2260f8e1f771abbee7d5e345c0937aab90675706b1690a/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:806f24b4085453c197a5078218d1ee08783ebbba271badd153d1ae22a3ee804f", size = 51934335, upload-time = "2026-04-21T10:48:42.099Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/62/89e07a1e7329d2cde3e3c6994ba0839a24977a2beda8be6005ea3d860b99/pyarrow-24.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:e4505fc6583f7b05ab854934896bcac8253b04ac1171a77dfb73efef92076d91", size = 27271748, upload-time = "2026-04-21T10:49:42.532Z" },
+    { url = "https://files.pythonhosted.org/packages/17/1a/cff3a59f80b5b1658549d46611b67163f65e0664431c076ad728bf9d5af4/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:1a4e45017efbf115032e4475ee876d525e0e36c742214fbe405332480ecd6275", size = 35238554, upload-time = "2026-04-21T10:48:48.526Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/99/cce0f42a327bfef2c420fb6078a3eb834826e5d6697bf3009fe11d2ad051/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:7986f1fa71cee060ad00758bcc79d3a93bab8559bf978fab9e53472a2e25a17b", size = 36782301, upload-time = "2026-04-21T10:48:55.181Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/66/8e560d5ff6793ca29aca213c53eec0dd482dd46cb93b2819e5aab52e4252/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d3e0b61e8efb24ed38898e5cdc5fffa9124be480008d401a1f8071500494ae42", size = 45721929, upload-time = "2026-04-21T10:49:03.676Z" },
+    { url = "https://files.pythonhosted.org/packages/27/0c/a26e25505d030716e078d9f16eb74973cbf0b33b672884e9f9da1c83b871/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:55a3bc1e3df3b5567b7d27ef551b2283f0c68a5e86f1cd56abc569da4f31335b", size = 48825365, upload-time = "2026-04-21T10:49:11.714Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/eb/771f9ecb0c65e73fe9dccdd1717901b9594f08c4515d000c7c62df573811/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:641f795b361874ac9da5294f8f443dfdbee355cf2bd9e3b8d97aaac2306b9b37", size = 49451819, upload-time = "2026-04-21T10:49:21.474Z" },
+    { url = "https://files.pythonhosted.org/packages/48/da/61ae89a88732f5a785646f3ec6125dbb640fa98a540eb2b9889caa561403/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8adc8e6ce5fccf5dc707046ae4914fd537def529709cc0d285d37a7f9cd442ca", size = 51909252, upload-time = "2026-04-21T10:49:31.164Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1a/8dd5cafab7b66573fa91c03d06d213356ad4edd71813aa75e08ce2b3a844/pyarrow-24.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:9b18371ad2f44044b81a8d23bc2d8a9b6a6226dca775e8e16cfee640473d6c5d", size = 27388127, upload-time = "2026-04-21T10:49:37.334Z" },
 ]
 
 [[package]]
 name = "pycparser"
-version = "2.23"
+version = "3.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/fe/cf/d2d3b9f5699fb1e4615c8e32ff220203e43b248e1dfcc6736ad9057731ca/pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2", size = 173734, upload-time = "2025-09-09T13:23:47.91Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" },
 ]
 
 [[package]]
 name = "pydantic"
-version = "2.12.5"
+version = "2.13.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "annotated-types", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pydantic-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "typing-inspection", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "annotated-types", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pydantic-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "typing-inspection", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/18/a5/b60d21ac674192f8ab0ba4e9fd860690f9b4a6e51ca5df118733b487d8d6/pydantic-2.13.4.tar.gz", hash = "sha256:c40756b57adaa8b1efeeced5c196f3f3b7c435f90e84ea7f443901bec8099ef6", size = 844775, upload-time = "2026-05-06T13:43:05.343Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/7b/122376b1fd3c62c1ed9dc80c931ace4844b3c55407b6fb2d199377c9736f/pydantic-2.13.4-py3-none-any.whl", hash = "sha256:45a282cde31d808236fd7ea9d919b128653c8b38b393d1c4ab335c62924d9aba", size = 472262, upload-time = "2026-05-06T13:43:02.641Z" },
 ]
 
 [[package]]
 name = "pydantic-core"
-version = "2.41.5"
+version = "2.46.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/9d/56/921726b776ace8d8f5db44c4ef961006580d91dc52b803c489fafd1aa249/pydantic_core-2.46.4.tar.gz", hash = "sha256:62f875393d7f270851f20523dd2e29f082bcc82292d66db2b64ea71f64b6e1c1", size = 471464, upload-time = "2026-05-06T13:37:06.98Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" },
-    { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" },
-    { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" },
-    { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" },
-    { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" },
-    { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" },
-    { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" },
-    { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" },
-    { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" },
-    { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" },
-    { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" },
-    { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" },
-    { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" },
+    { url = "https://files.pythonhosted.org/packages/51/a2/5d30b469c5267a17b39dec53208222f76a8d351dfac4af661888c5aee77d/pydantic_core-2.46.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:5d5902252db0d3cedf8d4a1bc68f70eeb430f7e4c7104c8c476753519b423008", size = 2106306, upload-time = "2026-05-06T13:37:48.029Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/81/4fa520eaffa8bd7d1525e644cd6d39e7d60b1592bc5b516693c7340b50f1/pydantic_core-2.46.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c94f0688e7b8d0a67abf40e57a7eaaecd17cc9586706a31b76c031f63df052b4", size = 1951906, upload-time = "2026-05-06T13:37:17.012Z" },
+    { url = "https://files.pythonhosted.org/packages/03/d5/fd02da45b659668b05923b17ba3a0100a0a3d5541e3bd8fcc4ecb711309e/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f027324c56cd5406ca49c124b0db10e56c69064fec039acc571c29020cc87c76", size = 1976802, upload-time = "2026-05-06T13:37:35.113Z" },
+    { url = "https://files.pythonhosted.org/packages/21/f2/95727e1368be3d3ed485eaab7adbd7dda408f33f7a36e8b48e0144002b91/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e739fee756ba1010f8bcccb534252e85a35fe45ae92c295a06059ce58b74ccd3", size = 2052446, upload-time = "2026-05-06T13:37:12.313Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/86/5d99feea3f77c7234b8718075b23db11532773c1a0dbd9b9490215dc2eeb/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d56801be94b86a9da183e5f3766e6310752b99ff647e38b09a9500d88e46e76", size = 2232757, upload-time = "2026-05-06T13:39:01.149Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/3a/508ac615935ef7588cf6d9e9b91309fdc2da751af865e02a9098de88258c/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2412e734dcb48da14d4e4006b82b46b74f2518b8a26ee7e58c6844a6cd6d03c4", size = 2309275, upload-time = "2026-05-06T13:37:41.406Z" },
+    { url = "https://files.pythonhosted.org/packages/07/f8/41db9de19d7987d6b04715a02b3b40aea467000275d9d758ffaa31af7d50/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9551187363ffc0de2a00b2e47c25aeaeb1020b69b668762966df15fc5659dd5a", size = 2094467, upload-time = "2026-05-06T13:39:18.847Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/e2/f35033184cb11d0052daf4416e8e10a502ea2ac006fc4f459aee872727d1/pydantic_core-2.46.4-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:0186750b482eefa11d7f435892b09c5c606193ef3375bcf94aa00ae6bfb66262", size = 2134417, upload-time = "2026-05-06T13:40:17.944Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/7b/6ceeb1cc90e193862f444ebe373d8fdf613f0a82572dde03fb10734c6c71/pydantic_core-2.46.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5855698a4856556d86e8e6cd8434bc3ac0314ee8e12089ae0e143f64c6256e4e", size = 2179782, upload-time = "2026-05-06T13:40:32.618Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/f2/c8d7773ede6af08036423a00ae0ceffce266c3c52a096c435d68c896083f/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:cbaf13819775b7f769bf4a1f066cb6df7a28d4480081a589828ef190226881cd", size = 2188782, upload-time = "2026-05-06T13:36:51.018Z" },
+    { url = "https://files.pythonhosted.org/packages/59/31/0c864784e31f09f05cdd87606f08923b9c9e7f6e51dd27f20f62f975ce9f/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:633147d34cf4550417f12e2b1a0383973bdf5cdfde212cb09e9a581cf10820be", size = 2328334, upload-time = "2026-05-06T13:40:37.764Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/eb/4f6c8a41efa30baa755590f4141abf3a8c370fab610915733e74134a7270/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:82cf5301172168103724d49a1444d3378cb20cdee30b116a1bd6031236298a5d", size = 2372986, upload-time = "2026-05-06T13:39:34.152Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/24/b375a480d53113860c299764bfe9f349a3dc9108b3adc0d7f0d786492ebf/pydantic_core-2.46.4-cp313-cp313-win32.whl", hash = "sha256:9fa8ae11da9e2b3126c6426f147e0fba88d96d65921799bb30c6abd1cb2c97fb", size = 1973693, upload-time = "2026-05-06T13:37:55.072Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/e8/cff247591966f2d22ec8c003cd7587e27b7ba7b81ab2fb888e3ab75dc285/pydantic_core-2.46.4-cp313-cp313-win_amd64.whl", hash = "sha256:6b3ace8194b0e5204818c92802dcdca7fc6d88aabbb799d7c795540d9cd6d292", size = 2071819, upload-time = "2026-05-06T13:38:49.139Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/1a/f4aee670d5670e9e148e0c82c7db98d780be566c6e6a97ee8035528ca0b3/pydantic_core-2.46.4-cp313-cp313-win_arm64.whl", hash = "sha256:184c081504d17f1c1066e430e117142b2c77d9448a97f7b65c6ac9fd9aee238d", size = 2027411, upload-time = "2026-05-06T13:40:45.796Z" },
+]
+
+[[package]]
+name = "pyee"
+version = "13.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8b/04/e7c1fe4dc78a6fdbfd6c337b1c3732ff543b8a397683ab38378447baa331/pyee-13.0.1.tar.gz", hash = "sha256:0b931f7c14535667ed4c7e0d531716368715e860b988770fc7eb8578d1f67fc8", size = 31655, upload-time = "2026-02-14T21:12:28.044Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/c4/b4d4827c93ef43c01f599ef31453ccc1c132b353284fc6c87d535c233129/pyee-13.0.1-py3-none-any.whl", hash = "sha256:af2f8fede4171ef667dfded53f96e2ed0d6e6bd7ee3bb46437f77e3b57689228", size = 15659, upload-time = "2026-02-14T21:12:26.263Z" },
 ]
 
 [[package]]
 name = "pygments"
-version = "2.19.2"
+version = "2.20.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" },
 ]
 
 [[package]]
 name = "pyinstaller"
-version = "6.17.0"
+version = "6.20.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "altgraph", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "macholib", marker = "sys_platform == 'darwin'" },
-    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pyinstaller-hooks-contrib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "altgraph", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "macholib", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pyinstaller-hooks-contrib", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/01/80/9e0dad9c69a7cfd4b5aaede8c6225d762bab7247a2a6b7651e1995522001/pyinstaller-6.17.0.tar.gz", hash = "sha256:be372bd911392b88277e510940ac32a5c2a6ce4b8d00a311c78fa443f4f27313", size = 4014147, upload-time = "2025-11-24T19:43:32.109Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/46/60/d03d52e6690d4e9caf333dcd14550cde634ce6c118b3bc8fa3112c3186fd/pyinstaller-6.20.0.tar.gz", hash = "sha256:95c5c7e03d5d61e9dfb8ef259c699cf492bb1041beb6dbe83696608cec07347a", size = 4048728, upload-time = "2026-04-22T20:59:36.96Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/35/f5/37e419d84d5284ecab11ef8b61306a3b978fe6f0fd69a9541e16bfd72e65/pyinstaller-6.17.0-py3-none-macosx_10_13_universal2.whl", hash = "sha256:4e446b8030c6e5a2f712e3f82011ecf6c7ead86008357b0d23a0ec4bcde31dac", size = 1031880, upload-time = "2025-11-24T19:42:30.862Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/b6/2e184879ab9cf90a1d2867fdd34d507c4d246b3cc52ca05aad00bfc70ee7/pyinstaller-6.17.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:aa9fd87aaa28239c6f0d0210114029bd03f8cac316a90bab071a5092d7c85ad7", size = 731968, upload-time = "2025-11-24T19:42:35.421Z" },
-    { url = "https://files.pythonhosted.org/packages/40/76/f529de98f7e5cce7904c19b224990003fc2267eda2ee5fdd8452acb420a9/pyinstaller-6.17.0-py3-none-manylinux2014_i686.whl", hash = "sha256:060b122e43e7c0b23e759a4153be34bd70914135ab955bb18a67181e0dca85a2", size = 743217, upload-time = "2025-11-24T19:42:39.286Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/10/c02bfbb050cafc4c353cf69baf95407e211e1372bd286ab5ce5cbc13a30a/pyinstaller-6.17.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:cd213d1a545c97dfe4a3c40e8213ff7c5127fc115c49229f27a3fa541503444b", size = 741119, upload-time = "2025-11-24T19:42:43.12Z" },
-    { url = "https://files.pythonhosted.org/packages/11/9d/69fdacfd9335695f5900a376cfe3e4aed28f0720ffc15fee81fdb9d920bc/pyinstaller-6.17.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:89c0d18ba8b62c6607abd8cf2299ae5ffa5c36d8c47f39608ce8c3f357f6099f", size = 738111, upload-time = "2025-11-24T19:42:46.97Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/1e/e8e36e1568f6865ac706c6e1f875c1a346ddaa9f9a8f923d66545d2240ed/pyinstaller-6.17.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2a147b83cdebb07855bd5a663600891550062373a2ca375c58eacead33741a27", size = 737795, upload-time = "2025-11-24T19:42:50.675Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/15/9dc0f81ccb746c27bfa6ee53164422fe47ee079c7a717d9c4791aba78797/pyinstaller-6.17.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:f8cfbbfa6708e54fb936df6dd6eafaf133e84efb0d2fe25b91cfeefa793c4ca4", size = 736891, upload-time = "2025-11-24T19:42:54.458Z" },
-    { url = "https://files.pythonhosted.org/packages/97/e6/bed54821c1ebe1275c559661d3e7bfa23c406673b515252dfbf89db56c65/pyinstaller-6.17.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:97f4c1942f7b4cd73f9e38b49cc8f5f8a6fbb44922cb60dd3073a189b77ee1ae", size = 736752, upload-time = "2025-11-24T19:42:58.144Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/e4/e228d6d1bbb7fd62dc660a8fb202a583b023d3a3624ca95d1a9290ee4d6a/pyinstaller-6.20.0-py3-none-macosx_10_13_universal2.whl", hash = "sha256:bf3be4e1284ee78ddccba5e29f99443a12a7b4673168288ffc4c9d38c6f7b90e", size = 1047642, upload-time = "2026-04-22T20:58:32.006Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/bd/afb631bcb3f9040efebd4f6d067f0828b51710818f69fb41a2d4b7787f52/pyinstaller-6.20.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:72ae9c1fdea134afa791f58bdc9a1934d5c7609753c111e0026bfc272b32b712", size = 742494, upload-time = "2026-04-22T20:58:36.285Z" },
+    { url = "https://files.pythonhosted.org/packages/76/08/0729a5bac14754150e5d83b39d87d842eb42b0bffcaa03dbad6252e23a39/pyinstaller-6.20.0-py3-none-manylinux2014_i686.whl", hash = "sha256:1031bcc307f3fbeffd4e162723e64d46dbf591c82dd0997413afb2a07328b941", size = 754191, upload-time = "2026-04-22T20:58:40.603Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/82/bc0ee4c7b97db1958eb651e0da9fb1e672e5ae53ca8867fd97701de52906/pyinstaller-6.20.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:8df3b3f347659fa2562d8d193a98ad4600133b8b8d07c268df89e4154376750e", size = 751902, upload-time = "2026-04-22T20:58:44.7Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/e7/770002d6aaa54173881cb2c49bb195ba67b97bf39bac1cdf320f28401629/pyinstaller-6.20.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:b0d3cc9dd8120d448459bd3880a12e2f9774c51443af49047801446377999a59", size = 748634, upload-time = "2026-04-22T20:58:48.579Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/db/68ba1fccb71278b2124fb90b37b7c8c0bc4c1173fba45b94466df3d9cb7f/pyinstaller-6.20.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:03696bb6350177c6bc23bcaf78e71a33c4a89b6754dd90d1be2f318e978c918b", size = 748490, upload-time = "2026-04-22T20:58:52.749Z" },
+    { url = "https://files.pythonhosted.org/packages/03/0f/ac77ffa996a56be3d5c8f85734a007f8347240691657f9704e7de2527fa3/pyinstaller-6.20.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:6357f1699f6af84f37e7367f031d4f68abdba65543b83990c9e8f5a4cebed0b7", size = 747650, upload-time = "2026-04-22T20:58:57.093Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/56/1ee91c3a2bc10ca1f36da10a6fd55ff7efc4dec367171eb25992a827874f/pyinstaller-6.20.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:0ab39c690abad26ba148e8f664f0478acc82a733997f4f22e757774832802da9", size = 747413, upload-time = "2026-04-22T20:59:01.174Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/55/ae264339996953c4cdf9d89d916a0a8fa26a83cf917a742fff8b9d5f3fe8/pyinstaller-6.20.0-py3-none-win32.whl", hash = "sha256:9a7637e8e44b4387b13667fdcaac86ab6b29c446c16d34d8401539b81838759c", size = 1331584, upload-time = "2026-04-22T20:59:07.201Z" },
+    { url = "https://files.pythonhosted.org/packages/76/8c/300f57578882cce259bfb5ae56fda3b69caa3fe9df40a176c719920ea6e2/pyinstaller-6.20.0-py3-none-win_amd64.whl", hash = "sha256:d588844e890ee80c4365867f98146636e1849bbca8e4284bbf0c809aff0f161a", size = 1391851, upload-time = "2026-04-22T20:59:14.024Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/ea/b2f8e1642aecda78c0b75c7321f708e49e10bb3c00dd4f148c40761a1527/pyinstaller-6.20.0-py3-none-win_arm64.whl", hash = "sha256:bd53282c0a73e5c95573e1ddc8e5d564d4932bec91efbaed4dc5fdff9c2ae7f2", size = 1332259, upload-time = "2026-04-22T20:59:20.509Z" },
 ]
 
 [[package]]
 name = "pyinstaller-hooks-contrib"
-version = "2025.11"
+version = "2026.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/45/2f/2c68b6722d233dae3e5243751aafc932940b836919cfaca22dd0c60d417c/pyinstaller_hooks_contrib-2025.11.tar.gz", hash = "sha256:dfe18632e06655fa88d218e0d768fd753e1886465c12a6d4bce04f1aaeec917d", size = 169183, upload-time = "2025-12-23T12:59:37.361Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1a/67/f4452d68793fb15beba4f19ef39a38a8822f0da7452b503c400d5a21f5c1/pyinstaller_hooks_contrib-2026.5.tar.gz", hash = "sha256:f066dfca8f7c45ff6336c9cf9fe25b4e48bfeb322a1aa24faaedfb8a8d1b0b08", size = 173689, upload-time = "2026-05-04T22:36:55.124Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a7/c4/3a096c6e701832443b957b9dac18a163103360d0c7f5842ca41695371148/pyinstaller_hooks_contrib-2025.11-py3-none-any.whl", hash = "sha256:777e163e2942474aa41a8e6d31ac1635292d63422c3646c176d584d04d971c34", size = 449478, upload-time = "2025-12-23T12:59:35.987Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/5c/fd465d11da4d12b50d7eb5d2ee2ceb780d8d049dbb489f3828d131e387af/pyinstaller_hooks_contrib-2026.5-py3-none-any.whl", hash = "sha256:ea1535783fbdac4626351709e83f3ea80b681d3a4745763ebb407b5e27342eb9", size = 457314, upload-time = "2026-05-04T22:36:53.598Z" },
 ]
 
 [[package]]
 name = "pyparsing"
-version = "3.3.1"
+version = "3.3.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/33/c1/1d9de9aeaa1b89b0186e5fe23294ff6517fce1bc69149185577cd31016b2/pyparsing-3.3.1.tar.gz", hash = "sha256:47fad0f17ac1e2cad3de3b458570fbc9b03560aa029ed5e16ee5554da9a2251c", size = 1550512, upload-time = "2025-12-23T03:14:04.391Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f3/91/9c6ee907786a473bf81c5f53cf703ba0957b23ab84c264080fb5a450416f/pyparsing-3.3.2.tar.gz", hash = "sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc", size = 6851574, upload-time = "2026-01-21T03:57:59.36Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8b/40/2614036cdd416452f5bf98ec037f38a1afb17f327cb8e6b652d4729e0af8/pyparsing-3.3.1-py3-none-any.whl", hash = "sha256:023b5e7e5520ad96642e2c6db4cb683d3970bd640cdf7115049a6e9c3682df82", size = 121793, upload-time = "2025-12-23T03:14:02.103Z" },
+    { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" },
 ]
 
 [[package]]
@@ -2377,13 +2272,13 @@ name = "pytablewriter"
 version = "1.2.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "dataproperty", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mbstrdecoder", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pathvalidate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tabledata", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tcolorpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "typepy", extra = ["datetime"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "dataproperty", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "mbstrdecoder", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pathvalidate", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tabledata", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tcolorpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "typepy", extra = ["datetime"], marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/f6/a1/617730f290f04d347103ab40bf67d317df6691b14746f6e1ea039fb57062/pytablewriter-1.2.1.tar.gz", hash = "sha256:7bd0f4f397e070e3b8a34edcf1b9257ccbb18305493d8350a5dbc9957fced959", size = 619241, upload-time = "2025-01-01T15:37:00.04Z" }
 wheels = [
@@ -2392,41 +2287,42 @@ wheels = [
 
 [[package]]
 name = "pytest"
-version = "9.0.2"
+version = "9.0.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "iniconfig", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pluggy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pygments", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "iniconfig", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pluggy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pygments", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" },
 ]
 
 [[package]]
 name = "pytest-asyncio"
-version = "1.3.0"
+version = "1.4.0a2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/2c/7c/f0831e89e025cea4f7b0201743c02c1016d86b9d4d6e86da8d556f6e86e0/pytest_asyncio-1.4.0a2.tar.gz", hash = "sha256:7cdef3b22cdfe423829eb594a25f7c23c8b3ec2a82d014a56e5179038eb3e674", size = 57596, upload-time = "2026-05-02T07:40:45.489Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/0f/eebdc66222f6942fe2962894c419f14693cb2401bdb5c000b86742aab004/pytest_asyncio-1.4.0a2-py3-none-any.whl", hash = "sha256:b6f8c01beaca5dc05c88a95b7a9df7660da4cef319cf685d886af6715261e9d4", size = 16957, upload-time = "2026-05-02T07:40:43.636Z" },
 ]
 
 [[package]]
 name = "pytest-env"
-version = "1.2.0"
+version = "1.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "python-dotenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/13/12/9c87d0ca45d5992473208bcef2828169fa7d39b8d7fc6e3401f5c08b8bf7/pytest_env-1.2.0.tar.gz", hash = "sha256:475e2ebe8626cee01f491f304a74b12137742397d6c784ea4bc258f069232b80", size = 8973, upload-time = "2025-10-09T19:15:47.42Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ff/69/4db1c30625af0621df8dbe73797b38b6d1b04e15d021dd5d26a6d297f78c/pytest_env-1.6.0.tar.gz", hash = "sha256:ac02d6fba16af54d61e311dd70a3c61024a4e966881ea844affc3c8f0bf207d3", size = 16163, upload-time = "2026-03-12T22:39:43.78Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/27/98/822b924a4a3eb58aacba84444c7439fce32680592f394de26af9c76e2569/pytest_env-1.2.0-py3-none-any.whl", hash = "sha256:d7e5b7198f9b83c795377c09feefa45d56083834e60d04767efd64819fc9da00", size = 6251, upload-time = "2025-10-09T19:15:46.077Z" },
+    { url = "https://files.pythonhosted.org/packages/27/16/ad52f56b96d851a2bcfdc1e754c3531341885bd7177a128c13ff2ca72ab4/pytest_env-1.6.0-py3-none-any.whl", hash = "sha256:1e7f8a62215e5885835daaed694de8657c908505b964ec8097a7ce77b403d9a3", size = 10400, upload-time = "2026-03-12T22:39:41.887Z" },
 ]
 
 [[package]]
@@ -2434,7 +2330,7 @@ name = "python-dateutil"
 version = "2.9.0.post0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "six", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" }
 wheels = [
@@ -2442,21 +2338,30 @@ wheels = [
 ]
 
 [[package]]
-name = "python-multipart"
-version = "0.0.21"
+name = "python-dotenv"
+version = "1.2.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/78/96/804520d0850c7db98e5ccb70282e29208723f0964e88ffd9d0da2f52ea09/python_multipart-0.0.21.tar.gz", hash = "sha256:7137ebd4d3bbf70ea1622998f902b97a29434a9e8dc40eb203bbcf7c2a2cba92", size = 37196, upload-time = "2025-12-17T09:24:22.446Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/aa/76/03af049af4dcee5d27442f71b6924f01f3efb5d2bd34f23fcd563f2cc5f5/python_multipart-0.0.21-py3-none-any.whl", hash = "sha256:cf7a6713e01c87aa35387f4774e812c4361150938d20d232800f75ffcf266090", size = 24541, upload-time = "2025-12-17T09:24:21.153Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" },
+]
+
+[[package]]
+name = "python-multipart"
+version = "0.0.29"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/fe/70bd71a6738b09a0bdf6480ca6436b167469ca4578b2a0efbe390b4b0e70/python_multipart-0.0.29.tar.gz", hash = "sha256:643e93849196645e2dbdd81a0f8829a23123ad7f797a84a364c6fb3563f18904", size = 45678, upload-time = "2026-05-17T17:29:47.654Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8f/cb/769cfc37177252872a45a71f3fbdde9d51b471a3f3c14bfe95dde3407386/python_multipart-0.0.29-py3-none-any.whl", hash = "sha256:2ddcc971cef266225f54f552d8fa10bcfbb1f14446caec199060daac59ff2d69", size = 29640, upload-time = "2026-05-17T17:29:45.69Z" },
 ]
 
 [[package]]
 name = "pytz"
-version = "2026.1.post1"
+version = "2026.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/56/db/b8721d71d945e6a8ac63c0fc900b2067181dbb50805958d4d4661cf7d277/pytz-2026.1.post1.tar.gz", hash = "sha256:3378dde6a0c3d26719182142c56e60c7f9af7e968076f31aae569d72a0358ee1", size = 321088, upload-time = "2026-03-03T07:47:50.683Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ff/46/dd499ec9038423421951e4fad73051febaa13d2df82b4064f87af8b8c0c3/pytz-2026.2.tar.gz", hash = "sha256:0e60b47b29f21574376f218fe21abc009894a2321ea16c6754f3cad6eb7cdd6a", size = 320861, upload-time = "2026-05-04T01:35:29.667Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/99/781fe0c827be2742bcc775efefccb3b048a3a9c6ce9aec0cbf4a101677e5/pytz-2026.1.post1-py2.py3-none-any.whl", hash = "sha256:f2fd16142fda348286a75e1a524be810bb05d444e5a081f37f7affc635035f7a", size = 510489, upload-time = "2026-03-03T07:47:49.167Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/dd/96da98f892250475bdf2328112d7468abdd4acc7b902b6af23f4ed958ea0/pytz-2026.2-py2.py3-none-any.whl", hash = "sha256:04156e608bee23d3792fd45c94ae47fae1036688e75032eea2e3bf0323d1f126", size = 510141, upload-time = "2026-05-04T01:35:27.408Z" },
 ]
 
 [[package]]
@@ -2472,20 +2377,9 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" },
     { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" },
     { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" },
-    { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" },
-    { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" },
-    { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" },
-    { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" },
+    { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" },
+    { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" },
+    { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
 ]
 
 [[package]]
@@ -2493,9 +2387,9 @@ name = "readme-renderer"
 version = "44.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "docutils", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "nh3", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pygments", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "docutils", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "nh3", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pygments", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5a/a9/104ec9234c8448c4379768221ea6df01260cd6c2ce13182d4eac531c8342/readme_renderer-44.0.tar.gz", hash = "sha256:8712034eabbfa6805cacf1402b4eeb2a73028f72d1166d6f5cb7f9c047c5d1e1", size = 32056, upload-time = "2024-07-08T15:00:57.805Z" }
 wheels = [
@@ -2504,69 +2398,57 @@ wheels = [
 
 [[package]]
 name = "regex"
-version = "2025.11.3"
+version = "2026.5.9"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/cc/a9/546676f25e573a4cf00fe8e119b78a37b6a8fe2dc95cda877b30889c9c45/regex-2025.11.3.tar.gz", hash = "sha256:1fedc720f9bb2494ce31a58a1631f9c82df6a09b49c19517ea5cc280b4541e01", size = 414669, upload-time = "2025-11-03T21:34:22.089Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/0e/49aee608ad09480e7fd276898c99ec6192985fa331abe4eb3a986094490b/regex-2026.5.9.tar.gz", hash = "sha256:a8234aa23ec39894bfe4a3f1b85616a7032481964a13ac6fc9f10de4f6fca270", size = 416074, upload-time = "2026-05-09T23:15:19.37Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e1/a7/dda24ebd49da46a197436ad96378f17df30ceb40e52e859fc42cac45b850/regex-2025.11.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c1e448051717a334891f2b9a620fe36776ebf3dd8ec46a0b877c8ae69575feb4", size = 489081, upload-time = "2025-11-03T21:31:55.9Z" },
-    { url = "https://files.pythonhosted.org/packages/19/22/af2dc751aacf88089836aa088a1a11c4f21a04707eb1b0478e8e8fb32847/regex-2025.11.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9b5aca4d5dfd7fbfbfbdaf44850fcc7709a01146a797536a8f84952e940cca76", size = 291123, upload-time = "2025-11-03T21:31:57.758Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/88/1a3ea5672f4b0a84802ee9891b86743438e7c04eb0b8f8c4e16a42375327/regex-2025.11.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:04d2765516395cf7dda331a244a3282c0f5ae96075f728629287dfa6f76ba70a", size = 288814, upload-time = "2025-11-03T21:32:01.12Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/8c/f5987895bf42b8ddeea1b315c9fedcfe07cadee28b9c98cf50d00adcb14d/regex-2025.11.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d9903ca42bfeec4cebedba8022a7c97ad2aab22e09573ce9976ba01b65e4361", size = 798592, upload-time = "2025-11-03T21:32:03.006Z" },
-    { url = "https://files.pythonhosted.org/packages/99/2a/6591ebeede78203fa77ee46a1c36649e02df9eaa77a033d1ccdf2fcd5d4e/regex-2025.11.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:639431bdc89d6429f6721625e8129413980ccd62e9d3f496be618a41d205f160", size = 864122, upload-time = "2025-11-03T21:32:04.553Z" },
-    { url = "https://files.pythonhosted.org/packages/94/d6/be32a87cf28cf8ed064ff281cfbd49aefd90242a83e4b08b5a86b38e8eb4/regex-2025.11.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f117efad42068f9715677c8523ed2be1518116d1c49b1dd17987716695181efe", size = 912272, upload-time = "2025-11-03T21:32:06.148Z" },
-    { url = "https://files.pythonhosted.org/packages/62/11/9bcef2d1445665b180ac7f230406ad80671f0fc2a6ffb93493b5dd8cd64c/regex-2025.11.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4aecb6f461316adf9f1f0f6a4a1a3d79e045f9b71ec76055a791affa3b285850", size = 803497, upload-time = "2025-11-03T21:32:08.162Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/a7/da0dc273d57f560399aa16d8a68ae7f9b57679476fc7ace46501d455fe84/regex-2025.11.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3b3a5f320136873cc5561098dfab677eea139521cb9a9e8db98b7e64aef44cbc", size = 787892, upload-time = "2025-11-03T21:32:09.769Z" },
-    { url = "https://files.pythonhosted.org/packages/da/4b/732a0c5a9736a0b8d6d720d4945a2f1e6f38f87f48f3173559f53e8d5d82/regex-2025.11.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:75fa6f0056e7efb1f42a1c34e58be24072cb9e61a601340cc1196ae92326a4f9", size = 858462, upload-time = "2025-11-03T21:32:11.769Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/f5/a2a03df27dc4c2d0c769220f5110ba8c4084b0bfa9ab0f9b4fcfa3d2b0fc/regex-2025.11.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:dbe6095001465294f13f1adcd3311e50dd84e5a71525f20a10bd16689c61ce0b", size = 850528, upload-time = "2025-11-03T21:32:13.906Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/09/e1cd5bee3841c7f6eb37d95ca91cdee7100b8f88b81e41c2ef426910891a/regex-2025.11.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:454d9b4ae7881afbc25015b8627c16d88a597479b9dea82b8c6e7e2e07240dc7", size = 789866, upload-time = "2025-11-03T21:32:15.748Z" },
-    { url = "https://files.pythonhosted.org/packages/20/28/fd0c63357caefe5680b8ea052131acbd7f456893b69cc2a90cc3e0dc90d4/regex-2025.11.3-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:1eb1ebf6822b756c723e09f5186473d93236c06c579d2cc0671a722d2ab14281", size = 491984, upload-time = "2025-11-03T21:32:23.466Z" },
-    { url = "https://files.pythonhosted.org/packages/df/ec/7014c15626ab46b902b3bcc4b28a7bae46d8f281fc7ea9c95e22fcaaa917/regex-2025.11.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1e00ec2970aab10dc5db34af535f21fcf32b4a31d99e34963419636e2f85ae39", size = 292673, upload-time = "2025-11-03T21:32:25.034Z" },
-    { url = "https://files.pythonhosted.org/packages/23/ab/3b952ff7239f20d05f1f99e9e20188513905f218c81d52fb5e78d2bf7634/regex-2025.11.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a4cb042b615245d5ff9b3794f56be4138b5adc35a4166014d31d1814744148c7", size = 291029, upload-time = "2025-11-03T21:32:26.528Z" },
-    { url = "https://files.pythonhosted.org/packages/21/7e/3dc2749fc684f455f162dcafb8a187b559e2614f3826877d3844a131f37b/regex-2025.11.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:44f264d4bf02f3176467d90b294d59bf1db9fe53c141ff772f27a8b456b2a9ed", size = 807437, upload-time = "2025-11-03T21:32:28.363Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/0b/d529a85ab349c6a25d1ca783235b6e3eedf187247eab536797021f7126c6/regex-2025.11.3-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7be0277469bf3bd7a34a9c57c1b6a724532a0d235cd0dc4e7f4316f982c28b19", size = 873368, upload-time = "2025-11-03T21:32:30.4Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/18/2d868155f8c9e3e9d8f9e10c64e9a9f496bb8f7e037a88a8bed26b435af6/regex-2025.11.3-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0d31e08426ff4b5b650f68839f5af51a92a5b51abd8554a60c2fbc7c71f25d0b", size = 914921, upload-time = "2025-11-03T21:32:32.123Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/71/9d72ff0f354fa783fe2ba913c8734c3b433b86406117a8db4ea2bf1c7a2f/regex-2025.11.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e43586ce5bd28f9f285a6e729466841368c4a0353f6fd08d4ce4630843d3648a", size = 812708, upload-time = "2025-11-03T21:32:34.305Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/19/ce4bf7f5575c97f82b6e804ffb5c4e940c62609ab2a0d9538d47a7fdf7d4/regex-2025.11.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0f9397d561a4c16829d4e6ff75202c1c08b68a3bdbfe29dbfcdb31c9830907c6", size = 795472, upload-time = "2025-11-03T21:32:36.364Z" },
-    { url = "https://files.pythonhosted.org/packages/03/86/fd1063a176ffb7b2315f9a1b08d17b18118b28d9df163132615b835a26ee/regex-2025.11.3-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:dd16e78eb18ffdb25ee33a0682d17912e8cc8a770e885aeee95020046128f1ce", size = 868341, upload-time = "2025-11-03T21:32:38.042Z" },
-    { url = "https://files.pythonhosted.org/packages/12/43/103fb2e9811205e7386366501bc866a164a0430c79dd59eac886a2822950/regex-2025.11.3-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:ffcca5b9efe948ba0661e9df0fa50d2bc4b097c70b9810212d6b62f05d83b2dd", size = 854666, upload-time = "2025-11-03T21:32:40.079Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/22/e392e53f3869b75804762c7c848bd2dd2abf2b70fb0e526f58724638bd35/regex-2025.11.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c56b4d162ca2b43318ac671c65bd4d563e841a694ac70e1a976ac38fcf4ca1d2", size = 799473, upload-time = "2025-11-03T21:32:42.148Z" },
-    { url = "https://files.pythonhosted.org/packages/31/e9/f6e13de7e0983837f7b6d238ad9458800a874bf37c264f7923e63409944c/regex-2025.11.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:9697a52e57576c83139d7c6f213d64485d3df5bf84807c35fa409e6c970801c6", size = 489089, upload-time = "2025-11-03T21:32:50.027Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/5c/261f4a262f1fa65141c1b74b255988bd2fa020cc599e53b080667d591cfc/regex-2025.11.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e18bc3f73bd41243c9b38a6d9f2366cd0e0137a9aebe2d8ff76c5b67d4c0a3f4", size = 291059, upload-time = "2025-11-03T21:32:51.682Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/57/f14eeb7f072b0e9a5a090d1712741fd8f214ec193dba773cf5410108bb7d/regex-2025.11.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:61a08bcb0ec14ff4e0ed2044aad948d0659604f824cbd50b55e30b0ec6f09c73", size = 288900, upload-time = "2025-11-03T21:32:53.569Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/6b/1d650c45e99a9b327586739d926a1cd4e94666b1bd4af90428b36af66dc7/regex-2025.11.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9c30003b9347c24bcc210958c5d167b9e4f9be786cb380a7d32f14f9b84674f", size = 799010, upload-time = "2025-11-03T21:32:55.222Z" },
-    { url = "https://files.pythonhosted.org/packages/99/ee/d66dcbc6b628ce4e3f7f0cbbb84603aa2fc0ffc878babc857726b8aab2e9/regex-2025.11.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4e1e592789704459900728d88d41a46fe3969b82ab62945560a31732ffc19a6d", size = 864893, upload-time = "2025-11-03T21:32:57.239Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/2d/f238229f1caba7ac87a6c4153d79947fb0261415827ae0f77c304260c7d3/regex-2025.11.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6538241f45eb5a25aa575dbba1069ad786f68a4f2773a29a2bd3dd1f9de787be", size = 911522, upload-time = "2025-11-03T21:32:59.274Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/3d/22a4eaba214a917c80e04f6025d26143690f0419511e0116508e24b11c9b/regex-2025.11.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce22519c989bb72a7e6b36a199384c53db7722fe669ba891da75907fe3587db", size = 803272, upload-time = "2025-11-03T21:33:01.393Z" },
-    { url = "https://files.pythonhosted.org/packages/84/b1/03188f634a409353a84b5ef49754b97dbcc0c0f6fd6c8ede505a8960a0a4/regex-2025.11.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:66d559b21d3640203ab9075797a55165d79017520685fb407b9234d72ab63c62", size = 787958, upload-time = "2025-11-03T21:33:03.379Z" },
-    { url = "https://files.pythonhosted.org/packages/99/6a/27d072f7fbf6fadd59c64d210305e1ff865cc3b78b526fd147db768c553b/regex-2025.11.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:669dcfb2e38f9e8c69507bace46f4889e3abbfd9b0c29719202883c0a603598f", size = 859289, upload-time = "2025-11-03T21:33:05.374Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/70/1b3878f648e0b6abe023172dacb02157e685564853cc363d9961bcccde4e/regex-2025.11.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:32f74f35ff0f25a5021373ac61442edcb150731fbaa28286bbc8bb1582c89d02", size = 850026, upload-time = "2025-11-03T21:33:07.131Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/d5/68e25559b526b8baab8e66839304ede68ff6727237a47727d240006bd0ff/regex-2025.11.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e6c7a21dffba883234baefe91bc3388e629779582038f75d2a5be918e250f0ed", size = 789499, upload-time = "2025-11-03T21:33:09.141Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/06/49b198550ee0f5e4184271cee87ba4dfd9692c91ec55289e6282f0f86ccf/regex-2025.11.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ba0d8a5d7f04f73ee7d01d974d47c5834f8a1b0224390e4fe7c12a3a92a78ecc", size = 491985, upload-time = "2025-11-03T21:33:16.555Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/bf/abdafade008f0b1c9da10d934034cb670432d6cf6cbe38bbb53a1cfd6cf8/regex-2025.11.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:442d86cf1cfe4faabf97db7d901ef58347efd004934da045c745e7b5bd57ac49", size = 292669, upload-time = "2025-11-03T21:33:18.32Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/ef/0c357bb8edbd2ad8e273fcb9e1761bc37b8acbc6e1be050bebd6475f19c1/regex-2025.11.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:fd0a5e563c756de210bb964789b5abe4f114dacae9104a47e1a649b910361536", size = 291030, upload-time = "2025-11-03T21:33:20.048Z" },
-    { url = "https://files.pythonhosted.org/packages/79/06/edbb67257596649b8fb088d6aeacbcb248ac195714b18a65e018bf4c0b50/regex-2025.11.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bf3490bcbb985a1ae97b2ce9ad1c0f06a852d5b19dde9b07bdf25bf224248c95", size = 807674, upload-time = "2025-11-03T21:33:21.797Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/d9/ad4deccfce0ea336296bd087f1a191543bb99ee1c53093dcd4c64d951d00/regex-2025.11.3-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3809988f0a8b8c9dcc0f92478d6501fac7200b9ec56aecf0ec21f4a2ec4b6009", size = 873451, upload-time = "2025-11-03T21:33:23.741Z" },
-    { url = "https://files.pythonhosted.org/packages/13/75/a55a4724c56ef13e3e04acaab29df26582f6978c000ac9cd6810ad1f341f/regex-2025.11.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f4ff94e58e84aedb9c9fce66d4ef9f27a190285b451420f297c9a09f2b9abee9", size = 914980, upload-time = "2025-11-03T21:33:25.999Z" },
-    { url = "https://files.pythonhosted.org/packages/67/1e/a1657ee15bd9116f70d4a530c736983eed997b361e20ecd8f5ca3759d5c5/regex-2025.11.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7eb542fd347ce61e1321b0a6b945d5701528dca0cd9759c2e3bb8bd57e47964d", size = 812852, upload-time = "2025-11-03T21:33:27.852Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/6f/f7516dde5506a588a561d296b2d0044839de06035bb486b326065b4c101e/regex-2025.11.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d6c2d5919075a1f2e413c00b056ea0c2f065b3f5fe83c3d07d325ab92dce51d6", size = 795566, upload-time = "2025-11-03T21:33:32.364Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/dd/3d10b9e170cc16fb34cb2cef91513cf3df65f440b3366030631b2984a264/regex-2025.11.3-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:3f8bf11a4827cc7ce5a53d4ef6cddd5ad25595d3c1435ef08f76825851343154", size = 868463, upload-time = "2025-11-03T21:33:34.459Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/8e/935e6beff1695aa9085ff83195daccd72acc82c81793df480f34569330de/regex-2025.11.3-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:22c12d837298651e5550ac1d964e4ff57c3f56965fc1812c90c9fb2028eaf267", size = 854694, upload-time = "2025-11-03T21:33:36.793Z" },
-    { url = "https://files.pythonhosted.org/packages/92/12/10650181a040978b2f5720a6a74d44f841371a3d984c2083fc1752e4acf6/regex-2025.11.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:62ba394a3dda9ad41c7c780f60f6e4a70988741415ae96f6d1bf6c239cf01379", size = 799691, upload-time = "2025-11-03T21:33:39.079Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/da/797e91ecec6f84135da778ddce78c20e0af5d2a15c26f87a81bc3eadb6db/regex-2026.5.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d626b84406444b165fc0ba981604edea39f0588ff1f92baa23fe50799ea9afdb", size = 490303, upload-time = "2026-05-09T23:13:04.382Z" },
+    { url = "https://files.pythonhosted.org/packages/44/da/bf30abaaa737b58f4a4b8c4a03659e02fd92092c822e0197ed9e0daab917/regex-2026.5.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d7bdc0ab8f3dd7e1b4f9ab88634e13374669db86bb3c72e8292f07ae313f539f", size = 292019, upload-time = "2026-05-09T23:13:06.022Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/e7/d0eaf5713828417b9e5648cf81fa9bacd4961f6ab98c380c2034f8716e35/regex-2026.5.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a8820737949116ffff55fe18f9fc644530063ba6ebfcb8314239416e78f1347c", size = 289468, upload-time = "2026-05-09T23:13:08.214Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/9b/b3fdd62b003baa1a9b593cd8c8699c9651c2e80cc21a5c715707983c42d7/regex-2026.5.9-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0fbdbac82cb3e4450d0ccde7d7a35607f4cb2dd9fba4b8b69bfaf8c9fa6aed", size = 796749, upload-time = "2026-05-09T23:13:10.573Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/30/66ab84588765f5b4b271a9ca09ef7ce2b87caa95176ec3d2ad65d7bc4902/regex-2026.5.9-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:57e8915c7986aa33d25e4d3629cef711cd2863f2961b10409f0c04cb8b7d9020", size = 865445, upload-time = "2026-05-09T23:13:12.523Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/89/f05169e8588aac365f35ffc7f3bc3184f095ef4cfded7cfaa3c7fd5dbd89/regex-2026.5.9-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508f56a89ba9cb26e4168cbc37dbd60a28d82430a9e18ad1d25fe0883c314ca2", size = 912322, upload-time = "2026-05-09T23:13:14.281Z" },
+    { url = "https://files.pythonhosted.org/packages/30/e1/c93444052cf41581f3c884ab3fb5823daf0992f11cd4388d4275ca610558/regex-2026.5.9-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6d189041f15691cfa2b6c4290448ec221244d225b3f5fe9e7771b34ffcdf6e2", size = 801269, upload-time = "2026-05-09T23:13:16.569Z" },
+    { url = "https://files.pythonhosted.org/packages/50/fe/0cf96b882f540e62e8b9956599798203d599c44cf4c77917ca27400ff69b/regex-2026.5.9-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e82db382b44d0111b22601c509c89f64434816c9e0eef9d1989cda8cc6ff1c04", size = 777085, upload-time = "2026-05-09T23:13:18.675Z" },
+    { url = "https://files.pythonhosted.org/packages/23/5c/d78d4924e7fc875557b9e9b768423925fdfaac5549d06da7810019a9bd26/regex-2026.5.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2acfb48634f64996b57f90f39afa692ff362162722581921fe92239a59960f3c", size = 785153, upload-time = "2026-05-09T23:13:20.525Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e0/5214774090e7b4524dcea3e3c4aa74141d43043f8beb49c1599db1c8b53a/regex-2026.5.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d29eebfc9525db68cad3c97eedd7f754fa265aa5cd0cf4f863b2421e1b48fc9f", size = 860164, upload-time = "2026-05-09T23:13:22.263Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/e1/4a57a83350319b1271f0d7a249b8672513ed928b237a741631270de6caea/regex-2026.5.9-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:debb893095e944091c16e641a6e33c1b0f4cb61ab945ec5afbf53ce7068834d8", size = 765731, upload-time = "2026-05-09T23:13:24.277Z" },
+    { url = "https://files.pythonhosted.org/packages/12/f4/499e74a20c156fc75836ee04a72a38d1a063978f600937f9760467beb1b0/regex-2026.5.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d659eee77986549c9ea45b861c7567e44d6287c3dc9a4565478853f7b9fe2ff6", size = 852062, upload-time = "2026-05-09T23:13:26.125Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/92/7eebc0d0a01e78629695f342ba17e0deaff8fb45e79cc0d7b98287da6e3e/regex-2026.5.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2efa205e6d98b24d1f3ab395c11aa15cdf10935bca283d0285e0499c284fba21", size = 789577, upload-time = "2026-05-09T23:13:27.814Z" },
+    { url = "https://files.pythonhosted.org/packages/05/a4/018e71f7d2ad48c1ebe6d3ae0026f9b7cb4802fd15c7cc02fdf724355102/regex-2026.5.9-cp313-cp313-win32.whl", hash = "sha256:f3844f134e834076677dd369976e9f5068679fcb8e50102fdf6b7ac96a3ec127", size = 266691, upload-time = "2026-05-09T23:13:29.549Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/1d/861a93719fb9ee7dbfc3761b3797b7a3e112a5d42c6129459d2d741be9b5/regex-2026.5.9-cp313-cp313-win_amd64.whl", hash = "sha256:3527bb4942d2c14552155406cdedd906567456821848aed1cb4933a391bf5eca", size = 277747, upload-time = "2026-05-09T23:13:31.859Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/c6/0a2436ae4da1ba76e51cb98943c6838a9a721faa40ebe2dce07694ae34e3/regex-2026.5.9-cp313-cp313-win_arm64.whl", hash = "sha256:56a33f191f17d8c417f99945ebdc1e691d3af9605d86ec68c7e54a57e3e17af6", size = 270500, upload-time = "2026-05-09T23:13:33.525Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/e9/d21346f7b60ed58789371358ed66b09d00f832e1bd7c06e55d9da5679882/regex-2026.5.9-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:01f28d868834624c934b8d2e0aa1c8341337e37831f4a012f18a5afcba4cbaf3", size = 494172, upload-time = "2026-05-09T23:13:35.935Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/43/fd1177a2032037c681baecdb3422ee4e1424aec4e4f470ef47793d325274/regex-2026.5.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:48036f6374aaa79eb3b754ec29c61d1c6b1606749d705a13f8854fa2539671f6", size = 293952, upload-time = "2026-05-09T23:13:38.307Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7d/9fbf919768368d3f8a4f6c692cf2aa61e482b2b81ec6a298ace4cbf02480/regex-2026.5.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b96350aa424e79d4fd6b567b344dcbe2b2d6bfc48dfe7717587e1fa6d43da6ff", size = 292314, upload-time = "2026-05-09T23:13:40.353Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/6c/e41bfeecb589716843e7c4df09ba46ff2a42961457afece19059d85caeef/regex-2026.5.9-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8f3af7a4903c5c04a11a196a5aa75cdd7dd3f8508132f9fb3259d9f5908e3b88", size = 811681, upload-time = "2026-05-09T23:13:42.543Z" },
+    { url = "https://files.pythonhosted.org/packages/87/83/a5c1c525fba0aa656e88ad0face0b1829788ef4c2fb6b26df58aa1151b84/regex-2026.5.9-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7e87577720152d2caae19fe2baaf1f8d5ca12091e9e229f03915c37d1e4b9178", size = 871135, upload-time = "2026-05-09T23:13:44.326Z" },
+    { url = "https://files.pythonhosted.org/packages/18/d4/80882e799e440dd878b0979cbebf8fa4d54624a332c83037c7a701649e3f/regex-2026.5.9-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c8b9b9d294cfea3cd19c718ade7cc93492b2c4991abd9a68d0b3477ae6d8e100", size = 917265, upload-time = "2026-05-09T23:13:47.295Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/ff/8db60211e2286e396aad7dc7725356c502bff0901ea05bd6cdc2e1a042b9/regex-2026.5.9-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:728d8bfd28a8845c8b6bc5dc7ce010453d206396786c0765c2740cb65f37791e", size = 816311, upload-time = "2026-05-09T23:13:49.885Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/47/742ef579c61730f8d268e5cf1f9ce0e37e2ea041ad0f5644724f2378e463/regex-2026.5.9-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7e30b874d341fac767d7df5a0870540541c2c054b80cfaac116e8d367a8a7ff2", size = 785498, upload-time = "2026-05-09T23:13:52.25Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/ab/cb0999802dcb0fb95b1ab005e8d4163d8afdd67efc2cb6b6630ac13f8cb1/regex-2026.5.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fd190e88a895a8901325fad284a3f74ea52b1da8525b76cc811fa9b1edf0ce2b", size = 801348, upload-time = "2026-05-09T23:13:54.127Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/62/8ca59a24c55bc34d166eefaf3717bd77772f329fdbf984d86581e0a3571c/regex-2026.5.9-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:8e76e8161ad00694cfce6767d5dea860c6391ac5b83e5c3a39661e696f11fc7e", size = 866493, upload-time = "2026-05-09T23:13:56.067Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/3d/30f2ae62cef3278bb5bb821f467277a55fb73f01032cf85997e15e8289a8/regex-2026.5.9-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ddda5340e6c01a293027dd46232fa79eaff1b48058ce7a98f572b6445b088041", size = 772811, upload-time = "2026-05-09T23:13:57.867Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/ae/7d2089bcd78ad0c0161bc684339df50032acb438a7bd3305e7ddb1193cec/regex-2026.5.9-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:205109e96b3cf5adf8f4cd62bedde9487feb282b9497a3535451e5a24cd706a0", size = 856584, upload-time = "2026-05-09T23:13:59.679Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/29/92ff47f75990131ea4f24ba17819e5a9d141e10819807e09addd73409af6/regex-2026.5.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dfbe4579b9f08036aa7d101d1835437a20783574ac66327e6b29b4018a138081", size = 803453, upload-time = "2026-05-09T23:14:01.978Z" },
+    { url = "https://files.pythonhosted.org/packages/04/99/eff29f1037dcab36702c9ee5d6858cf1ce2336ea8ea2987f64245b99ea5e/regex-2026.5.9-cp313-cp313t-win32.whl", hash = "sha256:ed2c9e8068b614c574d8d30e543d617cf5379b0535d46f97ef00e904745a08b5", size = 269951, upload-time = "2026-05-09T23:14:03.661Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/9d/8870b8981d27b22cda77bb26a5ac7ebfa9c7d9e0dea195a834a82380e748/regex-2026.5.9-cp313-cp313t-win_amd64.whl", hash = "sha256:b46b0f094dc1d3b90356c85a0bd2c9bafc4a6a190b9d6f8ddd5a033b6e088ed4", size = 281240, upload-time = "2026-05-09T23:14:05.56Z" },
+    { url = "https://files.pythonhosted.org/packages/72/b1/3379415e8f135c13ac551353397cc4fe97b4978f3cac73c5fcbcded548b8/regex-2026.5.9-cp313-cp313t-win_arm64.whl", hash = "sha256:872acc074bd29ffc9913ecdfedf6ea77502312ca44a4aa0d3779089c6069d8de", size = 272383, upload-time = "2026-05-09T23:14:07.843Z" },
 ]
 
 [[package]]
 name = "requests"
-version = "2.32.5"
+version = "2.34.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "charset-normalizer", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "idna", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "urllib3", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "certifi", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "charset-normalizer", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "idna", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "urllib3", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ac/c3/e2a2b89f2d3e2179abd6d00ebd70bff6273f37fb3e0cc209f48b39d00cbf/requests-2.34.2.tar.gz", hash = "sha256:f288924cae4e29463698d6d60bc6a4da69c89185ad1e0bcc4104f584e960b9ed", size = 142856, upload-time = "2026-05-14T19:25:27.735Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/f4/c67b0b3f1b9245e8d266f0f112c500d50e5b4e83cb6f3b71b6528104182a/requests-2.34.2-py3-none-any.whl", hash = "sha256:2a0d60c172f83ac6ab31e4554906c0f3b3588d37b5cb939b1c061f4907e278e0", size = 73075, upload-time = "2026-05-14T19:25:26.443Z" },
 ]
 
 [[package]]
@@ -2574,7 +2456,7 @@ name = "requests-toolbelt"
 version = "1.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888, upload-time = "2023-05-01T04:11:33.229Z" }
 wheels = [
@@ -2592,15 +2474,15 @@ wheels = [
 
 [[package]]
 name = "rich"
-version = "14.2.0"
+version = "15.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "markdown-it-py", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pygments", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "markdown-it-py", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pygments", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/fb/d2/8920e102050a0de7bfabeb4c4614a49248cf8d5d7a8d01885fbb24dc767a/rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4", size = 219990, upload-time = "2025-10-09T14:16:53.064Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680, upload-time = "2026-04-12T08:24:00.75Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
+    { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" },
 ]
 
 [[package]]
@@ -2608,34 +2490,36 @@ name = "rouge-score"
 version = "0.1.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "absl-py", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "nltk", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "absl-py", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "nltk", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "six", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e2/c5/9136736c37022a6ad27fea38f3111eb8f02fe75d067f9a985cc358653102/rouge_score-0.1.2.tar.gz", hash = "sha256:c7d4da2683e68c9abf0135ef915d63a46643666f848e558a1b9f7ead17ff0f04", size = 17400, upload-time = "2022-07-22T22:46:22.909Z" }
 
 [[package]]
 name = "ruff"
-version = "0.14.11"
+version = "0.15.13"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d4/77/9a7fe084d268f8855d493e5031ea03fa0af8cc05887f638bf1c4e3363eb8/ruff-0.14.11.tar.gz", hash = "sha256:f6dc463bfa5c07a59b1ff2c3b9767373e541346ea105503b4c0369c520a66958", size = 5993417, upload-time = "2026-01-08T19:11:58.322Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/24/21/a7d5c126d5b557715ef81098f3db2fe20f622a039ff2e626af28d674ab80/ruff-0.15.13.tar.gz", hash = "sha256:f9d89f17f7ba7fb2ed42921f0df75da797a9a5d71bc39049e2c687cf2baf44b7", size = 4678180, upload-time = "2026-05-14T13:44:37.869Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f0/a6/a4c40a5aaa7e331f245d2dc1ac8ece306681f52b636b40ef87c88b9f7afd/ruff-0.14.11-py3-none-linux_armv6l.whl", hash = "sha256:f6ff2d95cbd335841a7217bdfd9c1d2e44eac2c584197ab1385579d55ff8830e", size = 12951208, upload-time = "2026-01-08T19:12:09.218Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/5c/360a35cb7204b328b685d3129c08aca24765ff92b5a7efedbdd6c150d555/ruff-0.14.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6f6eb5c1c8033680f4172ea9c8d3706c156223010b8b97b05e82c59bdc774ee6", size = 13330075, upload-time = "2026-01-08T19:12:02.549Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/9e/0cc2f1be7a7d33cae541824cf3f95b4ff40d03557b575912b5b70273c9ec/ruff-0.14.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f2fc34cc896f90080fca01259f96c566f74069a04b25b6205d55379d12a6855e", size = 12257809, upload-time = "2026-01-08T19:12:00.366Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/e5/5faab97c15bb75228d9f74637e775d26ac703cc2b4898564c01ab3637c02/ruff-0.14.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53386375001773ae812b43205d6064dae49ff0968774e6befe16a994fc233caa", size = 12678447, upload-time = "2026-01-08T19:12:13.899Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/33/e9767f60a2bef779fb5855cab0af76c488e0ce90f7bb7b8a45c8a2ba4178/ruff-0.14.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a697737dce1ca97a0a55b5ff0434ee7205943d4874d638fe3ae66166ff46edbe", size = 12758560, upload-time = "2026-01-08T19:11:42.55Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/84/4c6cf627a21462bb5102f7be2a320b084228ff26e105510cd2255ea868e5/ruff-0.14.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6845ca1da8ab81ab1dce755a32ad13f1db72e7fba27c486d5d90d65e04d17b8f", size = 13599296, upload-time = "2026-01-08T19:11:30.371Z" },
-    { url = "https://files.pythonhosted.org/packages/88/e1/92b5ed7ea66d849f6157e695dc23d5d6d982bd6aa8d077895652c38a7cae/ruff-0.14.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:e36ce2fd31b54065ec6f76cb08d60159e1b32bdf08507862e32f47e6dde8bcbf", size = 15048981, upload-time = "2026-01-08T19:12:04.742Z" },
-    { url = "https://files.pythonhosted.org/packages/61/df/c1bd30992615ac17c2fb64b8a7376ca22c04a70555b5d05b8f717163cf9f/ruff-0.14.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:590bcc0e2097ecf74e62a5c10a6b71f008ad82eb97b0a0079e85defe19fe74d9", size = 14633183, upload-time = "2026-01-08T19:11:40.069Z" },
-    { url = "https://files.pythonhosted.org/packages/04/e9/fe552902f25013dd28a5428a42347d9ad20c4b534834a325a28305747d64/ruff-0.14.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:53fe71125fc158210d57fe4da26e622c9c294022988d08d9347ec1cf782adafe", size = 14050453, upload-time = "2026-01-08T19:11:37.555Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/93/f36d89fa021543187f98991609ce6e47e24f35f008dfe1af01379d248a41/ruff-0.14.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a35c9da08562f1598ded8470fcfef2afb5cf881996e6c0a502ceb61f4bc9c8a3", size = 13757889, upload-time = "2026-01-08T19:12:07.094Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/9f/c7fb6ecf554f28709a6a1f2a7f74750d400979e8cd47ed29feeaa1bd4db8/ruff-0.14.11-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:0f3727189a52179393ecf92ec7057c2210203e6af2676f08d92140d3e1ee72c1", size = 13955832, upload-time = "2026-01-08T19:11:55.064Z" },
-    { url = "https://files.pythonhosted.org/packages/db/a0/153315310f250f76900a98278cf878c64dfb6d044e184491dd3289796734/ruff-0.14.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:eb09f849bd37147a789b85995ff734a6c4a095bed5fd1608c4f56afc3634cde2", size = 12586522, upload-time = "2026-01-08T19:11:35.356Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/2b/a73a2b6e6d2df1d74bf2b78098be1572191e54bec0e59e29382d13c3adc5/ruff-0.14.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:c61782543c1231bf71041461c1f28c64b961d457d0f238ac388e2ab173d7ecb7", size = 12724637, upload-time = "2026-01-08T19:11:47.796Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/41/09100590320394401cd3c48fc718a8ba71c7ddb1ffd07e0ad6576b3a3df2/ruff-0.14.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:82ff352ea68fb6766140381748e1f67f83c39860b6446966cff48a315c3e2491", size = 13145837, upload-time = "2026-01-08T19:11:32.87Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/d8/e035db859d1d3edf909381eb8ff3e89a672d6572e9454093538fe6f164b0/ruff-0.14.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:728e56879df4ca5b62a9dde2dd0eb0edda2a55160c0ea28c4025f18c03f86984", size = 13850469, upload-time = "2026-01-08T19:12:11.694Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/61/11d458dc6ac22504fd8e237b29dfd40504c7fbbcc8930402cfe51a8e63ed/ruff-0.15.13-py3-none-linux_armv6l.whl", hash = "sha256:444b580fc72fd6887e650acd3e575e18cdc79dbcf42fb4030b491057921f61f8", size = 10738279, upload-time = "2026-05-14T13:44:18.7Z" },
+    { url = "https://files.pythonhosted.org/packages/86/ca/caa871ee7be718c45256fada4e16a218ee3e33f0c4a46b729a60a24912e6/ruff-0.15.13-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6590d009e7cb7ebf36f83dbdd44a3fa48a0994ff6f1cdc1b08006abe58f98dc7", size = 11124798, upload-time = "2026-05-14T13:44:06.427Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/19/43f5f2e568dddde567fc41f8471f9432c09563e19d3e617a48cfa52f8f0a/ruff-0.15.13-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1c26d2f66163deeb6e08d8b39fbbe983ce3c71cea06a6d7591cfd1421793c629", size = 10460761, upload-time = "2026-05-14T13:44:04.375Z" },
+    { url = "https://files.pythonhosted.org/packages/99/df/cf938cd6de3003178f03ad7c1ea2a6c099468c03a35037985070b37e76be/ruff-0.15.13-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dbd6f94b434f896308e4d57fb7bfde0d02b99f7a64b3bdab0fdfa6a864203a5", size = 10804451, upload-time = "2026-05-14T13:44:25.221Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/7d/5d0973129b154ded2225729169d7068f26b467760b146493fde138415f23/ruff-0.15.13-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bf3259f3be4d181bda591da5db2571aed6853c6a048157756448020bc6c5cd22", size = 10534285, upload-time = "2026-05-14T13:44:08.888Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/e3/6b999bbc66cd51e5f073842bc2a3995e99c5e0e72e16b15e7261f7abf57a/ruff-0.15.13-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae9c17e5eb4430c154e76abc25d79a318190f5a997f38fb6b114416c5319ffc9", size = 11312063, upload-time = "2026-05-14T13:44:11.274Z" },
+    { url = "https://files.pythonhosted.org/packages/af/5a/642639e9f5db04f1e97fbd6e091c6fd20725bdf072fb114d00eefb9e6eb8/ruff-0.15.13-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e2e39bff6c341f4b577a21b801326fab0b11847f48fcaa83f00a113c9b3cb55", size = 12183079, upload-time = "2026-05-14T13:44:01.634Z" },
+    { url = "https://files.pythonhosted.org/packages/19/4c/7585735f6b53b0f12de13618b2f7d250a844f018822efc899df2e7b8295f/ruff-0.15.13-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e8d9a8e08013542e94d3220bc5b62cc3e5ef87c5f74bff367d3fac14fab013e6", size = 11440833, upload-time = "2026-05-14T13:43:59.043Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/31/bf1a0803d077e679cfeee5f2f67290a0fa79c7385b5d9a8c17b9db2c48f0/ruff-0.15.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc411dfebe5eebe55ce041c6ae080eb7668955e866daa2fbb16692a784f1c4ca", size = 11434486, upload-time = "2026-05-14T13:44:27.761Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/4e/62c9b999875d4f14db80f277c030578f5e249c9852d65b7ac7ad0b43c041/ruff-0.15.13-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:768494eb08b9cee54e2fd27969966f74db5a57f6eaa7a90fcb3306af34dfc4bd", size = 11385189, upload-time = "2026-05-14T13:44:13.704Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/89/7e959047a104df3eb12863447c110140191fc5b6c4f379ea2e803fcdb0e4/ruff-0.15.13-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:fb75f9a3a7e42ffe117d734494e6c5e5cb3565d66e12612cb63d0e572a41a5b6", size = 10781380, upload-time = "2026-05-14T13:43:56.734Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/52/5fd18f3b88cab63e88aa11516b3b4e1e5f720e5c330f8dbe5c26210f41f8/ruff-0.15.13-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8cb74dd33bb2f6613faf7fc03b660053b5ac4f80e706d5788c6335e2a8048d51", size = 10540605, upload-time = "2026-05-14T13:44:20.748Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/e0/9e35f338990d3e41a82875ff7053ffe97541dae81c9d02143177f381d572/ruff-0.15.13-py3-none-musllinux_1_2_i686.whl", hash = "sha256:7ef823f817fcd191dc934e984be9cf4094f808effa16f2542ad8e821ba02bbf2", size = 11036554, upload-time = "2026-05-14T13:44:16.256Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/13/070fb048c24080fba188f66371e2a92785be257ad02242066dc7255ac6e9/ruff-0.15.13-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:f345a13937bd7f09f6f5d19fa0721b0c103e00e7f62bc67089a8e5e037719e0b", size = 11528133, upload-time = "2026-05-14T13:44:22.808Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/8c/b1e1666aef7fc6555094d73ae6cd981701781ae85b97ceefc0eebd0b4668/ruff-0.15.13-py3-none-win32.whl", hash = "sha256:4044f94208b3b05ba0fc4a4abd0558cf4d6459bd18325eead7fd8cc66f909b41", size = 10721455, upload-time = "2026-05-14T13:44:35.697Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/a6/870a3e8a50590bb92be184ad928c2922f088b00d9dc5c5ec7b924ee08c22/ruff-0.15.13-py3-none-win_amd64.whl", hash = "sha256:7064884d442b7d477b4e7473d12da7f08851d2b1982763c5d3f388a19468a1a4", size = 11900409, upload-time = "2026-05-14T13:44:30.389Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/36/9c015cd052fca743dae8cb2aeb16b551444787467db42ceab0fc968865af/ruff-0.15.13-py3-none-win_arm64.whl", hash = "sha256:2471da9bd1068c8c064b5fd9c0c4b6dddffd6369cb1cd68b29993b1709ff1b21", size = 11179336, upload-time = "2026-05-14T13:44:33.026Z" },
 ]
 
 [[package]]
@@ -2643,7 +2527,7 @@ name = "rustworkx"
 version = "0.17.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e7/b0/66d96f02120f79eeed86b5c5be04029b6821155f31ed4907a4e9f1460671/rustworkx-0.17.1.tar.gz", hash = "sha256:59ea01b4e603daffa4e8827316c1641eef18ae9032f0b1b14aa0181687e3108e", size = 399407, upload-time = "2025-09-15T16:29:46.429Z" }
 wheels = [
@@ -2656,6 +2540,8 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/39/5b/281bb21d091ab4e36cf377088366d55d0875fa2347b3189c580ec62b44c7/rustworkx-0.17.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246cc252053f89e36209535b9c58755960197e6ae08d48d3973760141c62ac95", size = 2221186, upload-time = "2025-08-13T01:43:38.598Z" },
     { url = "https://files.pythonhosted.org/packages/cc/2d/30a941a21b81e9db50c4c3ef8a64c5ee1c8eea3a90506ca0326ce39d021f/rustworkx-0.17.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c10d25e9f0e87d6a273d1ea390b636b4fb3fede2094bf0cb3fe565d696a91b48", size = 2123510, upload-time = "2025-08-13T01:43:40.288Z" },
     { url = "https://files.pythonhosted.org/packages/4f/ef/c9199e4b6336ee5a9f1979c11b5779c5cf9ab6f8386e0b9a96c8ffba7009/rustworkx-0.17.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:48784a673cf8d04f3cd246fa6b53fd1ccc4d83304503463bd561c153517bccc1", size = 2302783, upload-time = "2025-08-13T01:43:42.073Z" },
+    { url = "https://files.pythonhosted.org/packages/30/3d/a49ab633e99fca4ccbb9c9f4bd41904186c175ebc25c530435529f71c480/rustworkx-0.17.1-cp39-abi3-win32.whl", hash = "sha256:5dbc567833ff0a8ad4580a4fe4bde92c186d36b4c45fca755fb1792e4fafe9b5", size = 1931541, upload-time = "2025-08-13T01:43:43.415Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/ec/cee878c1879b91ab8dc7d564535d011307839a2fea79d2a650413edf53be/rustworkx-0.17.1-cp39-abi3-win_amd64.whl", hash = "sha256:d0a48fb62adabd549f9f02927c3a159b51bf654c7388a12fc16d45452d5703ea", size = 2055049, upload-time = "2025-08-13T01:43:44.926Z" },
 ]
 
 [[package]]
@@ -2663,12 +2549,12 @@ name = "sacrebleu"
 version = "2.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "lxml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "portalocker", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tabulate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "colorama", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "lxml", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "portalocker", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tabulate", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/d3/ed/d7acddcff74d690c56fe26a1f7828bdde548262828d0743414ea916c40c1/sacrebleu-2.6.0.tar.gz", hash = "sha256:91499b6cd46138d95154fff1e863c2f9be57e82f0c719d8dd718d0006cf6c566", size = 1893419, upload-time = "2026-01-12T17:17:20.799Z" }
 wheels = [
@@ -2677,22 +2563,26 @@ wheels = [
 
 [[package]]
 name = "safetensors"
-version = "0.7.0"
+version = "0.8.0rc0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload-time = "2025-11-19T15:18:43.199Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c9/76/70a044292cabc4b591b9b7947aa7d5dd346647acab18532e7e971a02141e/safetensors-0.8.0rc0.tar.gz", hash = "sha256:b4168a839ff287dc26b0d843e1760962b2e92ed5645f95e8ab3f4b9401807e6a", size = 235447, upload-time = "2026-04-14T14:30:42.125Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload-time = "2025-11-19T15:18:35.84Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload-time = "2025-11-19T15:18:34.416Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload-time = "2025-11-19T15:18:09.79Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload-time = "2025-11-19T15:18:16.145Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload-time = "2025-11-19T15:18:21.11Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload-time = "2025-11-19T15:18:25.719Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload-time = "2025-11-19T15:18:33.023Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload-time = "2025-11-19T15:18:31.075Z" },
-    { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload-time = "2025-11-19T15:18:37.211Z" },
-    { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload-time = "2025-11-19T15:18:38.689Z" },
-    { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload-time = "2025-11-19T15:18:40.162Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/c4/8ae3b9b8159babed52fe67698e4095858787dafb3363fa3500c150eef5d5/safetensors-0.8.0rc0-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c1e7a6a1c0dd0128888bc47aca0a9625855673f44f275bf4073088563bf7121b", size = 469331, upload-time = "2026-04-14T14:30:35.024Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/28/5322eb9057aeccb8492546a8e7fc070a8490afcca6e658f0a53e2279cca8/safetensors-0.8.0rc0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:c052d1706567487bc103088fe02daf05132dbccbbc3d798753541b66eb37fb14", size = 450714, upload-time = "2026-04-14T14:30:33.884Z" },
+    { url = "https://files.pythonhosted.org/packages/85/10/8aedf0becbe6ba019f0be2ab1efbf124d1319d7daaea5f1e3c165670a162/safetensors-0.8.0rc0-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79547625fa84f3a9b28b933e44c67d012edf22a0c7170ed68835b9f467dda836", size = 493726, upload-time = "2026-04-14T14:30:23.641Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/de/9a6d5d2b842814ff7a715169054235b6141924350be746b02f7906dd0756/safetensors-0.8.0rc0-cp310-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a132d3cf5f63c3f02b82c4abf65c58d33a8422199ae34e09a9a7edb661bd2ca9", size = 502966, upload-time = "2026-04-14T14:30:25.344Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/aa/29be34707d27b81b280759f4e52fb38fc6955e2d5e053164b9ab9eabee77/safetensors-0.8.0rc0-cp310-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d42f6c44773901ce1a021d2372747a559e9ec5aa59d044c0d711c273bff21c67", size = 621250, upload-time = "2026-04-14T14:30:26.746Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/fa/5b0997ca9cc70c4e6e6ed2afb59506c7065df29bc4771df8f7be61c3bc90/safetensors-0.8.0rc0-cp310-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b40d25911c5f241cad874ad1ea4100a9a9e3c2d469a73a38b47af759d239f44", size = 527309, upload-time = "2026-04-14T14:30:29.722Z" },
+    { url = "https://files.pythonhosted.org/packages/25/e0/be46e568cc05530f106ab5dc2faa383ba51533022d735df32db5d550d598/safetensors-0.8.0rc0-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf0d366f75f63867f1ede90f87090450c7cec320da1fc2a597f9bb8cb73460db", size = 509088, upload-time = "2026-04-14T14:30:32.377Z" },
+    { url = "https://files.pythonhosted.org/packages/88/5c/497168a26d656fbf39e20470ad8be60d3bb766267792d999061a6e164bb6/safetensors-0.8.0rc0-cp310-abi3-manylinux_2_31_riscv64.whl", hash = "sha256:50c56d7b6a2f44c3f4ab130bfeb6a8a51ce72bec152805f9c5a46bdf6addb6c5", size = 509345, upload-time = "2026-04-14T14:30:28.235Z" },
+    { url = "https://files.pythonhosted.org/packages/01/a4/54fbeed1447bba46bf8715cbf0d45c11339deeb66afde9ced01ead9233c9/safetensors-0.8.0rc0-cp310-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:94d9c0d569a124fe3074b9934031c2cdcfab12d4d7b64ae17343fac4a92081e8", size = 543961, upload-time = "2026-04-14T14:30:31.135Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/18/af173ce378d316352a5a20fe4b161cf54366519db587cc12b1aa9771be17/safetensors-0.8.0rc0-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b4fcccda047df747e2463744428cba352d99527c4e52545d07f8c3a8583136f1", size = 668965, upload-time = "2026-04-14T14:30:36.24Z" },
+    { url = "https://files.pythonhosted.org/packages/47/bf/de0c22d52d4006f682dec432d237bce71418c236f12accff6e9d614ec66d/safetensors-0.8.0rc0-cp310-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:2ef8ab6704ea895cb13c89d5825f49e87328cac2093e7e45fb3cb615bd457fb2", size = 778061, upload-time = "2026-04-14T14:30:37.522Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/f9/bd146043d920cd3fa0b62fd2f548f7b73f0a6212ed960546055bbb11d62a/safetensors-0.8.0rc0-cp310-abi3-musllinux_1_2_i686.whl", hash = "sha256:35bf158d1555df7a529c844ae8ab89355c9df34546de0f94c47d538902bcc07c", size = 751302, upload-time = "2026-04-14T14:30:39.191Z" },
+    { url = "https://files.pythonhosted.org/packages/44/58/448c080cd6c2b46662dd0fe93e3814e9ea7e1f818ddf8c0d13ca75eda47a/safetensors-0.8.0rc0-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:98b0f6f2a14a6bde7f6acaa5f0381baef9a87c6a3124338affe4e4bb40bf826b", size = 713576, upload-time = "2026-04-14T14:30:40.49Z" },
+    { url = "https://files.pythonhosted.org/packages/55/97/68207a641c30edc7eed692d89cf340e1fe8ba03f91c3643c9a02419d0942/safetensors-0.8.0rc0-cp310-abi3-win32.whl", hash = "sha256:7e7cc49c69d8df5aaaf332532cd636609727599f81294bf4e5de56a2e3b70a10", size = 325782, upload-time = "2026-04-14T14:30:45.907Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/0b/c28fd694c98ebfefb764538a2906428aacb51b3bf18e2206723b1ccc6d48/safetensors-0.8.0rc0-cp310-abi3-win_amd64.whl", hash = "sha256:d6532e381c492f5a6b4e82706b232f003e9e697b77d6c2eb7e806d11b578d00b", size = 342453, upload-time = "2026-04-14T14:30:44.668Z" },
+    { url = "https://files.pythonhosted.org/packages/51/73/fd944d3417ba04bd0e72682fa1bedc6d99d986a3594fc7910313088cfe88/safetensors-0.8.0rc0-cp310-abi3-win_arm64.whl", hash = "sha256:b7f8180f8c119dce85da7913904ccf4a0227adf095eb63f1732a6729c2672cb1", size = 330970, upload-time = "2026-04-14T14:30:43.451Z" },
 ]
 
 [[package]]
@@ -2700,10 +2590,10 @@ name = "scikit-learn"
 version = "1.8.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "joblib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "scipy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "threadpoolctl", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "joblib", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "scipy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "threadpoolctl", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0e/d4/40988bf3b8e34feec1d0e6a051446b1f66225f8529b9309becaeef62b6c4/scikit_learn-1.8.0.tar.gz", hash = "sha256:9bccbb3b40e3de10351f8f5068e105d0f4083b1a65fa07b6634fbc401a6287fd", size = 7335585, upload-time = "2025-12-10T07:08:53.618Z" }
 wheels = [
@@ -2711,18 +2601,14 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/58/37/31b83b2594105f61a381fc74ca19e8780ee923be2d496fcd8d2e1147bd99/scikit_learn-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:edec98c5e7c128328124a029bceb09eda2d526997780fef8d65e9a69eead963e", size = 8044458, upload-time = "2025-12-10T07:08:05.336Z" },
     { url = "https://files.pythonhosted.org/packages/2d/5a/3f1caed8765f33eabb723596666da4ebbf43d11e96550fb18bdec42b467b/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:74b66d8689d52ed04c271e1329f0c61635bcaf5b926db9b12d58914cdc01fe57", size = 8610341, upload-time = "2025-12-10T07:08:07.732Z" },
     { url = "https://files.pythonhosted.org/packages/38/cf/06896db3f71c75902a8e9943b444a56e727418f6b4b4a90c98c934f51ed4/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8fdf95767f989b0cfedb85f7ed8ca215d4be728031f56ff5a519ee1e3276dc2e", size = 8900022, upload-time = "2025-12-10T07:08:09.862Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/f9/9b7563caf3ec8873e17a31401858efab6b39a882daf6c1bfa88879c0aa11/scikit_learn-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:2de443b9373b3b615aec1bb57f9baa6bb3a9bd093f1269ba95c17d870422b271", size = 7989409, upload-time = "2025-12-10T07:08:12.028Z" },
+    { url = "https://files.pythonhosted.org/packages/49/bd/1f4001503650e72c4f6009ac0c4413cb17d2d601cef6f71c0453da2732fc/scikit_learn-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:eddde82a035681427cbedded4e6eff5e57fa59216c2e3e90b10b19ab1d0a65c3", size = 7619760, upload-time = "2025-12-10T07:08:13.688Z" },
     { url = "https://files.pythonhosted.org/packages/d2/7d/a630359fc9dcc95496588c8d8e3245cc8fd81980251079bc09c70d41d951/scikit_learn-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7cc267b6108f0a1499a734167282c00c4ebf61328566b55ef262d48e9849c735", size = 8826045, upload-time = "2025-12-10T07:08:15.215Z" },
     { url = "https://files.pythonhosted.org/packages/cc/56/a0c86f6930cfcd1c7054a2bc417e26960bb88d32444fe7f71d5c2cfae891/scikit_learn-1.8.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:fe1c011a640a9f0791146011dfd3c7d9669785f9fed2b2a5f9e207536cf5c2fd", size = 8420324, upload-time = "2025-12-10T07:08:17.561Z" },
     { url = "https://files.pythonhosted.org/packages/46/1e/05962ea1cebc1cf3876667ecb14c283ef755bf409993c5946ade3b77e303/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72358cce49465d140cc4e7792015bb1f0296a9742d5622c67e31399b75468b9e", size = 8680651, upload-time = "2025-12-10T07:08:19.952Z" },
     { url = "https://files.pythonhosted.org/packages/fe/56/a85473cd75f200c9759e3a5f0bcab2d116c92a8a02ee08ccd73b870f8bb4/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80832434a6cc114f5219211eec13dcbc16c2bac0e31ef64c6d346cde3cf054cb", size = 8925045, upload-time = "2025-12-10T07:08:22.11Z" },
-    { url = "https://files.pythonhosted.org/packages/24/05/1af2c186174cc92dcab2233f327336058c077d38f6fe2aceb08e6ab4d509/scikit_learn-1.8.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c22a2da7a198c28dd1a6e1136f19c830beab7fdca5b3e5c8bba8394f8a5c45b3", size = 8528667, upload-time = "2025-12-10T07:08:27.541Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/25/01c0af38fe969473fb292bba9dc2b8f9b451f3112ff242c647fee3d0dfe7/scikit_learn-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:6b595b07a03069a2b1740dc08c2299993850ea81cce4fe19b2421e0c970de6b7", size = 8066524, upload-time = "2025-12-10T07:08:29.822Z" },
-    { url = "https://files.pythonhosted.org/packages/be/ce/a0623350aa0b68647333940ee46fe45086c6060ec604874e38e9ab7d8e6c/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:29ffc74089f3d5e87dfca4c2c8450f88bdc61b0fc6ed5d267f3988f19a1309f6", size = 8657133, upload-time = "2025-12-10T07:08:31.865Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/cb/861b41341d6f1245e6ca80b1c1a8c4dfce43255b03df034429089ca2a2c5/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fb65db5d7531bccf3a4f6bec3462223bea71384e2cda41da0f10b7c292b9e7c4", size = 8923223, upload-time = "2025-12-10T07:08:34.166Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/d1/ef294ca754826daa043b2a104e59960abfab4cf653891037d19dd5b6f3cf/scikit_learn-1.8.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4511be56637e46c25721e83d1a9cea9614e7badc7040c4d573d75fbe257d6fd7", size = 8848305, upload-time = "2025-12-10T07:08:41.013Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/e2/b1f8b05138ee813b8e1a4149f2f0d289547e60851fd1bb268886915adbda/scikit_learn-1.8.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:a69525355a641bf8ef136a7fa447672fb54fe8d60cab5538d9eb7c6438543fb9", size = 8432257, upload-time = "2025-12-10T07:08:42.873Z" },
-    { url = "https://files.pythonhosted.org/packages/26/11/c32b2138a85dcb0c99f6afd13a70a951bfdff8a6ab42d8160522542fb647/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c2656924ec73e5939c76ac4c8b026fc203b83d8900362eb2599d8aee80e4880f", size = 8678673, upload-time = "2025-12-10T07:08:45.362Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/57/51f2384575bdec454f4fe4e7a919d696c9ebce914590abf3e52d47607ab8/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15fc3b5d19cc2be65404786857f2e13c70c83dd4782676dd6814e3b89dc8f5b9", size = 8922467, upload-time = "2025-12-10T07:08:47.408Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/b7/64d8cfa896c64435ae57f4917a548d7ac7a44762ff9802f75a79b77cb633/scikit_learn-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ee787491dbfe082d9c3013f01f5991658b0f38aa8177e4cd4bf434c58f551702", size = 8507994, upload-time = "2025-12-10T07:08:23.943Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/37/e192ea709551799379958b4c4771ec507347027bb7c942662c7fbeba31cb/scikit_learn-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf97c10a3f5a7543f9b88cbf488d33d175e9146115a451ae34568597ba33dcde", size = 7869518, upload-time = "2025-12-10T07:08:25.71Z" },
 ]
 
 [[package]]
@@ -2730,7 +2616,7 @@ name = "scipy"
 version = "1.17.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" }
 wheels = [
@@ -2742,6 +2628,8 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f5/5f/f17563f28ff03c7b6799c50d01d5d856a1d55f2676f537ca8d28c7f627cd/scipy-1.17.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:581b2264fc0aa555f3f435a5944da7504ea3a065d7029ad60e7c3d1ae09c5464", size = 35203952, upload-time = "2026-02-23T00:19:42.259Z" },
     { url = "https://files.pythonhosted.org/packages/8d/a5/9afd17de24f657fdfe4df9a3f1ea049b39aef7c06000c13db1530d81ccca/scipy-1.17.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:beeda3d4ae615106d7094f7e7cef6218392e4465cc95d25f900bebabfded0950", size = 34979063, upload-time = "2026-02-23T00:19:47.547Z" },
     { url = "https://files.pythonhosted.org/packages/8b/13/88b1d2384b424bf7c924f2038c1c409f8d88bb2a8d49d097861dd64a57b2/scipy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6609bc224e9568f65064cfa72edc0f24ee6655b47575954ec6339534b2798369", size = 37598449, upload-time = "2026-02-23T00:19:53.238Z" },
+    { url = "https://files.pythonhosted.org/packages/35/e5/d6d0e51fc888f692a35134336866341c08655d92614f492c6860dc45bb2c/scipy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:37425bc9175607b0268f493d79a292c39f9d001a357bebb6b88fdfaff13f6448", size = 36510943, upload-time = "2026-02-23T00:20:50.89Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/fd/3be73c564e2a01e690e19cc618811540ba5354c67c8680dce3281123fb79/scipy-1.17.1-cp313-cp313-win_arm64.whl", hash = "sha256:5cf36e801231b6a2059bf354720274b7558746f3b1a4efb43fcf557ccd484a87", size = 24545621, upload-time = "2026-02-23T00:20:55.871Z" },
     { url = "https://files.pythonhosted.org/packages/6f/6b/17787db8b8114933a66f9dcc479a8272e4b4da75fe03b0c282f7b0ade8cd/scipy-1.17.1-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:d59c30000a16d8edc7e64152e30220bfbd724c9bbb08368c054e24c651314f0a", size = 31936708, upload-time = "2026-02-23T00:19:58.694Z" },
     { url = "https://files.pythonhosted.org/packages/38/2e/524405c2b6392765ab1e2b722a41d5da33dc5c7b7278184a8ad29b6cb206/scipy-1.17.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:010f4333c96c9bb1a4516269e33cb5917b08ef2166d5556ca2fd9f082a9e6ea0", size = 28570135, upload-time = "2026-02-23T00:20:03.934Z" },
     { url = "https://files.pythonhosted.org/packages/fd/c3/5bd7199f4ea8556c0c8e39f04ccb014ac37d1468e6cfa6a95c6b3562b76e/scipy-1.17.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:2ceb2d3e01c5f1d83c4189737a42d9cb2fc38a6eeed225e7515eef71ad301dce", size = 20741977, upload-time = "2026-02-23T00:20:07.935Z" },
@@ -2750,22 +2638,8 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f3/c3/2d834a5ac7bf3a0c806ad1508efc02dda3c8c61472a56132d7894c312dea/scipy-1.17.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74cbb80d93260fe2ffa334efa24cb8f2f0f622a9b9febf8b483c0b865bfb3475", size = 35264159, upload-time = "2026-02-23T00:20:23.087Z" },
     { url = "https://files.pythonhosted.org/packages/4d/77/d3ed4becfdbd217c52062fafe35a72388d1bd82c2d0ba5ca19d6fcc93e11/scipy-1.17.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:dbc12c9f3d185f5c737d801da555fb74b3dcfa1a50b66a1a93e09190f41fab50", size = 35102771, upload-time = "2026-02-23T00:20:28.636Z" },
     { url = "https://files.pythonhosted.org/packages/bd/12/d19da97efde68ca1ee5538bb261d5d2c062f0c055575128f11a2730e3ac1/scipy-1.17.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94055a11dfebe37c656e70317e1996dc197e1a15bbcc351bcdd4610e128fe1ca", size = 37665910, upload-time = "2026-02-23T00:20:34.743Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/83/333afb452af6f0fd70414dc04f898647ee1423979ce02efa75c3b0f2c28e/scipy-1.17.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:a48a72c77a310327f6a3a920092fa2b8fd03d7deaa60f093038f22d98e096717", size = 31584510, upload-time = "2026-02-23T00:21:01.015Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/a6/d05a85fd51daeb2e4ea71d102f15b34fedca8e931af02594193ae4fd25f7/scipy-1.17.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:45abad819184f07240d8a696117a7aacd39787af9e0b719d00285549ed19a1e9", size = 28170131, upload-time = "2026-02-23T00:21:05.888Z" },
-    { url = "https://files.pythonhosted.org/packages/db/7b/8624a203326675d7746a254083a187398090a179335b2e4a20e2ddc46e83/scipy-1.17.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3fd1fcdab3ea951b610dc4cef356d416d5802991e7e32b5254828d342f7b7e0b", size = 20342032, upload-time = "2026-02-23T00:21:09.904Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/35/2c342897c00775d688d8ff3987aced3426858fd89d5a0e26e020b660b301/scipy-1.17.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7bdf2da170b67fdf10bca777614b1c7d96ae3ca5794fd9587dce41eb2966e866", size = 22678766, upload-time = "2026-02-23T00:21:14.313Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/f2/7cdb8eb308a1a6ae1e19f945913c82c23c0c442a462a46480ce487fdc0ac/scipy-1.17.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:adb2642e060a6549c343603a3851ba76ef0b74cc8c079a9a58121c7ec9fe2350", size = 32957007, upload-time = "2026-02-23T00:21:19.663Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/2e/7eea398450457ecb54e18e9d10110993fa65561c4f3add5e8eccd2b9cd41/scipy-1.17.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eee2cfda04c00a857206a4330f0c5e3e56535494e30ca445eb19ec624ae75118", size = 35221333, upload-time = "2026-02-23T00:21:25.278Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/77/5b8509d03b77f093a0d52e606d3c4f79e8b06d1d38c441dacb1e26cacf46/scipy-1.17.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d2650c1fb97e184d12d8ba010493ee7b322864f7d3d00d3f9bb97d9c21de4068", size = 35042066, upload-time = "2026-02-23T00:21:31.358Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/df/18f80fb99df40b4070328d5ae5c596f2f00fffb50167e31439e932f29e7d/scipy-1.17.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:08b900519463543aa604a06bec02461558a6e1cef8fdbb8098f77a48a83c8118", size = 37612763, upload-time = "2026-02-23T00:21:37.247Z" },
-    { url = "https://files.pythonhosted.org/packages/96/ad/f8c414e121f82e02d76f310f16db9899c4fcde36710329502a6b2a3c0392/scipy-1.17.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:1cc682cea2ae55524432f3cdff9e9a3be743d52a7443d0cba9017c23c87ae2f6", size = 31949750, upload-time = "2026-02-23T00:21:42.289Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/b0/c741e8865d61b67c81e255f4f0a832846c064e426636cd7de84e74d209be/scipy-1.17.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:2040ad4d1795a0ae89bfc7e8429677f365d45aa9fd5e4587cf1ea737f927b4a1", size = 28585858, upload-time = "2026-02-23T00:21:47.706Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/1b/3985219c6177866628fa7c2595bfd23f193ceebbe472c98a08824b9466ff/scipy-1.17.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:131f5aaea57602008f9822e2115029b55d4b5f7c070287699fe45c661d051e39", size = 20757723, upload-time = "2026-02-23T00:21:52.039Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/19/2a04aa25050d656d6f7b9e7b685cc83d6957fb101665bfd9369ca6534563/scipy-1.17.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:9cdc1a2fcfd5c52cfb3045feb399f7b3ce822abdde3a193a6b9a60b3cb5854ca", size = 23043098, upload-time = "2026-02-23T00:21:56.185Z" },
-    { url = "https://files.pythonhosted.org/packages/86/f1/3383beb9b5d0dbddd030335bf8a8b32d4317185efe495374f134d8be6cce/scipy-1.17.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e3dcd57ab780c741fde8dc68619de988b966db759a3c3152e8e9142c26295ad", size = 33030397, upload-time = "2026-02-23T00:22:01.404Z" },
-    { url = "https://files.pythonhosted.org/packages/41/68/8f21e8a65a5a03f25a79165ec9d2b28c00e66dc80546cf5eb803aeeff35b/scipy-1.17.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9956e4d4f4a301ebf6cde39850333a6b6110799d470dbbb1e25326ac447f52a", size = 35281163, upload-time = "2026-02-23T00:22:07.024Z" },
-    { url = "https://files.pythonhosted.org/packages/84/8d/c8a5e19479554007a5632ed7529e665c315ae7492b4f946b0deb39870e39/scipy-1.17.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a4328d245944d09fd639771de275701ccadf5f781ba0ff092ad141e017eccda4", size = 35116291, upload-time = "2026-02-23T00:22:12.585Z" },
-    { url = "https://files.pythonhosted.org/packages/52/52/e57eceff0e342a1f50e274264ed47497b59e6a4e3118808ee58ddda7b74a/scipy-1.17.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a77cbd07b940d326d39a1d1b37817e2ee4d79cb30e7338f3d0cddffae70fcaa2", size = 37682317, upload-time = "2026-02-23T00:22:18.513Z" },
+    { url = "https://files.pythonhosted.org/packages/06/1c/1172a88d507a4baaf72c5a09bb6c018fe2ae0ab622e5830b703a46cc9e44/scipy-1.17.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e30bdeaa5deed6bc27b4cc490823cd0347d7dae09119b8803ae576ea0ce52e4c", size = 36562980, upload-time = "2026-02-23T00:20:40.575Z" },
+    { url = "https://files.pythonhosted.org/packages/70/b0/eb757336e5a76dfa7911f63252e3b7d1de00935d7705cf772db5b45ec238/scipy-1.17.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a720477885a9d2411f94a93d16f9d89bad0f28ca23c3f8daa521e2dcc3f44d49", size = 24856543, upload-time = "2026-02-23T00:20:45.313Z" },
 ]
 
 [[package]]
@@ -2773,8 +2647,8 @@ name = "secretstorage"
 version = "3.5.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cryptography", marker = "sys_platform == 'linux'" },
-    { name = "jeepney", marker = "sys_platform == 'linux'" },
+    { name = "cryptography", marker = "sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "jeepney", marker = "sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/1c/03/e834bcd866f2f8a49a85eaff47340affa3bfa391ee9912a952a1faa68c7b/secretstorage-3.5.0.tar.gz", hash = "sha256:f04b8e4689cbce351744d5537bf6b1329c6fc68f91fa666f60a380edddcd11be", size = 19884, upload-time = "2025-11-23T19:02:53.191Z" }
 wheels = [
@@ -2792,30 +2666,26 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8d/de/5a007fb53b1ab0aafc69d11a5a3dd72a289d5a3e78dcf2c3a3d9b14ffe93/sentencepiece-0.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:097f3394e99456e9e4efba1737c3749d7e23563dd1588ce71a3d007f25475fff", size = 1253641, upload-time = "2025-08-12T06:59:56.562Z" },
     { url = "https://files.pythonhosted.org/packages/2c/d2/f552be5928105588f4f4d66ee37dd4c61460d8097e62d0e2e0eec41bc61d/sentencepiece-0.2.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7b670879c370d350557edabadbad1f6561a9e6968126e6debca4029e5547820", size = 1316271, upload-time = "2025-08-12T06:59:58.109Z" },
     { url = "https://files.pythonhosted.org/packages/96/df/0cfe748ace5485be740fed9476dee7877f109da32ed0d280312c94ec259f/sentencepiece-0.2.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c7f0fd2f2693309e6628aeeb2e2faf6edd221134dfccac3308ca0de01f8dab47", size = 1387882, upload-time = "2025-08-12T07:00:00.701Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/dd/f7774d42a881ced8e1739f393ab1e82ece39fc9abd4779e28050c2e975b5/sentencepiece-0.2.1-cp313-cp313-win32.whl", hash = "sha256:92b3816aa2339355fda2c8c4e021a5de92180b00aaccaf5e2808972e77a4b22f", size = 999541, upload-time = "2025-08-12T07:00:02.709Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/e9/932b9eae6fd7019548321eee1ab8d5e3b3d1294df9d9a0c9ac517c7b636d/sentencepiece-0.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:10ed3dab2044c47f7a2e7b4969b0c430420cdd45735d78c8f853191fa0e3148b", size = 1054669, upload-time = "2025-08-12T07:00:04.915Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/3a/76488a00ea7d6931689cda28726a1447d66bf1a4837943489314593d5596/sentencepiece-0.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac650534e2251083c5f75dde4ff28896ce7c8904133dc8fef42780f4d5588fcd", size = 1033922, upload-time = "2025-08-12T07:00:06.496Z" },
     { url = "https://files.pythonhosted.org/packages/4a/b6/08fe2ce819e02ccb0296f4843e3f195764ce9829cbda61b7513f29b95718/sentencepiece-0.2.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:8dd4b477a7b069648d19363aad0cab9bad2f4e83b2d179be668efa672500dc94", size = 1946052, upload-time = "2025-08-12T07:00:08.136Z" },
     { url = "https://files.pythonhosted.org/packages/ab/d9/1ea0e740591ff4c6fc2b6eb1d7510d02f3fb885093f19b2f3abd1363b402/sentencepiece-0.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0c0f672da370cc490e4c59d89e12289778310a0e71d176c541e4834759e1ae07", size = 1327408, upload-time = "2025-08-12T07:00:09.572Z" },
     { url = "https://files.pythonhosted.org/packages/99/7e/1fb26e8a21613f6200e1ab88824d5d203714162cf2883248b517deb500b7/sentencepiece-0.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ad8493bea8432dae8d6830365352350f3b4144415a1d09c4c8cb8d30cf3b6c3c", size = 1254857, upload-time = "2025-08-12T07:00:11.021Z" },
     { url = "https://files.pythonhosted.org/packages/bc/85/c72fd1f3c7a6010544d6ae07f8ddb38b5e2a7e33bd4318f87266c0bbafbf/sentencepiece-0.2.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b81a24733726e3678d2db63619acc5a8dccd074f7aa7a54ecd5ca33ca6d2d596", size = 1315722, upload-time = "2025-08-12T07:00:12.989Z" },
     { url = "https://files.pythonhosted.org/packages/4a/e8/661e5bd82a8aa641fd6c1020bd0e890ef73230a2b7215ddf9c8cd8e941c2/sentencepiece-0.2.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0a81799d0a68d618e89063fb423c3001a034c893069135ffe51fee439ae474d6", size = 1387452, upload-time = "2025-08-12T07:00:15.088Z" },
-    { url = "https://files.pythonhosted.org/packages/24/9c/89eb8b2052f720a612478baf11c8227dcf1dc28cd4ea4c0c19506b5af2a2/sentencepiece-0.2.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:5d0350b686c320068702116276cfb26c066dc7e65cfef173980b11bb4d606719", size = 1943147, upload-time = "2025-08-12T07:00:21.809Z" },
-    { url = "https://files.pythonhosted.org/packages/82/0b/a1432bc87f97c2ace36386ca23e8bd3b91fb40581b5e6148d24b24186419/sentencepiece-0.2.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c7f54a31cde6fa5cb030370566f68152a742f433f8d2be458463d06c208aef33", size = 1325624, upload-time = "2025-08-12T07:00:23.289Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/99/bbe054ebb5a5039457c590e0a4156ed073fb0fe9ce4f7523404dd5b37463/sentencepiece-0.2.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c83b85ab2d6576607f31df77ff86f28182be4a8de6d175d2c33ca609925f5da1", size = 1253670, upload-time = "2025-08-12T07:00:24.69Z" },
-    { url = "https://files.pythonhosted.org/packages/19/ad/d5c7075f701bd97971d7c2ac2904f227566f51ef0838dfbdfdccb58cd212/sentencepiece-0.2.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1855f57db07b51fb51ed6c9c452f570624d2b169b36f0f79ef71a6e6c618cd8b", size = 1316247, upload-time = "2025-08-12T07:00:26.435Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/03/35fbe5f3d9a7435eebd0b473e09584bd3cc354ce118b960445b060d33781/sentencepiece-0.2.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01e6912125cb45d3792f530a4d38f8e21bf884d6b4d4ade1b2de5cf7a8d2a52b", size = 1387894, upload-time = "2025-08-12T07:00:28.339Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/11/5b414b9fae6255b5fb1e22e2ed3dc3a72d3a694e5703910e640ac78346bb/sentencepiece-0.2.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:a19adcec27c524cb7069a1c741060add95f942d1cbf7ad0d104dffa0a7d28a2b", size = 1946081, upload-time = "2025-08-12T07:00:36.97Z" },
-    { url = "https://files.pythonhosted.org/packages/77/eb/7a5682bb25824db8545f8e5662e7f3e32d72a508fdce086029d89695106b/sentencepiece-0.2.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:e37e4b4c4a11662b5db521def4e44d4d30ae69a1743241412a93ae40fdcab4bb", size = 1327406, upload-time = "2025-08-12T07:00:38.669Z" },
-    { url = "https://files.pythonhosted.org/packages/03/b0/811dae8fb9f2784e138785d481469788f2e0d0c109c5737372454415f55f/sentencepiece-0.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:477c81505db072b3ab627e7eab972ea1025331bd3a92bacbf798df2b75ea86ec", size = 1254846, upload-time = "2025-08-12T07:00:40.611Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/23/195b2e7ec85ebb6a547969f60b723c7aca5a75800ece6cc3f41da872d14e/sentencepiece-0.2.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:010f025a544ef770bb395091d57cb94deb9652d8972e0d09f71d85d5a0816c8c", size = 1315721, upload-time = "2025-08-12T07:00:42.914Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/aa/553dbe4178b5f23eb28e59393dddd64186178b56b81d9b8d5c3ff1c28395/sentencepiece-0.2.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:733e59ff1794d26db706cd41fc2d7ca5f6c64a820709cb801dc0ea31780d64ab", size = 1387458, upload-time = "2025-08-12T07:00:44.56Z" },
+    { url = "https://files.pythonhosted.org/packages/99/5e/ae66c361023a470afcbc1fbb8da722c72ea678a2fcd9a18f1a12598c7501/sentencepiece-0.2.1-cp313-cp313t-win32.whl", hash = "sha256:89a3ea015517c42c0341d0d962f3e6aaf2cf10d71b1932d475c44ba48d00aa2b", size = 1002501, upload-time = "2025-08-12T07:00:16.966Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/03/d332828c4ff764e16c1b56c2c8f9a33488bbe796b53fb6b9c4205ddbf167/sentencepiece-0.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:33f068c9382dc2e7c228eedfd8163b52baa86bb92f50d0488bf2b7da7032e484", size = 1057555, upload-time = "2025-08-12T07:00:18.573Z" },
+    { url = "https://files.pythonhosted.org/packages/88/14/5aee0bf0864df9bd82bd59e7711362908e4935e3f9cdc1f57246b5d5c9b9/sentencepiece-0.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:b3616ad246f360e52c85781e47682d31abfb6554c779e42b65333d4b5f44ecc0", size = 1036042, upload-time = "2025-08-12T07:00:20.209Z" },
 ]
 
 [[package]]
 name = "setuptools"
-version = "80.9.0"
+version = "82.0.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4f/db/cfac1baf10650ab4d1c111714410d2fbb77ac5a616db26775db562c8fab2/setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9", size = 1152316, upload-time = "2026-03-09T12:47:17.221Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/76/f789f7a86709c6b087c5a2f52f911838cad707cc613162401badc665acfe/setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb", size = 1006223, upload-time = "2026-03-09T12:47:15.026Z" },
 ]
 
 [[package]]
@@ -2845,6 +2715,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
 ]
 
+[[package]]
+name = "sounddevice"
+version = "0.5.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cffi", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2a/f9/2592608737553638fca98e21e54bfec40bf577bb98a61b2770c912aab25e/sounddevice-0.5.5.tar.gz", hash = "sha256:22487b65198cb5bf2208755105b524f78ad173e5ab6b445bdab1c989f6698df3", size = 143191, upload-time = "2026-01-23T18:36:43.529Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/0a/478e441fd049002cf308520c0d62dd8333e7c6cc8d997f0dda07b9fbcc46/sounddevice-0.5.5-py3-none-any.whl", hash = "sha256:30ff99f6c107f49d25ad16a45cacd8d91c25a1bcdd3e81a206b921a3a6405b1f", size = 32807, upload-time = "2026-01-23T18:36:35.649Z" },
+    { url = "https://files.pythonhosted.org/packages/56/f9/c037c35f6d0b6bc3bc7bfb314f1d6f1f9a341328ef47cd63fc4f850a7b27/sounddevice-0.5.5-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl", hash = "sha256:05eb9fd6c54c38d67741441c19164c0dae8ce80453af2d8c4ad2e7823d15b722", size = 108557, upload-time = "2026-01-23T18:36:37.41Z" },
+    { url = "https://files.pythonhosted.org/packages/88/a1/d19dd9889cd4bce2e233c4fac007cd8daaf5b9fe6e6a5d432cf17be0b807/sounddevice-0.5.5-py3-none-win32.whl", hash = "sha256:1234cc9b4c9df97b6cbe748146ae0ec64dd7d6e44739e8e42eaa5b595313a103", size = 317765, upload-time = "2026-01-23T18:36:39.047Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/0e/002ed7c4c1c2ab69031f78989d3b789fee3a7fba9e586eb2b81688bf4961/sounddevice-0.5.5-py3-none-win_amd64.whl", hash = "sha256:cfc6b2c49fb7f555591c78cb8ecf48d6a637fd5b6e1db5fec6ed9365d64b3519", size = 365324, upload-time = "2026-01-23T18:36:40.496Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/39/a61d4b83a7746b70d23d9173be688c0c6bfc7173772344b7442c2c155497/sounddevice-0.5.5-py3-none-win_arm64.whl", hash = "sha256:3861901ddd8230d2e0e8ae62ac320cdd4c688d81df89da036dcb812f757bb3e6", size = 317115, upload-time = "2026-01-23T18:36:42.235Z" },
+]
+
 [[package]]
 name = "sqlitedict"
 version = "2.1.0"
@@ -2853,14 +2739,14 @@ sdist = { url = "https://files.pythonhosted.org/packages/12/9a/7620d1e9dcb02839e
 
 [[package]]
 name = "starlette"
-version = "0.50.0"
+version = "1.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/81/69/17425771797c36cded50b7fe44e850315d039f28b15901ab44839e70b593/starlette-1.0.0.tar.gz", hash = "sha256:6a4beaf1f81bb472fd19ea9b918b50dc3a77a6f2e190a12954b25e6ed5eea149", size = 2655289, upload-time = "2026-03-22T18:29:46.779Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" },
 ]
 
 [[package]]
@@ -2868,7 +2754,7 @@ name = "sympy"
 version = "1.14.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "mpmath", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mpmath", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
 wheels = [
@@ -2877,15 +2763,15 @@ wheels = [
 
 [[package]]
 name = "tabledata"
-version = "1.3.4"
+version = "1.3.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "dataproperty", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "typepy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "dataproperty", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "typepy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b2/35/171c8977162f1163368406deddde4c59673b62bd0cb2f34948a02effb075/tabledata-1.3.4.tar.gz", hash = "sha256:e9649cab129d718f3bff4150083b77f8a78c30f6634a30caf692b10fdc60cb97", size = 25074, upload-time = "2024-12-31T14:12:31.198Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/75/65/2f54f0dedd775dde48e300023d20e13ad329a51e33dcadb6d47b4dc95768/tabledata-1.3.5.tar.gz", hash = "sha256:98c64d0ad6b520846b41000fb3f5b2f42fa7ca2675c2c669e5ccab6b93082a36", size = 25396, upload-time = "2026-05-11T12:03:26.367Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/08/64/fa4160151976ee4b2cf0c1217a99443ffaeb991956feddfeac9eee9952f8/tabledata-1.3.4-py3-none-any.whl", hash = "sha256:1f56e433bfdeb89f4487abfa48c4603a3b07c5d3a3c7e05ff73dd018c24bd0d4", size = 11820, upload-time = "2024-12-31T14:12:28.584Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/86/37fa0e1437089f08b8b1b8c8ad93f6b57e9427753f002914299323300a9e/tabledata-1.3.5-py3-none-any.whl", hash = "sha256:a1e57afc4767b51bef551114c0df31f205d712dbb75e3caf9be7834a79f23136", size = 11919, upload-time = "2026-05-11T12:03:24.907Z" },
 ]
 
 [[package]]
@@ -2935,61 +2821,55 @@ wheels = [
 
 [[package]]
 name = "tiktoken"
-version = "0.12.0"
+version = "0.13.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e4/e5/5f3cb2159769d0f4324c0e9e87f9de3c4b1cd45848a96b2eb3566ad5ca77/tiktoken-0.13.0.tar.gz", hash = "sha256:c9435714c3a84c2319499de9a300c0e604449dd0799ff246458b3bb6a7f433c1", size = 38986, upload-time = "2026-05-15T04:51:27.153Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802, upload-time = "2025-10-06T20:22:00.96Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995, upload-time = "2025-10-06T20:22:02.788Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948, upload-time = "2025-10-06T20:22:03.814Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986, upload-time = "2025-10-06T20:22:05.173Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222, upload-time = "2025-10-06T20:22:06.265Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097, upload-time = "2025-10-06T20:22:07.403Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309, upload-time = "2025-10-06T20:22:10.939Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712, upload-time = "2025-10-06T20:22:12.115Z" },
-    { url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725, upload-time = "2025-10-06T20:22:13.541Z" },
-    { url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875, upload-time = "2025-10-06T20:22:14.559Z" },
-    { url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451, upload-time = "2025-10-06T20:22:15.545Z" },
-    { url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794, upload-time = "2025-10-06T20:22:16.624Z" },
-    { url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188, upload-time = "2025-10-06T20:22:19.563Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978, upload-time = "2025-10-06T20:22:20.702Z" },
-    { url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271, upload-time = "2025-10-06T20:22:22.06Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216, upload-time = "2025-10-06T20:22:23.085Z" },
-    { url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860, upload-time = "2025-10-06T20:22:24.602Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567, upload-time = "2025-10-06T20:22:25.671Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473, upload-time = "2025-10-06T20:22:27.775Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855, upload-time = "2025-10-06T20:22:28.799Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022, upload-time = "2025-10-06T20:22:29.981Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736, upload-time = "2025-10-06T20:22:30.996Z" },
-    { url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908, upload-time = "2025-10-06T20:22:32.073Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload-time = "2025-10-06T20:22:33.385Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/83/b096c859c2a47c11731bf2f5885f4028b809dfe2396582883eed9cae372f/tiktoken-0.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5df5d1507bd245f1ccad4a074698240021239e455eb0bb4ced4e3d7181872154", size = 1034228, upload-time = "2026-05-15T04:50:40.988Z" },
+    { url = "https://files.pythonhosted.org/packages/53/61/c68e123b6d753e3fc2751e9b18e732c9d8bf1e1926762e736eee935d931c/tiktoken-0.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8fe806a50664e83a6ffd56cbd1e4f5dcc6cd32a3e7538f70dc38b1a271384545", size = 982978, upload-time = "2026-05-15T04:50:42.195Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/8b/96cc178cc584e65d363134500f297790b06cd48cdeb1e8fcf7bbe60f4715/tiktoken-0.13.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:125bc05005e747f993a83dc67934249932d6e4209854452cd4c0b1d53fba3ba2", size = 1116355, upload-time = "2026-05-15T04:50:43.564Z" },
+    { url = "https://files.pythonhosted.org/packages/86/f5/bab735d2c72ea55404b295d02d092644eb5f7cc6205e34d35eb9abfb9ab2/tiktoken-0.13.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:5e6358911cab4adee6712da27d65573496a4f68cf8a2b5fca6a4ad10fc5748cf", size = 1135772, upload-time = "2026-05-15T04:50:44.782Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/b9/6de04ebdf904edfaad87788011b3735087a0c9ea671b9027e1e4e965e8c8/tiktoken-0.13.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:975cbd78d085d75d26b59660e262736dcaed1e35f8f142cd6291025c01d25486", size = 1182415, upload-time = "2026-05-15T04:50:46.422Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/9c/470a05f3b1caf038f44880e334d47ab674e0c80d514c66b375d14d5afa10/tiktoken-0.13.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:75ab9bc99fa020a4c283424590ecd7f3afd70c1c281cb3fa3192a6c3af9f9615", size = 1239879, upload-time = "2026-05-15T04:50:48.052Z" },
+    { url = "https://files.pythonhosted.org/packages/42/a6/c1936d16055436cb32e6c6128d68629622e00f4768562f55653752d34768/tiktoken-0.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:6b1615f0ff71953d19729ceb18865429c185b0a23c5353f1bbca34a394bf60f7", size = 874829, upload-time = "2026-05-15T04:50:49.202Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/07/acb5992c3772b5a36284f742cfb7a5895aa4471d1848ac31464ad50d7fdf/tiktoken-0.13.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:6eb4a5bfbc6426938026b1a334e898ac53541360d62d8c689870160cc80abd67", size = 1033600, upload-time = "2026-05-15T04:50:50.4Z" },
+    { url = "https://files.pythonhosted.org/packages/14/e9/742e9aec30f59b9f161f7ff7cd072e02ea836c9e1c0854a8076dfcd40d5c/tiktoken-0.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:43cee3e5400573b2046fbf092cc7a5bc30164f9e4c95ce20714da929df48737a", size = 982516, upload-time = "2026-05-15T04:50:52.03Z" },
+    { url = "https://files.pythonhosted.org/packages/72/74/ca1541b053e7648254d2e4b42a253e1bb4359f2c91a0a8d49228c794e1a0/tiktoken-0.13.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:7de52e3f566d19b3b11bd37eea552c6c305ad74081f736882bd44d148ed4c48d", size = 1115518, upload-time = "2026-05-15T04:50:53.543Z" },
+    { url = "https://files.pythonhosted.org/packages/46/e3/93825eaf5a4a504795b787e5d5dea07fbeb3dabf97aa7b450be8bde59c89/tiktoken-0.13.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:51384448aa508e4df84c0f7c1dc3211c7f7b8096325660ee5fc82f3e11b381ce", size = 1136867, upload-time = "2026-05-15T04:50:55.191Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/46/002b68de6827091d5ae90b048f326e8aad8d953520950e5ce1508879414f/tiktoken-0.13.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e28157350f7ebf35008dd8e9e0fdb621f976e4230c881099c85e8cf07eaa50e2", size = 1181826, upload-time = "2026-05-15T04:50:56.296Z" },
+    { url = "https://files.pythonhosted.org/packages/db/c6/d393e3185a276505182f7abd93fe714f3c444a2be9180798fa052347504e/tiktoken-0.13.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:165cf1820ea4a354985c2490a5205d4cc74661c934aca79dd0368232fff94e0f", size = 1239489, upload-time = "2026-05-15T04:50:57.918Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/4d/bc07d1f1635d4897a202acc0ae11c2886eaa7325c359ba4741b47bf8e225/tiktoken-0.13.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6c43a675ca14f6f2749ba7f12075d37456015a24b859f2517b9beb4ef30807ec", size = 873820, upload-time = "2026-05-15T04:50:59.528Z" },
 ]
 
 [[package]]
 name = "tokenizers"
-version = "0.22.2"
+version = "0.23.0rc0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0b/dc/2ba78324f6c82284f8d3d03bba16e5771d075aa4d5e9b4ecbd87af846af2/tokenizers-0.23.0rc0.tar.gz", hash = "sha256:685c6d269444451a2cf276d3f2bf655f3d7094be20c6553e413ede86b03c637b", size = 361629, upload-time = "2026-04-24T05:37:42.81Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275, upload-time = "2026-01-05T10:41:02.158Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472, upload-time = "2026-01-05T10:41:00.276Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736, upload-time = "2026-01-05T10:40:32.165Z" },
-    { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835, upload-time = "2026-01-05T10:40:38.847Z" },
-    { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673, upload-time = "2026-01-05T10:40:56.614Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818, upload-time = "2026-01-05T10:40:44.507Z" },
-    { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195, upload-time = "2026-01-05T10:40:51.139Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982, upload-time = "2026-01-05T10:40:58.331Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245, upload-time = "2026-01-05T10:41:04.053Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069, upload-time = "2026-01-05T10:45:10.673Z" },
-    { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263, upload-time = "2026-01-05T10:45:12.559Z" },
-    { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429, upload-time = "2026-01-05T10:45:14.333Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/b9/dda4065e0f4b62e0e5a625cbaeb928a611d847171e059066b3adfdb3866f/tokenizers-0.23.0rc0-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bed69208ba6f74057e18e3c8ed73d62e681ff44f7be642ddeff747247c8a7a98", size = 3134709, upload-time = "2026-04-24T05:37:31.89Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/16/54bd9f9e5c3641fe3d6d0e5b1cee37c58cb7520d22752c2065fc5a83caff/tokenizers-0.23.0rc0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:951be943c0657d8fd12e104731165a56d995c87533cd7f70a9444ddd7afa7708", size = 3043651, upload-time = "2026-04-24T05:37:30.305Z" },
+    { url = "https://files.pythonhosted.org/packages/86/11/54c1040ee93c8d74a364fbf4e17fd5d88e2eea940cbdba69d48d42a5a0c0/tokenizers-0.23.0rc0-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:704ffd50130f6c85aa76ad16c8218ff0f966b14c6e6cab7d0636e492e487ffa5", size = 3365683, upload-time = "2026-04-24T05:37:18.674Z" },
+    { url = "https://files.pythonhosted.org/packages/14/79/c8a7bdfee971346119349dab62f9918de512a7e5a8177555eaa50d854e1f/tokenizers-0.23.0rc0-cp310-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bcd2a49117ad88999bc5d18d05addf67ec28e69f53e609ab07733c1f96404583", size = 3228688, upload-time = "2026-04-24T05:37:21.137Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/32/a46ab1348d0b573dab69860eee601927b9934323e40f6f6018bb362a6013/tokenizers-0.23.0rc0-cp310-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c52f927516521a3e1f6b6347f8bacedaf589eadd682e7ac87dac911d832c3a73", size = 3565137, upload-time = "2026-04-24T05:37:27.101Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/f1/1a3b6a30388fe7d4b57b1ea7fcd6192341e479d65e50366ee0ba13d96d14/tokenizers-0.23.0rc0-cp310-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d6add82746146a6e052295ac429949c2d8e723244aa97ffe30cfee6cd788e98", size = 3826198, upload-time = "2026-04-24T05:37:22.783Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/cb/161e52a424aa7ffb4097e8ce343d8dc2bdc42d590601032d4a9e6e5f7da5/tokenizers-0.23.0rc0-cp310-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:564115d3d6d2560b0a6b833d7dc39330d2328262557fbbd5bb0a14fb09b2b6cb", size = 3449011, upload-time = "2026-04-24T05:37:25.324Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/31/0e4b77ca48b302a5db827584c9784f6cdbb35380c0dd1d7668712d477bb5/tokenizers-0.23.0rc0-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82167864c62a3d83880ed23dea267aa5760e3fcf16fd73f94d413baf1968b211", size = 3337931, upload-time = "2026-04-24T05:37:28.723Z" },
+    { url = "https://files.pythonhosted.org/packages/50/e4/939249edee0073417b2c9447fd3b06e90c283ef6df72f3124427edae1f96/tokenizers-0.23.0rc0-cp310-abi3-manylinux_2_31_riscv64.whl", hash = "sha256:85f29751c4490bfaefe7e0d4b18ef28cd6d5f84c411e88ca896832eb4f18dd69", size = 3416560, upload-time = "2026-04-24T05:37:24.091Z" },
+    { url = "https://files.pythonhosted.org/packages/46/48/3a4bd2ba88af778e6fa6d03e271b2bc868f495745c8be91616781bf460d9/tokenizers-0.23.0rc0-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f82b7578eaad0cbb72765d1fbaa7e7bc04c531337513a21f437b73e4617fcf46", size = 9810112, upload-time = "2026-04-24T05:37:33.679Z" },
+    { url = "https://files.pythonhosted.org/packages/45/8a/70c9919aefc7f514d6e98fb9be379b2850ca071a841d88900278781a07b0/tokenizers-0.23.0rc0-cp310-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:e61dff90a4ad8dc7e7e124d67756d63cf3ae57e32f04fb35bb408af91f47ea70", size = 9631038, upload-time = "2026-04-24T05:37:36.207Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/f6/c15a5514f50bf953b70d3d2b7fd1829aa327ba8c9c519c54623510d6f459/tokenizers-0.23.0rc0-cp310-abi3-musllinux_1_2_i686.whl", hash = "sha256:5835b35d9a4815c8a4097d4dbac79c39b780684ea417fa4a93b9165e12ff1383", size = 9959195, upload-time = "2026-04-24T05:37:38.194Z" },
+    { url = "https://files.pythonhosted.org/packages/11/95/d1a6a0e6d6a9bc81b8124d83beb1fb1230310ee93938095f984a12fa336d/tokenizers-0.23.0rc0-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:33ed7df57a040ffb6f0244639619632a06f4c287ed1e77b5e70febb58f9e9a8b", size = 10106242, upload-time = "2026-04-24T05:37:40.745Z" },
+    { url = "https://files.pythonhosted.org/packages/78/c4/d9d587b9b32c9fca5ea901225d5c4c616802eb0082b17481d23808941641/tokenizers-0.23.0rc0-cp310-abi3-win32.whl", hash = "sha256:ab264a8ffdea05b5fd71a8bca6572762bde9b7aaadeba16dd25c7352a625fa71", size = 2523576, upload-time = "2026-04-24T05:37:47.173Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/9b/34b36f6a47fec0a160887da23f173aa8a1729fa425ee67944c9be27f58de/tokenizers-0.23.0rc0-cp310-abi3-win_amd64.whl", hash = "sha256:27fe690eeb35a3a7e52f47d96c2ce8ffc6f939cc51a4591be86d2c86b9881267", size = 2788929, upload-time = "2026-04-24T05:37:45.81Z" },
+    { url = "https://files.pythonhosted.org/packages/35/ec/920d2b36ddddb5ce819a005d9650dc941935e534a27c48758c93388aaa5b/tokenizers-0.23.0rc0-cp310-abi3-win_arm64.whl", hash = "sha256:0b66c5eab2ddd26e59cfe6aa1945aa8b656ea0a9a715e24171c01b5ab1987630", size = 2655724, upload-time = "2026-04-24T05:37:44.108Z" },
 ]
 
 [[package]]
@@ -3003,90 +2883,105 @@ wheels = [
 
 [[package]]
 name = "tomlkit"
-version = "0.14.0"
+version = "0.15.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c3/af/14b24e41977adb296d6bd1fb59402cf7d60ce364f90c890bd2ec65c43b5a/tomlkit-0.14.0.tar.gz", hash = "sha256:cf00efca415dbd57575befb1f6634c4f42d2d87dbba376128adb42c121b87064", size = 187167, upload-time = "2026-01-13T01:14:53.304Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/51/db/03eaf4331631ef6b27d6e3c9b68c54dc6f0d63d87201fed600cc409307fd/tomlkit-0.15.0.tar.gz", hash = "sha256:7d1a9ecba3086638211b13814ea79c90dd54dd11993564376f3aa92271f5c7a3", size = 161875, upload-time = "2026-05-10T07:38:22.245Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b5/11/87d6d29fb5d237229d67973a6c9e06e048f01cf4994dee194ab0ea841814/tomlkit-0.14.0-py3-none-any.whl", hash = "sha256:592064ed85b40fa213469f81ac584f67a4f2992509a7c3ea2d632208623a3680", size = 39310, upload-time = "2026-01-13T01:14:51.965Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/43/8bd850ee71a191bf072e31302c73a66be413fecdd98fdcd111ecbcce13ca/tomlkit-0.15.0-py3-none-any.whl", hash = "sha256:4dbc8f0fc024412b57ced8757ac7461305126a648ff8c2c807fcb8e133a78738", size = 41328, upload-time = "2026-05-10T07:38:23.517Z" },
 ]
 
 [[package]]
 name = "torch"
-version = "2.11.0"
+version = "2.10.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cuda-bindings", marker = "sys_platform == 'linux'" },
-    { name = "cuda-toolkit", extra = ["cublas", "cudart", "cufft", "cufile", "cupti", "curand", "cusolver", "cusparse", "nvjitlink", "nvrtc", "nvtx"], marker = "sys_platform == 'linux'" },
-    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "fsspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "networkx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "nvidia-cudnn-cu13", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cusparselt-cu13", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-nccl-cu13", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-nvshmem-cu13", marker = "sys_platform == 'linux'" },
-    { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "sympy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "triton", marker = "sys_platform == 'linux'" },
-    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "filelock", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "fsspec", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "jinja2", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "networkx", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "setuptools", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "sympy", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/87/89/5ea6722763acee56b045435fb84258db7375c48165ec8be7880ab2b281c5/torch-2.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1e6debd97ccd3205bbb37eb806a9d8219e1139d15419982c09e23ef7d4369d18", size = 80606801, upload-time = "2026-03-23T18:10:18.649Z" },
-    { url = "https://files.pythonhosted.org/packages/32/d1/8ed2173589cbfe744ed54e5a73efc107c0085ba5777ee93a5f4c1ab90553/torch-2.11.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:63a68fa59de8f87acc7e85a5478bb2dddbb3392b7593ec3e78827c793c4b73fd", size = 419732382, upload-time = "2026-03-23T18:08:30.835Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/e1/b73f7c575a4b8f87a5928f50a1e35416b5e27295d8be9397d5293e7e8d4c/torch-2.11.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:cc89b9b173d9adfab59fd227f0ab5e5516d9a52b658ae41d64e59d2e55a418db", size = 530711509, upload-time = "2026-03-23T18:08:47.213Z" },
-    { url = "https://files.pythonhosted.org/packages/db/38/8ac78069621b8c2b4979c2f96dc8409ef5e9c4189f6aac629189a78677ca/torch-2.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8b394322f49af4362d4f80e424bcaca7efcd049619af03a4cf4501520bdf0fb4", size = 80959574, upload-time = "2026-03-23T18:10:14.214Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/6c/56bfb37073e7136e6dd86bfc6af7339946dd684e0ecf2155ac0eee687ae1/torch-2.11.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2658f34ce7e2dabf4ec73b45e2ca68aedad7a5be87ea756ad656eaf32bf1e1ea", size = 419732324, upload-time = "2026-03-23T18:09:36.604Z" },
-    { url = "https://files.pythonhosted.org/packages/07/f4/1b666b6d61d3394cca306ea543ed03a64aad0a201b6cd159f1d41010aeb1/torch-2.11.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:98bb213c3084cfe176302949bdc360074b18a9da7ab59ef2edc9d9f742504778", size = 530596026, upload-time = "2026-03-23T18:09:20.842Z" },
-    { url = "https://files.pythonhosted.org/packages/26/0d/8603382f61abd0db35841148ddc1ffd607bf3100b11c6e1dab6d2fc44e72/torch-2.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:01018087326984a33b64e04c8cb5c2795f9120e0d775ada1f6638840227b04d7", size = 80573442, upload-time = "2026-03-23T18:09:10.117Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/86/7cd7c66cb9cec6be330fff36db5bd0eef386d80c031b581ec81be1d4b26c/torch-2.11.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:2bb3cc54bd0dea126b0060bb1ec9de0f9c7f7342d93d436646516b0330cd5be7", size = 419749385, upload-time = "2026-03-23T18:07:33.77Z" },
-    { url = "https://files.pythonhosted.org/packages/47/e8/b98ca2d39b2e0e4730c0ee52537e488e7008025bc77ca89552ff91021f7c/torch-2.11.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4dc8b3809469b6c30b411bb8c4cad3828efd26236153d9beb6a3ec500f211a60", size = 530716756, upload-time = "2026-03-23T18:07:50.02Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/46/4419098ed6d801750f26567b478fc185c3432e11e2cad712bc6b4c2ab0d0/torch-2.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8245477871c3700d4370352ffec94b103cfcb737229445cf9946cddb7b2ca7cd", size = 80959460, upload-time = "2026-03-23T18:09:00.818Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/66/54a56a4a6ceaffb567231994a9745821d3af922a854ed33b0b3a278e0a99/torch-2.11.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:ab9a8482f475f9ba20e12db84b0e55e2f58784bdca43a854a6ccd3fd4b9f75e6", size = 419735835, upload-time = "2026-03-23T18:07:18.974Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/e7/0b6665f533aa9e337662dc190425abc0af1fe3234088f4454c52393ded61/torch-2.11.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:563ed3d25542d7e7bbc5b235ccfacfeb97fb470c7fee257eae599adb8005c8a2", size = 530613405, upload-time = "2026-03-23T18:08:07.014Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/23/2c9fe0c9c27f7f6cb865abcea8a4568f29f00acaeadfc6a37f6801f84cb4/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e521c9f030a3774ed770a9c011751fb47c4d12029a3d6522116e48431f2ff89e", size = 79498254, upload-time = "2026-02-10T21:44:44.095Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/c6/4dfe238342ffdcec5aef1c96c457548762d33c40b45a1ab7033bb26d2ff2/torch-2.10.0-3-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:80b1b5bfe38eb0e9f5ff09f206dcac0a87aadd084230d4a36eea5ec5232c115b", size = 915627275, upload-time = "2026-03-11T14:16:11.325Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/f0/72bf18847f58f877a6a8acf60614b14935e2f156d942483af1ffc081aea0/torch-2.10.0-3-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:46b3574d93a2a8134b3f5475cfb98e2eb46771794c57015f6ad1fb795ec25e49", size = 915523474, upload-time = "2026-03-11T14:17:44.422Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/6f/f2e91e34e3fcba2e3fc8d8f74e7d6c22e74e480bbd1db7bc8900fdf3e95c/torch-2.10.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5c4d217b14741e40776dd7074d9006fd28b8a97ef5654db959d8635b2fe5f29b", size = 146004247, upload-time = "2026-01-21T16:24:29.335Z" },
+    { url = "https://files.pythonhosted.org/packages/98/fb/5160261aeb5e1ee12ee95fe599d0541f7c976c3701d607d8fc29e623229f/torch-2.10.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6b71486353fce0f9714ca0c9ef1c850a2ae766b409808acd58e9678a3edb7738", size = 915716445, upload-time = "2026-01-21T16:22:45.353Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/16/502fb1b41e6d868e8deb5b0e3ae926bbb36dab8ceb0d1b769b266ad7b0c3/torch-2.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:c2ee399c644dc92ef7bc0d4f7e74b5360c37cdbe7c5ba11318dda49ffac2bc57", size = 113757050, upload-time = "2026-01-21T16:24:19.204Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/0b/39929b148f4824bc3ad6f9f72a29d4ad865bcf7ebfc2fa67584773e083d2/torch-2.10.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:3202429f58309b9fa96a614885eace4b7995729f44beb54d3e4a47773649d382", size = 79851305, upload-time = "2026-01-21T16:24:09.209Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/14/21fbce63bc452381ba5f74a2c0a959fdf5ad5803ccc0c654e752e0dbe91a/torch-2.10.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:aae1b29cd68e50a9397f5ee897b9c24742e9e306f88a807a27d617f07adb3bd8", size = 146005472, upload-time = "2026-01-21T16:22:29.022Z" },
+    { url = "https://files.pythonhosted.org/packages/54/fd/b207d1c525cb570ef47f3e9f836b154685011fce11a2f444ba8a4084d042/torch-2.10.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6021db85958db2f07ec94e1bc77212721ba4920c12a18dc552d2ae36a3eb163f", size = 915612644, upload-time = "2026-01-21T16:21:47.019Z" },
+    { url = "https://files.pythonhosted.org/packages/36/53/0197f868c75f1050b199fe58f9bf3bf3aecac9b4e85cc9c964383d745403/torch-2.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff43db38af76fda183156153983c9a096fc4c78d0cd1e07b14a2314c7f01c2c8", size = 113997015, upload-time = "2026-01-21T16:23:00.767Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/13/e76b4d9c160e89fff48bf16b449ea324bda84745d2ab30294c37c2434c0d/torch-2.10.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:cdf2a523d699b70d613243211ecaac14fe9c5df8a0b0a9c02add60fb2a413e0f", size = 79498248, upload-time = "2026-01-21T16:23:09.315Z" },
+]
+
+[[package]]
+name = "torchaudio"
+version = "2.10.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "torch", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b6/02/341e7bd588355f82c5180103cb2f8070a72ab1be920ab27553a1135d4aa6/torchaudio-2.10.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:8fd38d28ee150c584d3ee3b05f39e021f0ad8a8ec8fec1f26dfe150c9db9b2f5", size = 737164, upload-time = "2026-01-21T16:28:38.354Z" },
+    { url = "https://files.pythonhosted.org/packages/49/fd/831c2595c81b17141180ca11ab3c0836cc544ef13e15aa0e7b2cb619e582/torchaudio-2.10.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5bc39ff3ea341097ce1ab023dd88c9dd8ca5f96ebf48821e7d23766137bb55d7", size = 392757, upload-time = "2026-01-21T16:28:33.631Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/d8/405c80c57dc68ca5855bddfaae57c3d84ea7397bf1eb2aa5d59c9fa1d3a9/torchaudio-2.10.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:3057c4286db5673d266124a2a10ca54e19f516772e9057f44573a7da5b85e328", size = 1897099, upload-time = "2026-01-21T16:28:24.793Z" },
+    { url = "https://files.pythonhosted.org/packages/73/cf/0e48d67788c935e3b3d00e6f55a930a54a67f432e04c33ef80a38cb764fd/torchaudio-2.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:99e74d1901742bc10961d807fe75c0dd9496f4a4a4ff4bb317c5de4a0b6f24e6", size = 475476, upload-time = "2026-01-21T16:28:28.249Z" },
+    { url = "https://files.pythonhosted.org/packages/48/29/30bcce0f17a8279b051b09250993691a828f89a03278306b23571c18df04/torchaudio-2.10.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6cfe98ef0ea9bee6d6297493ce67ce0c54a38d80caf6535a3ae48900fd5f3769", size = 742449, upload-time = "2026-01-21T16:28:29.556Z" },
+    { url = "https://files.pythonhosted.org/packages/43/8c/653e7f67855424bf3b7cbb48335f8316f7fb02bb01a6cab38f6bf9555676/torchaudio-2.10.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:b41b254d958632dc00dc7768431cadda516c91641d798775cbb19bcd4f0d2be4", size = 393430, upload-time = "2026-01-21T16:28:34.855Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/1f/f91fcb9dd47a19b720fb48042a2f6f023651948e73726e98fff60d5ed5c7/torchaudio-2.10.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:da1081d1018a1e95f5a13947402aeb037cf5ac8861219a6164df004898a96bb1", size = 1897271, upload-time = "2026-01-21T16:28:23.519Z" },
+    { url = "https://files.pythonhosted.org/packages/57/27/270c26890f43838e8faa5d3e52f079bd9d9d09f9a535a11cf6b94e20ed21/torchaudio-2.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f1afa53146a5655258d3a86e689c6879dfe78581d9bee9ef611ace98722f86bb", size = 478966, upload-time = "2026-01-21T16:28:32.491Z" },
+]
+
+[[package]]
+name = "torchvision"
+version = "0.25.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pillow", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "torch", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f5/5b/1562a04a6a5a4cf8cf40016a0cdeda91ede75d6962cff7f809a85ae966a5/torchvision-0.25.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:24e11199e4d84ba9c5ee7825ebdf1cd37ce8deec225117f10243cae984ced3ec", size = 1874918, upload-time = "2026-01-21T16:27:39.02Z" },
+    { url = "https://files.pythonhosted.org/packages/36/b1/3d6c42f62c272ce34fcce609bb8939bdf873dab5f1b798fd4e880255f129/torchvision-0.25.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5f271136d2d2c0b7a24c5671795c6e4fd8da4e0ea98aeb1041f62bc04c4370ef", size = 2309106, upload-time = "2026-01-21T16:27:30.624Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/60/59bb9c8b67cce356daeed4cb96a717caa4f69c9822f72e223a0eae7a9bd9/torchvision-0.25.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:855c0dc6d37f462482da7531c6788518baedca1e0847f3df42a911713acdfe52", size = 8071522, upload-time = "2026-01-21T16:27:29.392Z" },
+    { url = "https://files.pythonhosted.org/packages/32/a5/9a9b1de0720f884ea50dbf9acb22cbe5312e51d7b8c4ac6ba9b51efd9bba/torchvision-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:cef0196be31be421f6f462d1e9da1101be7332d91984caa6f8022e6c78a5877f", size = 4321911, upload-time = "2026-01-21T16:27:35.195Z" },
+    { url = "https://files.pythonhosted.org/packages/52/99/dca81ed21ebaeff2b67cc9f815a20fdaa418b69f5f9ea4c6ed71721470db/torchvision-0.25.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a8f8061284395ce31bcd460f2169013382ccf411148ceb2ee38e718e9860f5a7", size = 1896209, upload-time = "2026-01-21T16:27:32.159Z" },
+    { url = "https://files.pythonhosted.org/packages/28/cc/2103149761fdb4eaed58a53e8437b2d716d48f05174fab1d9fcf1e2a2244/torchvision-0.25.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:146d02c9876858420adf41f3189fe90e3d6a409cbfa65454c09f25fb33bf7266", size = 2310735, upload-time = "2026-01-21T16:27:22.327Z" },
+    { url = "https://files.pythonhosted.org/packages/76/ad/f4c985ad52ddd3b22711c588501be1b330adaeaf6850317f66751711b78c/torchvision-0.25.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c4d395cb2c4a2712f6eb93a34476cdf7aae74bb6ea2ea1917f858e96344b00aa", size = 8089557, upload-time = "2026-01-21T16:27:27.666Z" },
+    { url = "https://files.pythonhosted.org/packages/63/cc/0ea68b5802e5e3c31f44b307e74947bad5a38cc655231d845534ed50ddb8/torchvision-0.25.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5e6b449e9fa7d642142c0e27c41e5a43b508d57ed8e79b7c0a0c28652da8678c", size = 4344260, upload-time = "2026-01-21T16:27:17.018Z" },
 ]
 
 [[package]]
 name = "tqdm"
-version = "4.67.1"
+version = "4.67.3"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" },
+    { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" },
 ]
 
 [[package]]
 name = "transformers"
-version = "5.3.0"
+version = "5.8.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "safetensors", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tokenizers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "typer", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "safetensors", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tokenizers", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "typer", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/fc/1a/70e830d53ecc96ce69cfa8de38f163712d2b43ac52fbd743f39f56025c31/transformers-5.3.0.tar.gz", hash = "sha256:009555b364029da9e2946d41f1c5de9f15e6b1df46b189b7293f33a161b9c557", size = 8830831, upload-time = "2026-03-04T17:41:46.119Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/e6/4134ea2fbea322cddc7ffc94a0d8ee47fe32ce8e876b320cd37d88edfc4d/transformers-5.8.1.tar.gz", hash = "sha256:4dd5b6de4105725104d84fd6abd74b305f4debfc251b38c648ee5dd087cf543b", size = 8532019, upload-time = "2026-05-13T03:21:57.234Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b8/88/ae8320064e32679a5429a2c9ebbc05c2bf32cefb6e076f9b07f6d685a9b4/transformers-5.3.0-py3-none-any.whl", hash = "sha256:50ac8c89c3c7033444fb3f9f53138096b997ebb70d4b5e50a2e810bf12d3d29a", size = 10661827, upload-time = "2026-03-04T17:41:42.722Z" },
-]
-
-[[package]]
-name = "triton"
-version = "3.6.0"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/3c/12/34d71b350e89a204c2c7777a9bba0dcf2f19a5bfdd70b57c4dbc5ffd7154/triton-3.6.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448e02fe6dc898e9e5aa89cf0ee5c371e99df5aa5e8ad976a80b93334f3494fd", size = 176133521, upload-time = "2026-01-20T16:16:13.321Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/0b/37d991d8c130ce81a8728ae3c25b6e60935838e9be1b58791f5997b24a54/triton-3.6.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10c7f76c6e72d2ef08df639e3d0d30729112f47a56b0c81672edc05ee5116ac9", size = 188289450, upload-time = "2026-01-20T16:00:49.136Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/4e/41b0c8033b503fd3cfcd12392cdd256945026a91ff02452bef40ec34bee7/triton-3.6.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1722e172d34e32abc3eb7711d0025bb69d7959ebea84e3b7f7a341cd7ed694d6", size = 176276087, upload-time = "2026-01-20T16:16:18.989Z" },
-    { url = "https://files.pythonhosted.org/packages/35/f8/9c66bfc55361ec6d0e4040a0337fb5924ceb23de4648b8a81ae9d33b2b38/triton-3.6.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d002e07d7180fd65e622134fbd980c9a3d4211fb85224b56a0a0efbd422ab72f", size = 188400296, upload-time = "2026-01-20T16:00:56.042Z" },
-    { url = "https://files.pythonhosted.org/packages/49/55/5ecf0dcaa0f2fbbd4420f7ef227ee3cb172e91e5fede9d0ecaddc43363b4/triton-3.6.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5523241e7d1abca00f1d240949eebdd7c673b005edbbce0aca95b8191f1d43", size = 176138577, upload-time = "2026-01-20T16:16:25.426Z" },
-    { url = "https://files.pythonhosted.org/packages/df/3d/9e7eee57b37c80cec63322c0231bb6da3cfe535a91d7a4d64896fcb89357/triton-3.6.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a17a5d5985f0ac494ed8a8e54568f092f7057ef60e1b0fa09d3fd1512064e803", size = 188273063, upload-time = "2026-01-20T16:01:07.278Z" },
-    { url = "https://files.pythonhosted.org/packages/48/db/56ee649cab5eaff4757541325aca81f52d02d4a7cd3506776cad2451e060/triton-3.6.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b3a97e8ed304dfa9bd23bb41ca04cdf6b2e617d5e782a8653d616037a5d537d", size = 176274804, upload-time = "2026-01-20T16:16:31.528Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994, upload-time = "2026-01-20T16:01:14.236Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/b1/8be7e7ef0b5200491312201918b6125ef9c9df9dd0f0240ccef9ac824e6b/transformers-5.8.1-py3-none-any.whl", hash = "sha256:5340fb95962162cdfdae5cc91d7f8fedd92ed75216c1154c5e1f590fcf56dd0e", size = 10632882, upload-time = "2026-05-13T03:21:52.876Z" },
 ]
 
 [[package]]
@@ -3094,15 +2989,15 @@ name = "twine"
 version = "6.2.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "id", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "keyring", marker = "(platform_machine != 'ppc64le' and platform_machine != 's390x' and sys_platform == 'darwin') or (platform_machine != 'ppc64le' and platform_machine != 's390x' and sys_platform == 'linux')" },
-    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "readme-renderer", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "requests-toolbelt", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "rfc3986", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "urllib3", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "id", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "keyring", marker = "(platform_machine != 'ppc64le' and platform_machine != 's390x' and sys_platform == 'darwin') or (platform_machine != 'ppc64le' and platform_machine != 's390x' and sys_platform == 'linux') or (platform_machine == 'ppc64le' and sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine == 'ppc64le' and sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'ppc64le' and sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 'ppc64le' and sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'ppc64le' and sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 'ppc64le' and sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 's390x' and sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine == 's390x' and sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 's390x' and sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 's390x' and sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 's390x' and sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 's390x' and sys_platform == 'darwin' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 'ppc64le' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine == 'ppc64le' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'ppc64le' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 'ppc64le' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 'ppc64le' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 'ppc64le' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (platform_machine == 's390x' and sys_platform == 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "readme-renderer", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "requests-toolbelt", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "rfc3986", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "urllib3", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e0/a8/949edebe3a82774c1ec34f637f5dd82d1cf22c25e963b7d63771083bbee5/twine-6.2.0.tar.gz", hash = "sha256:e5ed0d2fd70c9959770dce51c8f39c8945c574e18173a7b81802dab51b4b75cf", size = 172262, upload-time = "2025-09-04T15:43:17.255Z" }
 wheels = [
@@ -3111,45 +3006,45 @@ wheels = [
 
 [[package]]
 name = "typepy"
-version = "1.3.4"
+version = "1.3.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "mbstrdecoder", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mbstrdecoder", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/79/59/4c39942077d7de285f762a91024dbda731be693591732977358f77d120fb/typepy-1.3.4.tar.gz", hash = "sha256:89c1f66de6c6133209c43a94d23431d320ba03ef5db18f241091ea594035d9de", size = 39558, upload-time = "2024-12-29T09:18:15.774Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/8b/9f/ae119b0e0fd0fe8dcb0e1eeebfeb62f37fdc0b467267cff15cdb746ba38b/typepy-1.3.5.tar.gz", hash = "sha256:a1c5f54c41860f89bab175f512b11e8c9a57cfe7b8b3d5ae5d52d828b756b6dd", size = 39883, upload-time = "2026-05-04T14:04:32.835Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ee/31/e393c3830bdedd01735bd195c85ac3034b6bcaf6c18142bab60a4047ca36/typepy-1.3.4-py3-none-any.whl", hash = "sha256:d5ed3e0c7f49521bff0603dd08cf8d453371cf68d65a29d3d0038552ccc46e2e", size = 31449, upload-time = "2024-12-29T09:18:13.135Z" },
+    { url = "https://files.pythonhosted.org/packages/64/71/75cf08c49b64a9419f1f2cef9be072ac34f6b784da2851489470b7c7ba15/typepy-1.3.5-py3-none-any.whl", hash = "sha256:de361b59609c7503efc2edbe9d7a4e053ae71307bf90ae1678ec4d6bcd807922", size = 31530, upload-time = "2026-05-04T14:04:31.46Z" },
 ]
 
 [package.optional-dependencies]
 datetime = [
-    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pytz", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "pytz", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 
 [[package]]
 name = "typer"
-version = "0.24.1"
+version = "0.25.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "annotated-doc", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "shellingham", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "annotated-doc", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "shellingham", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e4/51/9aed62104cea109b820bbd6c14245af756112017d309da813ef107d42e7e/typer-0.25.1.tar.gz", hash = "sha256:9616eb8853a09ffeabab1698952f33c6f29ffdbceb4eaeecf571880e8d7664cc", size = 122276, upload-time = "2026-04-30T19:32:16.964Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/f9/2b3ff4e56e5fa7debfaf9eb135d0da96f3e9a1d5b27222223c7296336e5f/typer-0.25.1-py3-none-any.whl", hash = "sha256:75caa44ed46a03fb2dab8808753ffacdbfea88495e74c85a28c5eefcf5f39c89", size = 58409, upload-time = "2026-04-30T19:32:18.271Z" },
 ]
 
 [[package]]
 name = "types-aiofiles"
-version = "25.1.0.20251011"
+version = "25.1.0.20260518"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/84/6c/6d23908a8217e36704aa9c79d99a620f2fdd388b66a4b7f72fbc6b6ff6c6/types_aiofiles-25.1.0.20251011.tar.gz", hash = "sha256:1c2b8ab260cb3cd40c15f9d10efdc05a6e1e6b02899304d80dfa0410e028d3ff", size = 14535, upload-time = "2025-10-11T02:44:51.237Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/df/42/f5b9b90162d2196f016b87228d6bf43f2c2c0c6501bfd5415001b3eb68bb/types_aiofiles-25.1.0.20260518.tar.gz", hash = "sha256:c0c95eb78755d4fa7b397d4f0332c632714dd7cd0d17f49b96e31d4d7a8d8c76", size = 14891, upload-time = "2026-05-18T06:05:27.804Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/71/0f/76917bab27e270bb6c32addd5968d69e558e5b6f7fb4ac4cbfa282996a96/types_aiofiles-25.1.0.20251011-py3-none-any.whl", hash = "sha256:8ff8de7f9d42739d8f0dadcceeb781ce27cd8d8c4152d4a7c52f6b20edb8149c", size = 14338, upload-time = "2025-10-11T02:44:50.054Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/3d/7a9ed9faafeae3aa3b5bc22fa5b979ff9cf3c83ecbe919b58eae07795b8c/types_aiofiles-25.1.0.20260518-py3-none-any.whl", hash = "sha256:f776bdfb4bec17f743d9ef042e61edf03bdcc7821fc08556fba9b63d873fdea9", size = 14377, upload-time = "2026-05-18T06:05:26.871Z" },
 ]
 
 [[package]]
@@ -3166,7 +3061,7 @@ name = "typing-inspection"
 version = "0.4.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" }
 wheels = [
@@ -3175,24 +3070,24 @@ wheels = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.3"
+version = "2.7.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/53/0c/06f8b233b8fd13b9e5ee11424ef85419ba0d8ba0b3138bf360be2ff56953/urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c", size = 433602, upload-time = "2026-05-07T16:13:18.596Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/3e/5db95bcf282c52709639744ca2a8b149baccf648e39c8cc87553df9eae0c/urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897", size = 131087, upload-time = "2026-05-07T16:13:17.151Z" },
 ]
 
 [[package]]
 name = "uvicorn"
-version = "0.42.0"
+version = "0.47.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "h11", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "h11", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/e3/ad/4a96c425be6fb67e0621e62d86c402b4a17ab2be7f7c055d9bd2f638b9e2/uvicorn-0.42.0.tar.gz", hash = "sha256:9b1f190ce15a2dd22e7758651d9b6d12df09a13d51ba5bf4fc33c383a48e1775", size = 85393, upload-time = "2026-03-16T06:19:50.077Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f6/b1/8e7077a8641086aea449e1b5752a570f1b5906c64e0a33cd6d93b63a066b/uvicorn-0.47.0.tar.gz", hash = "sha256:7c9a0ea1a9414106bbab7324609c162d8fa0cdcdcb703060987269d77c7bb533", size = 90582, upload-time = "2026-05-14T18:16:54.455Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0a/89/f8827ccff89c1586027a105e5630ff6139a64da2515e24dafe860bd9ae4d/uvicorn-0.42.0-py3-none-any.whl", hash = "sha256:96c30f5c7abe6f74ae8900a70e92b85ad6613b745d4879eb9b16ccad15645359", size = 68830, upload-time = "2026-03-16T06:19:48.325Z" },
+    { url = "https://files.pythonhosted.org/packages/15/41/ac2dfdbc1f60c7af4f994c7a335cfa7040c01642b605d65f611cecc2a1e4/uvicorn-0.47.0-py3-none-any.whl", hash = "sha256:2c5715bc12d1892d84752049f400cd1c3cb018514967fdfeb97640443a6a9432", size = 71301, upload-time = "2026-05-14T18:16:51.762Z" },
 ]
 
 [[package]]
@@ -3206,7 +3101,7 @@ name = "wsproto"
 version = "1.3.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "h11", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "h11", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c7/79/12135bdf8b9c9367b8701c2c19a14c913c120b882d50b014ca0d38083c2c/wsproto-1.3.2.tar.gz", hash = "sha256:b86885dcf294e15204919950f666e06ffc6c7c114ca900b060d6e16293528294", size = 50116, upload-time = "2025-11-20T18:18:01.871Z" }
 wheels = [
@@ -3215,124 +3110,103 @@ wheels = [
 
 [[package]]
 name = "xxhash"
-version = "3.6.0"
+version = "3.7.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/02/84/30869e01909fb37a6cc7e18688ee8bf1e42d57e7e0777636bd47524c43c7/xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6", size = 85160, upload-time = "2025-10-02T14:37:08.097Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/24/2f/e183a1b407002f5af81822bee18b61cdb94b8670208ef34734d8d2b8ebe9/xxhash-3.7.0.tar.gz", hash = "sha256:6cc4eefbb542a5d6ffd6d70ea9c502957c925e800f998c5630ecc809d6702bae", size = 82022, upload-time = "2026-04-25T11:10:32.553Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/33/76/35d05267ac82f53ae9b0e554da7c5e281ee61f3cad44c743f0fcd354f211/xxhash-3.6.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:599e64ba7f67472481ceb6ee80fa3bd828fd61ba59fb11475572cc5ee52b89ec", size = 32738, upload-time = "2025-10-02T14:34:55.839Z" },
-    { url = "https://files.pythonhosted.org/packages/31/a8/3fbce1cd96534a95e35d5120637bf29b0d7f5d8fa2f6374e31b4156dd419/xxhash-3.6.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d8b8aaa30fca4f16f0c84a5c8d7ddee0e25250ec2796c973775373257dde8f1", size = 30821, upload-time = "2025-10-02T14:34:57.219Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/ea/d387530ca7ecfa183cb358027f1833297c6ac6098223fd14f9782cd0015c/xxhash-3.6.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d597acf8506d6e7101a4a44a5e428977a51c0fadbbfd3c39650cca9253f6e5a6", size = 194127, upload-time = "2025-10-02T14:34:59.21Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/0c/71435dcb99874b09a43b8d7c54071e600a7481e42b3e3ce1eb5226a5711a/xxhash-3.6.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:858dc935963a33bc33490128edc1c12b0c14d9c7ebaa4e387a7869ecc4f3e263", size = 212975, upload-time = "2025-10-02T14:35:00.816Z" },
-    { url = "https://files.pythonhosted.org/packages/84/7a/c2b3d071e4bb4a90b7057228a99b10d51744878f4a8a6dd643c8bd897620/xxhash-3.6.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba284920194615cb8edf73bf52236ce2e1664ccd4a38fdb543506413529cc546", size = 212241, upload-time = "2025-10-02T14:35:02.207Z" },
-    { url = "https://files.pythonhosted.org/packages/81/5f/640b6eac0128e215f177df99eadcd0f1b7c42c274ab6a394a05059694c5a/xxhash-3.6.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b54219177f6c6674d5378bd862c6aedf64725f70dd29c472eaae154df1a2e89", size = 445471, upload-time = "2025-10-02T14:35:03.61Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/1e/3c3d3ef071b051cc3abbe3721ffb8365033a172613c04af2da89d5548a87/xxhash-3.6.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42c36dd7dbad2f5238950c377fcbf6811b1cdb1c444fab447960030cea60504d", size = 193936, upload-time = "2025-10-02T14:35:05.013Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/bd/4a5f68381939219abfe1c22a9e3a5854a4f6f6f3c4983a87d255f21f2e5d/xxhash-3.6.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f22927652cba98c44639ffdc7aaf35828dccf679b10b31c4ad72a5b530a18eb7", size = 210440, upload-time = "2025-10-02T14:35:06.239Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/37/b80fe3d5cfb9faff01a02121a0f4d565eb7237e9e5fc66e73017e74dcd36/xxhash-3.6.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b45fad44d9c5c119e9c6fbf2e1c656a46dc68e280275007bbfd3d572b21426db", size = 197990, upload-time = "2025-10-02T14:35:07.735Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/fd/2c0a00c97b9e18f72e1f240ad4e8f8a90fd9d408289ba9c7c495ed7dc05c/xxhash-3.6.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6f2580ffab1a8b68ef2b901cde7e55fa8da5e4be0977c68f78fc80f3c143de42", size = 210689, upload-time = "2025-10-02T14:35:09.438Z" },
-    { url = "https://files.pythonhosted.org/packages/93/86/5dd8076a926b9a95db3206aba20d89a7fc14dd5aac16e5c4de4b56033140/xxhash-3.6.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:40c391dd3cd041ebc3ffe6f2c862f402e306eb571422e0aa918d8070ba31da11", size = 414068, upload-time = "2025-10-02T14:35:11.162Z" },
-    { url = "https://files.pythonhosted.org/packages/af/3c/0bb129170ee8f3650f08e993baee550a09593462a5cddd8e44d0011102b1/xxhash-3.6.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f205badabde7aafd1a31e8ca2a3e5a763107a71c397c4481d6a804eb5063d8bd", size = 191495, upload-time = "2025-10-02T14:35:12.971Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/30/25e5321c8732759e930c555176d37e24ab84365482d257c3b16362235212/xxhash-3.6.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a42e633d75cdad6d625434e3468126c73f13f7584545a9cf34e883aa1710e702", size = 32956, upload-time = "2025-10-02T14:35:17.413Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/3c/0573299560d7d9f8ab1838f1efc021a280b5ae5ae2e849034ef3dee18810/xxhash-3.6.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:568a6d743219e717b07b4e03b0a828ce593833e498c3b64752e0f5df6bfe84db", size = 31072, upload-time = "2025-10-02T14:35:18.844Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/1c/52d83a06e417cd9d4137722693424885cc9878249beb3a7c829e74bf7ce9/xxhash-3.6.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bec91b562d8012dae276af8025a55811b875baace6af510412a5e58e3121bc54", size = 196409, upload-time = "2025-10-02T14:35:20.31Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/8e/c6d158d12a79bbd0b878f8355432075fc82759e356ab5a111463422a239b/xxhash-3.6.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78e7f2f4c521c30ad5e786fdd6bae89d47a32672a80195467b5de0480aa97b1f", size = 215736, upload-time = "2025-10-02T14:35:21.616Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/68/c4c80614716345d55071a396cf03d06e34b5f4917a467faf43083c995155/xxhash-3.6.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3ed0df1b11a79856df5ffcab572cbd6b9627034c1c748c5566fa79df9048a7c5", size = 214833, upload-time = "2025-10-02T14:35:23.32Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/e9/ae27c8ffec8b953efa84c7c4a6c6802c263d587b9fc0d6e7cea64e08c3af/xxhash-3.6.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e4edbfc7d420925b0dd5e792478ed393d6e75ff8fc219a6546fb446b6a417b1", size = 448348, upload-time = "2025-10-02T14:35:25.111Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/6b/33e21afb1b5b3f46b74b6bd1913639066af218d704cc0941404ca717fc57/xxhash-3.6.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fba27a198363a7ef87f8c0f6b171ec36b674fe9053742c58dd7e3201c1ab30ee", size = 196070, upload-time = "2025-10-02T14:35:26.586Z" },
-    { url = "https://files.pythonhosted.org/packages/96/b6/fcabd337bc5fa624e7203aa0fa7d0c49eed22f72e93229431752bddc83d9/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:794fe9145fe60191c6532fa95063765529770edcdd67b3d537793e8004cabbfd", size = 212907, upload-time = "2025-10-02T14:35:28.087Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/d3/9ee6160e644d660fcf176c5825e61411c7f62648728f69c79ba237250143/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:6105ef7e62b5ac73a837778efc331a591d8442f8ef5c7e102376506cb4ae2729", size = 200839, upload-time = "2025-10-02T14:35:29.857Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/98/e8de5baa5109394baf5118f5e72ab21a86387c4f89b0e77ef3e2f6b0327b/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f01375c0e55395b814a679b3eea205db7919ac2af213f4a6682e01220e5fe292", size = 213304, upload-time = "2025-10-02T14:35:31.222Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/1d/71056535dec5c3177eeb53e38e3d367dd1d16e024e63b1cee208d572a033/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d706dca2d24d834a4661619dcacf51a75c16d65985718d6a7d73c1eeeb903ddf", size = 416930, upload-time = "2025-10-02T14:35:32.517Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/6c/5cbde9de2cd967c322e651c65c543700b19e7ae3e0aae8ece3469bf9683d/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5f059d9faeacd49c0215d66f4056e1326c80503f51a1532ca336a385edadd033", size = 193787, upload-time = "2025-10-02T14:35:33.827Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/5e/0138bc4484ea9b897864d59fce9be9086030825bc778b76cb5a33a906d37/xxhash-3.6.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a40a3d35b204b7cc7643cbcf8c9976d818cb47befcfac8bbefec8038ac363f3e", size = 32754, upload-time = "2025-10-02T14:35:38.245Z" },
-    { url = "https://files.pythonhosted.org/packages/18/d7/5dac2eb2ec75fd771957a13e5dda560efb2176d5203f39502a5fc571f899/xxhash-3.6.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a54844be970d3fc22630b32d515e79a90d0a3ddb2644d8d7402e3c4c8da61405", size = 30846, upload-time = "2025-10-02T14:35:39.6Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/71/8bc5be2bb00deb5682e92e8da955ebe5fa982da13a69da5a40a4c8db12fb/xxhash-3.6.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:016e9190af8f0a4e3741343777710e3d5717427f175adfdc3e72508f59e2a7f3", size = 194343, upload-time = "2025-10-02T14:35:40.69Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/3b/52badfb2aecec2c377ddf1ae75f55db3ba2d321c5e164f14461c90837ef3/xxhash-3.6.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f6f72232f849eb9d0141e2ebe2677ece15adfd0fa599bc058aad83c714bb2c6", size = 213074, upload-time = "2025-10-02T14:35:42.29Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/2b/ae46b4e9b92e537fa30d03dbc19cdae57ed407e9c26d163895e968e3de85/xxhash-3.6.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:63275a8aba7865e44b1813d2177e0f5ea7eadad3dd063a21f7cf9afdc7054063", size = 212388, upload-time = "2025-10-02T14:35:43.929Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/80/49f88d3afc724b4ac7fbd664c8452d6db51b49915be48c6982659e0e7942/xxhash-3.6.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cd01fa2aa00d8b017c97eb46b9a794fbdca53fc14f845f5a328c71254b0abb7", size = 445614, upload-time = "2025-10-02T14:35:45.216Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/ba/603ce3961e339413543d8cd44f21f2c80e2a7c5cfe692a7b1f2cccf58f3c/xxhash-3.6.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0226aa89035b62b6a86d3c68df4d7c1f47a342b8683da2b60cedcddb46c4d95b", size = 194024, upload-time = "2025-10-02T14:35:46.959Z" },
-    { url = "https://files.pythonhosted.org/packages/78/d1/8e225ff7113bf81545cfdcd79eef124a7b7064a0bba53605ff39590b95c2/xxhash-3.6.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c6e193e9f56e4ca4923c61238cdaced324f0feac782544eb4c6d55ad5cc99ddd", size = 210541, upload-time = "2025-10-02T14:35:48.301Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/58/0f89d149f0bad89def1a8dd38feb50ccdeb643d9797ec84707091d4cb494/xxhash-3.6.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9176dcaddf4ca963d4deb93866d739a343c01c969231dbe21680e13a5d1a5bf0", size = 198305, upload-time = "2025-10-02T14:35:49.584Z" },
-    { url = "https://files.pythonhosted.org/packages/11/38/5eab81580703c4df93feb5f32ff8fa7fe1e2c51c1f183ee4e48d4bb9d3d7/xxhash-3.6.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c1ce4009c97a752e682b897aa99aef84191077a9433eb237774689f14f8ec152", size = 210848, upload-time = "2025-10-02T14:35:50.877Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/6b/953dc4b05c3ce678abca756416e4c130d2382f877a9c30a20d08ee6a77c0/xxhash-3.6.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:8cb2f4f679b01513b7adbb9b1b2f0f9cdc31b70007eaf9d59d0878809f385b11", size = 414142, upload-time = "2025-10-02T14:35:52.15Z" },
-    { url = "https://files.pythonhosted.org/packages/08/a9/238ec0d4e81a10eb5026d4a6972677cbc898ba6c8b9dbaec12ae001b1b35/xxhash-3.6.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:653a91d7c2ab54a92c19ccf43508b6a555440b9be1bc8be553376778be7f20b5", size = 191547, upload-time = "2025-10-02T14:35:53.547Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/db/0e99732ed7f64182aef4a6fb145e1a295558deec2a746265dcdec12d191e/xxhash-3.6.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c5294f596a9017ca5a3e3f8884c00b91ab2ad2933cf288f4923c3fd4346cf3d4", size = 32955, upload-time = "2025-10-02T14:35:58.267Z" },
-    { url = "https://files.pythonhosted.org/packages/55/f4/2a7c3c68e564a099becfa44bb3d398810cc0ff6749b0d3cb8ccb93f23c14/xxhash-3.6.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1cf9dcc4ab9cff01dfbba78544297a3a01dafd60f3bde4e2bfd016cf7e4ddc67", size = 31072, upload-time = "2025-10-02T14:35:59.382Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/d9/72a29cddc7250e8a5819dad5d466facb5dc4c802ce120645630149127e73/xxhash-3.6.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:01262da8798422d0685f7cef03b2bd3f4f46511b02830861df548d7def4402ad", size = 196579, upload-time = "2025-10-02T14:36:00.838Z" },
-    { url = "https://files.pythonhosted.org/packages/63/93/b21590e1e381040e2ca305a884d89e1c345b347404f7780f07f2cdd47ef4/xxhash-3.6.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51a73fb7cb3a3ead9f7a8b583ffd9b8038e277cdb8cb87cf890e88b3456afa0b", size = 215854, upload-time = "2025-10-02T14:36:02.207Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/b8/edab8a7d4fa14e924b29be877d54155dcbd8b80be85ea00d2be3413a9ed4/xxhash-3.6.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b9c6df83594f7df8f7f708ce5ebeacfc69f72c9fbaaababf6cf4758eaada0c9b", size = 214965, upload-time = "2025-10-02T14:36:03.507Z" },
-    { url = "https://files.pythonhosted.org/packages/27/67/dfa980ac7f0d509d54ea0d5a486d2bb4b80c3f1bb22b66e6a05d3efaf6c0/xxhash-3.6.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:627f0af069b0ea56f312fd5189001c24578868643203bca1abbc2c52d3a6f3ca", size = 448484, upload-time = "2025-10-02T14:36:04.828Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/63/8ffc2cc97e811c0ca5d00ab36604b3ea6f4254f20b7bc658ca825ce6c954/xxhash-3.6.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa912c62f842dfd013c5f21a642c9c10cd9f4c4e943e0af83618b4a404d9091a", size = 196162, upload-time = "2025-10-02T14:36:06.182Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/77/07f0e7a3edd11a6097e990f6e5b815b6592459cb16dae990d967693e6ea9/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b465afd7909db30168ab62afe40b2fcf79eedc0b89a6c0ab3123515dc0df8b99", size = 213007, upload-time = "2025-10-02T14:36:07.733Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/d8/bc5fa0d152837117eb0bef6f83f956c509332ce133c91c63ce07ee7c4873/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a881851cf38b0a70e7c4d3ce81fc7afd86fbc2a024f4cfb2a97cf49ce04b75d3", size = 200956, upload-time = "2025-10-02T14:36:09.106Z" },
-    { url = "https://files.pythonhosted.org/packages/26/a5/d749334130de9411783873e9b98ecc46688dad5db64ca6e04b02acc8b473/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9b3222c686a919a0f3253cfc12bb118b8b103506612253b5baeaac10d8027cf6", size = 213401, upload-time = "2025-10-02T14:36:10.585Z" },
-    { url = "https://files.pythonhosted.org/packages/89/72/abed959c956a4bfc72b58c0384bb7940663c678127538634d896b1195c10/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:c5aa639bc113e9286137cec8fadc20e9cd732b2cc385c0b7fa673b84fc1f2a93", size = 417083, upload-time = "2025-10-02T14:36:12.276Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/b3/62fd2b586283b7d7d665fb98e266decadf31f058f1cf6c478741f68af0cb/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5c1343d49ac102799905e115aee590183c3921d475356cb24b4de29a4bc56518", size = 193913, upload-time = "2025-10-02T14:36:14.025Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/ca/d5174b4c36d10f64d4ca7050563138c5a599efb01a765858ddefc9c1202a/xxhash-3.7.0-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:4b6d6b33f141158692bd4eafbb96edbc5aa0dabdb593a962db01a91983d4f8fa", size = 36813, upload-time = "2026-04-25T11:06:51.73Z" },
+    { url = "https://files.pythonhosted.org/packages/41/d0/abc6c9d347ba1f1e1e1d98125d0881a0452c7f9a76a9dd03a7b5d2197f23/xxhash-3.7.0-cp313-cp313-android_21_x86_64.whl", hash = "sha256:845d347df254d6c619f616afa921331bada8614b8d373d58725c663ba97c3605", size = 35121, upload-time = "2026-04-25T11:06:53.048Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/11/4cc834eb3d79f2f2b3a6ef7324195208bcdfbdcf7534d2b17267aa5f3a8f/xxhash-3.7.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:fddbbb69a6fff4f421e7a0d1fa28f894b20112e9e3fab306af451e2dfd0e459b", size = 29624, upload-time = "2026-04-25T11:06:54.311Z" },
+    { url = "https://files.pythonhosted.org/packages/23/83/e97d3e7b635fe73a1dfb1e91f805324dd6d930bb42041cbf18f183bc0b6d/xxhash-3.7.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:54876a4e45101cec2bf8f31a973cda073a23e2e108538dad224ba07f85f22487", size = 30638, upload-time = "2026-04-25T11:06:55.864Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/40/d84951d80c35db1f4c40a29a64a8520eea5d56e764c603906b4fe763580f/xxhash-3.7.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:0c72fe9c7e3d6dfd7f1e21e224a877917fa09c465694ba4e06464b9511b65544", size = 33323, upload-time = "2026-04-25T11:06:57.336Z" },
+    { url = "https://files.pythonhosted.org/packages/89/cc/c7dc6558d97e9ab023f663d69ab28b340ed9bf4d2d94f2c259cf896bb354/xxhash-3.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a6d73a830b17ef49bc04e00182bd839164c1b3c59c127cd7c54fcb10c7ed8ee8", size = 33362, upload-time = "2026-04-25T11:06:58.656Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/6e/46b84017b1301d54091430353d4ad5901654a3e0871649877a416f7f1644/xxhash-3.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:91c3b07cf3362086d8f126c6aecd8e5e9396ad8b2f2219ea7e49a8250c318acd", size = 30874, upload-time = "2026-04-25T11:06:59.834Z" },
+    { url = "https://files.pythonhosted.org/packages/df/5e/8f9158e3ab906ad3fec51e09b5ea0093e769f12207bfa42a368ca204e7ab/xxhash-3.7.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:50e879ebbac351c81565ca108db766d7832f5b8b6a5b14b8c0151f7190028e3d", size = 194185, upload-time = "2026-04-25T11:07:01.658Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/29/a804ded9f5d3d3758292678d23e7528b08fda7b7e750688d08b052322475/xxhash-3.7.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:921c14e93817842dd0dd9f372890a0f0c72e534650b6ab13c5be5cd0db11d47e", size = 213033, upload-time = "2026-04-25T11:07:03.606Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/91/1ce5a7d2fdc975267320e2c78fc1cecfe7ab735ccbcf6993ec5dd541cb2c/xxhash-3.7.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e64a7c9d7dfca3e0fafcbc5e455519090706a3e36e95d655cec3e04e79f95aaa", size = 236140, upload-time = "2026-04-25T11:07:05.396Z" },
+    { url = "https://files.pythonhosted.org/packages/34/04/fd595a4fd8617b05fa27bd9b684ecb4985bfed27917848eea85d54036d06/xxhash-3.7.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2220af08163baf5fa36c2b8af079dc2cbe6e66ae061385267f9472362dfd53c6", size = 212291, upload-time = "2026-04-25T11:07:06.966Z" },
+    { url = "https://files.pythonhosted.org/packages/03/fb/f1a379cbc372ae5b9f4ab36154c48a849ca6ebe3ac477067a57865bf3bc6/xxhash-3.7.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f14bb8b22a4a91325813e3d553b8963c10cf8c756cff65ee50c194431296c655", size = 445532, upload-time = "2026-04-25T11:07:08.525Z" },
+    { url = "https://files.pythonhosted.org/packages/65/59/172424b79f8cfd4b6d8a122b2193e6b8ad4b11f7159bb3b6f9b3191329bb/xxhash-3.7.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:496736f86a9bedaf64b0dc70e3539d0766df01c71ea22032698e88f3f04a1ce9", size = 193990, upload-time = "2026-04-25T11:07:10.315Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/19/aeac22161d953f139f07ba5586cb4a17c5b7b6dff985122803bb12933500/xxhash-3.7.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0ff71596bd79816975b3de7130ab1ff4541410285a3c084584eeb1c8239996fd", size = 284876, upload-time = "2026-04-25T11:07:12.15Z" },
+    { url = "https://files.pythonhosted.org/packages/77/d5/4fd0b59e7a02242953da05ff679fbb961b0a4368eac97a217e11dae110c1/xxhash-3.7.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1ad86695c19b1d46fe106925db3c7a37f16be37669dcf58dcc70a9dd6e324676", size = 210495, upload-time = "2026-04-25T11:07:13.952Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/fb/976a3165c728c7faf74aa1b5ab3cf6a85e6d731612894741840524c7d28c/xxhash-3.7.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:970f9f8c50961d639cbd0d988c96f80ddf66006de93641719282c4fe7a87c5e6", size = 241331, upload-time = "2026-04-25T11:07:15.557Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/2c/6763d5901d53ac9e6ba296e5717ae599025c9d268396e8faa8b4b0a8e0ac/xxhash-3.7.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5886ad85e9e347911783760a1d16cb6b393e8f9e3b52c982568226cb56927bdc", size = 198037, upload-time = "2026-04-25T11:07:17.563Z" },
+    { url = "https://files.pythonhosted.org/packages/61/2b/876e722d533833f5f9a83473e6ba993e48745701096944e77bbecf29b2c3/xxhash-3.7.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6e934bbae1e0ec74e27d5f0d7f37ef547ce5ff9f0a7e63fb39e559fc99526734", size = 210744, upload-time = "2026-04-25T11:07:19.055Z" },
+    { url = "https://files.pythonhosted.org/packages/21/e6/d7e7baef7ce24166b4668d3c48557bb35a23b92ecadcac7e7718d099ab69/xxhash-3.7.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:3b6b3d28228af044ebcded71c4a3dd86e1dbd7e2f4645bf40f7b5da65bb5fb5a", size = 275406, upload-time = "2026-04-25T11:07:20.908Z" },
+    { url = "https://files.pythonhosted.org/packages/92/fe/198b3763b2e01ca908f2154969a2352ec99bda892b574a11a9a151c5ede4/xxhash-3.7.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:6be4d70d9ab76c9f324ead9c01af6ff52c324745ea0c3731682a0cf99720f1fe", size = 414125, upload-time = "2026-04-25T11:07:23.037Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/6d/019a11affd5a5499137cacca53808659964785439855b5aa40dfd3412916/xxhash-3.7.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:151d7520838d4465461a0b7f4ae488b3b00de16183dd3214c1a6b14bf89d7fb6", size = 191555, upload-time = "2026-04-25T11:07:24.991Z" },
+    { url = "https://files.pythonhosted.org/packages/76/21/b96d58568df2d01533244c3e0e5cbdd0c8b2b25c4bec4d72f19259a292d7/xxhash-3.7.0-cp313-cp313-win32.whl", hash = "sha256:d798c1e291bffb8e37b5bbe0dda77fc767cd19e89cadaf66e6ed5d0ff88c9fe6", size = 30668, upload-time = "2026-04-25T11:07:26.665Z" },
+    { url = "https://files.pythonhosted.org/packages/99/57/d849a8d3afa1f8f4bc6a831cd89f49f9706fbbad94d2975d6140a171988c/xxhash-3.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:875811ba23c543b1a1c3143c926e43996eb27ebb8f52d3500744aa608c275aed", size = 31524, upload-time = "2026-04-25T11:07:27.92Z" },
+    { url = "https://files.pythonhosted.org/packages/81/52/bacc753e92dee78b058af8dcef0a50815f5f860986c664a92d75f965b6a5/xxhash-3.7.0-cp313-cp313-win_arm64.whl", hash = "sha256:54a675cb300dda83d71daae2a599389d22db8021a0f8db0dd659e14626eb3ecc", size = 27768, upload-time = "2026-04-25T11:07:29.113Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/47/ddbd683b7fc7e592c1a8d9d65f73ce9ab513f082b3967eee2baf549b8fc6/xxhash-3.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a3b19a42111c4057c1547a4a1396a53961dca576a0f6b82bfa88a2d1561764b2", size = 33576, upload-time = "2026-04-25T11:07:30.469Z" },
+    { url = "https://files.pythonhosted.org/packages/07/f2/36d3310161db7f72efb4562aadde0ed429f1d0531782dd6345b12d2da527/xxhash-3.7.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8f4608a06e4d61b7a3425665a46d00e0579122e1a2fae97a0c52953a3aad9aa3", size = 31123, upload-time = "2026-04-25T11:07:31.989Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/3f/75937a5c69556ed213021e43cbedd84c8e0279d0d74e7d41a255d84ba4b1/xxhash-3.7.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:ad37c7792479e49cf96c1ab25517d7003fe0d93687a772ba19a097d235bbe41e", size = 196491, upload-time = "2026-04-25T11:07:33.358Z" },
+    { url = "https://files.pythonhosted.org/packages/22/29/f10d7ff8c7a733d4403a43b9de18c8fabc005f98cec054644f04418659ee/xxhash-3.7.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc026e3b89d98e30a8288c95cb696e77d150b3f0fb7a51f73dcd49ee6b5577fa", size = 215793, upload-time = "2026-04-25T11:07:34.919Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/fd/778f60aa295f58907938f030a8b514611f391405614a525cccd2ffc00eb5/xxhash-3.7.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c9b31ab1f28b078a6a1ac1a54eb35e7d5390deddd56870d0be3a0a733d1c321c", size = 237993, upload-time = "2026-04-25T11:07:36.638Z" },
+    { url = "https://files.pythonhosted.org/packages/70/f5/736db5de387b4a540e37a05b84b40dc58a1ce974bfd2b4e5754ce29b68c3/xxhash-3.7.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3bb5fd680c038fd5229e44e9c493782f90df9bef632fd0499d442374688ff70b", size = 214887, upload-time = "2026-04-25T11:07:38.564Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/aa/09a095f22fdb9a27fbb716841fbff52119721f9ca4261952d07a912f7839/xxhash-3.7.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:030c0fd688fce3569fbb49a2feefd4110cbb0b650186fb4610759ecfac677548", size = 448407, upload-time = "2026-04-25T11:07:40.552Z" },
+    { url = "https://files.pythonhosted.org/packages/74/8a/b745efeeca9e34a91c26fdc97ad8514c43d5a81ac78565cba80a1353870a/xxhash-3.7.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5b1bde10324f4c31812ae0d0502e92d916ae8917cad7209353f122b8b8f610c3", size = 196119, upload-time = "2026-04-25T11:07:42.101Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/5c/0cfceb024af90c191f665c7933b1f318ee234f4797858383bebd1881d52f/xxhash-3.7.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:503722d52a615f2604f5e7611de7d43878df010dc0053094ef91cb9a9ac3d987", size = 286751, upload-time = "2026-04-25T11:07:43.568Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/0a/0793e405dc3cf8f4ebe2c1acec1e4e4608cd9e7e50ea691dabbc2a95ccbb/xxhash-3.7.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c72500a3b6d6c30ebfc135035bcace9eb5884f2dc220804efcaaba43e9f611dd", size = 212961, upload-time = "2026-04-25T11:07:45.388Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/7e/721118ffc63bfff94aa565bcf2555a820f9f4bdb0f001e0d609bdfad70de/xxhash-3.7.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:43475925a766d01ca8cd9a857fd87f3d50406983c8506a4c07c4df12adcc867f", size = 243703, upload-time = "2026-04-25T11:07:47.053Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/18/16f6267160488b8276fd3d449d425712512add292ba545c1b6946bfdb7dd/xxhash-3.7.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8d09dfd2ab135b985daf868b594315ebe11ad86cd9fea46e6c69f19b28f7d25a", size = 200894, upload-time = "2026-04-25T11:07:48.657Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/94/80ba841287fd97e3e9cac1d228788c8ef623746f570404961eec748ecb5c/xxhash-3.7.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:c50269d0055ac1faecfd559886d2cbe4b730de236585aba0e873f9d9dadbe585", size = 213357, upload-time = "2026-04-25T11:07:50.257Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/7e/106d4067130c59f1e18a55ffadcd876d8c68534883a1e02685b29d3d8153/xxhash-3.7.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:1910df4756a5ab58cfad8744fc2d0f23926e3efcc346ee76e87b974abab922f4", size = 277600, upload-time = "2026-04-25T11:07:51.745Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/86/a081dd30da71d720b2612a792bfd55e45fa9a07ac76a0507f60487473c25/xxhash-3.7.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d006faf3b491957efcb433489be3c149efe4787b7063d5cddb8ddaefdc60e0c1", size = 416980, upload-time = "2026-04-25T11:07:53.504Z" },
+    { url = "https://files.pythonhosted.org/packages/35/29/1a95221a029a3c1293773869e1ab47b07cbbdd82444a42809e8c60156626/xxhash-3.7.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:abb65b4e947e958f7b3b0d71db3ce447d1bc5f37f5eab871ce7223bda8768a04", size = 193840, upload-time = "2026-04-25T11:07:55.103Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/e0/db909dd0823285de2286f67e10ee4d81e96ad35d7d8e964ecb07fccd8af9/xxhash-3.7.0-cp313-cp313t-win32.whl", hash = "sha256:178959906cb1716a1ce08e0d69c82886c70a15a6f2790fc084fdd146ca30cd49", size = 30966, upload-time = "2026-04-25T11:07:56.524Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/ff/d705b15b22f21ee106adce239cb65d35067a158c630b240270f09b17c2e6/xxhash-3.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2524a1e20d4c231d13b50f7cf39e44265b055669a64a7a4b9a2a44faa03f19b6", size = 31784, upload-time = "2026-04-25T11:07:57.758Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/1f/b2cf83c3638fd0588e0b17f22e5a9400bdfb1a3e3755324ac0aee2250b88/xxhash-3.7.0-cp313-cp313t-win_arm64.whl", hash = "sha256:37d994d0ffe81ef087bb330d392caa809bb5853c77e22ea3f71db024a0543dba", size = 27932, upload-time = "2026-04-25T11:07:59.109Z" },
 ]
 
 [[package]]
 name = "yarl"
-version = "1.22.0"
+version = "1.23.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "idna", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "multidict", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "propcache", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "idna", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "multidict", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
+    { name = "propcache", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda12') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cpu' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-cuda13') or (extra == 'extra-3-exo-mlx-cuda12' and extra == 'extra-3-exo-mlx-none') or (extra == 'extra-3-exo-mlx-cuda13' and extra == 'extra-3-exo-mlx-none')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/57/63/0c6ebca57330cd313f6102b16dd57ffaf3ec4c83403dcb45dbd15c6f3ea1/yarl-1.22.0.tar.gz", hash = "sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71", size = 187169, upload-time = "2025-10-06T14:12:55.963Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/23/6e/beb1beec874a72f23815c1434518bfc4ed2175065173fb138c3705f658d4/yarl-1.23.0.tar.gz", hash = "sha256:53b1ea6ca88ebd4420379c330aea57e258408dd0df9af0992e5de2078dc9f5d5", size = 194676, upload-time = "2026-03-01T22:07:53.373Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ea/f3/d67de7260456ee105dc1d162d43a019ecad6b91e2f51809d6cddaa56690e/yarl-1.22.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8dee9c25c74997f6a750cd317b8ca63545169c098faee42c84aa5e506c819b53", size = 139980, upload-time = "2025-10-06T14:10:14.601Z" },
-    { url = "https://files.pythonhosted.org/packages/01/88/04d98af0b47e0ef42597b9b28863b9060bb515524da0a65d5f4db160b2d5/yarl-1.22.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01e73b85a5434f89fc4fe27dcda2aff08ddf35e4d47bbbea3bdcd25321af538a", size = 93424, upload-time = "2025-10-06T14:10:16.115Z" },
-    { url = "https://files.pythonhosted.org/packages/18/91/3274b215fd8442a03975ce6bee5fe6aa57a8326b29b9d3d56234a1dca244/yarl-1.22.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:22965c2af250d20c873cdbee8ff958fb809940aeb2e74ba5f20aaf6b7ac8c70c", size = 93821, upload-time = "2025-10-06T14:10:17.993Z" },
-    { url = "https://files.pythonhosted.org/packages/61/3a/caf4e25036db0f2da4ca22a353dfeb3c9d3c95d2761ebe9b14df8fc16eb0/yarl-1.22.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4f15793aa49793ec8d1c708ab7f9eded1aa72edc5174cae703651555ed1b601", size = 373243, upload-time = "2025-10-06T14:10:19.44Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/9e/51a77ac7516e8e7803b06e01f74e78649c24ee1021eca3d6a739cb6ea49c/yarl-1.22.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5542339dcf2747135c5c85f68680353d5cb9ffd741c0f2e8d832d054d41f35a", size = 342361, upload-time = "2025-10-06T14:10:21.124Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/f8/33b92454789dde8407f156c00303e9a891f1f51a0330b0fad7c909f87692/yarl-1.22.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5c401e05ad47a75869c3ab3e35137f8468b846770587e70d71e11de797d113df", size = 387036, upload-time = "2025-10-06T14:10:22.902Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/9a/c5db84ea024f76838220280f732970aa4ee154015d7f5c1bfb60a267af6f/yarl-1.22.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:243dda95d901c733f5b59214d28b0120893d91777cb8aa043e6ef059d3cddfe2", size = 397671, upload-time = "2025-10-06T14:10:24.523Z" },
-    { url = "https://files.pythonhosted.org/packages/11/c9/cd8538dc2e7727095e0c1d867bad1e40c98f37763e6d995c1939f5fdc7b1/yarl-1.22.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bec03d0d388060058f5d291a813f21c011041938a441c593374da6077fe21b1b", size = 377059, upload-time = "2025-10-06T14:10:26.406Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/b9/ab437b261702ced75122ed78a876a6dec0a1b0f5e17a4ac7a9a2482d8abe/yarl-1.22.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0748275abb8c1e1e09301ee3cf90c8a99678a4e92e4373705f2a2570d581273", size = 365356, upload-time = "2025-10-06T14:10:28.461Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/9d/8e1ae6d1d008a9567877b08f0ce4077a29974c04c062dabdb923ed98e6fe/yarl-1.22.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:47fdb18187e2a4e18fda2c25c05d8251a9e4a521edaed757fef033e7d8498d9a", size = 361331, upload-time = "2025-10-06T14:10:30.541Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/5a/09b7be3905962f145b73beb468cdd53db8aa171cf18c80400a54c5b82846/yarl-1.22.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c7044802eec4524fde550afc28edda0dd5784c4c45f0be151a2d3ba017daca7d", size = 382590, upload-time = "2025-10-06T14:10:33.352Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/7f/59ec509abf90eda5048b0bc3e2d7b5099dffdb3e6b127019895ab9d5ef44/yarl-1.22.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:139718f35149ff544caba20fce6e8a2f71f1e39b92c700d8438a0b1d2a631a02", size = 385316, upload-time = "2025-10-06T14:10:35.034Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/84/891158426bc8036bfdfd862fabd0e0fa25df4176ec793e447f4b85cf1be4/yarl-1.22.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e1b51bebd221006d3d2f95fbe124b22b247136647ae5dcc8c7acafba66e5ee67", size = 374431, upload-time = "2025-10-06T14:10:37.76Z" },
-    { url = "https://files.pythonhosted.org/packages/88/fc/6908f062a2f77b5f9f6d69cecb1747260831ff206adcbc5b510aff88df91/yarl-1.22.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:719ae08b6972befcba4310e49edb1161a88cdd331e3a694b84466bd938a6ab10", size = 146209, upload-time = "2025-10-06T14:10:44.643Z" },
-    { url = "https://files.pythonhosted.org/packages/65/47/76594ae8eab26210b4867be6f49129861ad33da1f1ebdf7051e98492bf62/yarl-1.22.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:47d8a5c446df1c4db9d21b49619ffdba90e77c89ec6e283f453856c74b50b9e3", size = 95966, upload-time = "2025-10-06T14:10:46.554Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/ce/05e9828a49271ba6b5b038b15b3934e996980dd78abdfeb52a04cfb9467e/yarl-1.22.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cfebc0ac8333520d2d0423cbbe43ae43c8838862ddb898f5ca68565e395516e9", size = 97312, upload-time = "2025-10-06T14:10:48.007Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/c5/7dffad5e4f2265b29c9d7ec869c369e4223166e4f9206fc2243ee9eea727/yarl-1.22.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4398557cbf484207df000309235979c79c4356518fd5c99158c7d38203c4da4f", size = 361967, upload-time = "2025-10-06T14:10:49.997Z" },
-    { url = "https://files.pythonhosted.org/packages/50/b2/375b933c93a54bff7fc041e1a6ad2c0f6f733ffb0c6e642ce56ee3b39970/yarl-1.22.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2ca6fd72a8cd803be290d42f2dec5cdcd5299eeb93c2d929bf060ad9efaf5de0", size = 323949, upload-time = "2025-10-06T14:10:52.004Z" },
-    { url = "https://files.pythonhosted.org/packages/66/50/bfc2a29a1d78644c5a7220ce2f304f38248dc94124a326794e677634b6cf/yarl-1.22.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca1f59c4e1ab6e72f0a23c13fca5430f889634166be85dbf1013683e49e3278e", size = 361818, upload-time = "2025-10-06T14:10:54.078Z" },
-    { url = "https://files.pythonhosted.org/packages/46/96/f3941a46af7d5d0f0498f86d71275696800ddcdd20426298e572b19b91ff/yarl-1.22.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c5010a52015e7c70f86eb967db0f37f3c8bd503a695a49f8d45700144667708", size = 372626, upload-time = "2025-10-06T14:10:55.767Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/42/8b27c83bb875cd89448e42cd627e0fb971fa1675c9ec546393d18826cb50/yarl-1.22.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d7672ecf7557476642c88497c2f8d8542f8e36596e928e9bcba0e42e1e7d71f", size = 341129, upload-time = "2025-10-06T14:10:57.985Z" },
-    { url = "https://files.pythonhosted.org/packages/49/36/99ca3122201b382a3cf7cc937b95235b0ac944f7e9f2d5331d50821ed352/yarl-1.22.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:3b7c88eeef021579d600e50363e0b6ee4f7f6f728cd3486b9d0f3ee7b946398d", size = 346776, upload-time = "2025-10-06T14:10:59.633Z" },
-    { url = "https://files.pythonhosted.org/packages/85/b4/47328bf996acd01a4c16ef9dcd2f59c969f495073616586f78cd5f2efb99/yarl-1.22.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f4afb5c34f2c6fecdcc182dfcfc6af6cccf1aa923eed4d6a12e9d96904e1a0d8", size = 334879, upload-time = "2025-10-06T14:11:01.454Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/ad/b77d7b3f14a4283bffb8e92c6026496f6de49751c2f97d4352242bba3990/yarl-1.22.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:59c189e3e99a59cf8d83cbb31d4db02d66cda5a1a4374e8a012b51255341abf5", size = 350996, upload-time = "2025-10-06T14:11:03.452Z" },
-    { url = "https://files.pythonhosted.org/packages/81/c8/06e1d69295792ba54d556f06686cbd6a7ce39c22307100e3fb4a2c0b0a1d/yarl-1.22.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:5a3bf7f62a289fa90f1990422dc8dff5a458469ea71d1624585ec3a4c8d6960f", size = 356047, upload-time = "2025-10-06T14:11:05.115Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/b8/4c0e9e9f597074b208d18cef227d83aac36184bfbc6eab204ea55783dbc5/yarl-1.22.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:de6b9a04c606978fdfe72666fa216ffcf2d1a9f6a381058d4378f8d7b1e5de62", size = 342947, upload-time = "2025-10-06T14:11:08.137Z" },
-    { url = "https://files.pythonhosted.org/packages/46/b3/e20ef504049f1a1c54a814b4b9bed96d1ac0e0610c3b4da178f87209db05/yarl-1.22.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:34b36c2c57124530884d89d50ed2c1478697ad7473efd59cfd479945c95650e4", size = 140520, upload-time = "2025-10-06T14:11:15.465Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/04/3532d990fdbab02e5ede063676b5c4260e7f3abea2151099c2aa745acc4c/yarl-1.22.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:0dd9a702591ca2e543631c2a017e4a547e38a5c0f29eece37d9097e04a7ac683", size = 93504, upload-time = "2025-10-06T14:11:17.106Z" },
-    { url = "https://files.pythonhosted.org/packages/11/63/ff458113c5c2dac9a9719ac68ee7c947cb621432bcf28c9972b1c0e83938/yarl-1.22.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:594fcab1032e2d2cc3321bb2e51271e7cd2b516c7d9aee780ece81b07ff8244b", size = 94282, upload-time = "2025-10-06T14:11:19.064Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/bc/315a56aca762d44a6aaaf7ad253f04d996cb6b27bad34410f82d76ea8038/yarl-1.22.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f3d7a87a78d46a2e3d5b72587ac14b4c16952dd0887dbb051451eceac774411e", size = 372080, upload-time = "2025-10-06T14:11:20.996Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/3f/08e9b826ec2e099ea6e7c69a61272f4f6da62cb5b1b63590bb80ca2e4a40/yarl-1.22.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:852863707010316c973162e703bddabec35e8757e67fcb8ad58829de1ebc8590", size = 338696, upload-time = "2025-10-06T14:11:22.847Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/9f/90360108e3b32bd76789088e99538febfea24a102380ae73827f62073543/yarl-1.22.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:131a085a53bfe839a477c0845acf21efc77457ba2bcf5899618136d64f3303a2", size = 387121, upload-time = "2025-10-06T14:11:24.889Z" },
-    { url = "https://files.pythonhosted.org/packages/98/92/ab8d4657bd5b46a38094cfaea498f18bb70ce6b63508fd7e909bd1f93066/yarl-1.22.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:078a8aefd263f4d4f923a9677b942b445a2be970ca24548a8102689a3a8ab8da", size = 394080, upload-time = "2025-10-06T14:11:27.307Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/e7/d8c5a7752fef68205296201f8ec2bf718f5c805a7a7e9880576c67600658/yarl-1.22.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bca03b91c323036913993ff5c738d0842fc9c60c4648e5c8d98331526df89784", size = 372661, upload-time = "2025-10-06T14:11:29.387Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/2e/f4d26183c8db0bb82d491b072f3127fb8c381a6206a3a56332714b79b751/yarl-1.22.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:68986a61557d37bb90d3051a45b91fa3d5c516d177dfc6dd6f2f436a07ff2b6b", size = 364645, upload-time = "2025-10-06T14:11:31.423Z" },
-    { url = "https://files.pythonhosted.org/packages/80/7c/428e5812e6b87cd00ee8e898328a62c95825bf37c7fa87f0b6bb2ad31304/yarl-1.22.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:4792b262d585ff0dff6bcb787f8492e40698443ec982a3568c2096433660c694", size = 355361, upload-time = "2025-10-06T14:11:33.055Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/2a/249405fd26776f8b13c067378ef4d7dd49c9098d1b6457cdd152a99e96a9/yarl-1.22.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ebd4549b108d732dba1d4ace67614b9545b21ece30937a63a65dd34efa19732d", size = 381451, upload-time = "2025-10-06T14:11:35.136Z" },
-    { url = "https://files.pythonhosted.org/packages/67/a8/fb6b1adbe98cf1e2dd9fad71003d3a63a1bc22459c6e15f5714eb9323b93/yarl-1.22.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f87ac53513d22240c7d59203f25cc3beac1e574c6cd681bbfd321987b69f95fd", size = 383814, upload-time = "2025-10-06T14:11:37.094Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/f9/3aa2c0e480fb73e872ae2814c43bc1e734740bb0d54e8cb2a95925f98131/yarl-1.22.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:22b029f2881599e2f1b06f8f1db2ee63bd309e2293ba2d566e008ba12778b8da", size = 370799, upload-time = "2025-10-06T14:11:38.83Z" },
-    { url = "https://files.pythonhosted.org/packages/06/5e/a15eb13db90abd87dfbefb9760c0f3f257ac42a5cac7e75dbc23bed97a9f/yarl-1.22.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:45c2842ff0e0d1b35a6bf1cd6c690939dacb617a70827f715232b2e0494d55d1", size = 146223, upload-time = "2025-10-06T14:11:46.796Z" },
-    { url = "https://files.pythonhosted.org/packages/18/82/9665c61910d4d84f41a5bf6837597c89e665fa88aa4941080704645932a9/yarl-1.22.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d947071e6ebcf2e2bee8fce76e10faca8f7a14808ca36a910263acaacef08eca", size = 95981, upload-time = "2025-10-06T14:11:48.845Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/9a/2f65743589809af4d0a6d3aa749343c4b5f4c380cc24a8e94a3c6625a808/yarl-1.22.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:334b8721303e61b00019474cc103bdac3d7b1f65e91f0bfedeec2d56dfe74b53", size = 97303, upload-time = "2025-10-06T14:11:50.897Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/ab/5b13d3e157505c43c3b43b5a776cbf7b24a02bc4cccc40314771197e3508/yarl-1.22.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e7ce67c34138a058fd092f67d07a72b8e31ff0c9236e751957465a24b28910c", size = 361820, upload-time = "2025-10-06T14:11:52.549Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/76/242a5ef4677615cf95330cfc1b4610e78184400699bdda0acb897ef5e49a/yarl-1.22.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d77e1b2c6d04711478cb1c4ab90db07f1609ccf06a287d5607fcd90dc9863acf", size = 323203, upload-time = "2025-10-06T14:11:54.225Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/96/475509110d3f0153b43d06164cf4195c64d16999e0c7e2d8a099adcd6907/yarl-1.22.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4647674b6150d2cae088fc07de2738a84b8bcedebef29802cf0b0a82ab6face", size = 363173, upload-time = "2025-10-06T14:11:56.069Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/66/59db471aecfbd559a1fd48aedd954435558cd98c7d0da8b03cc6c140a32c/yarl-1.22.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efb07073be061c8f79d03d04139a80ba33cbd390ca8f0297aae9cce6411e4c6b", size = 373562, upload-time = "2025-10-06T14:11:58.783Z" },
-    { url = "https://files.pythonhosted.org/packages/03/1f/c5d94abc91557384719da10ff166b916107c1b45e4d0423a88457071dd88/yarl-1.22.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e51ac5435758ba97ad69617e13233da53908beccc6cfcd6c34bbed8dcbede486", size = 339828, upload-time = "2025-10-06T14:12:00.686Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/97/aa6a143d3afba17b6465733681c70cf175af89f76ec8d9286e08437a7454/yarl-1.22.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33e32a0dd0c8205efa8e83d04fc9f19313772b78522d1bdc7d9aed706bfd6138", size = 347551, upload-time = "2025-10-06T14:12:02.628Z" },
-    { url = "https://files.pythonhosted.org/packages/43/3c/45a2b6d80195959239a7b2a8810506d4eea5487dce61c2a3393e7fc3c52e/yarl-1.22.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:bf4a21e58b9cde0e401e683ebd00f6ed30a06d14e93f7c8fd059f8b6e8f87b6a", size = 334512, upload-time = "2025-10-06T14:12:04.871Z" },
-    { url = "https://files.pythonhosted.org/packages/86/a0/c2ab48d74599c7c84cb104ebd799c5813de252bea0f360ffc29d270c2caa/yarl-1.22.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:e4b582bab49ac33c8deb97e058cd67c2c50dac0dd134874106d9c774fd272529", size = 352400, upload-time = "2025-10-06T14:12:06.624Z" },
-    { url = "https://files.pythonhosted.org/packages/32/75/f8919b2eafc929567d3d8411f72bdb1a2109c01caaab4ebfa5f8ffadc15b/yarl-1.22.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:0b5bcc1a9c4839e7e30b7b30dd47fe5e7e44fb7054ec29b5bb8d526aa1041093", size = 357140, upload-time = "2025-10-06T14:12:08.362Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/72/6a85bba382f22cf78add705d8c3731748397d986e197e53ecc7835e76de7/yarl-1.22.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c0232bce2170103ec23c454e54a57008a9a72b5d1c3105dc2496750da8cfa47c", size = 341473, upload-time = "2025-10-06T14:12:10.994Z" },
-    { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/4b/a0a6e5d0ee8a2f3a373ddef8a4097d74ac901ac363eea1440464ccbe0898/yarl-1.23.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:16c6994ac35c3e74fb0ae93323bf8b9c2a9088d55946109489667c510a7d010e", size = 123796, upload-time = "2026-03-01T22:05:41.412Z" },
+    { url = "https://files.pythonhosted.org/packages/67/b6/8925d68af039b835ae876db5838e82e76ec87b9782ecc97e192b809c4831/yarl-1.23.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4a42e651629dafb64fd5b0286a3580613702b5809ad3f24934ea87595804f2c5", size = 86547, upload-time = "2026-03-01T22:05:42.841Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/50/06d511cc4b8e0360d3c94af051a768e84b755c5eb031b12adaaab6dec6e5/yarl-1.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7c6b9461a2a8b47c65eef63bb1c76a4f1c119618ffa99ea79bc5bb1e46c5821b", size = 85854, upload-time = "2026-03-01T22:05:44.85Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/f4/4e30b250927ffdab4db70da08b9b8d2194d7c7b400167b8fbeca1e4701ca/yarl-1.23.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2569b67d616eab450d262ca7cb9f9e19d2f718c70a8b88712859359d0ab17035", size = 98351, upload-time = "2026-03-01T22:05:46.836Z" },
+    { url = "https://files.pythonhosted.org/packages/86/fc/4118c5671ea948208bdb1492d8b76bdf1453d3e73df051f939f563e7dcc5/yarl-1.23.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e9d9a4d06d3481eab79803beb4d9bd6f6a8e781ec078ac70d7ef2dcc29d1bea5", size = 92711, upload-time = "2026-03-01T22:05:48.316Z" },
+    { url = "https://files.pythonhosted.org/packages/56/11/1ed91d42bd9e73c13dc9e7eb0dd92298d75e7ac4dd7f046ad0c472e231cd/yarl-1.23.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f514f6474e04179d3d33175ed3f3e31434d3130d42ec153540d5b157deefd735", size = 106014, upload-time = "2026-03-01T22:05:50.028Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/c9/74e44e056a23fbc33aca71779ef450ca648a5bc472bdad7a82339918f818/yarl-1.23.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:fda207c815b253e34f7e1909840fd14299567b1c0eb4908f8c2ce01a41265401", size = 105557, upload-time = "2026-03-01T22:05:51.416Z" },
+    { url = "https://files.pythonhosted.org/packages/66/fe/b1e10b08d287f518994f1e2ff9b6d26f0adeecd8dd7d533b01bab29a3eda/yarl-1.23.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34b6cf500e61c90f305094911f9acc9c86da1a05a7a3f5be9f68817043f486e4", size = 101559, upload-time = "2026-03-01T22:05:52.872Z" },
+    { url = "https://files.pythonhosted.org/packages/72/59/c5b8d94b14e3d3c2a9c20cb100119fd534ab5a14b93673ab4cc4a4141ea5/yarl-1.23.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d7504f2b476d21653e4d143f44a175f7f751cd41233525312696c76aa3dbb23f", size = 100502, upload-time = "2026-03-01T22:05:54.954Z" },
+    { url = "https://files.pythonhosted.org/packages/77/4f/96976cb54cbfc5c9fd73ed4c51804f92f209481d1fb190981c0f8a07a1d7/yarl-1.23.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:578110dd426f0d209d1509244e6d4a3f1a3e9077655d98c5f22583d63252a08a", size = 98027, upload-time = "2026-03-01T22:05:56.409Z" },
+    { url = "https://files.pythonhosted.org/packages/63/6e/904c4f476471afdbad6b7e5b70362fb5810e35cd7466529a97322b6f5556/yarl-1.23.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:609d3614d78d74ebe35f54953c5bbd2ac647a7ddb9c30a5d877580f5e86b22f2", size = 95369, upload-time = "2026-03-01T22:05:58.141Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/40/acfcdb3b5f9d68ef499e39e04d25e141fe90661f9d54114556cf83be8353/yarl-1.23.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4966242ec68afc74c122f8459abd597afd7d8a60dc93d695c1334c5fd25f762f", size = 105565, upload-time = "2026-03-01T22:06:00.286Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/c6/31e28f3a6ba2869c43d124f37ea5260cac9c9281df803c354b31f4dd1f3c/yarl-1.23.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:e0fd068364a6759bc794459f0a735ab151d11304346332489c7972bacbe9e72b", size = 99813, upload-time = "2026-03-01T22:06:01.712Z" },
+    { url = "https://files.pythonhosted.org/packages/08/1f/6f65f59e72d54aa467119b63fc0b0b1762eff0232db1f4720cd89e2f4a17/yarl-1.23.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:39004f0ad156da43e86aa71f44e033de68a44e5a31fc53507b36dd253970054a", size = 105632, upload-time = "2026-03-01T22:06:03.188Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/c4/18b178a69935f9e7a338127d5b77d868fdc0f0e49becd286d51b3a18c61d/yarl-1.23.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e5723c01a56c5028c807c701aa66722916d2747ad737a046853f6c46f4875543", size = 101895, upload-time = "2026-03-01T22:06:04.651Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/54/f5b870b5505663911dba950a8e4776a0dbd51c9c54c0ae88e823e4b874a0/yarl-1.23.0-cp313-cp313-win32.whl", hash = "sha256:1b6b572edd95b4fa8df75de10b04bc81acc87c1c7d16bcdd2035b09d30acc957", size = 82356, upload-time = "2026-03-01T22:06:06.04Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/84/266e8da36879c6edcd37b02b547e2d9ecdfea776be49598e75696e3316e1/yarl-1.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:baaf55442359053c7d62f6f8413a62adba3205119bcb6f49594894d8be47e5e3", size = 87515, upload-time = "2026-03-01T22:06:08.107Z" },
+    { url = "https://files.pythonhosted.org/packages/00/fd/7e1c66efad35e1649114fa13f17485f62881ad58edeeb7f49f8c5e748bf9/yarl-1.23.0-cp313-cp313-win_arm64.whl", hash = "sha256:fb4948814a2a98e3912505f09c9e7493b1506226afb1f881825368d6fb776ee3", size = 81785, upload-time = "2026-03-01T22:06:10.181Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/fc/119dd07004f17ea43bb91e3ece6587759edd7519d6b086d16bfbd3319982/yarl-1.23.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:aecfed0b41aa72b7881712c65cf764e39ce2ec352324f5e0837c7048d9e6daaa", size = 130719, upload-time = "2026-03-01T22:06:11.708Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/0d/9f2348502fbb3af409e8f47730282cd6bc80dec6630c1e06374d882d6eb2/yarl-1.23.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a41bcf68efd19073376eb8cf948b8d9be0af26256403e512bb18f3966f1f9120", size = 89690, upload-time = "2026-03-01T22:06:13.429Z" },
+    { url = "https://files.pythonhosted.org/packages/50/93/e88f3c80971b42cfc83f50a51b9d165a1dbf154b97005f2994a79f212a07/yarl-1.23.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cde9a2ecd91668bcb7f077c4966d8ceddb60af01b52e6e3e2680e4cf00ad1a59", size = 89851, upload-time = "2026-03-01T22:06:15.53Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/07/61c9dd8ba8f86473263b4036f70fb594c09e99c0d9737a799dfd8bc85651/yarl-1.23.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5023346c4ee7992febc0068e7593de5fa2bf611848c08404b35ebbb76b1b0512", size = 95874, upload-time = "2026-03-01T22:06:17.553Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/e9/f9ff8ceefba599eac6abddcfb0b3bee9b9e636e96dbf54342a8577252379/yarl-1.23.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d1009abedb49ae95b136a8904a3f71b342f849ffeced2d3747bf29caeda218c4", size = 88710, upload-time = "2026-03-01T22:06:19.004Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/78/0231bfcc5d4c8eec220bc2f9ef82cb4566192ea867a7c5b4148f44f6cbcd/yarl-1.23.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a8d00f29b42f534cc8aa3931cfe773b13b23e561e10d2b26f27a8d309b0e82a1", size = 101033, upload-time = "2026-03-01T22:06:21.203Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/9b/30ea5239a61786f18fd25797151a17fbb3be176977187a48d541b5447dd4/yarl-1.23.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:95451e6ce06c3e104556d73b559f5da6c34a069b6b62946d3ad66afcd51642ea", size = 100817, upload-time = "2026-03-01T22:06:22.738Z" },
+    { url = "https://files.pythonhosted.org/packages/62/e2/a4980481071791bc83bce2b7a1a1f7adcabfa366007518b4b845e92eeee3/yarl-1.23.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:531ef597132086b6cf96faa7c6c1dcd0361dd5f1694e5cc30375907b9b7d3ea9", size = 97482, upload-time = "2026-03-01T22:06:24.21Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/1e/304a00cf5f6100414c4b5a01fc7ff9ee724b62158a08df2f8170dfc72a2d/yarl-1.23.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:88f9fb0116fbfcefcab70f85cf4b74a2b6ce5d199c41345296f49d974ddb4123", size = 95949, upload-time = "2026-03-01T22:06:25.697Z" },
+    { url = "https://files.pythonhosted.org/packages/68/03/093f4055ed4cae649ac53bca3d180bd37102e9e11d048588e9ab0c0108d0/yarl-1.23.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e7b0460976dc75cb87ad9cc1f9899a4b97751e7d4e77ab840fc9b6d377b8fd24", size = 95839, upload-time = "2026-03-01T22:06:27.309Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/28/4c75ebb108f322aa8f917ae10a8ffa4f07cae10a8a627b64e578617df6a0/yarl-1.23.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:115136c4a426f9da976187d238e84139ff6b51a20839aa6e3720cd1026d768de", size = 90696, upload-time = "2026-03-01T22:06:29.048Z" },
+    { url = "https://files.pythonhosted.org/packages/23/9c/42c2e2dd91c1a570402f51bdf066bfdb1241c2240ba001967bad778e77b7/yarl-1.23.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:ead11956716a940c1abc816b7df3fa2b84d06eaed8832ca32f5c5e058c65506b", size = 100865, upload-time = "2026-03-01T22:06:30.525Z" },
+    { url = "https://files.pythonhosted.org/packages/74/05/1bcd60a8a0a914d462c305137246b6f9d167628d73568505fce3f1cb2e65/yarl-1.23.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:fe8f8f5e70e6dbdfca9882cd9deaac058729bcf323cf7a58660901e55c9c94f6", size = 96234, upload-time = "2026-03-01T22:06:32.692Z" },
+    { url = "https://files.pythonhosted.org/packages/90/b2/f52381aac396d6778ce516b7bc149c79e65bfc068b5de2857ab69eeea3b7/yarl-1.23.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:a0e317df055958a0c1e79e5d2aa5a5eaa4a6d05a20d4b0c9c3f48918139c9fc6", size = 100295, upload-time = "2026-03-01T22:06:34.268Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/e8/638bae5bbf1113a659b2435d8895474598afe38b4a837103764f603aba56/yarl-1.23.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f0fd84de0c957b2d280143522c4f91a73aada1923caee763e24a2b3fda9f8a5", size = 97784, upload-time = "2026-03-01T22:06:35.864Z" },
+    { url = "https://files.pythonhosted.org/packages/80/25/a3892b46182c586c202629fc2159aa13975d3741d52ebd7347fd501d48d5/yarl-1.23.0-cp313-cp313t-win32.whl", hash = "sha256:93a784271881035ab4406a172edb0faecb6e7d00f4b53dc2f55919d6c9688595", size = 88313, upload-time = "2026-03-01T22:06:37.39Z" },
+    { url = "https://files.pythonhosted.org/packages/43/68/8c5b36aa5178900b37387937bc2c2fe0e9505537f713495472dcf6f6fccc/yarl-1.23.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dd00607bffbf30250fe108065f07453ec124dbf223420f57f5e749b04295e090", size = 94932, upload-time = "2026-03-01T22:06:39.579Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/cc/d79ba8292f51f81f4dc533a8ccfb9fc6992cabf0998ed3245de7589dc07c/yarl-1.23.0-cp313-cp313t-win_arm64.whl", hash = "sha256:ac09d42f48f80c9ee1635b2fcaa819496a44502737660d3c0f2ade7526d29144", size = 84786, upload-time = "2026-03-01T22:06:41.988Z" },
+    { url = "https://files.pythonhosted.org/packages/69/68/c8739671f5699c7dc470580a4f821ef37c32c4cb0b047ce223a7f115757f/yarl-1.23.0-py3-none-any.whl", hash = "sha256:a2df6afe50dea8ae15fa34c9f824a3ee958d785fd5d089063d960bae1daa0a3f", size = 48288, upload-time = "2026-03-01T22:07:51.388Z" },
 ]
 
 [[package]]
@@ -3355,16 +3229,7 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/70/e8/2ec6b6fb7358b2ec0113ae202647ca7c0e9d15b61c005ae5225ad0995df5/zstandard-0.25.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0be7622c37c183406f3dbf0cba104118eb16a4ea7359eeb5752f0794882fc250", size = 5433952, upload-time = "2025-09-14T22:17:45.271Z" },
     { url = "https://files.pythonhosted.org/packages/7b/01/b5f4d4dbc59ef193e870495c6f1275f5b2928e01ff5a81fecb22a06e22fb/zstandard-0.25.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5f5e4c2a23ca271c218ac025bd7d635597048b366d6f31f420aaeb715239fc98", size = 5814054, upload-time = "2025-09-14T22:17:47.08Z" },
     { url = "https://files.pythonhosted.org/packages/b2/e5/fbd822d5c6f427cf158316d012c5a12f233473c2f9c5fe5ab1ae5d21f3d8/zstandard-0.25.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f187a0bb61b35119d1926aee039524d1f93aaf38a9916b8c4b78ac8514a0aaf", size = 5360113, upload-time = "2025-09-14T22:17:48.893Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/5c/f8923b595b55fe49e30612987ad8bf053aef555c14f05bb659dd5dbe3e8a/zstandard-0.25.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e29f0cf06974c899b2c188ef7f783607dbef36da4c242eb6c82dcd8b512855e3", size = 795887, upload-time = "2025-09-14T22:17:54.198Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/09/d0a2a14fc3439c5f874042dca72a79c70a532090b7ba0003be73fee37ae2/zstandard-0.25.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:05df5136bc5a011f33cd25bc9f506e7426c0c9b3f9954f056831ce68f3b6689f", size = 640658, upload-time = "2025-09-14T22:17:55.423Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/7c/8b6b71b1ddd517f68ffb55e10834388d4f793c49c6b83effaaa05785b0b4/zstandard-0.25.0-cp314-cp314-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:f604efd28f239cc21b3adb53eb061e2a205dc164be408e553b41ba2ffe0ca15c", size = 5379849, upload-time = "2025-09-14T22:17:57.372Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/86/a48e56320d0a17189ab7a42645387334fba2200e904ee47fc5a26c1fd8ca/zstandard-0.25.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223415140608d0f0da010499eaa8ccdb9af210a543fac54bce15babbcfc78439", size = 5058095, upload-time = "2025-09-14T22:17:59.498Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/ad/eb659984ee2c0a779f9d06dbfe45e2dc39d99ff40a319895df2d3d9a48e5/zstandard-0.25.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e54296a283f3ab5a26fc9b8b5d4978ea0532f37b231644f367aa588930aa043", size = 5551751, upload-time = "2025-09-14T22:18:01.618Z" },
-    { url = "https://files.pythonhosted.org/packages/61/b3/b637faea43677eb7bd42ab204dfb7053bd5c4582bfe6b1baefa80ac0c47b/zstandard-0.25.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ca54090275939dc8ec5dea2d2afb400e0f83444b2fc24e07df7fdef677110859", size = 6364818, upload-time = "2025-09-14T22:18:03.769Z" },
-    { url = "https://files.pythonhosted.org/packages/31/dc/cc50210e11e465c975462439a492516a73300ab8caa8f5e0902544fd748b/zstandard-0.25.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e09bb6252b6476d8d56100e8147b803befa9a12cea144bbe629dd508800d1ad0", size = 5560402, upload-time = "2025-09-14T22:18:05.954Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/ae/56523ae9c142f0c08efd5e868a6da613ae76614eca1305259c3bf6a0ed43/zstandard-0.25.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a9ec8c642d1ec73287ae3e726792dd86c96f5681eb8df274a757bf62b750eae7", size = 4955108, upload-time = "2025-09-14T22:18:07.68Z" },
-    { url = "https://files.pythonhosted.org/packages/98/cf/c899f2d6df0840d5e384cf4c4121458c72802e8bda19691f3b16619f51e9/zstandard-0.25.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a4089a10e598eae6393756b036e0f419e8c1d60f44a831520f9af41c14216cf2", size = 5269248, upload-time = "2025-09-14T22:18:09.753Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/c0/59e912a531d91e1c192d3085fc0f6fb2852753c301a812d856d857ea03c6/zstandard-0.25.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f67e8f1a324a900e75b5e28ffb152bcac9fbed1cc7b43f99cd90f395c4375344", size = 5430330, upload-time = "2025-09-14T22:18:11.966Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/1d/7e31db1240de2df22a58e2ea9a93fc6e38cc29353e660c0272b6735d6669/zstandard-0.25.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:9654dbc012d8b06fc3d19cc825af3f7bf8ae242226df5f83936cb39f5fdc846c", size = 5811123, upload-time = "2025-09-14T22:18:13.907Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/49/fac46df5ad353d50535e118d6983069df68ca5908d4d65b8c466150a4ff1/zstandard-0.25.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4203ce3b31aec23012d3a4cf4a2ed64d12fea5269c49aed5e4c3611b938e4088", size = 5359591, upload-time = "2025-09-14T22:18:16.465Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/e0/69a553d2047f9a2c7347caa225bb3a63b6d7704ad74610cb7823baa08ed7/zstandard-0.25.0-cp313-cp313-win32.whl", hash = "sha256:7030defa83eef3e51ff26f0b7bfb229f0204b66fe18e04359ce3474ac33cbc09", size = 436936, upload-time = "2025-09-14T22:17:52.658Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/82/b9c06c870f3bd8767c201f1edbdf9e8dc34be5b0fbc5682c4f80fe948475/zstandard-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:1f830a0dac88719af0ae43b8b2d6aef487d437036468ef3c2ea59c51f9d55fd5", size = 506232, upload-time = "2025-09-14T22:17:50.402Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/57/60c3c01243bb81d381c9916e2a6d9e149ab8627c0c7d7abb2d73384b3c0c/zstandard-0.25.0-cp313-cp313-win_arm64.whl", hash = "sha256:85304a43f4d513f5464ceb938aa02c1e78c2943b29f44a750b48b25ac999a049", size = 462671, upload-time = "2025-09-14T22:17:51.533Z" },
 ]