raise exo bench default times

single node regression
fix
2026-02-05 03:33:30 -05:00 · 2026-02-03 14:54:54 +00:00 · 2026-02-03 11:33:50 +00:00 · 2026-02-03 11:17:17 +00:00 · 2026-02-03 11:06:05 +00:00 · 2026-02-03 10:57:42 +00:00
19 changed files with 1478 additions and 331 deletions
--- a/bench/eval_config.toml
+++ b/bench/eval_config.toml
@@ -2,8 +2,8 @@
 # See bench/exo_eval.py for usage

 [eval]
-# Eval framework type: "lm_eval" | "swe_bench" | "custom"
-type = "lm_eval"
+# Eval framework type: "lm_eval" | "swe_bench" | "custom" | "livecodebench"
+type = "livecodebench"
 # Require HuggingFace token (default: true)
 # Set to false if using only public datasets
 require_hf_token = true
@@ -49,6 +49,44 @@ fewshot_as_multiturn = true
 # Output path for results
 output_path = "bench/eval_results"

+# LiveCodeBench configuration
+# Contamination-free code generation benchmark
+# See: https://livecodebench.github.io/
+[livecodebench]
+# Evaluation scenario: "codegeneration" | "selfrepair" | "testoutputprediction" | "codeexecution"
+scenario = "codegeneration"
+# Dataset release version (newer versions have more problems)
+# release_v1: May 2023 - March 2024 (400 problems)
+# release_v2: May 2023 - May 2024 (511 problems)
+# release_v3: May 2023 - July 2024 (612 problems)
+# release_v4: May 2023 - September 2024 (713 problems)
+# release_v5: May 2023 - January 2025 (880 problems)
+release_version = "release_v5"
+# Sampling temperature
+# - For non-reasoning models: 0 (deterministic)
+# - For reasoning/thinking models (Kimi K2.5, DeepSeek R1): use model-recommended (e.g., 1.0)
+temperature = 1.0
+# Number of samples per problem (1 for pass@1, matches Artificial Analysis)
+n_samples = 1
+# Max tokens for generation
+# - For non-reasoning models: 16384
+# - For reasoning/thinking models: use model-recommended (Kimi K2.5 uses 96k)
+max_tokens = 96000
+# Use code_generation_lite for faster evaluation (default: true)
+# Set to false to use full test suite (slower but more thorough)
+fast = true
+# Run evaluation after generation (computes pass@1, pass@5)
+evaluate = true
+# Number of parallel API requests
+multiprocess = 8
+# Cache generated outputs for resumption (disabled by default to avoid stale results)
+use_cache = false
+# Timeout in seconds (universal for all operations)
+timeout = 100000
+openai_timeout = 100000
+# Output path for results
+output_path = "bench/lcb_results"
+
 # SWE-bench configuration (placeholder)
 [swe_bench]
 # SWE-bench dataset
--- a/bench/exo_bench.py
+++ b/bench/exo_bench.py
@@ -5,6 +5,7 @@ from __future__ import annotations
 import argparse
 import contextlib
 import http.client
+import itertools
 import json
 import os
 import time
@@ -24,7 +25,7 @@ class ExoHttpError(RuntimeError):


 class ExoClient:
-    def __init__(self, host: str, port: int, timeout_s: float = 600.0):
+    def __init__(self, host: str, port: int, timeout_s: float = 100000.0):
        self.host = host
        self.port = port
        self.timeout_s = timeout_s
@@ -180,14 +181,7 @@ def parse_int_list(values: list[str]) -> list[int]:
            part = part.strip()
            if part:
                items.append(int(part))
-
-    seen: set[int] = set()
-    out: list[int] = []
-    for x in items:
-        if x not in seen:
-            out.append(x)
-            seen.add(x)
-    return out
+    return items


 def resolve_model_short_id(client: ExoClient, model_arg: str) -> tuple[str, str]:
@@ -277,12 +271,29 @@ class PromptSizer:
                f"Target ({target}) is smaller than template overhead ({self.base_tokens})."
            )

-        content = ""
-        tok = self.count_fn(content)
+        # Estimate tokens per atom using a sample
+        sample_count = 100
+        sample_content = self.atom * sample_count
+        sample_tokens = self.count_fn(sample_content) - self.base_tokens
+        tokens_per_atom = sample_tokens / sample_count

-        while tok < target:
-            content += self.atom
-            tok = self.count_fn(content)
+        # Estimate starting point
+        needed_tokens = target - self.base_tokens
+        estimated_atoms = int(needed_tokens / tokens_per_atom)
+
+        # Binary search to find exact atom count
+        low, high = 0, estimated_atoms * 2 + 100
+        while low < high:
+            mid = (low + high) // 2
+            tok = self.count_fn(self.atom * mid)
+            if tok < target:
+                low = mid + 1
+            else:
+                high = mid
+
+        content = self.atom * low
+        tok = self.count_fn(content)
+        logger.info(f"{tok=}")

        if tok != target:
            raise RuntimeError(
@@ -348,7 +359,7 @@ def main() -> int:
        help="Warmup runs per placement (uses first pp/tg).",
    )
    ap.add_argument(
-        "--timeout", type=float, default=600.0, help="HTTP timeout (seconds)."
+        "--timeout", type=float, default=100000.0, help="HTTP timeout (seconds)."
    )
    ap.add_argument(
        "--json-out",
@@ -369,6 +380,14 @@ def main() -> int:
        logger.error("--repeat must be >= 1")
        return 2

+    # Log pairing mode
+    if len(pp_list) == len(tg_list):
+        logger.info(f"pp/tg mode: tandem (zip) - {len(pp_list)} pairs")
+    else:
+        logger.info(
+            f"pp/tg mode: combinations (product) - {len(pp_list) * len(tg_list)} pairs"
+        )
+
    client = ExoClient(args.host, args.port, timeout_s=args.timeout)
    short_id, full_model_id = resolve_model_short_id(client, args.model)

@@ -486,60 +505,55 @@ def main() -> int:
                )
                logger.debug(f"  warmup {i + 1}/{args.warmup} done")

-            for pp in pp_list:
-                # if (
-                #     pp * n_nodes > 2048
-                #     and "ring" in instance_meta.lower()
-                #     and "tensor" in sharding.lower()
-                # ):
-                #     model_card = MODEL_CARDS[short_id]
-                #     if model_card.metadata.storage_size > Memory.from_gb(10):
-                #         logger.info(
-                #             f"Skipping tensor ring as this is too slow for model of size {model_card.metadata.storage_size} on {n_nodes=}"
-                #         )
-                #         continue
-                for tg in tg_list:
-                    runs: list[dict[str, Any]] = []
-                    for r in range(args.repeat):
-                        time.sleep(3)
-                        try:
-                            row, actual_pp_tokens = run_one_completion(
-                                client, full_model_id, pp, tg, prompt_sizer
-                            )
-                        except Exception as e:
-                            logger.error(e)
-                            continue
-                        row.update(
-                            {
-                                "model_short_id": short_id,
-                                "model_id": full_model_id,
-                                "placement_sharding": sharding,
-                                "placement_instance_meta": instance_meta,
-                                "placement_nodes": n_nodes,
-                                "instance_id": instance_id,
-                                "pp_tokens": actual_pp_tokens,
-                                "tg": tg,
-                                "repeat_index": r,
-                            }
-                        )
-                        runs.append(row)
-                        all_rows.append(row)
+            # If pp and tg lists have same length, run in tandem (zip)
+            # Otherwise, run all combinations (cartesian product)
+            if len(pp_list) == len(tg_list):
+                pp_tg_pairs = list(zip(pp_list, tg_list))
+            else:
+                pp_tg_pairs = list(itertools.product(pp_list, tg_list))

-                    if runs:
-                        prompt_tps = mean(x["stats"]["prompt_tps"] for x in runs)
-                        gen_tps = mean(x["stats"]["generation_tps"] for x in runs)
-                        ptok = mean(x["stats"]["prompt_tokens"] for x in runs)
-                        gtok = mean(x["stats"]["generation_tokens"] for x in runs)
-                        peak = mean(
-                            x["stats"]["peak_memory_usage"]["inBytes"] for x in runs
+            for pp, tg in pp_tg_pairs:
+                runs: list[dict[str, Any]] = []
+                for r in range(args.repeat):
+                    time.sleep(3)
+                    try:
+                        row, actual_pp_tokens = run_one_completion(
+                            client, full_model_id, pp, tg, prompt_sizer
                        )
+                    except Exception as e:
+                        logger.error(e)
+                        continue
+                    row.update(
+                        {
+                            "model_short_id": short_id,
+                            "model_id": full_model_id,
+                            "placement_sharding": sharding,
+                            "placement_instance_meta": instance_meta,
+                            "placement_nodes": n_nodes,
+                            "instance_id": instance_id,
+                            "pp_tokens": actual_pp_tokens,
+                            "tg": tg,
+                            "repeat_index": r,
+                        }
+                    )
+                    runs.append(row)
+                    all_rows.append(row)

-                        logger.info(
-                            f"prompt_tps={prompt_tps:.2f} gen_tps={gen_tps:.2f}    "
-                            f"prompt_tokens={ptok} gen_tokens={gtok}    "
-                            f"peak_memory={format_peak_memory(peak)}\n"
-                        )
-                    time.sleep(2)
+                if runs:
+                    prompt_tps = mean(x["stats"]["prompt_tps"] for x in runs)
+                    gen_tps = mean(x["stats"]["generation_tps"] for x in runs)
+                    ptok = mean(x["stats"]["prompt_tokens"] for x in runs)
+                    gtok = mean(x["stats"]["generation_tokens"] for x in runs)
+                    peak = mean(
+                        x["stats"]["peak_memory_usage"]["inBytes"] for x in runs
+                    )
+
+                    logger.info(
+                        f"prompt_tps={prompt_tps:.2f} gen_tps={gen_tps:.2f}    "
+                        f"prompt_tokens={ptok} gen_tokens={gtok}    "
+                        f"peak_memory={format_peak_memory(peak)}\n"
+                    )
+                time.sleep(2)
        finally:
            try:
                client.request_json("DELETE", f"/instance/{instance_id}")
--- a/bench/exo_eval.py
+++ b/bench/exo_eval.py
@@ -5,12 +5,17 @@ exo-eval: Evaluation harness for exo inference system.

 Supports multiple evaluation frameworks via TOML configuration:
 - lm_eval: Language model evaluation using EleutherAI's lm-evaluation-harness
+- livecodebench: Code generation benchmark (https://livecodebench.github.io/)
 - swe_bench: SWE-bench evaluation (placeholder for future implementation)
 - custom: Custom evaluation scripts

 Usage:
    uv run python -m bench.exo_eval --config bench/eval_config.toml --model Llama-3.2-1b-Instruct-4bit
    uv run python -m bench.exo_eval --config bench/eval_config.toml --model Llama-3.2-1b-Instruct-4bit --dry-run
+
+    # Run LiveCodeBench (requires livecodebench package):
+    # First: git clone https://github.com/LiveCodeBench/LiveCodeBench && cd LiveCodeBench && uv pip install -e .
+    # Then set type = "livecodebench" in eval_config.toml
 """

 from __future__ import annotations
@@ -47,7 +52,7 @@ from bench.exo_bench import (
    wait_for_instance_ready,
 )

-EvalType = Literal["lm_eval", "swe_bench", "custom"]
+EvalType = Literal["lm_eval", "swe_bench", "livecodebench", "custom"]


 def load_config(config_path: str) -> dict[str, Any]:
@@ -64,7 +69,7 @@ def get_eval_type(config: dict[str, Any]) -> EvalType:
    """Extract evaluation type from config."""
    eval_section = config.get("eval", {})
    eval_type = eval_section.get("type", "lm_eval")
-    if eval_type not in ("lm_eval", "swe_bench", "custom"):
+    if eval_type not in ("lm_eval", "swe_bench", "livecodebench", "custom"):
        raise ValueError(f"Unknown eval type: {eval_type}")
    return eval_type

@@ -303,7 +308,7 @@ def run_lm_eval(
    output_path: str | None,
    limit: int | None,
    dry_run: bool,
-) -> int:
+) -> tuple[int, dict[str, Any] | None, float | None]:
    """Run lm_eval evaluation."""
    lm_eval_config = config.get("lm_eval", {})
    tasks = lm_eval_config.get("tasks", ["mmlu"])
@@ -311,6 +316,7 @@ def run_lm_eval(
        tasks = [tasks]

    exo_base_url = f"http://{host}:{port}"
+    effective_output = output_path or lm_eval_config.get("output_path")

    # Build args - use native completions or chat completions endpoint directly
    args = build_lm_eval_args(
@@ -320,31 +326,53 @@ def run_lm_eval(

    if dry_run:
        logger.info("[dry-run] Would execute the above command")
-        return 0
+        return 0, None, None

    try:
-        result = subprocess.run(args, check=False)
+        start_time = time.perf_counter()
+        # Use Popen with process group so we can kill all children on interrupt
+        proc = subprocess.Popen(args, start_new_session=True)
+        try:
+            proc.wait()
+        except KeyboardInterrupt:
+            # Kill the entire process group on Ctrl+C
+            import signal

-        # Print token usage summary from exo
+            logger.info("Interrupted - terminating lm_eval processes...")
+            try:
+                os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
+                proc.wait(timeout=5)
+            except (ProcessLookupError, OSError):
+                pass  # Already dead
+            except subprocess.TimeoutExpired:
+                os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
+            return 130, None, None
+        elapsed_seconds = time.perf_counter() - start_time
+
+        # Fetch and return token usage summary from exo
+        usage: dict[str, Any] | None = None
        try:
            import httpx

            usage_resp = httpx.get(f"{exo_base_url}/v1/usage", timeout=5)
            if usage_resp.status_code == 200:
-                usage = usage_resp.json()
+                usage_data: dict[str, Any] = usage_resp.json()
+                usage = usage_data
                logger.info("--- Token Usage (Total) ---")
-                logger.info(f"  Requests:          {usage.get('total_requests', 0)}")
                logger.info(
-                    f"  Prompt tokens:     {usage.get('total_prompt_tokens', 0)}"
+                    f"  Requests:          {usage_data.get('total_requests', 0)}"
                )
                logger.info(
-                    f"  Completion tokens: {usage.get('total_completion_tokens', 0)}"
+                    f"  Prompt tokens:     {usage_data.get('total_prompt_tokens', 0)}"
                )
                logger.info(
-                    f"  Reasoning tokens:  {usage.get('total_reasoning_tokens', 0)}"
+                    f"  Completion tokens: {usage_data.get('total_completion_tokens', 0)}"
                )
-                logger.info(f"  Total tokens:      {usage.get('total_tokens', 0)}")
-                by_model = usage.get("by_model", {})
+                logger.info(
+                    f"  Reasoning tokens:  {usage_data.get('total_reasoning_tokens', 0)}"
+                )
+                logger.info(f"  Total tokens:      {usage_data.get('total_tokens', 0)}")
+                by_model = usage_data.get("by_model", {})
                if by_model:
                    for model_name, counters in by_model.items():
                        logger.info(f"--- Token Usage ({model_name}) ---")
@@ -363,10 +391,59 @@ def run_lm_eval(
        except Exception:
            pass  # Usage endpoint not available

-        return result.returncode
+        logger.info(f"Evaluation completed in {elapsed_seconds:.2f}s")
+
+        # Append token usage to lm_eval's results.json
+        if effective_output and usage:
+            _append_token_usage_to_results(effective_output, usage, elapsed_seconds)
+
+        return proc.returncode, usage, elapsed_seconds
    except FileNotFoundError:
        logger.error("lm_eval not found. Install with: uv sync --extra eval")
-        return 1
+        return 1, None, None
+
+
+def _append_token_usage_to_results(
+    output_path: str, usage: dict[str, Any], elapsed_seconds: float
+) -> None:
+    """Append token usage data to lm_eval's results.json file."""
+    output_dir = Path(output_path)
+    results_file = output_dir / "results.json"
+
+    if not results_file.exists():
+        # lm_eval may put results in a subdirectory named after the model
+        for subdir in output_dir.iterdir():
+            if subdir.is_dir():
+                candidate = subdir / "results.json"
+                if candidate.exists():
+                    results_file = candidate
+                    break
+
+    if not results_file.exists():
+        logger.warning(f"Could not find results.json in {output_path}")
+        return
+
+    try:
+        with open(results_file, encoding="utf-8") as f:
+            results = json.load(f)
+
+        # Add token usage to the results
+        results["token_usage"] = {
+            "prompt_tokens": usage.get("total_prompt_tokens", 0),
+            "completion_tokens": usage.get("total_completion_tokens", 0),
+            "reasoning_tokens": usage.get("total_reasoning_tokens", 0),
+            "total_tokens": usage.get("total_tokens", 0),
+            "total_requests": usage.get("total_requests", 0),
+            "by_model": usage.get("by_model"),
+        }
+        results["elapsed_seconds"] = elapsed_seconds
+
+        with open(results_file, "w", encoding="utf-8") as f:
+            json.dump(results, f, indent=2, ensure_ascii=False)
+
+        logger.info(f"Added token usage to: {results_file}")
+    except Exception as e:
+        logger.warning(f"Failed to append token usage to results.json: {e}")


 def run_swe_bench(
@@ -376,7 +453,7 @@ def run_swe_bench(
    model: str,
    output_path: str | None,
    dry_run: bool,
-) -> int:
+) -> tuple[int, dict[str, Any] | None, float | None]:
    """Run SWE-bench evaluation (placeholder)."""
    swe_config = config.get("swe_bench", {})

@@ -395,13 +472,277 @@ def run_swe_bench(

    if dry_run:
        logger.info("[dry-run] SWE-bench evaluation would be executed")
-        return 0
+        return 0, None, None

    logger.warning(
        "SWE-bench integration is a placeholder. "
        "Implement swebench inference and evaluation logic as needed."
    )
-    return 0
+    return 0, None, None
+
+
+def run_livecodebench(
+    config: dict[str, Any],
+    host: str,
+    port: int,
+    model: str,
+    output_path: str | None,
+    limit: int | None,
+    dry_run: bool,
+) -> tuple[int, dict[str, Any] | None, float | None]:
+    """Run LiveCodeBench evaluation.
+
+    LiveCodeBench is a contamination-free benchmark for code generation that
+    continuously collects new problems from LeetCode, AtCoder, and Codeforces.
+
+    See: https://livecodebench.github.io/
+    """
+    lcb_config = config.get("livecodebench", {})
+
+    scenario = lcb_config.get("scenario", "codegeneration")
+    release_version = lcb_config.get("release_version", "release_v5")
+    # Defaults match Artificial Analysis methodology:
+    # https://artificialanalysis.ai/methodology/intelligence-benchmarking
+    temperature = lcb_config.get("temperature", 0)  # 0 for non-reasoning models
+    n_samples = lcb_config.get("n_samples", 1)  # pass@1
+    max_tokens = lcb_config.get("max_tokens", 16384)
+    use_cache = lcb_config.get("use_cache", False)
+    fast = lcb_config.get("fast", True)  # Use code_generation_lite by default
+    evaluate = lcb_config.get("evaluate", True)
+    multiprocess = lcb_config.get("multiprocess", 4)
+    # Timeouts (high defaults for slow inference)
+    timeout = lcb_config.get("timeout", 1800)  # 30 min per problem
+    openai_timeout = lcb_config.get("openai_timeout", 3600)  # 1 hour per request
+
+    exo_base_url = f"http://{host}:{port}/v1"
+    effective_output = output_path or lcb_config.get("output_path", "bench/lcb_results")
+
+    logger.info("LiveCodeBench evaluation configuration:")
+    logger.info(f"  Scenario: {scenario}")
+    logger.info(f"  Release version: {release_version}")
+    logger.info(f"  Model: {model}")
+    logger.info(f"  API endpoint: {exo_base_url}")
+    logger.info(f"  Temperature: {temperature}")
+    logger.info(f"  N samples: {n_samples}")
+    logger.info(f"  Max tokens: {max_tokens}")
+    logger.info(f"  Output path: {effective_output}")
+
+    # Build command using our wrapper script that handles:
+    # 1. Registering custom models in LiveCodeBench's registry
+    # 2. Patching the OpenAI client to use exo's endpoint
+    args = [
+        sys.executable,
+        "-m",
+        "bench.livecodebench_runner",
+        "--base-url",
+        exo_base_url,
+        "--model",
+        model,
+        "--scenario",
+        scenario,
+        "--release_version",
+        release_version,
+        "--temperature",
+        str(temperature),
+        "--n",
+        str(n_samples),
+        "--codegen_n",
+        str(n_samples),
+        "--max_tokens",
+        str(max_tokens),
+        "--output-dir",
+        effective_output,
+    ]
+
+    if use_cache:
+        args.append("--use_cache")
+
+    if not fast:
+        args.append("--not_fast")
+
+    if evaluate:
+        args.append("--evaluate")
+
+    if multiprocess > 1:
+        args.extend(["--multiprocess", str(multiprocess)])
+
+    # Add timeouts
+    args.extend(["--timeout", str(timeout)])
+    args.extend(["--openai_timeout", str(openai_timeout)])
+
+    if limit is not None:
+        args.extend(["--limit", str(limit)])
+
+    logger.info(f"LiveCodeBench command: {' '.join(args)}")
+
+    if dry_run:
+        logger.info("[dry-run] Would execute the above command")
+        return 0, None, None
+
+    # Environment is set up by the wrapper script
+    env = os.environ.copy()
+
+    try:
+        start_time = time.perf_counter()
+        # Use Popen with process group so we can kill all children on interrupt
+        proc = subprocess.Popen(args, env=env, start_new_session=True)
+        try:
+            proc.wait()
+        except KeyboardInterrupt:
+            # Kill the entire process group on Ctrl+C
+            import signal
+
+            logger.info("Interrupted - terminating LiveCodeBench processes...")
+            try:
+                os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
+                proc.wait(timeout=5)
+            except (ProcessLookupError, OSError):
+                pass  # Already dead
+            except subprocess.TimeoutExpired:
+                os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
+            return 130, None, None
+        elapsed_seconds = time.perf_counter() - start_time
+
+        # Fetch token usage from exo
+        usage: dict[str, Any] | None = None
+        try:
+            import httpx
+
+            usage_resp = httpx.get(f"http://{host}:{port}/v1/usage", timeout=5)
+            if usage_resp.status_code == 200:
+                usage_data: dict[str, Any] = usage_resp.json()
+                usage = usage_data
+                logger.info("--- Token Usage (Total) ---")
+                logger.info(
+                    f"  Requests:          {usage_data.get('total_requests', 0)}"
+                )
+                logger.info(
+                    f"  Prompt tokens:     {usage_data.get('total_prompt_tokens', 0)}"
+                )
+                logger.info(
+                    f"  Completion tokens: {usage_data.get('total_completion_tokens', 0)}"
+                )
+                logger.info(f"  Total tokens:      {usage_data.get('total_tokens', 0)}")
+        except Exception:
+            pass  # Usage endpoint not available
+
+        logger.info(f"LiveCodeBench evaluation completed in {elapsed_seconds:.2f}s")
+
+        # Generate results.json from eval files
+        if proc.returncode == 0:
+            _generate_livecodebench_results(
+                effective_output, model, elapsed_seconds, usage, lcb_config
+            )
+
+        return proc.returncode, usage, elapsed_seconds
+
+    except FileNotFoundError:
+        logger.error(
+            "LiveCodeBench not found. Install with: "
+            "pip install livecodebench  OR  "
+            "git clone https://github.com/LiveCodeBench/LiveCodeBench && "
+            "cd LiveCodeBench && uv pip install -e ."
+        )
+        return 1, None, None
+
+
+def _generate_livecodebench_results(
+    output_path: str,
+    model: str,
+    elapsed_seconds: float,
+    usage: dict[str, Any] | None,
+    lcb_config: dict[str, Any],
+) -> None:
+    """Generate a results.json file from LiveCodeBench evaluation results."""
+    output_dir = Path(output_path)
+    model_dir = output_dir / model
+
+    if not model_dir.exists():
+        logger.warning(f"Model output directory not found: {model_dir}")
+        return
+
+    # Find all eval.json files (not eval_all.json)
+    eval_files = list(model_dir.glob("*_eval.json"))
+    eval_files = [f for f in eval_files if "_eval_all.json" not in f.name]
+
+    if not eval_files:
+        logger.warning(f"No eval files found in {model_dir}")
+        return
+
+    # Parse the most recent eval file (by modification time)
+    eval_files.sort(key=lambda f: f.stat().st_mtime, reverse=True)
+    latest_eval = eval_files[0]
+
+    try:
+        with open(latest_eval, encoding="utf-8") as f:
+            eval_data = json.load(f)
+
+        # Extract pass@k scores from the first element
+        scores: dict[str, float] = {}
+        details: dict[str, Any] = {}
+
+        if isinstance(eval_data, list) and len(eval_data) > 0:
+            first_elem = eval_data[0]
+            if isinstance(first_elem, dict):
+                # Extract all pass@k scores
+                for key, value in first_elem.items():
+                    if key.startswith("pass@") and isinstance(value, (int, float)):
+                        scores[key] = float(value)
+                    elif key == "detail":
+                        details = value
+
+        # Count problems from the corresponding output file
+        output_file_name = latest_eval.name.replace("_eval.json", ".json")
+        output_file = model_dir / output_file_name
+        num_problems = 0
+        if output_file.exists():
+            with open(output_file, encoding="utf-8") as f:
+                problems_data = json.load(f)
+                if isinstance(problems_data, list):
+                    num_problems = len(problems_data)
+
+        # Build results.json
+        results: dict[str, Any] = {
+            "model": model,
+            "eval_type": "livecodebench",
+            "scenario": lcb_config.get("scenario", "codegeneration"),
+            "release_version": lcb_config.get("release_version", "release_v5"),
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "elapsed_seconds": elapsed_seconds,
+            "num_problems": num_problems,
+            "results": scores,
+            "config": {
+                "temperature": lcb_config.get("temperature", 0),
+                "n_samples": lcb_config.get("n_samples", 1),
+                "max_tokens": lcb_config.get("max_tokens", 16384),
+            },
+        }
+
+        if details:
+            results["details"] = details
+
+        if usage:
+            results["token_usage"] = {
+                "prompt_tokens": usage.get("total_prompt_tokens", 0),
+                "completion_tokens": usage.get("total_completion_tokens", 0),
+                "total_tokens": usage.get("total_tokens", 0),
+                "total_requests": usage.get("total_requests", 0),
+            }
+
+        # Write results.json to the model directory
+        results_file = model_dir / "results.json"
+        with open(results_file, "w", encoding="utf-8") as f:
+            json.dump(results, f, indent=2, ensure_ascii=False)
+
+        logger.info(f"Generated LiveCodeBench results: {results_file}")
+
+        # Also log the key metrics
+        if scores:
+            scores_str = ", ".join(f"{k}={v:.2%}" for k, v in sorted(scores.items()))
+            logger.info(f"LiveCodeBench scores: {scores_str}")
+
+    except Exception as e:
+        logger.warning(f"Failed to generate results.json: {e}")


 def run_custom_eval(
@@ -411,19 +752,19 @@ def run_custom_eval(
    model: str,
    output_path: str | None,
    dry_run: bool,
-) -> int:
+) -> tuple[int, dict[str, Any] | None, float | None]:
    """Run custom evaluation script."""
    custom_config = config.get("custom", {})

    script = custom_config.get("script")
    if not script:
        logger.error("No script specified in [custom] config section")
-        return 1
+        return 1, None, None

    script_path = Path(script)
    if not script_path.exists():
        logger.error(f"Custom script not found: {script}")
-        return 1
+        return 1, None, None

    script_args = custom_config.get("args", [])
    if not isinstance(script_args, list):
@@ -442,10 +783,13 @@ def run_custom_eval(

    if dry_run:
        logger.info("[dry-run] Would execute the above command")
-        return 0
+        return 0, None, None

+    start_time = time.perf_counter()
    result = subprocess.run(cmd, env=env, check=False)
-    return result.returncode
+    elapsed_seconds = time.perf_counter() - start_time
+    logger.info(f"Custom evaluation completed in {elapsed_seconds:.2f}s")
+    return result.returncode, None, elapsed_seconds


 def write_results_metadata(
@@ -457,6 +801,8 @@ def write_results_metadata(
    eval_type: EvalType,
    return_code: int,
    preview: dict[str, Any] | None,
+    usage: dict[str, Any] | None,
+    elapsed_seconds: float | None,
 ) -> None:
    """Write evaluation metadata to a JSON file."""
    metadata: dict[str, Any] = {
@@ -468,6 +814,9 @@ def write_results_metadata(
        "return_code": return_code,
    }

+    if elapsed_seconds is not None:
+        metadata["elapsed_seconds"] = elapsed_seconds
+
    if preview:
        metadata["placement"] = {
            "sharding": preview.get("sharding"),
@@ -477,6 +826,16 @@ def write_results_metadata(
            else None,
        }

+    if usage:
+        metadata["token_usage"] = {
+            "prompt_tokens": usage.get("total_prompt_tokens", 0),
+            "completion_tokens": usage.get("total_completion_tokens", 0),
+            "reasoning_tokens": usage.get("total_reasoning_tokens", 0),
+            "total_tokens": usage.get("total_tokens", 0),
+            "total_requests": usage.get("total_requests", 0),
+            "by_model": usage.get("by_model"),
+        }
+
    output_dir = Path(output_path)
    output_dir.mkdir(parents=True, exist_ok=True)
    metadata_path = output_dir / "eval_metadata.json"
@@ -621,8 +980,10 @@ def main() -> int:

    try:
        # Run evaluation
+        usage: dict[str, Any] | None = None
+        elapsed_seconds: float | None = None
        if eval_type == "lm_eval":
-            return_code = run_lm_eval(
+            return_code, usage, elapsed_seconds = run_lm_eval(
                config,
                args.host,
                args.port,
@@ -632,7 +993,7 @@ def main() -> int:
                args.dry_run,
            )
        elif eval_type == "swe_bench":
-            return_code = run_swe_bench(
+            return_code, usage, elapsed_seconds = run_swe_bench(
                config,
                args.host,
                args.port,
@@ -640,8 +1001,18 @@ def main() -> int:
                args.output,
                args.dry_run,
            )
+        elif eval_type == "livecodebench":
+            return_code, usage, elapsed_seconds = run_livecodebench(
+                config,
+                args.host,
+                args.port,
+                full_model_id,
+                args.output,
+                args.limit,
+                args.dry_run,
+            )
        elif eval_type == "custom":
-            return_code = run_custom_eval(
+            return_code, usage, elapsed_seconds = run_custom_eval(
                config,
                args.host,
                args.port,
@@ -665,6 +1036,8 @@ def main() -> int:
                eval_type,
                return_code,
                preview,
+                usage,
+                elapsed_seconds,
            )

        return return_code
--- a/bench/livecodebench_runner.py
+++ b/bench/livecodebench_runner.py
@@ -0,0 +1,287 @@
+#!/usr/bin/env python3
+# pyright: reportAny=false, reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false
+"""
+LiveCodeBench runner wrapper for exo.
+
+This wrapper allows running LiveCodeBench with custom OpenAI-compatible endpoints
+by dynamically registering models and configuring the OpenAI client.
+
+Usage:
+    python -m bench.livecodebench_runner --model my-model --base-url http://localhost:52415/v1 [lcb args...]
+
+The wrapper:
+1. Registers the custom model in LiveCodeBench's model registry
+2. Sets up environment variables for the OpenAI client
+3. Runs the standard LiveCodeBench runner
+
+Requires LiveCodeBench to be installed:
+    git clone https://github.com/LiveCodeBench/LiveCodeBench
+    cd LiveCodeBench && uv pip install -e .
+"""
+
+from __future__ import annotations
+
+import argparse
+import multiprocessing
+import os
+import signal
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import TYPE_CHECKING, NoReturn
+
+if TYPE_CHECKING:
+    from typing import Any
+
+
+def _cleanup_and_exit(exit_code: int = 130) -> NoReturn:
+    """Terminate all child processes and exit."""
+    # Terminate any active multiprocessing pools
+    for child in multiprocessing.active_children():
+        child.terminate()
+        child.join(timeout=1)
+        if child.is_alive():
+            child.kill()
+    # Force exit to avoid hanging on cleanup
+    os._exit(exit_code)
+
+
+def _signal_handler(signum: int, frame: object) -> NoReturn:
+    """Handle interrupt signals by terminating all child processes."""
+    _cleanup_and_exit(130)
+
+
+def get_lcb_directory() -> Path | None:
+    """Find the LiveCodeBench installation directory.
+
+    LiveCodeBench uses relative paths like 'lcb_runner/prompts/few_shot_examples/...'
+    which require running from the LiveCodeBench directory.
+    """
+    # Check environment variable first
+    if env_path := os.environ.get("LIVECODEBENCH_DIR"):
+        lcb_path = Path(env_path)
+        if (lcb_path / "lcb_runner" / "prompts" / "few_shot_examples").exists():
+            return lcb_path
+
+    # Use importlib to find package location without executing module code
+    # This avoids triggering the relative path imports that would fail
+    try:
+        import importlib.util
+
+        spec = importlib.util.find_spec("lcb_runner")
+        if spec and spec.origin:
+            # spec.origin is the __init__.py path, go up two levels
+            lcb_path = Path(spec.origin).parent.parent
+            if (lcb_path / "lcb_runner" / "prompts" / "few_shot_examples").exists():
+                return lcb_path
+    except (ImportError, ModuleNotFoundError):
+        pass
+
+    # Check common locations relative to this script
+    script_dir = Path(__file__).parent.parent  # exo/
+    common_locations = [
+        script_dir / "LiveCodeBench",  # exo/LiveCodeBench
+        script_dir.parent / "LiveCodeBench",  # sibling to exo
+    ]
+    for loc in common_locations:
+        if (loc / "lcb_runner" / "prompts" / "few_shot_examples").exists():
+            return loc
+
+    return None
+
+
+def setup_custom_model(model_name: str, base_url: str) -> None:
+    """Register a custom model in LiveCodeBench's registry."""
+    try:
+        from lcb_runner.lm_styles import (  # pyright: ignore[reportMissingImports]
+            LanguageModel,
+            LanguageModelList,
+            LanguageModelStore,
+            LMStyle,
+        )
+    except ImportError as e:
+        print(
+            "Error: LiveCodeBench not installed. Install with:\n"
+            "  git clone https://github.com/LiveCodeBench/LiveCodeBench\n"
+            "  cd LiveCodeBench && uv pip install -e .",
+            file=sys.stderr,
+        )
+        raise SystemExit(1) from e
+
+    # Check if model already exists
+    if model_name in LanguageModelStore:
+        return
+
+    # Create a new model entry using OpenAIChat style
+    # This will route through the oai_runner which respects OPENAI_BASE_URL
+    custom_model = LanguageModel(
+        model_name=model_name,
+        model_repr=model_name,
+        model_style=LMStyle.OpenAIChat,
+        release_date=datetime.now(),
+        link=base_url,
+    )
+
+    # Add to the model list and store
+    LanguageModelList.append(custom_model)
+    LanguageModelStore[model_name] = custom_model
+
+
+def patch_openai_client(base_url: str) -> None:
+    """Patch the OpenAI client to use a custom base URL.
+
+    This patches the oai_runner module to use our custom base URL.
+    """
+    try:
+        from lcb_runner.runner import oai_runner  # noqa: I001 # pyright: ignore[reportMissingImports]
+    except ImportError as e:
+        print(f"Error importing required modules: {e}", file=sys.stderr)
+        raise SystemExit(1) from e
+
+    # Store original client creation
+    original_init = oai_runner.OpenAI
+
+    def patched_openai(*args: Any, **kwargs: Any) -> Any:
+        """Create OpenAI client with custom base_url."""
+        # Inject base_url if not already set
+        if "base_url" not in kwargs:
+            kwargs["base_url"] = base_url
+        # Use dummy API key if not set (exo doesn't require auth)
+        if "api_key" not in kwargs and not os.getenv("OPENAI_KEY"):
+            kwargs["api_key"] = os.getenv("OPENAI_API_KEY", "exo-local")
+        return original_init(*args, **kwargs)
+
+    # Apply the patch
+    oai_runner.OpenAI = patched_openai
+
+
+def main() -> int:
+    """Main entry point."""
+    # Set up signal handlers for clean exit
+    signal.signal(signal.SIGINT, _signal_handler)
+    signal.signal(signal.SIGTERM, _signal_handler)
+
+    parser = argparse.ArgumentParser(
+        description="LiveCodeBench runner wrapper for exo",
+        epilog="Additional arguments are passed to lcb_runner.runner.main",
+    )
+    parser.add_argument(
+        "--base-url",
+        default=os.environ.get("OPENAI_BASE_URL", "http://localhost:52415/v1"),
+        help="OpenAI-compatible API base URL (default: OPENAI_BASE_URL or localhost:52415/v1)",
+    )
+    parser.add_argument(
+        "--model",
+        required=True,
+        help="Model name to use",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default=None,
+        help="Output directory for results (maps to LiveCodeBench's --custom_output_save_name)",
+    )
+    parser.add_argument(
+        "--limit",
+        type=int,
+        default=None,
+        help="Limit number of problems to evaluate (for testing)",
+    )
+
+    # Parse known args, pass rest to LiveCodeBench
+    args, remaining = parser.parse_known_args()
+
+    # Set up environment
+    os.environ["OPENAI_BASE_URL"] = args.base_url
+    if "OPENAI_API_KEY" not in os.environ and "OPENAI_KEY" not in os.environ:
+        os.environ["OPENAI_API_KEY"] = "exo-local"
+        os.environ["OPENAI_KEY"] = "exo-local"
+
+    # Save original directory for output path resolution
+    original_cwd = os.getcwd()
+
+    # Change to LiveCodeBench directory before imports that use relative paths
+    # LiveCodeBench uses paths like 'lcb_runner/prompts/few_shot_examples/...'
+    lcb_dir = get_lcb_directory()
+    if lcb_dir:
+        os.chdir(lcb_dir)
+    else:
+        print(
+            "Warning: Could not find LiveCodeBench directory. "
+            "Relative path imports may fail.",
+            file=sys.stderr,
+        )
+
+    # Setup custom model and patch client
+    setup_custom_model(args.model, args.base_url)
+    patch_openai_client(args.base_url)
+
+    # Build arguments for LiveCodeBench runner
+    lcb_args = ["--model", args.model]
+
+    # Resolve output directory to absolute path (relative to original cwd)
+    output_base: str | None = None
+    if args.output_dir:
+        output_base = str(Path(original_cwd) / args.output_dir)
+
+    lcb_args.extend(remaining)
+
+    # Run LiveCodeBench
+    try:
+        from lcb_runner.runner import main as lcb_main_module  # noqa: I001 # pyright: ignore[reportMissingImports]
+        from lcb_runner.utils import path_utils  # noqa: I001 # pyright: ignore[reportMissingImports]
+
+        # Patch output path to use our output directory
+        if output_base:
+            original_get_output_path = path_utils.get_output_path
+
+            def patched_get_output_path(model_repr: str, runner_args: Any) -> str:
+                # Get the original path and replace 'output/' with our base
+                original_path = original_get_output_path(model_repr, runner_args)
+                # Replace 'output/' prefix with our custom base
+                if original_path.startswith("output/"):
+                    new_path = str(
+                        Path(output_base) / original_path[7:]
+                    )  # Skip 'output/'
+                else:
+                    new_path = str(Path(output_base) / original_path)
+                path_utils.ensure_dir(new_path)
+                print(f"Saving results to: {new_path}")
+                return new_path
+
+            path_utils.get_output_path = patched_get_output_path
+            # Also patch in main module since it may have imported directly
+            if hasattr(lcb_main_module, "get_output_path"):
+                lcb_main_module.get_output_path = patched_get_output_path
+
+        # Patch benchmark loading to support --limit
+        # Must patch in the main module since it imports the function directly
+        if args.limit is not None:
+            original_build = lcb_main_module.build_prompt_benchmark
+
+            def limited_build(*a: Any, **kw: Any) -> Any:
+                benchmark, format_prompt = original_build(*a, **kw)
+                if args.limit and len(benchmark) > args.limit:
+                    print(
+                        f"Limiting benchmark from {len(benchmark)} to {args.limit} problems"
+                    )
+                    benchmark = benchmark[: args.limit]
+                return benchmark, format_prompt
+
+            lcb_main_module.build_prompt_benchmark = limited_build
+
+        # Patch sys.argv for argparse in lcb_main
+        sys.argv = [sys.argv[0], *lcb_args]
+        lcb_main_module.main()
+        return 0
+    except KeyboardInterrupt:
+        print("\nInterrupted by user", file=sys.stderr)
+        _cleanup_and_exit(130)
+    except SystemExit as e:
+        return e.code if isinstance(e.code, int) else 1
+    except Exception as e:
+        print(f"Error running LiveCodeBench: {e}", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,9 +18,9 @@ dependencies = [
    "loguru>=0.7.3",
    "exo_pyo3_bindings", # rust bindings
    "anyio==4.11.0",
-    "mlx==0.30.3; sys_platform == 'darwin'",
-    "mlx[cpu]==0.30.3; sys_platform == 'linux'",
-    "mlx-lm==0.30.5",
+    "mlx==0.30.4; sys_platform == 'darwin'",
+    "mlx[cpu]==0.30.4; sys_platform == 'linux'",
+    "mlx-lm",
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
    "hypercorn>=0.18.0",
    "openai-harmony>=0.0.8",
@@ -55,6 +55,11 @@ dev = [
 # ]
 eval = [
    "lm_eval[api]",
+    # LiveCodeBench dependencies (livecodebench itself must be installed manually due to packaging issues)
+    # Install with: git clone https://github.com/LiveCodeBench/LiveCodeBench && cd LiveCodeBench && uv pip install -e .
+    "openai>=1.59.6",
+    "datasets>=2.14.0,<4.0",  # LiveCodeBench requires <4.0 due to dataset script deprecation
+    "pebble>=5.1.0",
 ]

 ###
@@ -68,6 +73,7 @@ members = [

 [tool.uv.sources]
 exo_pyo3_bindings = { workspace = true }
+mlx-lm = { git = "https://github.com/davidmcc73/mlx-lm.git", branch = "main" }
 # Uncomment to use local mlx/mlx-lm development versions:
 # mlx = { path = "/Users/Shared/mlx", editable=true }
 # mlx-lm = { path = "/Users/Shared/mlx-lm", editable=true }
--- a/src/exo/download/download_utils.py
+++ b/src/exo/download/download_utils.py
@@ -155,13 +155,23 @@ async def seed_models(seed_dir: str | Path):


 async def fetch_file_list_with_cache(
-    model_id: ModelId, revision: str = "main", recursive: bool = False
+    model_id: ModelId,
+    revision: str = "main",
+    recursive: bool = False,
+    cache_ttl_seconds: int = 3600,
 ) -> list[FileListEntry]:
    target_dir = (await ensure_models_dir()) / "caches" / model_id.normalize()
    await aios.makedirs(target_dir, exist_ok=True)
    cache_file = target_dir / f"{model_id.normalize()}--{revision}--file_list.json"

-    # Always try fresh first
+    # Use cache if it exists and is fresh (< TTL seconds old)
+    if await aios.path.exists(cache_file):
+        cache_age = time.time() - (await aios.stat(cache_file)).st_mtime
+        if cache_age < cache_ttl_seconds:
+            async with aiofiles.open(cache_file, "r") as f:
+                return TypeAdapter(list[FileListEntry]).validate_json(await f.read())
+
+    # Cache missing or stale - fetch fresh
    try:
        file_list = await fetch_file_list_with_retry(
            model_id, revision, recursive=recursive
@@ -173,7 +183,7 @@ async def fetch_file_list_with_cache(
            )
        return file_list
    except Exception as e:
-        # Fetch failed - try cache fallback
+        # Fetch failed - try cache fallback (even if stale)
        if await aios.path.exists(cache_file):
            logger.warning(
                f"Failed to fetch file list for {model_id}, using cached data: {e}"
--- a/src/exo/download/impl_shard_downloader.py
+++ b/src/exo/download/impl_shard_downloader.py
@@ -21,7 +21,7 @@ def exo_shard_downloader(max_parallel_downloads: int = 8) -> ShardDownloader:


 async def build_base_shard(model_id: ModelId) -> ShardMetadata:
-    model_card = await ModelCard.from_hf(model_id)
+    model_card = await ModelCard.load(model_id)
    return PipelineShardMetadata(
        model_card=model_card,
        device_rank=0,
--- a/src/exo/main.py
+++ b/src/exo/main.py
@@ -267,6 +267,11 @@ def main():
        os.environ["EXO_FAST_SYNCH"] = "off"
        logger.info("FAST_SYNCH forced OFF")

+    # Set EXO_NO_BATCH env var for runner subprocesses
+    if args.no_batch:
+        os.environ["EXO_NO_BATCH"] = "1"
+        logger.info("Batch inference disabled (serial mode)")
+
    node = anyio.run(Node.create, args)
    anyio.run(node.run)
    logger.info("EXO Shutdown complete")
@@ -282,6 +287,7 @@ class Args(CamelCaseModel):
    no_worker: bool = False
    no_downloads: bool = False
    fast_synch: bool | None = None  # None = auto, True = force on, False = force off
+    no_batch: bool = False

    @classmethod
    def parse(cls) -> Self:
@@ -342,6 +348,11 @@ class Args(CamelCaseModel):
            dest="fast_synch",
            help="Force MLX FAST_SYNCH off",
        )
+        parser.add_argument(
+            "--no-batch",
+            action="store_true",
+            help="Disable batch inference (use serial processing for benchmarking)",
+        )

        args = parser.parse_args()
        return cls(**vars(args))  # pyright: ignore[reportAny] - We are intentionally validating here, we can't do it statically
--- a/src/exo/shared/models/model_cards.py
+++ b/src/exo/shared/models/model_cards.py
@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Annotated
+from typing import Annotated, Any

 import aiofiles
 import aiofiles.os as aios
@@ -7,7 +7,14 @@ import tomlkit
 from anyio import Path, open_file
 from huggingface_hub import model_info
 from loguru import logger
-from pydantic import BaseModel, Field, PositiveInt, field_validator
+from pydantic import (
+    AliasChoices,
+    BaseModel,
+    Field,
+    PositiveInt,
+    field_validator,
+    model_validator,
+)

 from exo.shared.constants import EXO_ENABLE_IMAGE_MODELS
 from exo.shared.types.common import ModelId
@@ -121,6 +128,22 @@ MODEL_CARDS: dict[str, ModelCard] = {
        supports_tensor=True,
        tasks=[ModelTask.TextGeneration],
    ),
+    "kimi-k2.5": ModelCard(
+        model_id=ModelId("mlx-community/Kimi-K2.5"),
+        storage_size=Memory.from_gb(617),
+        n_layers=61,
+        hidden_size=7168,
+        supports_tensor=True,
+        tasks=[ModelTask.TextGeneration],
+    ),
+    "kimi-k2.5-4bit": ModelCard(
+        model_id=ModelId("mlx-community/Kimi-K2.5-4bit"),
+        storage_size=Memory.from_gb(606),
+        n_layers=61,
+        hidden_size=7168,
+        supports_tensor=True,
+        tasks=[ModelTask.TextGeneration],
+    ),
    # llama-3.1
    "llama-3.1-8b": ModelCard(
        model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"),
@@ -703,15 +726,18 @@ if EXO_ENABLE_IMAGE_MODELS:
 class ConfigData(BaseModel):
    model_config = {"extra": "ignore"}  # Allow unknown fields

-    # Common field names for number of layers across different architectures
-    num_hidden_layers: Annotated[int, Field(ge=0)] | None = None
-    num_layers: Annotated[int, Field(ge=0)] | None = None
-    n_layer: Annotated[int, Field(ge=0)] | None = None
-    n_layers: Annotated[int, Field(ge=0)] | None = None  # Sometimes used
-    num_decoder_layers: Annotated[int, Field(ge=0)] | None = None  # Transformer models
-    decoder_layers: Annotated[int, Field(ge=0)] | None = None  # Some architectures
-    hidden_size: Annotated[int, Field(ge=0)] | None = None
    architectures: list[str] | None = None
+    hidden_size: Annotated[int, Field(ge=0)] | None = None
+    layer_count: int = Field(
+        validation_alias=AliasChoices(
+            "num_hidden_layers",
+            "num_layers",
+            "n_layer",
+            "n_layers",
+            "num_decoder_layers",
+            "decoder_layers",
+        )
+    )

    @property
    def supports_tensor(self) -> bool:
@@ -726,25 +752,27 @@ class ConfigData(BaseModel):
            ["GptOssForCausalLM"],
        ]

-    @property
-    def layer_count(self) -> int:
-        # Check common field names for layer count
-        layer_fields = [
-            self.num_hidden_layers,
-            self.num_layers,
-            self.n_layer,
-            self.n_layers,
-            self.num_decoder_layers,
-            self.decoder_layers,
-        ]
+    @model_validator(mode="before")
+    @classmethod
+    def defer_to_text_config(cls, data: dict[str, Any]):
+        text_config = data.get("text_config")
+        if text_config is None:
+            return data

-        for layer_count in layer_fields:
-            if layer_count is not None:
-                return layer_count
+        for field in [
+            "architectures",
+            "hidden_size",
+            "num_hidden_layers",
+            "num_layers",
+            "n_layer",
+            "n_layers",
+            "num_decoder_layers",
+            "decoder_layers",
+        ]:
+            if (val := text_config.get(field)) is not None:  # pyright: ignore[reportAny]
+                data[field] = val

-        raise ValueError(
-            f"No layer count found in config.json: {self.model_dump_json()}"
-        )
+        return data


 async def get_config_data(model_id: ModelId) -> ConfigData:
--- a/src/exo/shared/types/api.py
+++ b/src/exo/shared/types/api.py
@@ -239,10 +239,12 @@ class ChatCompletionTaskParams(BaseModel):
    tool_choice: str | dict[str, Any] | None = None
    parallel_tool_calls: bool | None = None
    user: str | None = None
+    # Internal flag for benchmark mode - set by API, preserved through serialization
+    bench: bool = False


 class BenchChatCompletionTaskParams(ChatCompletionTaskParams):
-    pass
+    bench: bool = True


 class PlaceInstanceParams(BaseModel):
--- a/src/exo/worker/engines/mlx/auto_parallel.py
+++ b/src/exo/worker/engines/mlx/auto_parallel.py
@@ -26,6 +26,7 @@ from mlx_lm.models.glm4_moe_lite import Glm4MoeLiteDecoderLayer, Glm4MoeLiteMLP
 from mlx_lm.models.glm4_moe_lite import Model as GLM4MoeLiteModel
 from mlx_lm.models.gpt_oss import GptOssMoeModel
 from mlx_lm.models.gpt_oss import Model as GptOssModel
+from mlx_lm.models.kimi_k25 import Model as KimiK25Model
 from mlx_lm.models.llama import Model as LlamaModel
 from mlx_lm.models.minimax import Model as MiniMaxModel
 from mlx_lm.models.ministral3 import Model as Ministral3Model
@@ -216,6 +217,9 @@ def pipeline_auto_parallel(
    device_rank, world_size = model_shard_meta.device_rank, model_shard_meta.world_size

    layers = layers[start_layer:end_layer]
+    for layer in layers:
+        mx.eval(layer)  # type: ignore
+
    layers[0] = PipelineFirstLayer(layers[0], device_rank, group=group)
    # Wrap intermediate layers with eval checkpoints to prevent GPU timeout
    for i in range(1, len(layers) - 1):
@@ -367,7 +371,7 @@ def tensor_auto_parallel(
            all_to_sharded_linear_in_place,
            sharded_to_all_linear_in_place,
        )
-    elif isinstance(model, (DeepseekV3Model, DeepseekV32Model)):
+    elif isinstance(model, (DeepseekV3Model, DeepseekV32Model, KimiK25Model)):
        tensor_parallel_sharding_strategy = DeepSeekShardingStrategy(
            group,
            all_to_sharded_linear,
@@ -476,7 +480,7 @@ def _set_layers(model: nn.Module, layers: list[_LayerCallable]) -> None:

        # Update DeepSeek V3 specific parameters when layers are shrunk
        if isinstance(
-            model, (DeepseekV3Model, DeepseekV32Model, Glm4MoeModel)
+            model, (DeepseekV3Model, DeepseekV32Model, Glm4MoeModel, KimiK25Model)
        ) and hasattr(inner_model_instance, "num_layers"):
            logger.info(
                f"Setting num_layers to {len(layers)} for model {model.model.__class__.__name__}"
@@ -495,6 +499,66 @@ def _set_layers(model: nn.Module, layers: list[_LayerCallable]) -> None:
        raise ValueError("Model must have either a 'layers' or 'h' attribute")


+def _patch_deepseek_for_batching(model: nn.Module) -> None:
+    """Patch DeepseekV3Model to handle batched total_context in __call__.
+
+    The upstream mlx-lm DeepseekV3Model has a bug where total_context becomes
+    an array (one value per batch item) when batching, but the comparison
+    `total_context >= self._mla_crossover` expects a scalar.
+
+    This patch fixes it by temporarily replacing the cache offset with a scalar
+    (max across batch) before calling the original __call__, then restoring it.
+    """
+    # Get the inner model (DeepseekV3Model)
+    inner_model: Any = getattr(model, "model", None)
+    if inner_model is None:
+        inner_model = getattr(model, "language_model", None)
+        if inner_model is not None:
+            inner_model = getattr(inner_model, "model", None)  # pyright: ignore[reportAny]
+
+    if inner_model is None:
+        return
+
+    # Get the inner model's class and patch __call__
+    inner_cls: Any = inner_model.__class__  # pyright: ignore[reportAny]
+    if hasattr(inner_cls, "_batching_patched"):  # pyright: ignore[reportAny]
+        return  # Already patched
+
+    original_call: Any = inner_cls.__call__  # pyright: ignore[reportAny]
+
+    def patched_inner_call(
+        self: Any,  # pyright: ignore[reportAny]
+        x: mx.array,
+        cache: Any = None,  # pyright: ignore[reportAny]
+    ) -> mx.array:
+        # Fix the batching bug where cache[0].offset is an array but the
+        # comparison `total_context >= self._mla_crossover` expects a scalar.
+        # We temporarily replace the offset with a scalar (max across batch)
+        # for the crossover check, then restore it after.
+        if cache is not None and len(cache) > 0 and hasattr(self, "_mla_crossover"):  # pyright: ignore[reportAny]
+            first_cache = cache[0]
+            original_offset: Any = first_cache.offset  # pyright: ignore[reportAny]
+
+            # Check if offset is an array (batched) and needs fixing
+            if hasattr(original_offset, "shape") and original_offset.shape:  # pyright: ignore[reportAny]
+                # Use max offset for the crossover decision (conservative choice)
+                scalar_offset = int(mx.max(original_offset).item())  # pyright: ignore[reportAny]
+                first_cache.offset = scalar_offset
+
+                try:
+                    result: Any = original_call(self, x, cache)  # pyright: ignore[reportAny]
+                finally:
+                    # Restore original array offset
+                    first_cache.offset = original_offset
+                return result  # pyright: ignore[reportAny]
+
+        return original_call(self, x, cache)  # pyright: ignore[reportAny]
+
+    inner_cls.__call__ = patched_inner_call
+    inner_cls._batching_patched = True
+    logger.info("Patched DeepseekV3Model for batched inference")
+
+
 class DeepSeekShardingStrategy(TensorParallelShardingStrategy):
    def shard_model(
        self,
@@ -520,6 +584,9 @@ class DeepSeekShardingStrategy(TensorParallelShardingStrategy):
                layer.self_attn.kv_b_proj
            )
            layer.self_attn.o_proj = self.sharded_to_all_linear(layer.self_attn.o_proj)
+            # Store pre-shard head count and group for context parallelism
+            layer.self_attn.context_parallel_total_heads = layer.self_attn.num_heads
+            layer.self_attn._cp_group = self.group
            layer.self_attn.num_heads //= self.N

            # Shard the MLP
@@ -542,6 +609,10 @@ class DeepSeekShardingStrategy(TensorParallelShardingStrategy):

            mx.eval(layer)

+        # Store group for context parallelism
+        if hasattr(model, "model"):
+            model.model._cp_group = self.group
+
        return model


--- a/src/exo/worker/engines/mlx/generator/generate.py
+++ b/src/exo/worker/engines/mlx/generator/generate.py
@@ -8,7 +8,6 @@ from mlx_lm.sample_utils import make_sampler
 from mlx_lm.tokenizer_utils import TokenizerWrapper

 from exo.shared.types.api import (
-    BenchChatCompletionTaskParams,
    ChatCompletionMessage,
    FinishReason,
    GenerationStats,
@@ -368,7 +367,7 @@ def mlx_generate(
 ) -> Generator[GenerationResponse]:
    # Ensure that generation stats only contains peak memory for this generation
    mx.reset_peak_memory()
-    is_bench: bool = isinstance(task, BenchChatCompletionTaskParams)
+    is_bench: bool = task.bench

    # Currently we support chat-completion tasks only.
    logger.debug(f"task_params: {task}")
@@ -417,6 +416,7 @@ def mlx_generate(
    max_tokens = task.max_tokens or MAX_TOKENS
    generated_text_parts: list[str] = []
    generation_start_time = time.perf_counter()
+    total_prompt_tokens = len(prompt_tokens) + prefix_hit_length
    for out in stream_generate(
        model=model,
        tokenizer=tokenizer,
@@ -438,7 +438,7 @@ def mlx_generate(
            stats = GenerationStats(
                prompt_tps=float(prefill_tps or out.prompt_tps),
                generation_tps=float(out.generation_tps),
-                prompt_tokens=int(out.prompt_tokens),
+                prompt_tokens=total_prompt_tokens,
                generation_tokens=int(out.generation_tokens),
                peak_memory_usage=Memory.from_gb(out.peak_memory),
            )
--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -165,12 +165,11 @@ def mlx_distributed_init(

                jaccl_coordinator = jaccl_coordinators[bound_instance.bound_node_id]

-                # TODO: update once upstream fixes
                logger.info(
-                    f"rank {rank} MLX_JACCL_DEVICES: {coordination_file} with devices: {jaccl_devices_json}"
+                    f"rank {rank} MLX_IBV_DEVICES: {coordination_file} with devices: {jaccl_devices_json}"
                )
                logger.info(f"rank {rank} MLX_JACCL_COORDINATOR: {jaccl_coordinator}")
-                os.environ["MLX_JACCL_DEVICES"] = coordination_file
+                os.environ["MLX_IBV_DEVICES"] = coordination_file
                os.environ["MLX_RANK"] = str(rank)
                os.environ["MLX_JACCL_COORDINATOR"] = jaccl_coordinator
                group = mx.distributed.init(backend="jaccl", strict=True)
@@ -259,10 +258,10 @@ def shard_and_load(

    logger.info(f"Group size: {group.size()}, group rank: {group.rank()}")

-    # Estimate timeout based on model size
-    base_timeout = float(os.environ.get("EXO_MODEL_LOAD_TIMEOUT", "60"))
+    # Estimate timeout based on model size (5x default for large queued workloads)
+    base_timeout = float(os.environ.get("EXO_MODEL_LOAD_TIMEOUT", "300"))
    model_size_gb = get_weights_size(shard_metadata).in_bytes / (1024**3)
-    timeout_seconds = base_timeout + model_size_gb / 5
+    timeout_seconds = base_timeout + model_size_gb
    logger.info(
        f"Evaluating model parameters with timeout of {timeout_seconds:.0f}s "
        f"(model size: {model_size_gb:.1f}GB)"
@@ -339,8 +338,35 @@ def load_tokenizer_for_model_id(

    # Kimi uses a custom TikTokenTokenizer that transformers 5.x can't load via AutoTokenizer
    if "kimi-k2" in model_id_lower:
+        import importlib.util
+        import types
+
        sys.path.insert(0, str(model_path))
-        from tokenization_kimi import TikTokenTokenizer  # type: ignore[import-not-found]  # noqa: I001
+
+        # Load tool_declaration_ts first (tokenization_kimi imports it with relative import)
+        tool_decl_path = model_path / "tool_declaration_ts.py"
+        if tool_decl_path.exists():
+            spec = importlib.util.spec_from_file_location(
+                "tool_declaration_ts", tool_decl_path
+            )
+            if spec and spec.loader:
+                tool_decl_module = importlib.util.module_from_spec(spec)
+                sys.modules["tool_declaration_ts"] = tool_decl_module
+                spec.loader.exec_module(tool_decl_module)
+
+        # Load tokenization_kimi with patched source (convert relative to absolute import)
+        tok_path = model_path / "tokenization_kimi.py"
+        source = tok_path.read_text()
+        source = source.replace("from .tool_declaration_ts", "from tool_declaration_ts")
+        spec = importlib.util.spec_from_file_location("tokenization_kimi", tok_path)
+        if spec:
+            tok_module = types.ModuleType("tokenization_kimi")
+            tok_module.__file__ = str(tok_path)
+            sys.modules["tokenization_kimi"] = tok_module
+            exec(compile(source, tok_path, "exec"), tok_module.__dict__)  # noqa: S102
+            TikTokenTokenizer = tok_module.TikTokenTokenizer  # type: ignore[attr-defined]  # noqa: N806
+        else:
+            from tokenization_kimi import TikTokenTokenizer  # type: ignore[import-not-found]  # noqa: I001

        hf_tokenizer: Any = TikTokenTokenizer.from_pretrained(model_path)  # pyright: ignore[reportUnknownVariableType,reportUnknownMemberType]

--- a/src/exo/worker/main.py
+++ b/src/exo/worker/main.py
@@ -275,9 +275,10 @@ class Worker:
                case ChatCompletion():
                    # Don't wait for acknowledgment for batchable inference tasks
                    # This allows multiple tasks to reach the runner for batching
-                    await self.runners[self._task_to_runner_id(task)].start_task(
-                        task, wait_for_ack=False
-                    )
+                    # For tensor parallel: all nodes send tasks to their runner
+                    # so non-coordinator can participate in collective ops
+                    runner_id = self._task_to_runner_id(task)
+                    await self.runners[runner_id].start_task(task, wait_for_ack=False)
                case task:
                    await self.runners[self._task_to_runner_id(task)].start_task(task)

--- a/src/exo/worker/plan.py
+++ b/src/exo/worker/plan.py
@@ -255,8 +255,12 @@ def _ready_to_warmup(
        )

        # Rank = 0
+        # For tensor parallel, warmup is skipped so other ranks go directly
+        # to RunnerReady. We need to accept both WarmingUp and Ready states.
        connecting_rank_ready = device_rank == 0 and all(
-            isinstance(all_runners.get(global_runner_id, None), RunnerWarmingUp)
+            isinstance(
+                all_runners.get(global_runner_id, None), (RunnerWarmingUp, RunnerReady)
+            )
            for global_runner_id in shard_assignments.runner_to_shard
            if global_runner_id != runner_id
        )
--- a/src/exo/worker/runner/batched_handler.py
+++ b/src/exo/worker/runner/batched_handler.py
@@ -81,6 +81,8 @@ class BatchedInferenceHandler:
        device_rank: int,
        world_size: int = 1,
        max_batch_size: int = 32,
+        tensor_parallel_group: mx.distributed.Group | None = None,
+        is_coordinator: bool = True,
    ):
        self.model = model
        self.tokenizer = tokenizer
@@ -88,6 +90,8 @@ class BatchedInferenceHandler:
        self.device_rank = device_rank
        self.world_size = world_size
        self.max_batch_size = max_batch_size
+        self.tensor_parallel_group = tensor_parallel_group
+        self.is_coordinator = is_coordinator

        # Model-specific thinking/reasoning detection
        self.is_gpt_oss = isinstance(model, GptOssModel)
@@ -112,6 +116,9 @@ class BatchedInferenceHandler:
        # Pending requests waiting to be batched
        self.pending: list[PendingRequest] = []

+        # Track active count for non-coordinators (they don't have uid_to_request)
+        self._non_coordinator_active_count: int = 0
+
        # Active batch generator and request tracking
        self.batch_generator: BatchGenerator | None = None
        self.pipelined_generator: PipelinedGenerator | None = None
@@ -138,7 +145,12 @@ class BatchedInferenceHandler:
                self.pipelined_generator is not None
                and self.pipelined_generator.has_active
            )
-        return self.batch_generator is not None and len(self.uid_to_request) > 0
+        if self.batch_generator is None:
+            return False
+        # For non-coordinators, use internal counter (they don't track uid_to_request)
+        if not self.is_coordinator:
+            return self._non_coordinator_active_count > 0
+        return len(self.uid_to_request) > 0

    @property
    def has_pending(self) -> bool:
@@ -187,28 +199,130 @@ class BatchedInferenceHandler:
            f"Added request to batch queue (pending={len(self.pending)}, active={self.current_batch_size})"
        )

+    def _broadcast_int(self, value: int) -> int:
+        """Broadcast an integer from rank 0 to all ranks."""
+        if self.tensor_parallel_group is None:
+            return value
+        arr = mx.array([value if self.is_coordinator else 0], dtype=mx.int32)
+        synced = mx.distributed.all_sum(arr, group=self.tensor_parallel_group)
+        mx.eval(synced)
+        return int(synced.item())
+
+    def _broadcast_tokens(self, tokens_list: list[list[int]]) -> list[list[int]]:
+        """Broadcast tokenized prompts from rank 0 to all ranks."""
+        if self.tensor_parallel_group is None:
+            return tokens_list
+
+        # Step 1: Broadcast number of sequences
+        num_seqs = self._broadcast_int(len(tokens_list))
+        if num_seqs == 0:
+            return []
+
+        # Step 2: Broadcast length of each sequence
+        lengths: list[int] = []
+        for i in range(num_seqs):
+            length = self._broadcast_int(
+                len(tokens_list[i])
+                if self.is_coordinator and i < len(tokens_list)
+                else 0
+            )
+            lengths.append(length)
+
+        # Step 3: Broadcast flattened tokens
+        total_tokens = sum(lengths)
+        if self.is_coordinator:
+            flat: list[int] = []
+            for seq in tokens_list:
+                flat.extend(seq)
+            flat_arr = mx.array(flat, dtype=mx.int32)
+        else:
+            flat_arr = mx.zeros((total_tokens,), dtype=mx.int32)
+
+        # Broadcast via all_sum (rank 0 contributes, others contribute zeros)
+        synced_flat = mx.distributed.all_sum(flat_arr, group=self.tensor_parallel_group)
+        mx.eval(synced_flat)
+
+        # Unflatten
+        result: list[list[int]] = []
+        offset = 0
+        for length in lengths:
+            seq_arr = synced_flat[offset : offset + length]
+            seq: list[int] = [int(x) for x in seq_arr.tolist()]  # type: ignore[union-attr]
+            result.append(seq)
+            offset += length
+
+        return result
+
    def flush(self) -> None:
        """Start processing pending requests by adding them to the batch/pipelined generator."""
-        if not self.has_pending:
-            return
+        # Declare variables with types
+        tokenized_prompts: list[list[int]]
+        max_tokens_list: list[int]
+        samplers: list[Callable[[mx.array], mx.array]]
+        prompt_token_counts: list[int]
+        requests_to_flush: list[PendingRequest]

-        # Determine how many requests to flush (up to available slots)
-        available_slots = self.max_batch_size - self.current_batch_size
-        requests_to_flush = self.pending[:available_slots]
-        self.pending = self.pending[available_slots:]
+        # For tensor parallel: rank 0 broadcasts batch info, others receive and sync
+        if self.tensor_parallel_group is not None:
+            # Broadcast how many to flush
+            available_slots = self.max_batch_size - self.current_batch_size
+            num_to_flush = self._broadcast_int(
+                min(len(self.pending), available_slots) if self.is_coordinator else 0
+            )

-        # Prepare batch data - tokenize prompts
-        tokenized_prompts: list[list[int]] = []
-        max_tokens_list: list[int] = []
-        samplers: list[Callable[[mx.array], mx.array]] = []
-        prompt_token_counts: list[int] = []
+            if num_to_flush == 0:
+                return

-        for req in requests_to_flush:
-            tokens = self.tokenizer.encode(req.prompt)
-            tokenized_prompts.append(tokens)
-            max_tokens_list.append(req.max_tokens)
-            samplers.append(req.sampler)
-            prompt_token_counts.append(len(tokens))
+            # Get requests and tokenize on rank 0
+            if self.is_coordinator:
+                requests_to_flush = self.pending[:num_to_flush]
+                self.pending = self.pending[num_to_flush:]
+                tokenized_prompts = [
+                    self.tokenizer.encode(req.prompt) for req in requests_to_flush
+                ]
+                max_tokens_list = [req.max_tokens for req in requests_to_flush]
+            else:
+                requests_to_flush = []
+                tokenized_prompts = []
+                max_tokens_list = []
+
+            # Broadcast tokenized prompts to all ranks
+            tokenized_prompts = self._broadcast_tokens(tokenized_prompts)
+
+            # Broadcast max_tokens
+            synced_max_tokens: list[int] = []
+            for i in range(num_to_flush):
+                mt = self._broadcast_int(
+                    max_tokens_list[i]
+                    if self.is_coordinator and i < len(max_tokens_list)
+                    else 0
+                )
+                synced_max_tokens.append(mt)
+            max_tokens_list = synced_max_tokens
+
+            # Create samplers (same on all ranks since we use temp=0 typically)
+            samplers = [make_sampler(temp=0.0) for _ in range(num_to_flush)]
+            prompt_token_counts = [len(t) for t in tokenized_prompts]
+
+        else:
+            if not self.has_pending:
+                return
+            available_slots = self.max_batch_size - self.current_batch_size
+            requests_to_flush = self.pending[:available_slots]
+            self.pending = self.pending[available_slots:]
+
+            # Prepare batch data - tokenize prompts
+            tokenized_prompts = []
+            max_tokens_list = []
+            samplers = []
+            prompt_token_counts = []
+
+            for req in requests_to_flush:
+                tokens = self.tokenizer.encode(req.prompt)
+                tokenized_prompts.append(tokens)
+                max_tokens_list.append(req.max_tokens)
+                samplers.append(req.sampler)
+                prompt_token_counts.append(len(tokens))

        if self.use_pipelined:
            self._flush_pipelined(
@@ -314,28 +428,48 @@ class BatchedInferenceHandler:
            samplers=samplers,  # pyright: ignore[reportCallIssue]
        )

-        for uid, req, prompt_tokens, tokens in zip(
-            uids, requests_to_flush, prompt_token_counts, tokenized_prompts, strict=True
-        ):  # pyright: ignore[reportUnknownArgumentType]
-            parser = None
-            if self.is_gpt_oss and self._harmony_encoding is not None:
-                parser = StreamableParser(self._harmony_encoding, role=Role.ASSISTANT)  # pyright: ignore[reportAny]
-            # Check if prompt contains <think> token - if so, model is already in thinking mode
-            starts_in_thinking = (
-                self._think_start_token is not None
-                and self._think_start_token in tokens
-            )
-            self.uid_to_request[uid] = ActiveRequest(
-                command_id=req.task.command_id,
-                should_extract_logprobs=req.should_extract_logprobs,
-                top_k=req.top_k,
-                prompt_tokens=prompt_tokens,
-                harmony_parser=parser,
-                in_thinking=starts_in_thinking,
-            )
+        # Only coordinator tracks requests (non-coordinators don't have request objects)
+        if self.is_coordinator:
+            for uid, req, prompt_tokens, tokens in zip(
+                uids,  # pyright: ignore[reportUnknownArgumentType]
+                requests_to_flush,
+                prompt_token_counts,
+                tokenized_prompts,
+                strict=True,
+            ):
+                parser = None
+                if self.is_gpt_oss and self._harmony_encoding is not None:
+                    parser = StreamableParser(
+                        self._harmony_encoding,  # pyright: ignore[reportAny]
+                        role=Role.ASSISTANT,
+                    )
+                # Check if prompt contains <think> token - if so, model is already in thinking mode
+                starts_in_thinking = (
+                    self._think_start_token is not None
+                    and self._think_start_token in tokens
+                )
+                self.uid_to_request[uid] = ActiveRequest(
+                    command_id=req.task.command_id,
+                    should_extract_logprobs=req.should_extract_logprobs,
+                    top_k=req.top_k,
+                    prompt_tokens=prompt_tokens,
+                    harmony_parser=parser,
+                    in_thinking=starts_in_thinking,
+                )
+        else:
+            # Non-coordinator: INCREMENT active count (not set) to track all active requests
+            # across multiple flushes. This ensures is_active remains True when new requests
+            # are added while existing ones are still generating.
+            self._non_coordinator_active_count += len(tokenized_prompts)

+        # Log the actual active count (different tracking for coordinator vs non-coordinator)
+        actual_active = (
+            self.current_batch_size
+            if self.is_coordinator
+            else self._non_coordinator_active_count
+        )
        logger.info(
-            f"Flushed {len(requests_to_flush)} requests into batch (active={self.current_batch_size}, uids={list(self.uid_to_request.keys())})"
+            f"Flushed {len(tokenized_prompts)} requests into batch (active={actual_active}, is_coordinator={self.is_coordinator})"
        )

    def step(self) -> Generator[Event, None, None]:
@@ -348,7 +482,20 @@ class BatchedInferenceHandler:
            yield from self._step_pipelined()
            return

-        if self.batch_generator is None or not self.uid_to_request:
+        if self.batch_generator is None:
+            return
+
+        # Non-coordinators still need to call next() for model sync but don't emit events
+        if not self.is_coordinator:
+            if self._non_coordinator_active_count > 0:
+                nc_responses: list[Any] = self.batch_generator.next()  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+                # Track completions to update active count
+                for nc_resp in nc_responses:  # pyright: ignore[reportUnknownVariableType]
+                    if nc_resp.finish_reason is not None:  # pyright: ignore[reportUnknownMemberType]
+                        self._non_coordinator_active_count -= 1
+            return
+
+        if not self.uid_to_request:
            return

        # Get next tokens for all active requests
--- a/src/exo/worker/runner/runner.py
+++ b/src/exo/worker/runner/runner.py
@@ -1,5 +1,6 @@
 import base64
 import json
+import os
 import time
 from collections.abc import Generator
 from functools import cache
@@ -71,7 +72,6 @@ from exo.worker.engines.image import (
    warmup_image_generator,
 )
 from exo.worker.engines.mlx import Model
-from exo.worker.engines.mlx.cache import KVPrefixCache
 from exo.worker.engines.mlx.generator.generate import (
    mlx_generate,
    warmup_inference,
@@ -87,7 +87,7 @@ from exo.worker.runner.batched_handler import BatchedInferenceHandler
 from exo.worker.runner.bootstrap import logger

 # Batching configuration
-BATCH_ENABLED = True
+BATCH_ENABLED = os.environ.get("EXO_NO_BATCH") != "1"
 BATCH_MAX_SIZE = 32


@@ -217,6 +217,11 @@ def main(
        bound_instance.bound_shard,
    )
    device_rank = shard_metadata.device_rank
+    # Determine if this node is the coordinator for tensor parallel
+    # Use sorted node ordering for consistency with main.py
+    node_id = bound_instance.bound_node_id
+    sorted_nodes = sorted(instance.shard_assignments.node_to_runner.keys())
+    is_tp_coordinator = node_id == sorted_nodes[0]
    logger.info("hello from the runner")
    if getattr(shard_metadata, "immediate_exception", False):
        raise Exception("Fake exception - runner failed to spin up.")
@@ -228,8 +233,8 @@ def main(
    model: Model | DistributedImageModel | None = None
    tokenizer: TokenizerWrapper | None = None
    group = None
-    kv_prefix_cache: KVPrefixCache | None = None
    batch_handler: BatchedInferenceHandler | None = None
+    is_tensor_parallel = False

    current_status: RunnerStatus = RunnerIdle()
    logger.info("runner created")
@@ -242,7 +247,13 @@ def main(
        Process a single task. Returns True if the runner should continue,
        False if it should shut down.
        """
-        nonlocal current_status, model, tokenizer, group, batch_handler
+        nonlocal \
+            current_status, \
+            model, \
+            tokenizer, \
+            group, \
+            batch_handler, \
+            is_tensor_parallel
        event_sender.send(
            TaskStatusUpdated(task_id=task.task_id, task_status=TaskStatus.Running)
        )
@@ -297,26 +308,25 @@ def main(
                    logger.info(f"model has_tool_calling={tokenizer.has_tool_calling}")

                    # Initialize batch handler for text generation models
+                    is_tensor_parallel = isinstance(shard_metadata, TensorShardMetadata)
                    if BATCH_ENABLED:
-                        # For tensor parallelism, distributed ops are handled inside model layers
-                        # so batch handler should use world_size=1 (no pipelining)
-                        batch_world_size = (
-                            1
-                            if isinstance(shard_metadata, TensorShardMetadata)
-                            else shard_metadata.world_size
-                        )
                        batch_handler = BatchedInferenceHandler(
                            model=model,
                            tokenizer=tokenizer,
                            model_id=shard_metadata.model_card.model_id,
                            device_rank=device_rank,
-                            world_size=batch_world_size,
+                            world_size=1
+                            if is_tensor_parallel
+                            else shard_metadata.world_size,
                            max_batch_size=BATCH_MAX_SIZE,
+                            tensor_parallel_group=group if is_tensor_parallel else None,
+                            is_coordinator=is_tp_coordinator
+                            if is_tensor_parallel
+                            else True,
                        )
                        logger.info(
-                            f"Batch handler initialized (max_batch_size={BATCH_MAX_SIZE}, world_size={batch_world_size})"
+                            f"Batch handler initialized (max_batch_size={BATCH_MAX_SIZE}, tensor_parallel={is_tensor_parallel})"
                        )
-                        kv_prefix_cache = KVPrefixCache(tokenizer)

                elif (
                    ModelTask.TextToImage in shard_metadata.model_card.tasks
@@ -347,12 +357,22 @@ def main(
                    assert not isinstance(model, DistributedImageModel)
                    assert tokenizer

-                    toks = warmup_inference(
-                        model=model,
-                        tokenizer=tokenizer,
-                        # kv_prefix_cache=kv_prefix_cache,  # supply for warmup-time prefix caching
-                    )
-                    logger.info(f"warmed up by generating {toks} tokens")
+                    # For tensor parallel with batch handler, skip explicit warmup.
+                    # The batch handler synchronizes all ranks via all_sum in flush(),
+                    # so the first real request warms up the model on all ranks simultaneously.
+                    # Without a batch handler, warmup must run normally to avoid GPU locks
+                    # from mismatched send/recv in serial processing.
+                    if is_tensor_parallel and batch_handler is not None:
+                        logger.info(
+                            "Tensor parallel: skipping warmup (first request will warm up through batch handler)"
+                        )
+                        toks = 0
+                    else:
+                        toks = warmup_inference(
+                            model=model,
+                            tokenizer=tokenizer,
+                        )
+                        logger.info(f"warmed up by generating {toks} tokens")
                    logger.info(
                        f"runner initialized in {time.time() - setup_start_time} seconds"
                    )
@@ -425,6 +445,12 @@ def main(
                    assert batch_handler is not None
                    try:
                        _check_for_debug_prompts(task_params.messages[0].content)
+                        # Non-coordinator TP: don't add to batch handler.
+                        # The batch handler syncs via all_sum in flush();
+                        # non-coordinator participates through that, not through add_request.
+                        if is_tensor_parallel and not is_tp_coordinator:
+                            event_sender.send(TaskAcknowledged(task_id=task.task_id))
+                            return True
                        batch_handler.add_request(task)

                        # Update status to running if not already
@@ -599,11 +625,16 @@ def main(

    with task_receiver as tasks:
        while True:
-            # Check if batch handler is active and needs processing
+            # For tensor parallel: both coordinator and non-coordinator go through
+            # the same loop, but only coordinator receives tasks. This ensures
+            # flush() all_sum calls are synchronized.
            if batch_handler is not None and (
-                batch_handler.is_active or batch_handler.has_pending
+                batch_handler.is_active
+                or batch_handler.has_pending
+                or is_tensor_parallel
            ):
                # Drain all available tasks before stepping
+                # Non-coordinator won't receive any (main.py doesn't send to it)
                should_break = False
                while True:
                    try:
@@ -627,11 +658,13 @@ def main(
                if should_break:
                    break

-                # Flush all pending requests before stepping
-                if batch_handler.has_pending:
-                    logger.info(
-                        f"Flushing batch (pending={len(batch_handler.pending)}, active={batch_handler.current_batch_size})"
-                    )
+                # Flush: for tensor parallel, always call so all ranks sync via all_sum
+                # For non-TP, only call when has_pending
+                if batch_handler.has_pending or is_tensor_parallel:
+                    if batch_handler.has_pending:
+                        logger.info(
+                            f"Flushing batch (pending={len(batch_handler.pending)}, active={batch_handler.current_batch_size})"
+                        )
                    batch_handler.flush()

                # Step generation and emit events
--- a/src/exo/worker/tests/unittests/test_plan/test_warmup.py
+++ b/src/exo/worker/tests/unittests/test_plan/test_warmup.py
@@ -5,6 +5,7 @@ from exo.shared.types.worker.runners import (
    RunnerIdle,
    RunnerLoaded,
    RunnerLoading,
+    RunnerReady,
    RunnerWarmingUp,
 )
 from exo.worker.tests.constants import (
--- a/uv.lock
+++ b/uv.lock
@@ -413,36 +413,46 @@ wheels = [

 [[package]]
 name = "datasets"
-version = "4.5.0"
+version = "2.21.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "fsspec", extra = ["http"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "multiprocess", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pandas", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pandas", version = "3.0.0rc2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'darwin') or (python_full_version >= '3.14' and sys_platform == 'linux')" },
+    { name = "pandas", version = "3.0.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.14' and sys_platform == 'darwin') or (python_full_version < '3.14' and sys_platform == 'linux')" },
    { name = "pyarrow", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "xxhash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/55/bf/bb927bde63d649296c83e883171ae77074717c1b80fe2868b328bd0dbcbb/datasets-4.5.0.tar.gz", hash = "sha256:00c698ce1c2452e646cc5fad47fef39d3fe78dd650a8a6eb205bb45eb63cd500", size = 588384, upload-time = "2026-01-14T18:27:54.297Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e5/a5/38719e5cff7aa0537a6be37d21cc1fdd7096e9565e8fce2d46a822e10b5b/datasets-2.21.0.tar.gz", hash = "sha256:998f85a8460f1bd982e5bd058f8a0808eef424249e3df1e8cdd594ccd0dc8ba2", size = 2215317, upload-time = "2024-08-14T06:40:44.314Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fc/d5/0d563ea3c205eee226dc8053cf7682a8ac588db8acecd0eda2b587987a0b/datasets-4.5.0-py3-none-any.whl", hash = "sha256:b5d7e08096ffa407dd69e58b1c0271c9b2506140839b8d99af07375ad31b6726", size = 515196, upload-time = "2026-01-14T18:27:52.419Z" },
+    { url = "https://files.pythonhosted.org/packages/72/b3/33c4ad44fa020e3757e9b2fad8a5de53d9079b501e6bbc45bdd18f82f893/datasets-2.21.0-py3-none-any.whl", hash = "sha256:25e4e097110ce28824b746a107727ada94024cba11db8bc588d468414692b65a", size = 527251, upload-time = "2024-08-14T06:40:39.612Z" },
 ]

 [[package]]
 name = "dill"
-version = "0.4.0"
+version = "0.3.8"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/17/4d/ac7ffa80c69ea1df30a8aa11b3578692a5118e7cd1aa157e3ef73b092d15/dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca", size = 184847, upload-time = "2024-01-27T23:42:16.145Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7", size = 116252, upload-time = "2024-01-27T23:42:14.239Z" },
+]
+
+[[package]]
+name = "distro"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
 ]

 [[package]]
@@ -466,7 +476,8 @@ dependencies = [
    { name = "multiprocess", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "pandas", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pandas", version = "3.0.0rc2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'darwin') or (python_full_version >= '3.14' and sys_platform == 'linux')" },
+    { name = "pandas", version = "3.0.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.14' and sys_platform == 'darwin') or (python_full_version < '3.14' and sys_platform == 'linux')" },
    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "xxhash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -509,7 +520,10 @@ dependencies = [

 [package.optional-dependencies]
 eval = [
+    { name = "datasets", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "lm-eval", extra = ["api"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "openai", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pebble", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]

 [package.dev-dependencies]
@@ -527,6 +541,7 @@ requires-dist = [
    { name = "aiofiles", specifier = ">=24.1.0" },
    { name = "aiohttp", specifier = ">=3.12.14" },
    { name = "anyio", specifier = "==4.11.0" },
+    { name = "datasets", marker = "extra == 'eval'", specifier = ">=2.14.0,<4.0" },
    { name = "exo-pyo3-bindings", editable = "rust/exo_pyo3_bindings" },
    { name = "fastapi", specifier = ">=0.116.1" },
    { name = "filelock", specifier = ">=3.18.0" },
@@ -536,10 +551,12 @@ requires-dist = [
    { name = "lm-eval", extras = ["api"], marker = "extra == 'eval'" },
    { name = "loguru", specifier = ">=0.7.3" },
    { name = "mflux", specifier = "==0.15.4" },
-    { name = "mlx", marker = "sys_platform == 'darwin'", specifier = "==0.30.3" },
-    { name = "mlx", extras = ["cpu"], marker = "sys_platform == 'linux'", specifier = "==0.30.3" },
-    { name = "mlx-lm", specifier = "==0.30.5" },
+    { name = "mlx", marker = "sys_platform == 'darwin'", specifier = "==0.30.4" },
+    { name = "mlx", extras = ["cpu"], marker = "sys_platform == 'linux'", specifier = "==0.30.4" },
+    { name = "mlx-lm", git = "https://github.com/davidmcc73/mlx-lm.git?branch=main" },
+    { name = "openai", marker = "extra == 'eval'", specifier = ">=1.59.6" },
    { name = "openai-harmony", specifier = ">=0.0.8" },
+    { name = "pebble", marker = "extra == 'eval'", specifier = ">=5.1.0" },
    { name = "pillow", specifier = ">=11.0,<12.0" },
    { name = "psutil", specifier = ">=7.0.0" },
    { name = "pydantic", specifier = ">=2.11.7" },
@@ -697,11 +714,11 @@ wheels = [

 [[package]]
 name = "fsspec"
-version = "2025.10.0"
+version = "2024.6.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/24/7f/2747c0d332b9acfa75dc84447a066fdf812b5a6b8d30472b74d309bfe8cb/fsspec-2025.10.0.tar.gz", hash = "sha256:b6789427626f068f9a83ca4e8a3cc050850b6c0f71f99ddb4f542b8266a26a59", size = 309285, upload-time = "2025-10-30T14:58:44.036Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/90/b6/eba5024a9889fcfff396db543a34bef0ab9d002278f163129f9f01005960/fsspec-2024.6.1.tar.gz", hash = "sha256:fad7d7e209dd4c1208e3bbfda706620e0da5142bebbd9c384afb95b07e798e49", size = 284584, upload-time = "2024-06-27T14:35:45.467Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/44/73bea497ac69bafde2ee4269292fa3b41f1198f4bb7bbaaabde30ad29d4a/fsspec-2024.6.1-py3-none-any.whl", hash = "sha256:3cb443f8bcd2efb31295a5b9fdb02aee81d8452c80d28f97a6d0959e6cee101e", size = 177561, upload-time = "2024-06-27T14:35:42.023Z" },
 ]

 [package.optional-dependencies]
@@ -926,6 +943,46 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
 ]

+[[package]]
+name = "jiter"
+version = "0.13.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0d/5e/4ec91646aee381d01cdb9974e30882c9cd3b8c5d1079d6b5ff4af522439a/jiter-0.13.0.tar.gz", hash = "sha256:f2839f9c2c7e2dffc1bc5929a510e14ce0a946be9365fd1219e7ef342dae14f4", size = 164847, upload-time = "2026-02-02T12:37:56.441Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/91/9c/7ee5a6ff4b9991e1a45263bfc46731634c4a2bde27dfda6c8251df2d958c/jiter-0.13.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1f8a55b848cbabf97d861495cd65f1e5c590246fabca8b48e1747c4dfc8f85bf", size = 306897, upload-time = "2026-02-02T12:36:16.748Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/02/be5b870d1d2be5dd6a91bdfb90f248fbb7dcbd21338f092c6b89817c3dbf/jiter-0.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f556aa591c00f2c45eb1b89f68f52441a016034d18b65da60e2d2875bbbf344a", size = 317507, upload-time = "2026-02-02T12:36:18.351Z" },
+    { url = "https://files.pythonhosted.org/packages/da/92/b25d2ec333615f5f284f3a4024f7ce68cfa0604c322c6808b2344c7f5d2b/jiter-0.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7e1d61da332ec412350463891923f960c3073cf1aae93b538f0bb4c8cd46efb", size = 350560, upload-time = "2026-02-02T12:36:19.746Z" },
+    { url = "https://files.pythonhosted.org/packages/be/ec/74dcb99fef0aca9fbe56b303bf79f6bd839010cb18ad41000bf6cc71eec0/jiter-0.13.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3097d665a27bc96fd9bbf7f86178037db139f319f785e4757ce7ccbf390db6c2", size = 363232, upload-time = "2026-02-02T12:36:21.243Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/37/f17375e0bb2f6a812d4dd92d7616e41917f740f3e71343627da9db2824ce/jiter-0.13.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d01ecc3a8cbdb6f25a37bd500510550b64ddf9f7d64a107d92f3ccb25035d0f", size = 483727, upload-time = "2026-02-02T12:36:22.688Z" },
+    { url = "https://files.pythonhosted.org/packages/77/d2/a71160a5ae1a1e66c1395b37ef77da67513b0adba73b993a27fbe47eb048/jiter-0.13.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ed9bbc30f5d60a3bdf63ae76beb3f9db280d7f195dfcfa61af792d6ce912d159", size = 370799, upload-time = "2026-02-02T12:36:24.106Z" },
+    { url = "https://files.pythonhosted.org/packages/01/99/ed5e478ff0eb4e8aa5fd998f9d69603c9fd3f32de3bd16c2b1194f68361c/jiter-0.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98fbafb6e88256f4454de33c1f40203d09fc33ed19162a68b3b257b29ca7f663", size = 359120, upload-time = "2026-02-02T12:36:25.519Z" },
+    { url = "https://files.pythonhosted.org/packages/16/be/7ffd08203277a813f732ba897352797fa9493faf8dc7995b31f3d9cb9488/jiter-0.13.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5467696f6b827f1116556cb0db620440380434591e93ecee7fd14d1a491b6daa", size = 390664, upload-time = "2026-02-02T12:36:26.866Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/84/e0787856196d6d346264d6dcccb01f741e5f0bd014c1d9a2ebe149caf4f3/jiter-0.13.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:2d08c9475d48b92892583df9da592a0e2ac49bcd41fae1fec4f39ba6cf107820", size = 513543, upload-time = "2026-02-02T12:36:28.217Z" },
+    { url = "https://files.pythonhosted.org/packages/65/50/ecbd258181c4313cf79bca6c88fb63207d04d5bf5e4f65174114d072aa55/jiter-0.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:aed40e099404721d7fcaf5b89bd3b4568a4666358bcac7b6b15c09fb6252ab68", size = 547262, upload-time = "2026-02-02T12:36:29.678Z" },
+    { url = "https://files.pythonhosted.org/packages/49/19/a929ec002ad3228bc97ca01dbb14f7632fffdc84a95ec92ceaf4145688ae/jiter-0.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fa476ab5dd49f3bf3a168e05f89358c75a17608dbabb080ef65f96b27c19ab10", size = 316616, upload-time = "2026-02-02T12:36:36.579Z" },
+    { url = "https://files.pythonhosted.org/packages/52/56/d19a9a194afa37c1728831e5fb81b7722c3de18a3109e8f282bfc23e587a/jiter-0.13.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ade8cb6ff5632a62b7dbd4757d8c5573f7a2e9ae285d6b5b841707d8363205ef", size = 346850, upload-time = "2026-02-02T12:36:38.058Z" },
+    { url = "https://files.pythonhosted.org/packages/36/4a/94e831c6bf287754a8a019cb966ed39ff8be6ab78cadecf08df3bb02d505/jiter-0.13.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9950290340acc1adaded363edd94baebcee7dabdfa8bee4790794cd5cfad2af6", size = 358551, upload-time = "2026-02-02T12:36:39.417Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/f5/f1997e987211f6f9bd71b8083047b316208b4aca0b529bb5f8c96c89ef3e/jiter-0.13.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:cc5223ab19fe25e2f0bf2643204ad7318896fe3729bf12fde41b77bfc4fafff0", size = 308804, upload-time = "2026-02-02T12:36:43.496Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/8f/5482a7677731fd44881f0204981ce2d7175db271f82cba2085dd2212e095/jiter-0.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9776ebe51713acf438fd9b4405fcd86893ae5d03487546dae7f34993217f8a91", size = 318787, upload-time = "2026-02-02T12:36:45.071Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/b9/7257ac59778f1cd025b26a23c5520a36a424f7f1b068f2442a5b499b7464/jiter-0.13.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:879e768938e7b49b5e90b7e3fecc0dbec01b8cb89595861fb39a8967c5220d09", size = 353880, upload-time = "2026-02-02T12:36:47.365Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/87/719eec4a3f0841dad99e3d3604ee4cba36af4419a76f3cb0b8e2e691ad67/jiter-0.13.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:682161a67adea11e3aae9038c06c8b4a9a71023228767477d683f69903ebc607", size = 366702, upload-time = "2026-02-02T12:36:48.871Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/65/415f0a75cf6921e43365a1bc227c565cb949caca8b7532776e430cbaa530/jiter-0.13.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a13b68cd1cd8cc9de8f244ebae18ccb3e4067ad205220ef324c39181e23bbf66", size = 486319, upload-time = "2026-02-02T12:36:53.006Z" },
+    { url = "https://files.pythonhosted.org/packages/54/a2/9e12b48e82c6bbc6081fd81abf915e1443add1b13d8fc586e1d90bb02bb8/jiter-0.13.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87ce0f14c6c08892b610686ae8be350bf368467b6acd5085a5b65441e2bf36d2", size = 372289, upload-time = "2026-02-02T12:36:54.593Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/c1/e4693f107a1789a239c759a432e9afc592366f04e901470c2af89cfd28e1/jiter-0.13.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c365005b05505a90d1c47856420980d0237adf82f70c4aff7aebd3c1cc143ad", size = 360165, upload-time = "2026-02-02T12:36:56.112Z" },
+    { url = "https://files.pythonhosted.org/packages/17/08/91b9ea976c1c758240614bd88442681a87672eebc3d9a6dde476874e706b/jiter-0.13.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1317fdffd16f5873e46ce27d0e0f7f4f90f0cdf1d86bf6abeaea9f63ca2c401d", size = 389634, upload-time = "2026-02-02T12:36:57.495Z" },
+    { url = "https://files.pythonhosted.org/packages/18/23/58325ef99390d6d40427ed6005bf1ad54f2577866594bcf13ce55675f87d/jiter-0.13.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:c05b450d37ba0c9e21c77fef1f205f56bcee2330bddca68d344baebfc55ae0df", size = 514933, upload-time = "2026-02-02T12:36:58.909Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/25/69f1120c7c395fd276c3996bb8adefa9c6b84c12bb7111e5c6ccdcd8526d/jiter-0.13.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:775e10de3849d0631a97c603f996f518159272db00fdda0a780f81752255ee9d", size = 548842, upload-time = "2026-02-02T12:37:00.433Z" },
+    { url = "https://files.pythonhosted.org/packages/62/92/1661d8b9fd6a3d7a2d89831db26fe3c1509a287d83ad7838831c7b7a5c7e/jiter-0.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:964538479359059a35fb400e769295d4b315ae61e4105396d355a12f7fef09f0", size = 318423, upload-time = "2026-02-02T12:37:05.806Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/3b/f77d342a54d4ebcd128e520fc58ec2f5b30a423b0fd26acdfc0c6fef8e26/jiter-0.13.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e104da1db1c0991b3eaed391ccd650ae8d947eab1480c733e5a3fb28d4313e40", size = 351438, upload-time = "2026-02-02T12:37:07.189Z" },
+    { url = "https://files.pythonhosted.org/packages/76/b3/ba9a69f0e4209bd3331470c723c2f5509e6f0482e416b612431a5061ed71/jiter-0.13.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e3a5f0cde8ff433b8e88e41aa40131455420fb3649a3c7abdda6145f8cb7202", size = 364774, upload-time = "2026-02-02T12:37:08.579Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/16/6cdb31fa342932602458dbb631bfbd47f601e03d2e4950740e0b2100b570/jiter-0.13.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:57aab48f40be1db920a582b30b116fe2435d184f77f0e4226f546794cedd9cf0", size = 487238, upload-time = "2026-02-02T12:37:10.066Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/b1/956cc7abaca8d95c13aa8d6c9b3f3797241c246cd6e792934cc4c8b250d2/jiter-0.13.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7772115877c53f62beeb8fd853cab692dbc04374ef623b30f997959a4c0e7e95", size = 372892, upload-time = "2026-02-02T12:37:11.656Z" },
+    { url = "https://files.pythonhosted.org/packages/26/c4/97ecde8b1e74f67b8598c57c6fccf6df86ea7861ed29da84629cdbba76c4/jiter-0.13.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1211427574b17b633cfceba5040de8081e5abf114f7a7602f73d2e16f9fdaa59", size = 360309, upload-time = "2026-02-02T12:37:13.244Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/d7/eabe3cf46715854ccc80be2cd78dd4c36aedeb30751dbf85a1d08c14373c/jiter-0.13.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7beae3a3d3b5212d3a55d2961db3c292e02e302feb43fce6a3f7a31b90ea6dfe", size = 389607, upload-time = "2026-02-02T12:37:14.881Z" },
+    { url = "https://files.pythonhosted.org/packages/df/2d/03963fc0804e6109b82decfb9974eb92df3797fe7222428cae12f8ccaa0c/jiter-0.13.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e5562a0f0e90a6223b704163ea28e831bd3a9faa3512a711f031611e6b06c939", size = 514986, upload-time = "2026-02-02T12:37:16.326Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/6c/8c83b45eb3eb1c1e18d841fe30b4b5bc5619d781267ca9bc03e005d8fd0a/jiter-0.13.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:6c26a424569a59140fb51160a56df13f438a2b0967365e987889186d5fc2f6f9", size = 548756, upload-time = "2026-02-02T12:37:17.736Z" },
+]
+
 [[package]]
 name = "joblib"
 version = "1.5.3"
@@ -1275,22 +1332,22 @@ wheels = [

 [[package]]
 name = "mlx"
-version = "0.30.3"
+version = "0.30.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "mlx-metal", marker = "sys_platform == 'darwin'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d0/22/42935d593fe82d3b98eb9d60e4620ed99703886635106f89d407c68f33bc/mlx-0.30.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:743fac1e4f9e8e46c8262943c643a31139c255cdb256c99ad496958215ccac1e", size = 569344, upload-time = "2026-01-14T01:16:54.847Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/27/f2e7a5236289d45315d0215e8553b4dd7e2faaba3bcb5025b34b25d5ab66/mlx-0.30.3-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:3b04ae81655aa0e63a6e8f2c749de3bbce64cf5b168ae10f39ed086dfa99e7f8", size = 569345, upload-time = "2026-01-14T01:16:56.564Z" },
-    { url = "https://files.pythonhosted.org/packages/01/41/06b042457f51952456e9bb46b2c6e205ab3a28fc52d6751b5787fdb762b2/mlx-0.30.3-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:ba9b5bdb1e929cc130af72efd7f73508c0f4e526d224489af7ec1c6419564659", size = 569213, upload-time = "2026-01-14T05:52:10.86Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/1e/f62c98fc0d2d878ee4235671f9d406b13cc9240493ba6fcfde2f72c2ff83/mlx-0.30.3-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:dfe5c5b64e55398a22100804abbf9681996b03129e720e36b1727ed704db12b5", size = 617309, upload-time = "2026-01-14T01:16:57.58Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/62/811f064693449de740350d27793ce39343a460305ec8d878c318b80921d0/mlx-0.30.3-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:a3364924610929936e6aaf13c71106161258e5a5d3f7813a64c07cc2435f9f55", size = 659521, upload-time = "2026-01-14T01:16:58.719Z" },
-    { url = "https://files.pythonhosted.org/packages/82/e2/6e551bd48fb350fbf0ee4cc5cd09485437d260b8f4937f22d8623e14687a/mlx-0.30.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:2c27fd8daaae14ca6cf407fcd236006a6e968f7708c8f61a2709116f2e754852", size = 571920, upload-time = "2026-01-14T01:16:59.683Z" },
-    { url = "https://files.pythonhosted.org/packages/82/c0/561d1c9d3d12830b0e7fdcbd807585ef20909e398d4bcdbf25e4367543eb/mlx-0.30.3-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:b755fd4ed4b6a2ae4dee3766b5a2ea52fcbe83ebd1cf018458e18b74139409f3", size = 571921, upload-time = "2026-01-14T01:17:00.868Z" },
-    { url = "https://files.pythonhosted.org/packages/42/1a/fb573fc2edc22a777fa254ff5c0c886ffd2c88aeb1f21c45778ef170f990/mlx-0.30.3-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:7e352c0369a2f7e54d4f317b434eab3333918ea9edde1c43c61d36386b6f76bf", size = 571732, upload-time = "2026-01-14T05:52:11.893Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/db/d0083e8f2205b3b2dcd9670eb6f0d6c1b7cbfea6b01a1f8bff39142edf44/mlx-0.30.3-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:00ac867f3d003c1477a66a579442c2040ba7ea43ce3c174490d1f8bf379606bd", size = 619635, upload-time = "2026-01-14T01:17:01.812Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/90/ab0b93ff0e76da4fe0e878722c76a308cfb950b044a4676e9617276d8ccd/mlx-0.30.3-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:5be7d0329036f09c6ed003ea3e307e97e3144f20a3e4711b01810d7d5013cf2c", size = 659652, upload-time = "2026-01-14T01:17:02.915Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/59/b6d138f5598bcd13d8e1d029a207cb8b18b14d5ded43533aef16d2e3852b/mlx-0.30.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e4b1ff6584ddcadcadbd7236f3ec6fe30abd918bcd75e51dd7693c113ab7d5f6", size = 572585, upload-time = "2026-01-27T22:53:26.236Z" },
+    { url = "https://files.pythonhosted.org/packages/10/57/72604531d02471c54dd1c71caeb77479297f37ab6aaa1125b457edfce9ee/mlx-0.30.4-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:1f367534078b10dcb660393a554f97732c194977ac8318bb389a76a6307757f8", size = 572587, upload-time = "2026-01-27T22:53:27.828Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/5c/1a340ccc5051d222ceb58aa00c42ea5d11f4ae0bd0fc97673bef5d6ff24b/mlx-0.30.4-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:5344d195ac60dcdb871afb3ebb15c22112408f54c91ef507bd16e3928dfff38d", size = 572571, upload-time = "2026-01-27T22:53:29.268Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/18/538c13fa6821459d8d2b6db1ac96f60679ef995f373c68be1d743055ba47/mlx-0.30.4-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:6879c7262c8f8f7a1a9ee6f27cbf5fe174d0863189a7672c9eb71cd8611bbaa7", size = 621260, upload-time = "2026-01-27T22:53:30.696Z" },
+    { url = "https://files.pythonhosted.org/packages/16/2c/e8aa0847ec97436443a78e87cc3fb95c94a2fe8b4b6ebb65cbaa67b6306c/mlx-0.30.4-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:367ba287ceb5b93a624b560ce8ce02378c03d1d60cc630b57efaf38061596d9b", size = 662522, upload-time = "2026-01-27T22:53:32.975Z" },
+    { url = "https://files.pythonhosted.org/packages/98/ab/d0a6303bf0f978e394036841089d58d2c8c305e3efbcce9e4351724b6f5c/mlx-0.30.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f026f3a30013e16034419caef0b0293ba84e69252fc1676d5d8becc92bb5a304", size = 574119, upload-time = "2026-01-27T22:53:34.304Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/58/f5ac415a1781877b21e88f9257c7071e48ee91c34ca461e880b74677758a/mlx-0.30.4-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:96ad421cfe62a6fe7fc98521f8af9a530d7d7b6ded402ba6f4eb81a4a3087d1f", size = 574120, upload-time = "2026-01-27T22:53:36.161Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/12/9eb62ebf0ca7989efa6dec92e79630ef70e54202b756523bdeadf3c009eb/mlx-0.30.4-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:dfafd24144d91f6b4bd5ef6711458c566fdf507aee6417567fc2da0469619878", size = 574112, upload-time = "2026-01-27T22:53:37.831Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/f3/ada2b2126fc7a2634bd30c07418c6ae9657530d4534249c6949dbcc0013d/mlx-0.30.4-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:f016e16ff43dff6240ee91a8ba32226db1d55797a81a64d7af84e0e4409852ba", size = 622977, upload-time = "2026-01-27T22:53:39.885Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/8d/fc498b847f9ed8459ee89fb5b06f7237541192a9e6cd965bed9f61114f5c/mlx-0.30.4-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:962f99d637a99058b7d7659b66570f988815f26f2ae9af52c4cd0359fab928e2", size = 662314, upload-time = "2026-01-27T22:53:41.415Z" },
 ]

 [package.optional-dependencies]
@@ -1303,16 +1360,16 @@ cuda13 = [

 [[package]]
 name = "mlx-cpu"
-version = "0.30.3"
+version = "0.30.4"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/91/8c/65bdb167141572ae0fa5d2da445368f454af52c0b93d8440b75df6472eef/mlx_cpu-0.30.3-py3-none-manylinux_2_35_aarch64.whl", hash = "sha256:85ba5601153e034d3ddb18b929147396737e74798c8453d7e078f53f35ae1512", size = 9000768, upload-time = "2026-01-14T01:16:14.144Z" },
-    { url = "https://files.pythonhosted.org/packages/24/eb/e31b1edf9bb907add8fc2e386c6683004908acc71a0394e22856f139a9f1/mlx_cpu-0.30.3-py3-none-manylinux_2_35_x86_64.whl", hash = "sha256:5363249de88ab0f54af6ac00cf62d40c64f8e2ecd867598195d9baa9de8f6edb", size = 10569189, upload-time = "2026-01-14T01:16:15.995Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/71/8a2f3598d628c6e5fd6ca4c58e080311dc39c558561d8f7fb2d91865f0e6/mlx_cpu-0.30.4-py3-none-manylinux_2_35_aarch64.whl", hash = "sha256:d3de6152e38f8a884d7cadb5e633bcf5fb346434867195709b4f6db8450e3f91", size = 8684835, upload-time = "2026-01-27T22:40:39.919Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/c4/eae335cf6859c4a45be52888b754bfceb0ad5363bd05ae0ce3e67fac1dec/mlx_cpu-0.30.4-py3-none-manylinux_2_35_x86_64.whl", hash = "sha256:1b7f076587d1bd028a6f8197fe35721a39b8202e36b05e3aba89d29d79ab6764", size = 10257054, upload-time = "2026-01-27T22:40:42.689Z" },
 ]

 [[package]]
 name = "mlx-cuda-13"
-version = "0.30.3"
+version = "0.30.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "nvidia-cublas", marker = "sys_platform == 'linux'" },
@@ -1321,14 +1378,14 @@ dependencies = [
    { name = "nvidia-nccl-cu13", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d1/63/a0ffea24ee9b1f8967c8ea685c1c7d943112130aad633a3bd780bb381966/mlx_cuda_13-0.30.3-py3-none-manylinux_2_35_aarch64.whl", hash = "sha256:eaecb79cc8ce4c0ddbc802b8f244ecdee38ba1de32aa8105477df9936f35bc7e", size = 65780796, upload-time = "2026-01-14T01:16:27.699Z" },
-    { url = "https://files.pythonhosted.org/packages/07/3c/fa9baab3c148fe0811c7f1d9d10966036e596480f4c54e9863cfdfd41970/mlx_cuda_13-0.30.3-py3-none-manylinux_2_35_x86_64.whl", hash = "sha256:9d02d95435bda50ad52572d4a3873479eb3072895c98053ee3efa7f476e64cb6", size = 68127545, upload-time = "2026-01-14T01:16:31.641Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/79/e0aec1bf713eb6f6cbda69e1f4d145429e0477c3087aa41755078caafcb7/mlx_cuda_13-0.30.4-py3-none-manylinux_2_35_aarch64.whl", hash = "sha256:4edc42cb2e00a7e51621afd4c0b43154f7e2a15a3c6516878207cda9f85ee133", size = 66771153, upload-time = "2026-01-27T23:38:44.61Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/ff/1793ec5ec7f486bc44356ac5d355a42577bf5c4c72c42feb9b237bc00838/mlx_cuda_13-0.30.4-py3-none-manylinux_2_35_x86_64.whl", hash = "sha256:23a4b617e8bcd5581e6d257ac09fee85f0195114523a863ba84118cfac4abb26", size = 69665061, upload-time = "2026-01-27T23:38:48.653Z" },
 ]

 [[package]]
 name = "mlx-lm"
-version = "0.30.5"
-source = { registry = "https://pypi.org/simple" }
+version = "0.30.6"
+source = { git = "https://github.com/davidmcc73/mlx-lm.git?branch=main#9b4c6b7aff64cf64d2ff6ce027b35b56a68c4182" }
 dependencies = [
    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "mlx", marker = "sys_platform == 'darwin'" },
@@ -1338,19 +1395,15 @@ dependencies = [
    { name = "sentencepiece", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/0b/90/4469d9f75f196e6255f59a89441abe0079925d30a001462e1c1c4bc4e6a1/mlx_lm-0.30.5.tar.gz", hash = "sha256:9e6cb258c65b766c6af25cb90958aef40acab67139f05839eef19864cb3154f6", size = 262367, upload-time = "2026-01-25T15:29:30.125Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/89/ba/66db6e1e5f1ef506655b562932f6bd8f72600116d5f31f92d71c1f200b3f/mlx_lm-0.30.5-py3-none-any.whl", hash = "sha256:a80bc8e3efdebe81813b0f6eb403fb66a7a15071e256f4e7102ada986acb75bb", size = 366716, upload-time = "2026-01-25T15:29:28.29Z" },
-]

 [[package]]
 name = "mlx-metal"
-version = "0.30.3"
+version = "0.30.4"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f6/63/4d8f6fefb507c028df4454dabfe8d8e0ad2961bb06510b6aca23d2d5b2be/mlx_metal-0.30.3-py3-none-macosx_14_0_arm64.whl", hash = "sha256:6276312b02353714c7c6515169569fe1c4bebe3229c8ecf1fdb375a13e78c966", size = 37716245, upload-time = "2026-01-14T01:16:34.838Z" },
-    { url = "https://files.pythonhosted.org/packages/35/91/1d452e48a4bb4958844fd3bb28ae31b8de110549c009ebec5024ce27ebf3/mlx_metal-0.30.3-py3-none-macosx_15_0_arm64.whl", hash = "sha256:c096c0a3428f3f96a06220f97a36f9528b18bc05173f821eb05bc8458e723fa8", size = 37712125, upload-time = "2026-01-14T01:16:38.619Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/36/7a3cbca85542b5ca4faf871e35927f43aa0e3fc830ae5b699780fe723677/mlx_metal-0.30.3-py3-none-macosx_26_0_arm64.whl", hash = "sha256:69068533bd1ee8b0379ce5de57ed5fd313577a10ecab58e1332fd1ff7248a75e", size = 46488962, upload-time = "2026-01-14T05:52:04.523Z" },
+    { url = "https://files.pythonhosted.org/packages/95/b1/a50b84aaa76a60605606df49196456f31871148485ede7cbe3267a25a51e/mlx_metal-0.30.4-py3-none-macosx_14_0_arm64.whl", hash = "sha256:10c417f86778ac5529ecd2180f90de35f2d3a0fcad4d5176d211d651504c4922", size = 38260996, upload-time = "2026-01-27T22:52:50.172Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/f0/6cce9e0ea545f61d0fa27dc6cd30ffa0e44f17bf859e5d75a34a9ba0da56/mlx_metal-0.30.4-py3-none-macosx_15_0_arm64.whl", hash = "sha256:f48f52490f0fcb2be924312d50c3a12625249d396a2a119ce4f7b0d388543ca9", size = 38255657, upload-time = "2026-01-27T22:52:53.683Z" },
+    { url = "https://files.pythonhosted.org/packages/07/fc/345f627bb88479cb53c3f37ad1947f865830060a3d792eec05954f53384d/mlx_metal-0.30.4-py3-none-macosx_26_0_arm64.whl", hash = "sha256:9a9fb6f9169eeb38a7f78389fe78306a1b5167fa489096bc50f9ca72074d7a95", size = 47541040, upload-time = "2026-01-27T22:52:57.059Z" },
 ]

 [[package]]
@@ -1442,19 +1495,18 @@ wheels = [

 [[package]]
 name = "multiprocess"
-version = "0.70.18"
+version = "0.70.16"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b5/ae/04f39c5d0d0def03247c2893d6f2b83c136bf3320a2154d7b8858f2ba72d/multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1", size = 1772603, upload-time = "2024-01-28T18:52:34.85Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ba/d8/0cba6cf51a1a31f20471fbc823a716170c73012ddc4fb85d706630ed6e8f/multiprocess-0.70.18-py310-none-any.whl", hash = "sha256:60c194974c31784019c1f459d984e8f33ee48f10fcf42c309ba97b30d9bd53ea", size = 134948, upload-time = "2025-04-17T03:11:20.223Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/88/9039f2fed1012ef584751d4ceff9ab4a51e5ae264898f0b7cbf44340a859/multiprocess-0.70.18-py311-none-any.whl", hash = "sha256:5aa6eef98e691281b3ad923be2832bf1c55dd2c859acd73e5ec53a66aae06a1d", size = 144462, upload-time = "2025-04-17T03:11:21.657Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/b6/5f922792be93b82ec6b5f270bbb1ef031fd0622847070bbcf9da816502cc/multiprocess-0.70.18-py312-none-any.whl", hash = "sha256:9b78f8e5024b573730bfb654783a13800c2c0f2dfc0c25e70b40d184d64adaa2", size = 150287, upload-time = "2025-04-17T03:11:22.69Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/25/7d7e78e750bc1aecfaf0efbf826c69a791d2eeaf29cf20cba93ff4cced78/multiprocess-0.70.18-py313-none-any.whl", hash = "sha256:871743755f43ef57d7910a38433cfe41319e72be1bbd90b79c7a5ac523eb9334", size = 151917, upload-time = "2025-04-17T03:11:24.044Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/c3/ca84c19bd14cdfc21c388fdcebf08b86a7a470ebc9f5c3c084fc2dbc50f7/multiprocess-0.70.18-py38-none-any.whl", hash = "sha256:dbf705e52a154fe5e90fb17b38f02556169557c2dd8bb084f2e06c2784d8279b", size = 132636, upload-time = "2025-04-17T03:11:24.936Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/f7/7ec7fddc92e50714ea3745631f79bd9c96424cb2702632521028e57d3a36/multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02", size = 134824, upload-time = "2024-01-28T18:52:26.062Z" },
+    { url = "https://files.pythonhosted.org/packages/50/15/b56e50e8debaf439f44befec5b2af11db85f6e0f344c3113ae0be0593a91/multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a", size = 143519, upload-time = "2024-01-28T18:52:28.115Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741, upload-time = "2024-01-28T18:52:29.395Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/89/38df130f2c799090c978b366cfdf5b96d08de5b29a4a293df7f7429fa50b/multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", size = 132628, upload-time = "2024-01-28T18:52:30.853Z" },
+    { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351, upload-time = "2024-01-28T18:52:31.981Z" },
 ]

 [[package]]
@@ -1559,40 +1611,26 @@ wheels = [

 [[package]]
 name = "numpy"
-version = "2.4.1"
+version = "2.2.6"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/24/62/ae72ff66c0f1fd959925b4c11f8c2dea61f47f6acaea75a08512cdfe3fed/numpy-2.4.1.tar.gz", hash = "sha256:a1ceafc5042451a858231588a104093474c6a5c57dcc724841f5c888d237d690", size = 20721320, upload-time = "2026-01-10T06:44:59.619Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/04/68/732d4b7811c00775f3bd522a21e8dd5a23f77eb11acdeb663e4a4ebf0ef4/numpy-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d797454e37570cfd61143b73b8debd623c3c0952959adb817dd310a483d58a1b", size = 16652495, upload-time = "2026-01-10T06:43:06.283Z" },
-    { url = "https://files.pythonhosted.org/packages/20/ca/857722353421a27f1465652b2c66813eeeccea9d76d5f7b74b99f298e60e/numpy-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82c55962006156aeef1629b953fd359064aa47e4d82cfc8e67f0918f7da3344f", size = 12368657, upload-time = "2026-01-10T06:43:09.094Z" },
-    { url = "https://files.pythonhosted.org/packages/81/0d/2377c917513449cc6240031a79d30eb9a163d32a91e79e0da47c43f2c0c8/numpy-2.4.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:71abbea030f2cfc3092a0ff9f8c8fdefdc5e0bf7d9d9c99663538bb0ecdac0b9", size = 5197256, upload-time = "2026-01-10T06:43:13.634Z" },
-    { url = "https://files.pythonhosted.org/packages/17/39/569452228de3f5de9064ac75137082c6214be1f5c532016549a7923ab4b5/numpy-2.4.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b55aa56165b17aaf15520beb9cbd33c9039810e0d9643dd4379e44294c7303e", size = 6545212, upload-time = "2026-01-10T06:43:15.661Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/a4/77333f4d1e4dac4395385482557aeecf4826e6ff517e32ca48e1dafbe42a/numpy-2.4.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0faba4a331195bfa96f93dd9dfaa10b2c7aa8cda3a02b7fd635e588fe821bf5", size = 14402871, upload-time = "2026-01-10T06:43:17.324Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/87/d341e519956273b39d8d47969dd1eaa1af740615394fe67d06f1efa68773/numpy-2.4.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3e3087f53e2b4428766b54932644d148613c5a595150533ae7f00dab2f319a8", size = 16359305, upload-time = "2026-01-10T06:43:19.376Z" },
-    { url = "https://files.pythonhosted.org/packages/32/91/789132c6666288eaa20ae8066bb99eba1939362e8f1a534949a215246e97/numpy-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:49e792ec351315e16da54b543db06ca8a86985ab682602d90c60ef4ff4db2a9c", size = 16181909, upload-time = "2026-01-10T06:43:21.808Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/b8/090b8bd27b82a844bb22ff8fdf7935cb1980b48d6e439ae116f53cdc2143/numpy-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:79e9e06c4c2379db47f3f6fc7a8652e7498251789bf8ff5bd43bf478ef314ca2", size = 18284380, upload-time = "2026-01-10T06:43:23.957Z" },
-    { url = "https://files.pythonhosted.org/packages/da/a1/354583ac5c4caa566de6ddfbc42744409b515039e085fab6e0ff942e0df5/numpy-2.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f93bc6892fe7b0663e5ffa83b61aab510aacffd58c16e012bb9352d489d90cb7", size = 12496156, upload-time = "2026-01-10T06:43:34.237Z" },
-    { url = "https://files.pythonhosted.org/packages/51/b0/42807c6e8cce58c00127b1dc24d365305189991f2a7917aa694a109c8d7d/numpy-2.4.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:178de8f87948163d98a4c9ab5bee4ce6519ca918926ec8df195af582de28544d", size = 5324663, upload-time = "2026-01-10T06:43:36.211Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/55/7a621694010d92375ed82f312b2f28017694ed784775269115323e37f5e2/numpy-2.4.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:98b35775e03ab7f868908b524fc0a84d38932d8daf7b7e1c3c3a1b6c7a2c9f15", size = 6645224, upload-time = "2026-01-10T06:43:37.884Z" },
-    { url = "https://files.pythonhosted.org/packages/50/96/9fa8635ed9d7c847d87e30c834f7109fac5e88549d79ef3324ab5c20919f/numpy-2.4.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:941c2a93313d030f219f3a71fd3d91a728b82979a5e8034eb2e60d394a2b83f9", size = 14462352, upload-time = "2026-01-10T06:43:39.479Z" },
-    { url = "https://files.pythonhosted.org/packages/03/d1/8cf62d8bb2062da4fb82dd5d49e47c923f9c0738032f054e0a75342faba7/numpy-2.4.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:529050522e983e00a6c1c6b67411083630de8b57f65e853d7b03d9281b8694d2", size = 16407279, upload-time = "2026-01-10T06:43:41.93Z" },
-    { url = "https://files.pythonhosted.org/packages/86/1c/95c86e17c6b0b31ce6ef219da00f71113b220bcb14938c8d9a05cee0ff53/numpy-2.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2302dc0224c1cbc49bb94f7064f3f923a971bfae45c33870dcbff63a2a550505", size = 16248316, upload-time = "2026-01-10T06:43:44.121Z" },
-    { url = "https://files.pythonhosted.org/packages/30/b4/e7f5ff8697274c9d0fa82398b6a372a27e5cef069b37df6355ccb1f1db1a/numpy-2.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9171a42fcad32dcf3fa86f0a4faa5e9f8facefdb276f54b8b390d90447cff4e2", size = 18329884, upload-time = "2026-01-10T06:43:46.613Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/a7/ef08d25698e0e4b4efbad8d55251d20fe2a15f6d9aa7c9b30cd03c165e6f/numpy-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3869ea1ee1a1edc16c29bbe3a2f2a4e515cc3a44d43903ad41e0cacdbaf733dc", size = 16652046, upload-time = "2026-01-10T06:43:54.797Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/39/e378b3e3ca13477e5ac70293ec027c438d1927f18637e396fe90b1addd72/numpy-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e867df947d427cdd7a60e3e271729090b0f0df80f5f10ab7dd436f40811699c3", size = 12378858, upload-time = "2026-01-10T06:43:57.099Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/74/7ec6154f0006910ed1fdbb7591cf4432307033102b8a22041599935f8969/numpy-2.4.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:e3bd2cb07841166420d2fa7146c96ce00cb3410664cbc1a6be028e456c4ee220", size = 5207417, upload-time = "2026-01-10T06:43:59.037Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/b7/053ac11820d84e42f8feea5cb81cc4fcd1091499b45b1ed8c7415b1bf831/numpy-2.4.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:f0a90aba7d521e6954670550e561a4cb925713bd944445dbe9e729b71f6cabee", size = 6542643, upload-time = "2026-01-10T06:44:01.852Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d558123217a83b2d1ba316b986e9248a1ed1971ad495963d555ccd75dcb1556", size = 14418963, upload-time = "2026-01-10T06:44:04.047Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/c0/3ed5083d94e7ffd7c404e54619c088e11f2e1939a9544f5397f4adb1b8ba/numpy-2.4.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f44de05659b67d20499cbc96d49f2650769afcb398b79b324bb6e297bfe3844", size = 16363811, upload-time = "2026-01-10T06:44:06.207Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/68/42b66f1852bf525050a67315a4fb94586ab7e9eaa541b1bef530fab0c5dd/numpy-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:69e7419c9012c4aaf695109564e3387f1259f001b4326dfa55907b098af082d3", size = 16197643, upload-time = "2026-01-10T06:44:08.33Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/40/e8714fc933d85f82c6bfc7b998a0649ad9769a32f3494ba86598aaf18a48/numpy-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2ffd257026eb1b34352e749d7cc1678b5eeec3e329ad8c9965a797e08ccba205", size = 18289601, upload-time = "2026-01-10T06:44:10.841Z" },
-    { url = "https://files.pythonhosted.org/packages/de/bc/ea3f2c96fcb382311827231f911723aeff596364eb6e1b6d1d91128aa29b/numpy-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e53170557d37ae404bf8d542ca5b7c629d6efa1117dac6a83e394142ea0a43f", size = 12498774, upload-time = "2026-01-10T06:44:19.467Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/ab/ef9d939fe4a812648c7a712610b2ca6140b0853c5efea361301006c02ae5/numpy-2.4.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:a73044b752f5d34d4232f25f18160a1cc418ea4507f5f11e299d8ac36875f8a0", size = 5327274, upload-time = "2026-01-10T06:44:23.189Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/31/d381368e2a95c3b08b8cf7faac6004849e960f4a042d920337f71cef0cae/numpy-2.4.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:fb1461c99de4d040666ca0444057b06541e5642f800b71c56e6ea92d6a853a0c", size = 6648306, upload-time = "2026-01-10T06:44:25.012Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/e5/0989b44ade47430be6323d05c23207636d67d7362a1796ccbccac6773dd2/numpy-2.4.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423797bdab2eeefbe608d7c1ec7b2b4fd3c58d51460f1ee26c7500a1d9c9ee93", size = 14464653, upload-time = "2026-01-10T06:44:26.706Z" },
-    { url = "https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52b5f61bdb323b566b528899cc7db2ba5d1015bda7ea811a8bcf3c89c331fa42", size = 16405144, upload-time = "2026-01-10T06:44:29.378Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/a3/0c63fe66b534888fa5177cc7cef061541064dbe2b4b60dcc60ffaf0d2157/numpy-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42d7dd5fa36d16d52a84f821eb96031836fd405ee6955dd732f2023724d0aa01", size = 16247425, upload-time = "2026-01-10T06:44:31.721Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/2b/55d980cfa2c93bd40ff4c290bf824d792bd41d2fe3487b07707559071760/numpy-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e7b6b5e28bbd47b7532698e5db2fe1db693d84b58c254e4389d99a27bb9b8f6b", size = 18330053, upload-time = "2026-01-10T06:44:34.617Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" },
+    { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" },
+    { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" },
+    { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" },
+    { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" },
+    { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" },
+    { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" },
+    { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" },
+    { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" },
 ]

 [[package]]
@@ -1729,10 +1767,10 @@ wheels = [

 [[package]]
 name = "nvidia-nccl-cu12"
-version = "2.27.5"
+version = "2.27.3"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload-time = "2025-06-03T21:58:04.013Z" },
 ]

 [[package]]
@@ -1752,14 +1790,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
 ]

-[[package]]
-name = "nvidia-nvshmem-cu12"
-version = "3.3.20"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145, upload-time = "2025-08-04T20:25:19.995Z" },
-]
-
 [[package]]
 name = "nvidia-nvtx-cu12"
 version = "12.8.90"
@@ -1768,6 +1798,25 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
 ]

+[[package]]
+name = "openai"
+version = "2.16.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "distro", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "jiter", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "sniffio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/6c/e4c964fcf1d527fdf4739e7cc940c60075a4114d50d03871d5d5b1e13a88/openai-2.16.0.tar.gz", hash = "sha256:42eaa22ca0d8ded4367a77374104d7a2feafee5bd60a107c3c11b5243a11cd12", size = 629649, upload-time = "2026-01-27T23:28:02.579Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/16/83/0315bf2cfd75a2ce8a7e54188e9456c60cec6c0cf66728ed07bd9859ff26/openai-2.16.0-py3-none-any.whl", hash = "sha256:5f46643a8f42899a84e80c38838135d7038e7718333ce61396994f887b09a59b", size = 1068612, upload-time = "2026-01-27T23:28:00.356Z" },
+]
+
 [[package]]
 name = "openai-harmony"
 version = "0.0.8"
@@ -1814,13 +1863,57 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/40/35/ddf3a6e8fc754fb939e2ea36fde96c28189184d6115afcf60011bb438ae5/packaging-26.0rc1-py3-none-any.whl", hash = "sha256:ecf921b33c620e357b1eed2ac3bc6313b1582874b0282d0773b6797b79cb0786", size = 74021, upload-time = "2026-01-09T17:41:17.134Z" },
 ]

+[[package]]
+name = "pandas"
+version = "3.0.0rc2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'darwin'",
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+]
+dependencies = [
+    { name = "numpy", marker = "(python_full_version >= '3.14' and sys_platform == 'darwin') or (python_full_version >= '3.14' and sys_platform == 'linux')" },
+    { name = "python-dateutil", marker = "(python_full_version >= '3.14' and sys_platform == 'darwin') or (python_full_version >= '3.14' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/11/1f/08274a71f6198b47fa6abb92c99ab87bfa088c3fca71b467a874c7a8db47/pandas-3.0.0rc2.tar.gz", hash = "sha256:729cd21b8387d9cba48ab357d2eea72215af6b631440fa00b2cb8ea5f83c7dbc", size = 4611940, upload-time = "2026-01-14T22:43:44.022Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/09/b7/9d31c2e0ef9950e6ca6238683a265867264ddc2ed25cd1643a102fe6488f/pandas-3.0.0rc2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:683ecc5e0f1080bdf2f5cb8d707b1dd506bdd77b199394151d7864f5e9da0a0b", size = 10276423, upload-time = "2026-01-14T22:42:33.95Z" },
+    { url = "https://files.pythonhosted.org/packages/08/8f/435edcea6bec50778ccccf0bd271f7d0eebccca6d04384e1f8e413b2c9f8/pandas-3.0.0rc2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:673e4f7550a61f994a8f1835f71187657953e970077d74fb0723d1fcc98d4b80", size = 9820608, upload-time = "2026-01-14T22:42:35.865Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/43/5cd6162ad4b311dc8340442d7d71d79c4ff0e1b009e349790099a10f98fd/pandas-3.0.0rc2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55adad848bb154cdca4a6501c5c961979a1788c4e8e2aa14bd42ea748264ee0e", size = 10323489, upload-time = "2026-01-14T22:42:38.13Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/5a/f4370b79eb4212abadea8905e3df058268043260f9fa97d86c0df54aa500/pandas-3.0.0rc2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:358185abd60c12e78ecec4affaa86f7775e847b77547bffff5f15f6992149d99", size = 10831839, upload-time = "2026-01-14T22:42:40.484Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/11/c953c00093be4ea3d2b1846a8c1f2f260bcae90ff61c3898da5406fa7ea2/pandas-3.0.0rc2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:361f57da30ad16486d51410e957033864171930316e7f28caf8ded2fd00eea79", size = 11330676, upload-time = "2026-01-14T22:42:42.933Z" },
+    { url = "https://files.pythonhosted.org/packages/65/96/e4bca78cfee5b8b7b921b609673768f8752a282bdbde669a43591a27e2b8/pandas-3.0.0rc2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d1408423ab06a278ae30aabdc3339cadff3dfe1b4d6ce77cf34e17810c00907c", size = 11883758, upload-time = "2026-01-14T22:42:45.444Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/b0/1bf38be28e25be6d07ceedeccf9b2cf9d7e6553598915f2e5e96a123fbf9/pandas-3.0.0rc2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e5ba7f285c4ca98f6d2f136657239a57dee7f992e1401776287ff61232c43e4d", size = 10711017, upload-time = "2026-01-14T22:42:51.954Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/c2/fb57d65a4583fb7cadd75040f02ceeea8922dc1c0ae3b9ba9f713d27f518/pandas-3.0.0rc2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:629d2050db9427ca875ad9b36569deff91625c8ae8440ab8aa31d0648cc735d8", size = 10362629, upload-time = "2026-01-14T22:42:54.231Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/64/44bdc4a30dd7a00510ffeb13fbb1dfd66df4761b4abd88805aff53b50cab/pandas-3.0.0rc2-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f765354b411f9b1a2fb376d3efc03b9bd09034d55a6ec154969ca765c32713ad", size = 10283691, upload-time = "2026-01-14T22:42:56.292Z" },
+    { url = "https://files.pythonhosted.org/packages/26/fb/4ac312b87a08eb5f8bba574fa09cb4d599c90bcf15e8331d0ad496d1023f/pandas-3.0.0rc2-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e9dc4fa215c768b6e92b1c589458dd0b071b8b362d276f189d491666f79e97e1", size = 10671727, upload-time = "2026-01-14T22:42:58.241Z" },
+    { url = "https://files.pythonhosted.org/packages/70/95/095b4fd1fbfea415b0a3728e94876ed1c4e5887bbc0e76817f96de641403/pandas-3.0.0rc2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d74eb4e76cf84ef0ce8225d02ebdc41c432b286f2ce46e482193108579230a78", size = 11296898, upload-time = "2026-01-14T22:43:00.575Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/32/c843f435b4b2873d1f9ad413019f755390fb823ebfcbff297fc546ecd189/pandas-3.0.0rc2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d669ff45857a6b7db9ba8020ec4443d4a93b919da87f67b8bd71938848495dcb", size = 11743350, upload-time = "2026-01-14T22:43:03.348Z" },
+    { url = "https://files.pythonhosted.org/packages/78/54/dcd99e9856271a0f0546c96a564c43d1445344a8e8f1ae3e7aa11b941007/pandas-3.0.0rc2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9e156705991758d892a6d008a76e2638d0d4f076deb2753a38034f64b7f4597f", size = 10280879, upload-time = "2026-01-14T22:43:07.574Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/a0/52ff58f8f4f210c41e2b7b777da1b6f754006576c34d130843147b663a44/pandas-3.0.0rc2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c26d22710d5b5f65e0e36ed1a73d08e96d8866b58bd56b45a93901c8ceffa152", size = 9870217, upload-time = "2026-01-14T22:43:10.306Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/9b/4a472aae89afd5c93d2ccb6e74ac94eef8b37c5fb2c56c647b1fd3e08813/pandas-3.0.0rc2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5000fcee5565e41053ff5d59e0d3d1b0784b492aa0f33b89972648de8615baee", size = 10399840, upload-time = "2026-01-14T22:43:12.702Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/2f/81c647c3f905023d06826762d86d76cc27435fe66396e1cebcddede5ea83/pandas-3.0.0rc2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ddd8ac07a9b30039a38512c8b917a52fb8b87f3bec110dd349801ebc5c128661", size = 10854606, upload-time = "2026-01-14T22:43:14.751Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/e2/2eabf98e251e0becaf58c97f6b4e4150d1ba013814059583688a281ecacb/pandas-3.0.0rc2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:650a812ad0164548c91181306b3ebd8c41acc44b8f03bc447725c6ec84b61751", size = 11402310, upload-time = "2026-01-14T22:43:16.735Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/7c/c8c224a49de3f5a6e82af813c19055b551abb3456757bed825f8ebf3be62/pandas-3.0.0rc2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c7ac831248102b92f4decafb5970fe0eed0c42d788b599485361aa225b26f2b4", size = 11922515, upload-time = "2026-01-14T22:43:18.861Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/9e/ea3f5c1ccd292fd1d31b92c5fc3ab03364e63b54e3f24ba9a27dc4350253/pandas-3.0.0rc2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:b31021fa98cdffe6cd6745edc53fece4b0f853290fd4a28f41bdf6a0dc644aa6", size = 10731009, upload-time = "2026-01-14T22:43:27.253Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/8c/9f11cb31727589ada2d333a1c174fbe92b32de935d2efc354e0fa63184ff/pandas-3.0.0rc2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:59c707013d019efba0af7cf731179d788f4d7309f367dc495082d61d201ad487", size = 10379214, upload-time = "2026-01-14T22:43:29.399Z" },
+    { url = "https://files.pythonhosted.org/packages/53/43/dfd47f186323205197204616f21bb833b18cbbdb817f32d63222fe249639/pandas-3.0.0rc2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8376665f907f6ee142987ebcd3f2e78f40833a7b03cd9a7c9a72774f9f0d4759", size = 10301411, upload-time = "2026-01-14T22:43:31.627Z" },
+    { url = "https://files.pythonhosted.org/packages/99/10/28cb0417e80dddc82b69c8bc3e5d3b64c62102a50e4cbe8716ddaeb62b23/pandas-3.0.0rc2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c22c471eec0a7c3bbfdbfe2a5cac0a2dc74ea24711534e0d69e0c308b3ad35ce", size = 10701696, upload-time = "2026-01-14T22:43:33.842Z" },
+    { url = "https://files.pythonhosted.org/packages/62/7d/51edfecfbe95a83b472acd0ca676690dba7bdc86936c82ab485a6d6a0d47/pandas-3.0.0rc2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4ac9bf1a92277599eb3f6fa2f30028c26861d48c736a77b6c098c56b46e53b92", size = 11318395, upload-time = "2026-01-14T22:43:35.802Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/f7/b962099d9706f14afcdc4aafe61f7710a37577eae6ac2098838e9fd3a52f/pandas-3.0.0rc2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:47966fb3d7b236a91e4f26bc6fb1a27ed09c95ead6d147958052c350abd545e3", size = 11772875, upload-time = "2026-01-14T22:43:37.949Z" },
+]
+
 [[package]]
 name = "pandas"
 version = "3.0.0"
 source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.14' and sys_platform == 'darwin'",
+    "python_full_version < '3.14' and sys_platform == 'linux'",
+]
 dependencies = [
-    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "numpy", marker = "(python_full_version < '3.14' and sys_platform == 'darwin') or (python_full_version < '3.14' and sys_platform == 'linux')" },
+    { name = "python-dateutil", marker = "(python_full_version < '3.14' and sys_platform == 'darwin') or (python_full_version < '3.14' and sys_platform == 'linux')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/de/da/b1dc0481ab8d55d0f46e343cfe67d4551a0e14fcee52bd38ca1bd73258d8/pandas-3.0.0.tar.gz", hash = "sha256:0facf7e87d38f721f0af46fe70d97373a37701b1c09f7ed7aeeb292ade5c050f", size = 4633005, upload-time = "2026-01-21T15:52:04.726Z" }
 wheels = [
@@ -1859,6 +1952,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/9a/70/875f4a23bfc4731703a5835487d0d2fb999031bd415e7d17c0ae615c18b7/pathvalidate-3.3.1-py3-none-any.whl", hash = "sha256:5263baab691f8e1af96092fa5137ee17df5bdfbd6cff1fcac4d6ef4bc2e1735f", size = 24305, upload-time = "2025-06-15T09:07:19.117Z" },
 ]

+[[package]]
+name = "pebble"
+version = "5.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/66/3b/7debef984e227a70798963cf2e5ea90882f62bca659b33cbd421a453abd1/pebble-5.2.0.tar.gz", hash = "sha256:8e0a5f6a1cfdd0ac1bfc4a789e20d2b4b895de976e547d23b7de23b71ef39b34", size = 39811, upload-time = "2026-01-25T12:05:11.422Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b5/de/1cce5274efcb921484998864820f2ba41679ea472daef748a7bc03fc0bb7/pebble-5.2.0-py3-none-any.whl", hash = "sha256:6237a792a78524648857ec6d2dae069c91a45bdef18daf957078a56e2dd8e0a8", size = 34881, upload-time = "2026-01-25T12:05:09.714Z" },
+]
+
 [[package]]
 name = "peft"
 version = "0.18.1"
@@ -2662,11 +2764,11 @@ wheels = [

 [[package]]
 name = "setuptools"
-version = "80.9.0"
+version = "79.0.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/bb/71/b6365e6325b3290e14957b2c3a804a529968c77a049b2ed40c095f749707/setuptools-79.0.1.tar.gz", hash = "sha256:128ce7b8f33c3079fd1b067ecbb4051a66e8526e7b65f6cec075dfc650ddfa88", size = 1367909, upload-time = "2025-04-23T22:20:59.241Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/6d/b4752b044bf94cb802d88a888dc7d288baaf77d7910b7dedda74b5ceea0c/setuptools-79.0.1-py3-none-any.whl", hash = "sha256:e147c0549f27767ba362f9da434eab9c5dc0045d5304feb602a0af001089fc51", size = 1256281, upload-time = "2025-04-23T22:20:56.768Z" },
 ]

 [[package]]
@@ -2854,7 +2956,7 @@ wheels = [

 [[package]]
 name = "torch"
-version = "2.9.1"
+version = "2.8.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2874,7 +2976,6 @@ dependencies = [
    { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "sympy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2882,18 +2983,12 @@ dependencies = [
    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/20/60/8fc5e828d050bddfab469b3fe78e5ab9a7e53dda9c3bdc6a43d17ce99e63/torch-2.9.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c29455d2b910b98738131990394da3e50eea8291dfeb4b12de71ecf1fdeb21cb", size = 104135743, upload-time = "2025-11-12T15:21:34.936Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/b7/6d3f80e6918213babddb2a37b46dbb14c15b14c5f473e347869a51f40e1f/torch-2.9.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:524de44cd13931208ba2c4bde9ec7741fd4ae6bfd06409a604fc32f6520c2bc9", size = 899749493, upload-time = "2025-11-12T15:24:36.356Z" },
-    { url = "https://files.pythonhosted.org/packages/28/0e/2a37247957e72c12151b33a01e4df651d9d155dd74d8cfcbfad15a79b44a/torch-2.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5be4bf7496f1e3ffb1dd44b672adb1ac3f081f204c5ca81eba6442f5f634df8e", size = 74830751, upload-time = "2025-11-12T15:21:43.792Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/f7/7a18745edcd7b9ca2381aa03353647bca8aace91683c4975f19ac233809d/torch-2.9.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:30a3e170a84894f3652434b56d59a64a2c11366b0ed5776fab33c2439396bf9a", size = 104142929, upload-time = "2025-11-12T15:21:48.319Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/dd/f1c0d879f2863ef209e18823a988dc7a1bf40470750e3ebe927efdb9407f/torch-2.9.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8301a7b431e51764629208d0edaa4f9e4c33e6df0f2f90b90e261d623df6a4e2", size = 899748978, upload-time = "2025-11-12T15:23:04.568Z" },
-    { url = "https://files.pythonhosted.org/packages/40/60/71c698b466dd01e65d0e9514b5405faae200c52a76901baf6906856f17e4/torch-2.9.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:2c14b3da5df416cf9cb5efab83aa3056f5b8cd8620b8fde81b4987ecab730587", size = 74480347, upload-time = "2025-11-12T15:21:57.648Z" },
-    { url = "https://files.pythonhosted.org/packages/48/50/c4b5112546d0d13cc9eaa1c732b823d676a9f49ae8b6f97772f795874a03/torch-2.9.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1edee27a7c9897f4e0b7c14cfc2f3008c571921134522d5b9b5ec4ebbc69041a", size = 74433245, upload-time = "2025-11-12T15:22:39.027Z" },
-    { url = "https://files.pythonhosted.org/packages/81/c9/2628f408f0518b3bae49c95f5af3728b6ab498c8624ab1e03a43dd53d650/torch-2.9.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:19d144d6b3e29921f1fc70503e9f2fc572cde6a5115c0c0de2f7ca8b1483e8b6", size = 104134804, upload-time = "2025-11-12T15:22:35.222Z" },
-    { url = "https://files.pythonhosted.org/packages/28/fc/5bc91d6d831ae41bf6e9e6da6468f25330522e92347c9156eb3f1cb95956/torch-2.9.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:c432d04376f6d9767a9852ea0def7b47a7bbc8e7af3b16ac9cf9ce02b12851c9", size = 899747132, upload-time = "2025-11-12T15:23:36.068Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/b2/2d15a52516b2ea3f414643b8de68fa4cb220d3877ac8b1028c83dc8ca1c4/torch-2.9.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cb10896a1f7fedaddbccc2017ce6ca9ecaaf990f0973bdfcf405439750118d2c", size = 74823558, upload-time = "2025-11-12T15:22:43.392Z" },
-    { url = "https://files.pythonhosted.org/packages/86/5c/5b2e5d84f5b9850cd1e71af07524d8cbb74cba19379800f1f9f7c997fc70/torch-2.9.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:0a2bd769944991c74acf0c4ef23603b9c777fdf7637f115605a4b2d8023110c7", size = 104145788, upload-time = "2025-11-12T15:23:52.109Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/8c/3da60787bcf70add986c4ad485993026ac0ca74f2fc21410bc4eb1bb7695/torch-2.9.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:07c8a9660bc9414c39cac530ac83b1fb1b679d7155824144a40a54f4a47bfa73", size = 899735500, upload-time = "2025-11-12T15:24:08.788Z" },
+    { url = "https://files.pythonhosted.org/packages/10/4e/469ced5a0603245d6a19a556e9053300033f9c5baccf43a3d25ba73e189e/torch-2.8.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2b2f96814e0345f5a5aed9bf9734efa913678ed19caf6dc2cddb7930672d6128", size = 101936856, upload-time = "2025-08-06T14:54:01.526Z" },
+    { url = "https://files.pythonhosted.org/packages/16/82/3948e54c01b2109238357c6f86242e6ecbf0c63a1af46906772902f82057/torch-2.8.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:65616ca8ec6f43245e1f5f296603e33923f4c30f93d65e103d9e50c25b35150b", size = 887922844, upload-time = "2025-08-06T14:55:50.78Z" },
+    { url = "https://files.pythonhosted.org/packages/de/69/8b7b13bba430f5e21d77708b616f767683629fc4f8037564a177d20f90ed/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:1a62a1ec4b0498930e2543535cf70b1bef8c777713de7ceb84cd79115f553767", size = 73915128, upload-time = "2025-08-06T14:54:34.769Z" },
+    { url = "https://files.pythonhosted.org/packages/15/0e/8a800e093b7f7430dbaefa80075aee9158ec22e4c4fc3c1a66e4fb96cb4f/torch-2.8.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:83c13411a26fac3d101fe8035a6b0476ae606deb8688e904e796a3534c197def", size = 102020139, upload-time = "2025-08-06T14:54:39.047Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/15/5e488ca0bc6162c86a33b58642bc577c84ded17c7b72d97e49b5833e2d73/torch-2.8.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8f0a9d617a66509ded240add3754e462430a6c1fc5589f86c17b433dd808f97a", size = 887990692, upload-time = "2025-08-06T14:56:18.286Z" },
+    { url = "https://files.pythonhosted.org/packages/04/6e/650bb7f28f771af0cb791b02348db8b7f5f64f40f6829ee82aa6ce99aabe/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7b677e17f5a3e69fdef7eb3b9da72622f8d322692930297e4ccb52fefc6c8211", size = 73632395, upload-time = "2025-08-06T14:55:28.645Z" },
 ]

 [[package]]
@@ -2920,7 +3015,7 @@ wheels = [

 [[package]]
 name = "transformers"
-version = "5.0.0rc3"
+version = "5.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2929,26 +3024,26 @@ dependencies = [
    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "safetensors", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "tokenizers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "typer-slim", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/3f/a3/7c116a8d85f69ea7749cf4c2df79e64c35d028e5fc7ea0168f299d03b8c7/transformers-5.0.0rc3.tar.gz", hash = "sha256:a0315b92b7e087617ade42ec9e6e92ee7620541cc5d6a3331886c52cbe306f5c", size = 8388520, upload-time = "2026-01-14T16:49:02.952Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/bc/79/845941711811789c85fb7e2599cea425a14a07eda40f50896b9d3fda7492/transformers-5.0.0.tar.gz", hash = "sha256:5f5634efed6cf76ad068cc5834c7adbc32db78bbd6211fb70df2325a9c37dec8", size = 8424830, upload-time = "2026-01-26T10:46:46.813Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1e/f2/ae2b8968764253bdf38a48dee3c299b8d0bedf7c8ffbe3449fca9bd95338/transformers-5.0.0rc3-py3-none-any.whl", hash = "sha256:383fad27f4f73092d330e45fae384681e5c8521e1dc1cf6cb1a297780e68bf2d", size = 10107087, upload-time = "2026-01-14T16:48:59.393Z" },
+    { url = "https://files.pythonhosted.org/packages/52/f3/ac976fa8e305c9e49772527e09fbdc27cc6831b8a2f6b6063406626be5dd/transformers-5.0.0-py3-none-any.whl", hash = "sha256:587086f249ce64c817213cf36afdb318d087f790723e9b3d4500b97832afd52d", size = 10142091, upload-time = "2026-01-26T10:46:43.88Z" },
 ]

 [[package]]
 name = "triton"
-version = "3.5.1"
+version = "3.4.0"
 source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "setuptools", marker = "sys_platform == 'linux'" },
+]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/27/46/8c3bbb5b0a19313f50edcaa363b599e5a1a5ac9683ead82b9b80fe497c8d/triton-3.5.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3f4346b6ebbd4fad18773f5ba839114f4826037c9f2f34e0148894cd5dd3dba", size = 170470410, upload-time = "2025-11-11T17:41:06.319Z" },
-    { url = "https://files.pythonhosted.org/packages/37/92/e97fcc6b2c27cdb87ce5ee063d77f8f26f19f06916aa680464c8104ef0f6/triton-3.5.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0b4d2c70127fca6a23e247f9348b8adde979d2e7a20391bfbabaac6aebc7e6a8", size = 170579924, upload-time = "2025-11-11T17:41:12.455Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/e6/c595c35e5c50c4bc56a7bac96493dad321e9e29b953b526bbbe20f9911d0/triton-3.5.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0637b1efb1db599a8e9dc960d53ab6e4637db7d4ab6630a0974705d77b14b60", size = 170480488, upload-time = "2025-11-11T17:41:18.222Z" },
-    { url = "https://files.pythonhosted.org/packages/16/b5/b0d3d8b901b6a04ca38df5e24c27e53afb15b93624d7fd7d658c7cd9352a/triton-3.5.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bac7f7d959ad0f48c0e97d6643a1cc0fd5786fe61cb1f83b537c6b2d54776478", size = 170582192, upload-time = "2025-11-11T17:41:23.963Z" },
+    { url = "https://files.pythonhosted.org/packages/30/7b/0a685684ed5322d2af0bddefed7906674f67974aa88b0fae6e82e3b766f6/triton-3.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00be2964616f4c619193cb0d1b29a99bd4b001d7dc333816073f92cf2a8ccdeb", size = 155569223, upload-time = "2025-07-30T19:58:44.017Z" },
+    { url = "https://files.pythonhosted.org/packages/20/63/8cb444ad5cdb25d999b7d647abac25af0ee37d292afc009940c05b82dda0/triton-3.4.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7936b18a3499ed62059414d7df563e6c163c5e16c3773678a3ee3d417865035d", size = 155659780, upload-time = "2025-07-30T19:58:51.171Z" },
 ]

 [[package]]
Author	SHA1	Message	Date
Ryuichi Leo Takashige	ddc81385fd	raise exo bench default times	2026-02-03 14:54:54 +00:00
Ryuichi Leo Takashige	694be69f30	single node regression	2026-02-03 11:33:50 +00:00
Ryuichi Leo Takashige	8a7d01641a	fix	2026-02-03 11:17:17 +00:00
Ryuichi Leo Takashige	5aaf8d8003	uv lock	2026-02-03 11:06:05 +00:00
Ryuichi Leo Takashige	8319477913	bring back david changes	2026-02-03 10:57:42 +00:00
Ryuichi Leo Takashige	4a2a2c092e	timeout	2026-02-03 02:07:42 +00:00
Ryuichi Leo Takashige	bc90ac33d7	pyproject.toml	2026-02-03 02:00:05 +00:00
Ryuichi Leo Takashige	42d256d70d	pyproject.toml	2026-02-03 01:59:47 +00:00
Ryuichi Leo Takashige	07a0622d04	revert deepseek changes	2026-02-03 01:58:22 +00:00
Ryuichi Leo Takashige	9868aaaf54	revert deepseek changes	2026-02-03 01:57:59 +00:00
Ryuichi Leo Takashige	90b2c97342	update config	2026-02-03 01:54:20 +00:00
Ryuichi Leo Takashige	af97b836c7	fix deepseek patch	2026-02-03 01:02:29 +00:00
Ryuichi Leo Takashige	fd722f663c	patch deepseek	2026-02-03 00:52:03 +00:00
Ryuichi Leo Takashige	4ec4695f84	...	2026-02-03 00:37:02 +00:00
Ryuichi Leo Takashige	87caab8647	...	2026-02-03 00:22:11 +00:00
Ryuichi Leo Takashige	c4b19088da	warmup is broken too??	2026-02-03 00:17:11 +00:00
Ryuichi Leo Takashige	b119a81d33	grr	2026-02-03 00:02:05 +00:00
Ryuichi Leo Takashige	110ef45928	im done with this	2026-02-03 00:01:10 +00:00
Ryuichi Leo Takashige	22fa3c8bb0	jeez say it's done	2026-02-02 23:53:56 +00:00
Ryuichi Leo Takashige	7608a5e7f4	told it to test...	2026-02-02 23:38:34 +00:00
Ryuichi Leo Takashige	04cc92a97f	claude is 100% nerfed to oblivion	2026-02-02 23:07:40 +00:00
Ryuichi Leo Takashige	163bb83195	what a mess	2026-02-02 23:06:49 +00:00
Ryuichi Leo Takashige	f082d284a5	fix recv errors	2026-02-02 22:58:25 +00:00
Ryuichi Leo Takashige	5ec1906a98	fix the race probably	2026-02-02 22:43:50 +00:00
Ryuichi Leo Takashige	5bc35b38b0	kill processes maybe	2026-02-02 22:31:03 +00:00
Ryuichi Leo Takashige	f906db66c5	generate results.json	2026-02-02 22:14:07 +00:00
Ryuichi Leo Takashige	d1ea5f4315	stop downloading shit	2026-02-02 22:04:21 +00:00
Ryuichi Leo Takashige	d9648194ed	dont download every time	2026-02-02 21:58:13 +00:00
Ryuichi Leo Takashige	bc73ef1436	what a failure	2026-02-02 21:44:05 +00:00
Ryuichi Leo Takashige	8aeeb46d2f	failures	2026-02-02 21:33:16 +00:00
Ryuichi Leo Takashige	edb2015607	failures	2026-02-02 21:13:42 +00:00
Ryuichi Leo Takashige	f613ebdc6c	failures	2026-02-02 21:12:34 +00:00
Ryuichi Leo Takashige	e72a1778dd	maybe fix	2026-02-02 20:24:52 +00:00
Ryuichi Leo Takashige	eb4c76e758	log text	2026-02-02 19:27:34 +00:00
Ryuichi Leo Takashige	b890c671b8	use new auto parallel	2026-02-02 19:23:29 +00:00
Ryuichi Leo Takashige	e7f3f47754	jeez that was dumb	2026-02-02 19:14:19 +00:00
Ryuichi Leo Takashige	d935c7a372	maybe fix?	2026-02-02 19:08:32 +00:00
Ryuichi Leo Takashige	bd089b30d7	raise timeouts	2026-02-02 18:50:26 +00:00
Ryuichi Leo Takashige	13b397a3c9	raise max concurrency	2026-02-02 18:45:29 +00:00
Ryuichi Leo Takashige	cf5fddf3f8	oops	2026-02-02 18:40:41 +00:00
Ryuichi Leo Takashige	c9df4ff004	save properly	2026-02-02 18:30:53 +00:00
Ryuichi Leo Takashige	4f7869b91b	cleanup after control c	2026-02-02 18:23:42 +00:00
Ryuichi Leo Takashige	b08ec25ef6	better limit?	2026-02-02 18:22:39 +00:00
Ryuichi Leo Takashige	f235019c28	make control c exit cleanly and add --limit	2026-02-02 18:04:58 +00:00
Ryuichi Leo Takashige	68a77f0910	little confusing pyproject change	2026-02-02 17:47:08 +00:00
Ryuichi Leo Takashige	8456e3f74b	actually fix exo eval	2026-02-02 17:37:37 +00:00
Ryuichi Leo Takashige	83e4725415	add 4bit attention	2026-02-02 17:30:52 +00:00
Ryuichi Leo Takashige	49dc7a8798	livecodebench fix	2026-02-02 17:30:34 +00:00
Ryuichi Leo Takashige	dea52342ca	livecodebench fix	2026-02-02 17:27:59 +00:00
Ryuichi Leo Takashige	aae28d8e8b	livecodebench eval	2026-02-02 17:14:56 +00:00
Ryuichi Leo Takashige	a28def8e45	revert use ssh	2026-02-02 16:06:32 +00:00
Ryuichi Leo Takashige	56a9864e19	use ssh	2026-02-02 15:59:42 +00:00
Ryuichi Leo Takashige	10afd08427	optimizations	2026-02-02 15:46:18 +00:00
Ryuichi Leo Takashige	04a0690746	faster prompt sizer	2026-02-02 14:50:04 +00:00
Ryuichi Leo Takashige	970717f1bb	dont time out pleaseee	2026-02-02 13:49:31 +00:00
Ryuichi Leo Takashige	774eb1756a	fix	2026-02-02 13:31:32 +00:00
Ryuichi Leo Takashige	061e58ce39	add livebench	2026-02-02 13:26:36 +00:00
Ryuichi Leo Takashige	e8b6ec131b	fix exo bench	2026-02-02 13:12:50 +00:00
Ryuichi Leo Takashige	7b4c5d0c6d	relative import	2026-02-02 11:44:14 +00:00
Ryuichi Leo Takashige	fb3d1e887f	relative import	2026-02-02 11:43:56 +00:00
Ryuichi Leo Takashige	2d15e49f4e	tagged model	2026-02-02 11:41:22 +00:00
Ryuichi Leo Takashige	c0f192897c	dumb upstream changes	2026-02-02 11:37:11 +00:00
Ryuichi Leo Takashige	7587cb872c	several fixes from main	2026-02-02 11:35:10 +00:00
Ryuichi Leo Takashige	bcb07782c1	no batch	2026-02-02 11:30:19 +00:00
Ryuichi Leo Takashige	24a6adf022	Add metadata to results.json	2026-01-29 13:02:35 +00:00