Fix merge

Use tensor rdma minimax
2026-01-27 07:20:14 -05:00 · 2026-01-27 12:13:49 +00:00 · 2026-01-27 11:57:33 +00:00 · 2026-01-27 11:46:18 +00:00 · 2026-01-27 11:34:10 +00:00 · 2026-01-23 15:05:42 +00:00
31 changed files with 4750 additions and 505 deletions
--- a/bench/init.py
+++ b/bench/init.py
--- a/bench/completions_proxy.py
+++ b/bench/completions_proxy.py
@@ -0,0 +1,451 @@
+# pyright: reportAny=false, reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false
+"""
+Proxy that converts /v1/completions requests to /v1/chat/completions.
+
+Used by exo_eval to support lm_eval tasks that require the completions API.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import socket
+from contextlib import asynccontextmanager, contextmanager
+from typing import TYPE_CHECKING, Any, AsyncGenerator, Generator
+
+import httpx
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse, StreamingResponse
+from hypercorn.asyncio import serve
+from hypercorn.config import Config
+from loguru import logger
+
+if TYPE_CHECKING:
+    from collections.abc import AsyncIterator
+
+# Tasks that require the completions API (loglikelihood-based)
+# These cannot work with chat completions because they need prompt token logprobs
+COMPLETIONS_REQUIRED_TASKS: set[str] = {
+    # Multiple choice / loglikelihood tasks
+    "arc_challenge",
+    "arc_easy",
+    "hellaswag",
+    "mmlu",
+    "openbookqa",
+    "piqa",
+    "sciq",
+    "siqa",
+    "truthfulqa_mc1",
+    "truthfulqa_mc2",
+    "winogrande",
+    "boolq",
+    "lambada",
+    "lambada_openai",
+    "logiqa",
+    "logiqa2",
+    # Add more as needed
+}
+
+# Task prefixes that indicate completions are required
+COMPLETIONS_REQUIRED_PREFIXES: tuple[str, ...] = (
+    "mmlu_",  # mmlu subtasks (but NOT mmlu_pro, mmlu_generative, etc.)
+    "arc_",  # arc subtasks
+    "hellaswag_",
+    "winogrande_",
+)
+
+# Generation-based tasks that happen to match completions prefixes above.
+# These use generate_until (not loglikelihood) and must go through chat completions.
+GENERATION_BASED_EXCEPTIONS: set[str] = {
+    "mmlu_pro",
+    "mmlu_generative",
+    "mmlu_flan_cot_fewshot",
+    "mmlu_flan_cot_zeroshot",
+}
+
+
+def tasks_require_completions(tasks: list[str]) -> bool:
+    """Check if any of the tasks require the completions API."""
+    for task in tasks:
+        task_lower = task.lower()
+        if task_lower in GENERATION_BASED_EXCEPTIONS:
+            continue
+        if task_lower in COMPLETIONS_REQUIRED_TASKS:
+            return True
+        for prefix in COMPLETIONS_REQUIRED_PREFIXES:
+            if task_lower.startswith(prefix):
+                return True
+    return False
+
+
+def find_free_port() -> int:
+    """Find a free port to use for the proxy."""
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("", 0))
+        return s.getsockname()[1]
+
+
+def create_proxy_app(upstream_url: str) -> FastAPI:
+    """Create a FastAPI app that proxies completions to chat completions."""
+
+    app = FastAPI()
+
+    def convert_completions_to_chat_request(
+        completions_req: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Convert a /v1/completions request to /v1/chat/completions format."""
+        prompt = completions_req.get("prompt", "")
+
+        # Handle prompt as string or list of strings
+        if isinstance(prompt, list):
+            prompt = prompt[0] if prompt else ""
+
+        chat_req: dict[str, Any] = {
+            "model": completions_req.get("model", ""),
+            "messages": [{"role": "user", "content": prompt}],
+            "stream": completions_req.get("stream", False),
+        }
+
+        # Map common parameters
+        for param in (
+            "max_tokens",
+            "temperature",
+            "top_p",
+            "stop",
+            "seed",
+            "presence_penalty",
+            "frequency_penalty",
+        ):
+            if param in completions_req:
+                chat_req[param] = completions_req[param]
+
+        # Handle logprobs - completions uses int, chat uses bool + top_logprobs
+        logprobs = completions_req.get("logprobs")
+        if logprobs is not None and logprobs > 0:
+            chat_req["logprobs"] = True
+            chat_req["top_logprobs"] = logprobs
+        elif logprobs is not None:
+            chat_req["logprobs"] = True
+
+        return chat_req
+
+    def convert_chat_to_completions_response(
+        chat_resp: dict[str, Any],
+        echo: bool = False,
+        prompt: str = "",
+    ) -> dict[str, Any]:
+        """Convert a /v1/chat/completions response to /v1/completions format."""
+        choices = []
+
+        for chat_choice in chat_resp.get("choices", []):
+            message = chat_choice.get("message", {})
+            text = message.get("content", "") or ""
+
+            # Build logprobs in completions format
+            logprobs_data = None
+            chat_logprobs = chat_choice.get("logprobs")
+
+            if chat_logprobs and chat_logprobs.get("content"):
+                tokens: list[str] = []
+                token_logprobs: list[float] = []
+                top_logprobs: list[dict[str, float]] = []
+                text_offset: list[int] = []
+
+                offset = 0
+                for item in chat_logprobs["content"]:
+                    tokens.append(item["token"])
+                    token_logprobs.append(item["logprob"])
+
+                    # Convert top_logprobs list to dict format
+                    top_lp_dict: dict[str, float] = {}
+                    for top_item in item.get("top_logprobs", []):
+                        top_lp_dict[top_item["token"]] = top_item["logprob"]
+                    top_logprobs.append(top_lp_dict)
+
+                    text_offset.append(offset)
+                    offset += len(item["token"])
+
+                logprobs_data = {
+                    "tokens": tokens,
+                    "token_logprobs": token_logprobs,
+                    "top_logprobs": top_logprobs,
+                    "text_offset": text_offset,
+                }
+
+            # If echo was requested, prepend prompt to text
+            if echo:
+                text = prompt + text
+
+            choices.append(
+                {
+                    "text": text,
+                    "index": chat_choice.get("index", 0),
+                    "logprobs": logprobs_data,
+                    "finish_reason": chat_choice.get("finish_reason"),
+                }
+            )
+
+        return {
+            "id": chat_resp.get("id", ""),
+            "object": "text_completion",
+            "created": chat_resp.get("created", 0),
+            "model": chat_resp.get("model", ""),
+            "choices": choices,
+            "usage": chat_resp.get("usage"),
+        }
+
+    def convert_chat_stream_chunk_to_completions(
+        chunk: dict[str, Any],
+        echo: bool = False,
+        prompt: str = "",
+        is_first: bool = False,
+    ) -> dict[str, Any]:
+        """Convert a streaming chat completion chunk to completions format."""
+        choices = []
+
+        for chat_choice in chunk.get("choices", []):
+            delta = chat_choice.get("delta", {})
+            text = delta.get("content", "") or ""
+
+            # If echo and first chunk, prepend prompt
+            if echo and is_first:
+                text = prompt + text
+
+            # Build logprobs in completions format
+            logprobs_data = None
+            chat_logprobs = chat_choice.get("logprobs")
+
+            if chat_logprobs and chat_logprobs.get("content"):
+                tokens: list[str] = []
+                token_logprobs: list[float] = []
+                top_logprobs: list[dict[str, float]] = []
+
+                for item in chat_logprobs["content"]:
+                    tokens.append(item["token"])
+                    token_logprobs.append(item["logprob"])
+
+                    top_lp_dict: dict[str, float] = {}
+                    for top_item in item.get("top_logprobs", []):
+                        top_lp_dict[top_item["token"]] = top_item["logprob"]
+                    top_logprobs.append(top_lp_dict)
+
+                logprobs_data = {
+                    "tokens": tokens,
+                    "token_logprobs": token_logprobs,
+                    "top_logprobs": top_logprobs,
+                }
+
+            choices.append(
+                {
+                    "text": text,
+                    "index": chat_choice.get("index", 0),
+                    "logprobs": logprobs_data,
+                    "finish_reason": chat_choice.get("finish_reason"),
+                }
+            )
+
+        return {
+            "id": chunk.get("id", ""),
+            "object": "text_completion",
+            "created": chunk.get("created", 0),
+            "model": chunk.get("model", ""),
+            "choices": choices,
+        }
+
+    @app.post("/v1/completions", response_model=None)
+    async def completions(request: Request):
+        body = await request.json()
+
+        prompt = body.get("prompt", "")
+        if isinstance(prompt, list):
+            prompt = prompt[0] if prompt else ""
+
+        echo = body.get("echo", False)
+        stream = body.get("stream", False)
+
+        chat_request = convert_completions_to_chat_request(body)
+        logger.debug(f"Proxying to {upstream_url}/v1/chat/completions")
+
+        async with httpx.AsyncClient(timeout=300.0, http2=False) as client:
+            if stream:
+
+                async def generate() -> AsyncGenerator[str, None]:
+                    is_first = True
+                    async with client.stream(
+                        "POST",
+                        f"{upstream_url}/v1/chat/completions",
+                        json=chat_request,
+                    ) as response:
+                        async for line in response.aiter_lines():
+                            if line.startswith("data: "):
+                                data = line[6:]
+                                if data == "[DONE]":
+                                    yield "data: [DONE]\n\n"
+                                    break
+
+                                try:
+                                    chunk = json.loads(data)
+                                    converted = (
+                                        convert_chat_stream_chunk_to_completions(
+                                            chunk,
+                                            echo=echo,
+                                            prompt=prompt,
+                                            is_first=is_first,
+                                        )
+                                    )
+                                    is_first = False
+                                    yield f"data: {json.dumps(converted)}\n\n"
+                                except json.JSONDecodeError:
+                                    continue
+
+                return StreamingResponse(generate(), media_type="text/event-stream")
+            else:
+                response = await client.post(
+                    f"{upstream_url}/v1/chat/completions",
+                    json=chat_request,
+                )
+                chat_response = response.json()
+
+                if "error" in chat_response:
+                    return JSONResponse(chat_response, status_code=response.status_code)
+
+                completions_response = convert_chat_to_completions_response(
+                    chat_response, echo=echo, prompt=prompt
+                )
+                return JSONResponse(completions_response)
+
+    @app.get("/v1/models", response_model=None)
+    async def models():
+        async with httpx.AsyncClient() as client:
+            response = await client.get(f"{upstream_url}/v1/models")
+            return JSONResponse(response.json())
+
+    return app
+
+
+class CompletionsProxy:
+    """Manages a completions proxy server lifecycle."""
+
+    def __init__(self, upstream_host: str, upstream_port: int):
+        self.upstream_url = f"http://{upstream_host}:{upstream_port}"
+        self.port = find_free_port()
+        self.host = "127.0.0.1"
+        self._task: asyncio.Task[None] | None = None
+        self._shutdown_event: asyncio.Event | None = None
+
+    @property
+    def base_url(self) -> str:
+        return f"http://{self.host}:{self.port}"
+
+    async def start(self) -> None:
+        """Start the proxy server in the background."""
+        app = create_proxy_app(self.upstream_url)
+        config = Config()
+        config.bind = [f"{self.host}:{self.port}"]
+        config.accesslog = None  # Suppress access logs
+
+        self._shutdown_event = asyncio.Event()
+
+        async def run_server() -> None:
+            await serve(app, config, shutdown_trigger=self._shutdown_event.wait)  # type: ignore[arg-type]
+
+        self._task = asyncio.create_task(run_server())
+
+        # Wait a bit for server to start
+        await asyncio.sleep(0.5)
+        logger.info(f"Completions proxy started on {self.base_url}")
+
+    async def stop(self) -> None:
+        """Stop the proxy server."""
+        if self._shutdown_event:
+            self._shutdown_event.set()
+        if self._task:
+            try:
+                await asyncio.wait_for(self._task, timeout=5.0)
+            except asyncio.TimeoutError:
+                self._task.cancel()
+                try:
+                    await self._task
+                except asyncio.CancelledError:
+                    pass
+        logger.info("Completions proxy stopped")
+
+
+@asynccontextmanager
+async def completions_proxy_context(
+    upstream_host: str, upstream_port: int
+) -> AsyncIterator[CompletionsProxy]:
+    """Context manager for running the completions proxy."""
+    proxy = CompletionsProxy(upstream_host, upstream_port)
+    await proxy.start()
+    try:
+        yield proxy
+    finally:
+        await proxy.stop()
+
+
+@contextmanager
+def run_completions_proxy(
+    upstream_host: str, upstream_port: int
+) -> Generator[CompletionsProxy, None, None]:
+    """Synchronous context manager that runs proxy in a subprocess."""
+    import subprocess
+    import sys
+    import time
+
+    port = find_free_port()
+    upstream_url = f"http://{upstream_host}:{upstream_port}"
+
+    # Start proxy as subprocess
+    proc = subprocess.Popen(
+        [
+            sys.executable,
+            "-c",
+            f"""
+import asyncio
+import sys
+from bench.completions_proxy import create_proxy_app
+from hypercorn.asyncio import serve
+from hypercorn.config import Config
+
+async def main():
+    print(f"Proxy starting: 127.0.0.1:{port} -> {upstream_url}", file=sys.stderr, flush=True)
+    app = create_proxy_app("{upstream_url}")
+    config = Config()
+    config.bind = ["127.0.0.1:{port}"]
+    config.accesslog = "-"  # Log to stderr
+    config.errorlog = "-"
+    await serve(app, config)
+
+asyncio.run(main())
+""",
+        ],
+        stdout=None,  # Inherit stdout
+        stderr=None,  # Inherit stderr
+    )
+
+    # Create a proxy object with the right base_url
+    class ProxyInfo:
+        def __init__(self, host: str, port: int):
+            self.host = host
+            self.port = port
+
+        @property
+        def base_url(self) -> str:
+            return f"http://{self.host}:{self.port}"
+
+    proxy = ProxyInfo("127.0.0.1", port)
+
+    # Wait for server to start
+    time.sleep(1.0)
+    logger.info(f"Completions proxy started on {proxy.base_url} -> {upstream_url}")
+
+    try:
+        yield proxy  # type: ignore[misc]
+    finally:
+        proc.terminate()
+        try:
+            proc.wait(timeout=5.0)
+        except subprocess.TimeoutExpired:
+            proc.kill()
+        logger.info("Completions proxy stopped")
--- a/bench/eval_config.toml
+++ b/bench/eval_config.toml
@@ -0,0 +1,66 @@
+# exo-eval configuration file
+# See bench/exo_eval.py for usage
+
+[eval]
+# Eval framework type: "lm_eval" | "swe_bench" | "custom"
+type = "lm_eval"
+# Require HuggingFace token (default: true)
+# Set to false if using only public datasets
+require_hf_token = true
+
+# Instance/placement configuration
+# Controls how exo sets up the model instance before running evals
+[instance]
+# Placement strategy: "ring" | "jaccl" | "both"
+instance_meta = "jaccl"
+# Sharding strategy: "pipeline" | "tensor" | "both"
+sharding = "tensor"
+# Node constraints
+min_nodes = 2
+max_nodes = 2
+
+# lm_eval configuration (EleutherAI's lm-evaluation-harness)
+[lm_eval]
+# Tasks to run (list of task names)
+# NOTE: Chat completions API only supports generation-based tasks.
+# Loglikelihood tasks (mmlu, hellaswag, arc) require /v1/completions endpoint.
+#
+# Generation-based tasks (work with chat completions):
+#   - mmlu_pro, mmlu_generative, mmlu_flan_cot_fewshot, mmlu_flan_cot_zeroshot
+#   - gsm8k, gsm8k_cot, gsm8k_cot_zeroshot
+#   - truthfulqa (uses generate_until for some subtasks)
+#   - humaneval, mbpp (code generation)
+#
+# Run `lm_eval --tasks list` to see all available tasks
+tasks = ["mmlu_pro"]
+# Number of few-shot examples (5 is standard for mmlu_pro CoT)
+num_fewshot = 5
+# Batch size (use 1 for API models, "auto" doesn't work)
+batch_size = 1
+# Number of concurrent requests (set > 1 to enable parallelism)
+# Higher values enable better batching throughput
+num_concurrent = 64
+# Apply chat template for instruct/chat models (default: true)
+apply_chat_template = true
+# Use fewshot examples as conversation turns (better for chat models)
+fewshot_as_multiturn = true
+# Optional: limit samples per task (omit or comment out for no limit)
+# limit = 100
+# Output path for results
+output_path = "bench/eval_results"
+
+# SWE-bench configuration (placeholder)
+[swe_bench]
+# SWE-bench dataset
+dataset = "princeton-nlp/SWE-bench_Lite"
+# Maximum workers for parallel execution
+max_workers = 8
+# Path for prediction outputs
+predictions_path = "bench/predictions"
+
+# Custom evaluation script configuration
+[custom]
+# Path to custom evaluation script
+script = "path/to/eval_script.py"
+# Arguments to pass to the script
+args = ["--arg1", "value1"]
--- a/bench/exo_eval.py
+++ b/bench/exo_eval.py
@@ -0,0 +1,666 @@
+#!/usr/bin/env python3
+# pyright: reportAny=false, reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false
+"""
+exo-eval: Evaluation harness for exo inference system.
+
+Supports multiple evaluation frameworks via TOML configuration:
+- lm_eval: Language model evaluation using EleutherAI's lm-evaluation-harness
+- swe_bench: SWE-bench evaluation (placeholder for future implementation)
+- custom: Custom evaluation scripts
+
+Usage:
+    uv run python -m bench.exo_eval --config bench/eval_config.toml --model Llama-3.2-1b-Instruct-4bit
+    uv run python -m bench.exo_eval --config bench/eval_config.toml --model Llama-3.2-1b-Instruct-4bit --dry-run
+"""
+
+from __future__ import annotations
+
+import argparse
+import contextlib
+import json
+import os
+import subprocess
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Literal
+
+# Add parent directory to path for direct script execution
+if __name__ == "__main__" and __package__ is None:
+    sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+import tomlkit
+from huggingface_hub import get_token as get_hf_token
+from loguru import logger
+from tomlkit.exceptions import TOMLKitError
+
+from bench.completions_proxy import tasks_require_completions
+from bench.exo_bench import (
+    ExoClient,
+    ExoHttpError,
+    instance_id_from_instance,
+    nodes_used_in_instance,
+    placement_filter,
+    resolve_model_short_id,
+    sharding_filter,
+    wait_for_instance_gone,
+    wait_for_instance_ready,
+)
+
+EvalType = Literal["lm_eval", "swe_bench", "custom"]
+
+
+def load_config(config_path: str) -> dict[str, Any]:
+    """Load and parse TOML configuration file."""
+    path = Path(config_path)
+    if not path.exists():
+        raise FileNotFoundError(f"Config file not found: {config_path}")
+
+    with open(path, encoding="utf-8") as f:
+        return dict(tomlkit.load(f))
+
+
+def get_eval_type(config: dict[str, Any]) -> EvalType:
+    """Extract evaluation type from config."""
+    eval_section = config.get("eval", {})
+    eval_type = eval_section.get("type", "lm_eval")
+    if eval_type not in ("lm_eval", "swe_bench", "custom"):
+        raise ValueError(f"Unknown eval type: {eval_type}")
+    return eval_type
+
+
+def check_hf_token(config: dict[str, Any]) -> bool:
+    """Check if HuggingFace token is available when required.
+
+    Returns True if token is available or not required, False otherwise.
+    """
+    eval_section = config.get("eval", {})
+    require_hf_token = eval_section.get("require_hf_token", True)
+
+    if not require_hf_token:
+        return True
+
+    token = get_hf_token()
+    if token is None:
+        logger.error(
+            "HuggingFace token not found. "
+            "Set HF_TOKEN environment variable or run 'huggingface-cli login'. "
+            "To disable this check, set require_hf_token = false in [eval] config."
+        )
+        return False
+
+    logger.info("HuggingFace token found")
+    return True
+
+
+def select_placement(
+    client: ExoClient,
+    full_model_id: str,
+    config: dict[str, Any],
+) -> dict[str, Any] | None:
+    """Select a placement based on config preferences."""
+    instance_config = config.get("instance", {})
+
+    # If explicit instance is provided, use it directly
+    if "instance" in instance_config:
+        return instance_config["instance"]
+
+    # Otherwise, select from previews based on preferences
+    instance_meta_pref = instance_config.get("instance_meta", "ring")
+    sharding_pref = instance_config.get("sharding", "pipeline")
+    max_nodes = instance_config.get("max_nodes", 4)
+    min_nodes = instance_config.get("min_nodes", 1)
+
+    previews_resp = client.request_json(
+        "GET", "/instance/previews", params={"model_id": full_model_id}
+    )
+    previews = previews_resp.get("previews") or []
+
+    selected: list[dict[str, Any]] = []
+    for p in previews:
+        if p.get("error") is not None:
+            continue
+        if not placement_filter(str(p.get("instance_meta", "")), instance_meta_pref):
+            continue
+        if not sharding_filter(str(p.get("sharding", "")), sharding_pref):
+            continue
+
+        instance = p.get("instance")
+        if not isinstance(instance, dict):
+            continue
+
+        n = nodes_used_in_instance(instance)
+        if min_nodes <= n <= max_nodes:
+            selected.append(p)
+
+    if not selected:
+        return None
+
+    # Sort by preference: exact match on sharding/meta, then by node count (descending)
+    def sort_key(p: dict[str, Any]) -> tuple[int, int, int]:
+        meta_match = (
+            1 if instance_meta_pref in str(p.get("instance_meta", "")).lower() else 0
+        )
+        sharding_match = 1 if sharding_pref in str(p.get("sharding", "")).lower() else 0
+        n_nodes = nodes_used_in_instance(p["instance"])
+        return (meta_match, sharding_match, n_nodes)
+
+    selected.sort(key=sort_key, reverse=True)
+    return selected[0]
+
+
+def setup_instance(
+    client: ExoClient,
+    full_model_id: str,
+    config: dict[str, Any],
+    dry_run: bool,
+) -> tuple[str | None, dict[str, Any] | None]:
+    """Create and wait for an instance to be ready. Returns (instance_id, preview)."""
+    preview = select_placement(client, full_model_id, config)
+
+    if preview is None:
+        logger.error("No valid placement found matching config preferences")
+        return None, None
+
+    instance_data = preview.get("instance")
+    instance: dict[str, Any] = (
+        instance_data if isinstance(instance_data, dict) else preview
+    )
+    instance_id = instance_id_from_instance(instance)
+
+    sharding = str(preview.get("sharding", "unknown"))
+    instance_meta = str(preview.get("instance_meta", "unknown"))
+    n_nodes = nodes_used_in_instance(instance)
+
+    logger.info(f"Selected placement: {sharding} / {instance_meta} / nodes={n_nodes}")
+    logger.info(f"Instance ID: {instance_id}")
+
+    if dry_run:
+        logger.info("[dry-run] Would create instance and wait for ready")
+        return instance_id, preview
+
+    # Create instance
+    client.request_json("POST", "/instance", body={"instance": instance})
+
+    try:
+        wait_for_instance_ready(client, instance_id)
+        logger.info("Instance is ready")
+        time.sleep(1)  # Brief pause after ready
+        return instance_id, preview
+    except (RuntimeError, TimeoutError) as e:
+        logger.error(f"Failed to initialize instance: {e}")
+        with contextlib.suppress(ExoHttpError):
+            client.request_json("DELETE", f"/instance/{instance_id}")
+        return None, None
+
+
+def teardown_instance(client: ExoClient, instance_id: str) -> None:
+    """Delete an instance and wait for it to be gone."""
+    try:
+        client.request_json("DELETE", f"/instance/{instance_id}")
+    except ExoHttpError as e:
+        if e.status != 404:
+            raise
+    except (ConnectionRefusedError, OSError):
+        logger.warning(f"Could not connect to exo to delete instance {instance_id} (server may be down)")
+        return
+    try:
+        wait_for_instance_gone(client, instance_id)
+    except (ConnectionRefusedError, OSError, TimeoutError):
+        logger.warning("Could not verify instance deletion (server may be down)")
+        return
+    logger.info(f"Instance {instance_id} deleted")
+
+
+def build_lm_eval_args(
+    config: dict[str, Any],
+    base_url: str,
+    model: str,
+    output_path: str | None,
+    limit: int | None,
+    use_completions: bool,
+) -> list[str]:
+    """Build command-line arguments for lm_eval."""
+    lm_eval_config = config.get("lm_eval", {})
+
+    # Choose model type based on whether tasks need completions API
+    if use_completions:
+        model_type = "local-completions"
+        endpoint_url = f"{base_url}/v1/completions"
+    else:
+        model_type = "local-chat-completions"
+        endpoint_url = f"{base_url}/v1/chat/completions"
+
+    # Build model_args string with num_concurrent if specified
+    model_args_parts = [f"model={model}", f"base_url={endpoint_url}"]
+    num_concurrent = lm_eval_config.get("num_concurrent")
+    if num_concurrent is not None and num_concurrent > 1:
+        model_args_parts.append(f"num_concurrent={num_concurrent}")
+    model_args = ",".join(model_args_parts)
+
+    args = [
+        sys.executable, "-m", "bench.lm_eval_patched",
+        "--model",
+        model_type,
+        "--model_args",
+        model_args,
+        "--verbosity",
+        "WARNING",
+    ]
+
+    # Tasks
+    tasks = lm_eval_config.get("tasks", ["mmlu"])
+    tasks_str = ",".join(tasks) if isinstance(tasks, list) else str(tasks)
+    args.extend(["--tasks", tasks_str])
+
+    # Few-shot
+    num_fewshot = lm_eval_config.get("num_fewshot")
+    if num_fewshot is not None:
+        args.extend(["--num_fewshot", str(num_fewshot)])
+
+    # Batch size (default to 1 for API models, "auto" doesn't work)
+    batch_size = lm_eval_config.get("batch_size", 1)
+    args.extend(["--batch_size", str(batch_size)])
+
+    # Apply chat template for instruct/chat models (default: true)
+    # Only applies to chat completions, but doesn't hurt to include
+    apply_chat_template = lm_eval_config.get("apply_chat_template", True)
+    if apply_chat_template and not use_completions:
+        args.append("--apply_chat_template")
+
+    # Fewshot as multiturn (optional, works with chat template)
+    fewshot_as_multiturn = lm_eval_config.get("fewshot_as_multiturn", False)
+    if fewshot_as_multiturn and not use_completions:
+        args.append("--fewshot_as_multiturn")
+
+    # Limit (command line overrides config)
+    effective_limit = limit if limit is not None else lm_eval_config.get("limit")
+    if effective_limit is not None:
+        args.extend(["--limit", str(effective_limit)])
+
+    # Output path
+    effective_output = output_path or lm_eval_config.get("output_path")
+    if effective_output:
+        args.extend(["--output_path", effective_output])
+        # Log model responses for post-hoc analysis when output is saved
+        args.append("--log_samples")
+
+    return args
+
+
+def run_lm_eval(
+    config: dict[str, Any],
+    host: str,
+    port: int,
+    model: str,
+    output_path: str | None,
+    limit: int | None,
+    dry_run: bool,
+) -> int:
+    """Run lm_eval evaluation."""
+    lm_eval_config = config.get("lm_eval", {})
+    tasks = lm_eval_config.get("tasks", ["mmlu"])
+    if isinstance(tasks, str):
+        tasks = [tasks]
+
+    # Check if tasks require the completions API
+    use_completions = tasks_require_completions(tasks)
+
+    if use_completions:
+        logger.info(
+            "Tasks require completions API - using native /v1/completions endpoint"
+        )
+
+    exo_base_url = f"http://{host}:{port}"
+
+    # Build args - use native completions or chat completions endpoint directly
+    args = build_lm_eval_args(
+        config, exo_base_url, model, output_path, limit, use_completions=use_completions
+    )
+    logger.info(f"lm_eval command: {' '.join(args)}")
+
+    if dry_run:
+        logger.info("[dry-run] Would execute the above command")
+        return 0
+
+    try:
+        result = subprocess.run(args, check=False)
+
+        # Print token usage summary from exo
+        try:
+            import httpx
+            usage_resp = httpx.get(f"{exo_base_url}/v1/usage", timeout=5)
+            if usage_resp.status_code == 200:
+                usage = usage_resp.json()
+                logger.info("--- Token Usage (Total) ---")
+                logger.info(f"  Requests:          {usage.get('total_requests', 0)}")
+                logger.info(f"  Prompt tokens:     {usage.get('total_prompt_tokens', 0)}")
+                logger.info(f"  Completion tokens: {usage.get('total_completion_tokens', 0)}")
+                logger.info(f"  Reasoning tokens:  {usage.get('total_reasoning_tokens', 0)}")
+                logger.info(f"  Total tokens:      {usage.get('total_tokens', 0)}")
+                by_model = usage.get("by_model", {})
+                if by_model:
+                    for model_name, counters in by_model.items():
+                        logger.info(f"--- Token Usage ({model_name}) ---")
+                        logger.info(f"  Requests:          {counters.get('requests', 0)}")
+                        logger.info(f"  Prompt tokens:     {counters.get('prompt_tokens', 0)}")
+                        logger.info(f"  Completion tokens: {counters.get('completion_tokens', 0)}")
+                        logger.info(f"  Reasoning tokens:  {counters.get('reasoning_tokens', 0)}")
+        except Exception:
+            pass  # Usage endpoint not available
+
+        return result.returncode
+    except FileNotFoundError:
+        logger.error("lm_eval not found. Install with: uv sync --extra eval")
+        return 1
+
+
+def run_swe_bench(
+    config: dict[str, Any],
+    host: str,
+    port: int,
+    model: str,
+    output_path: str | None,
+    dry_run: bool,
+) -> int:
+    """Run SWE-bench evaluation (placeholder)."""
+    swe_config = config.get("swe_bench", {})
+
+    dataset = swe_config.get("dataset", "princeton-nlp/SWE-bench_Lite")
+    max_workers = swe_config.get("max_workers", 8)
+    predictions_path = output_path or swe_config.get(
+        "predictions_path", "bench/predictions"
+    )
+
+    logger.info("SWE-bench evaluation configuration:")
+    logger.info(f"  Dataset: {dataset}")
+    logger.info(f"  Model: {model}")
+    logger.info(f"  API endpoint: http://{host}:{port}/v1")
+    logger.info(f"  Max workers: {max_workers}")
+    logger.info(f"  Predictions path: {predictions_path}")
+
+    if dry_run:
+        logger.info("[dry-run] SWE-bench evaluation would be executed")
+        return 0
+
+    logger.warning(
+        "SWE-bench integration is a placeholder. "
+        "Implement swebench inference and evaluation logic as needed."
+    )
+    return 0
+
+
+def run_custom_eval(
+    config: dict[str, Any],
+    host: str,
+    port: int,
+    model: str,
+    output_path: str | None,
+    dry_run: bool,
+) -> int:
+    """Run custom evaluation script."""
+    custom_config = config.get("custom", {})
+
+    script = custom_config.get("script")
+    if not script:
+        logger.error("No script specified in [custom] config section")
+        return 1
+
+    script_path = Path(script)
+    if not script_path.exists():
+        logger.error(f"Custom script not found: {script}")
+        return 1
+
+    script_args = custom_config.get("args", [])
+    if not isinstance(script_args, list):
+        script_args = [str(script_args)]
+
+    # Build environment with exo connection info
+    env = os.environ.copy()
+    env["EXO_HOST"] = host
+    env["EXO_PORT"] = str(port)
+    env["EXO_MODEL"] = model
+    if output_path:
+        env["EXO_OUTPUT_PATH"] = output_path
+
+    cmd = [sys.executable, str(script_path), *script_args]
+    logger.info(f"Custom eval command: {' '.join(cmd)}")
+
+    if dry_run:
+        logger.info("[dry-run] Would execute the above command")
+        return 0
+
+    result = subprocess.run(cmd, env=env, check=False)
+    return result.returncode
+
+
+def write_results_metadata(
+    output_path: str,
+    config: dict[str, Any],
+    host: str,
+    port: int,
+    model: str,
+    eval_type: EvalType,
+    return_code: int,
+    preview: dict[str, Any] | None,
+) -> None:
+    """Write evaluation metadata to a JSON file."""
+    metadata: dict[str, Any] = {
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "eval_type": eval_type,
+        "model": model,
+        "api_endpoint": f"http://{host}:{port}/v1",
+        "config": config,
+        "return_code": return_code,
+    }
+
+    if preview:
+        metadata["placement"] = {
+            "sharding": preview.get("sharding"),
+            "instance_meta": preview.get("instance_meta"),
+            "instance_id": instance_id_from_instance(preview["instance"])
+            if "instance" in preview
+            else None,
+        }
+
+    output_dir = Path(output_path)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    metadata_path = output_dir / "eval_metadata.json"
+
+    with open(metadata_path, "w", encoding="utf-8") as f:
+        json.dump(metadata, f, indent=2, ensure_ascii=False, default=str)
+
+    logger.info(f"Wrote evaluation metadata to: {metadata_path}")
+
+
+def main() -> int:
+    """Main entry point for exo-eval."""
+    ap = argparse.ArgumentParser(
+        prog="exo-eval",
+        description="Evaluation harness for exo inference system.",
+    )
+    ap.add_argument(
+        "--config",
+        required=True,
+        help="Path to TOML configuration file",
+    )
+    ap.add_argument(
+        "--host",
+        default=os.environ.get("EXO_HOST", "localhost"),
+        help="exo API host (default: localhost or EXO_HOST env var)",
+    )
+    ap.add_argument(
+        "--port",
+        type=int,
+        default=int(os.environ.get("EXO_PORT", "52415")),
+        help="exo API port (default: 52415 or EXO_PORT env var)",
+    )
+    ap.add_argument(
+        "--model",
+        required=True,
+        help="Model name/ID to evaluate",
+    )
+    ap.add_argument(
+        "--output",
+        default=None,
+        help="Output path for results (overrides config)",
+    )
+    ap.add_argument(
+        "--limit",
+        type=int,
+        default=None,
+        help="Limit samples per task (overrides config, lm_eval only)",
+    )
+    ap.add_argument(
+        "--timeout",
+        type=float,
+        default=600.0,
+        help="HTTP timeout in seconds (default: 600)",
+    )
+    ap.add_argument(
+        "--skip-instance-setup",
+        action="store_true",
+        help="Skip instance creation (assume instance already running)",
+    )
+    ap.add_argument(
+        "--pipeline",
+        type=int,
+        default=None,
+        metavar="N",
+        help="Use pipeline sharding with exactly N nodes (overrides config)",
+    )
+    ap.add_argument(
+        "--instance-meta",
+        choices=["ring", "jaccl", "both"],
+        default=None,
+        help="Instance meta preference (overrides config)",
+    )
+    ap.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print commands without executing",
+    )
+    args = ap.parse_args()
+
+    logger.info(f"exo-eval starting with config: {args.config}")
+
+    try:
+        config = load_config(args.config)
+    except FileNotFoundError as e:
+        logger.error(str(e))
+        return 1
+    except TOMLKitError as e:
+        logger.error(f"Failed to parse config: {e}")
+        return 1
+
+    eval_type = get_eval_type(config)
+    logger.info(f"Evaluation type: {eval_type}")
+    logger.info(f"Model: {args.model}")
+    logger.info(f"API endpoint: http://{args.host}:{args.port}/v1")
+
+    # Apply CLI overrides to instance config
+    if args.pipeline is not None or args.instance_meta is not None:
+        instance_config = config.setdefault("instance", {})
+        if args.pipeline is not None:
+            instance_config["sharding"] = "pipeline"
+            instance_config["min_nodes"] = args.pipeline
+            instance_config["max_nodes"] = args.pipeline
+            logger.info(f"CLI override: pipeline={args.pipeline} nodes")
+            # Limit concurrency for pipeline to avoid GPU timeouts
+            if args.pipeline >= 2:
+                lm_eval_config = config.setdefault("lm_eval", {})
+                lm_eval_config["num_concurrent"] = 4
+                logger.info("CLI override: num_concurrent=4 (pipeline>=2)")
+        if args.instance_meta is not None:
+            instance_config["instance_meta"] = args.instance_meta
+            logger.info(f"CLI override: instance_meta={args.instance_meta}")
+
+    # Check HuggingFace token if required
+    if not check_hf_token(config):
+        return 1
+
+    # Setup instance and resolve model
+    instance_id: str | None = None
+    preview: dict[str, Any] | None = None
+    client: ExoClient | None = None
+
+    if args.skip_instance_setup:
+        # Use model name as-is when skipping instance setup
+        full_model_id = args.model
+        logger.info(f"Using model: {full_model_id} (instance setup skipped)")
+    else:
+        client = ExoClient(args.host, args.port, timeout_s=args.timeout)
+
+        # Resolve model
+        try:
+            short_id, full_model_id = resolve_model_short_id(client, args.model)
+            logger.info(f"Resolved model: {short_id} -> {full_model_id}")
+        except Exception as e:
+            logger.error(f"Failed to resolve model: {e}")
+            return 1
+
+        instance_id, preview = setup_instance(
+            client, full_model_id, config, args.dry_run
+        )
+        if instance_id is None and not args.dry_run:
+            return 1
+
+    try:
+        # Run evaluation
+        if eval_type == "lm_eval":
+            return_code = run_lm_eval(
+                config,
+                args.host,
+                args.port,
+                full_model_id,
+                args.output,
+                args.limit,
+                args.dry_run,
+            )
+        elif eval_type == "swe_bench":
+            return_code = run_swe_bench(
+                config,
+                args.host,
+                args.port,
+                full_model_id,
+                args.output,
+                args.dry_run,
+            )
+        elif eval_type == "custom":
+            return_code = run_custom_eval(
+                config,
+                args.host,
+                args.port,
+                full_model_id,
+                args.output,
+                args.dry_run,
+            )
+        else:
+            logger.error(f"Unknown eval type: {eval_type}")
+            return 1
+
+        # Write metadata if output path specified and not dry-run
+        output_path = args.output or config.get(eval_type, {}).get("output_path")
+        if output_path and not args.dry_run:
+            write_results_metadata(
+                output_path,
+                config,
+                args.host,
+                args.port,
+                full_model_id,
+                eval_type,
+                return_code,
+                preview,
+            )
+
+        return return_code
+
+    finally:
+        # Teardown instance
+        if instance_id and client and not args.skip_instance_setup and not args.dry_run:
+            teardown_instance(client, instance_id)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/bench/lm_eval_patched.py
+++ b/bench/lm_eval_patched.py
@@ -0,0 +1,41 @@
+"""Patched lm_eval runner that fixes bugs in the upstream library.
+
+Fixes:
+- UnboundLocalError on `outputs` in TemplateAPI.amodel_call when API returns error
+- Prevents eval crash on transient API failures (returns None instead of raising)
+
+Usage: python -m bench.lm_eval_patched [lm_eval args...]
+"""
+
+# pyright: reportMissingTypeStubs=false, reportUnknownVariableType=false
+# pyright: reportUnknownMemberType=false, reportAny=false
+# ruff: noqa: I001
+
+import functools
+from typing import Any
+
+
+def _patch_amodel_call() -> None:
+    """Monkey-patch TemplateAPI.amodel_call to handle the unbound `outputs` variable bug."""
+    from lm_eval.models.api_models import TemplateAPI
+
+    original: Any = TemplateAPI.amodel_call
+
+    @functools.wraps(original)
+    async def patched_amodel_call(self: Any, *args: Any, **kwargs: Any) -> Any:
+        try:
+            return await original(self, *args, **kwargs)
+        except (UnboundLocalError, Exception):
+            # Return one empty-string result per request in the batch so the
+            # reorderer doesn't assert on missing coverage.
+            messages = kwargs.get("messages") or (args[2] if len(args) > 2 else [])
+            return [""] * max(len(messages), 1)
+
+    TemplateAPI.amodel_call = patched_amodel_call
+
+
+if __name__ == "__main__":
+    _patch_amodel_call()
+    from lm_eval.__main__ import cli_evaluate
+
+    cli_evaluate()
--- a/bench/stats_dashboard.html
+++ b/bench/stats_dashboard.html
@@ -0,0 +1,290 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>exo Usage Stats</title>
+<style>
+  * { margin: 0; padding: 0; box-sizing: border-box; }
+  body {
+    font-family: -apple-system, BlinkMacSystemFont, 'SF Mono', 'Menlo', monospace;
+    background: #1a1a2e;
+    color: #e0e0e0;
+    padding: 24px;
+    min-height: 100vh;
+  }
+  .header {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    margin-bottom: 24px;
+    padding-bottom: 16px;
+    border-bottom: 1px solid #333;
+  }
+  .header h1 {
+    font-size: 20px;
+    font-weight: 600;
+    color: #fff;
+  }
+  .status {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    font-size: 13px;
+    color: #888;
+  }
+  .status-dot {
+    width: 8px;
+    height: 8px;
+    border-radius: 50%;
+    background: #666;
+  }
+  .status-dot.connected { background: #4caf50; }
+  .status-dot.error { background: #f44336; }
+  .config {
+    margin-bottom: 24px;
+    display: flex;
+    align-items: center;
+    gap: 8px;
+  }
+  .config label {
+    font-size: 12px;
+    color: #888;
+  }
+  .config input {
+    background: #252540;
+    border: 1px solid #444;
+    border-radius: 4px;
+    color: #e0e0e0;
+    padding: 4px 8px;
+    font-size: 13px;
+    font-family: inherit;
+    width: 280px;
+  }
+  .section {
+    background: #252540;
+    border-radius: 8px;
+    padding: 20px;
+    margin-bottom: 16px;
+  }
+  .section h2 {
+    font-size: 14px;
+    font-weight: 600;
+    color: #aaa;
+    text-transform: uppercase;
+    letter-spacing: 0.5px;
+    margin-bottom: 16px;
+  }
+  .stat-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+    gap: 16px;
+  }
+  .stat-card {
+    background: #1a1a2e;
+    border-radius: 6px;
+    padding: 16px;
+  }
+  .stat-label {
+    font-size: 11px;
+    color: #888;
+    text-transform: uppercase;
+    letter-spacing: 0.5px;
+    margin-bottom: 4px;
+  }
+  .stat-value {
+    font-size: 28px;
+    font-weight: 700;
+    color: #fff;
+  }
+  .stat-rate {
+    font-size: 12px;
+    color: #4caf50;
+    margin-top: 4px;
+  }
+  table {
+    width: 100%;
+    border-collapse: collapse;
+    font-size: 13px;
+  }
+  th {
+    text-align: left;
+    padding: 8px 12px;
+    color: #888;
+    font-weight: 500;
+    border-bottom: 1px solid #333;
+    font-size: 11px;
+    text-transform: uppercase;
+    letter-spacing: 0.5px;
+  }
+  td {
+    padding: 8px 12px;
+    border-bottom: 1px solid #2a2a45;
+  }
+  td.num {
+    text-align: right;
+    font-variant-numeric: tabular-nums;
+  }
+  .model-name {
+    color: #7c9eff;
+    max-width: 300px;
+    overflow: hidden;
+    text-overflow: ellipsis;
+    white-space: nowrap;
+  }
+  .empty-state {
+    color: #666;
+    font-style: italic;
+    padding: 16px 0;
+  }
+</style>
+</head>
+<body>
+  <div class="header">
+    <h1>exo Usage Stats</h1>
+    <div class="status">
+      <div class="status-dot" id="statusDot"></div>
+      <span id="statusText">connecting...</span>
+    </div>
+  </div>
+
+  <div class="config">
+    <label for="baseUrl">Base URL:</label>
+    <input type="text" id="baseUrl" value="http://mac8-1:52415">
+  </div>
+
+  <div class="section">
+    <h2>Totals</h2>
+    <div class="stat-grid">
+      <div class="stat-card">
+        <div class="stat-label">Requests</div>
+        <div class="stat-value" id="totalRequests">0</div>
+      </div>
+      <div class="stat-card">
+        <div class="stat-label">Prompt Tokens</div>
+        <div class="stat-value" id="totalPrompt">0</div>
+        <div class="stat-rate" id="promptRate"></div>
+      </div>
+      <div class="stat-card">
+        <div class="stat-label">Completion Tokens</div>
+        <div class="stat-value" id="totalCompletion">0</div>
+        <div class="stat-rate" id="completionRate"></div>
+      </div>
+      <div class="stat-card">
+        <div class="stat-label">Reasoning Tokens</div>
+        <div class="stat-value" id="totalReasoning">0</div>
+      </div>
+      <div class="stat-card">
+        <div class="stat-label">Total Tokens</div>
+        <div class="stat-value" id="totalTokens">0</div>
+        <div class="stat-rate" id="totalRate"></div>
+      </div>
+    </div>
+  </div>
+
+  <div class="section">
+    <h2>Per-Model Breakdown</h2>
+    <div id="modelTable">
+      <div class="empty-state">No data yet</div>
+    </div>
+  </div>
+
+<script>
+
+  function fmt(n) {
+    return n.toLocaleString();
+  }
+
+  // Track first non-zero timestamp for overall average rate
+  let firstSeenTime = null;
+  let firstSeenTokens = { prompt: 0, completion: 0, total: 0 };
+
+  function setRate(id, currentTokens, tokenType) {
+    const el = document.getElementById(id);
+    if (firstSeenTime === null || currentTokens <= firstSeenTokens[tokenType]) {
+      el.textContent = '';
+      return;
+    }
+    const elapsed = (performance.now() / 1000) - firstSeenTime;
+    if (elapsed <= 0) { el.textContent = ''; return; }
+    const delta = currentTokens - firstSeenTokens[tokenType];
+    const avg = delta / elapsed;
+    el.textContent = fmt(Math.round(avg)) + ' tok/s avg';
+  }
+
+  function renderModelTable(byModel) {
+    const container = document.getElementById('modelTable');
+    const models = Object.entries(byModel);
+    if (models.length === 0) {
+      container.innerHTML = '<div class="empty-state">No data yet</div>';
+      return;
+    }
+    let html = '<table><thead><tr>';
+    html += '<th>Model</th><th style="text-align:right">Requests</th>';
+    html += '<th style="text-align:right">Prompt</th>';
+    html += '<th style="text-align:right">Completion</th>';
+    html += '<th style="text-align:right">Reasoning</th>';
+    html += '<th style="text-align:right">Total</th>';
+    html += '</tr></thead><tbody>';
+    for (const [name, counters] of models) {
+      const total = (counters.prompt_tokens || 0) + (counters.completion_tokens || 0);
+      html += '<tr>';
+      html += `<td class="model-name" title="${name}">${name}</td>`;
+      html += `<td class="num">${fmt(counters.requests || 0)}</td>`;
+      html += `<td class="num">${fmt(counters.prompt_tokens || 0)}</td>`;
+      html += `<td class="num">${fmt(counters.completion_tokens || 0)}</td>`;
+      html += `<td class="num">${fmt(counters.reasoning_tokens || 0)}</td>`;
+      html += `<td class="num">${fmt(total)}</td>`;
+      html += '</tr>';
+    }
+    html += '</tbody></table>';
+    container.innerHTML = html;
+  }
+
+  async function poll() {
+    const baseUrl = document.getElementById('baseUrl').value.replace(/\/+$/, '');
+    const dot = document.getElementById('statusDot');
+    const text = document.getElementById('statusText');
+
+    try {
+      const resp = await fetch(baseUrl + '/v1/usage');
+      if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
+      const data = await resp.json();
+
+      dot.className = 'status-dot connected';
+      text.textContent = 'connected';
+
+
+      document.getElementById('totalRequests').textContent = fmt(data.total_requests || 0);
+      document.getElementById('totalPrompt').textContent = fmt(data.total_prompt_tokens || 0);
+      document.getElementById('totalCompletion').textContent = fmt(data.total_completion_tokens || 0);
+      document.getElementById('totalReasoning').textContent = fmt(data.total_reasoning_tokens || 0);
+      document.getElementById('totalTokens').textContent = fmt(data.total_tokens || 0);
+
+      // Record first non-zero reading as baseline
+      if (firstSeenTime === null && (data.total_tokens || 0) > 0) {
+        firstSeenTime = performance.now() / 1000;
+        firstSeenTokens = {
+          prompt: data.total_prompt_tokens || 0,
+          completion: data.total_completion_tokens || 0,
+          total: data.total_tokens || 0,
+        };
+      }
+
+      setRate('promptRate', data.total_prompt_tokens || 0, 'prompt');
+      setRate('completionRate', data.total_completion_tokens || 0, 'completion');
+      setRate('totalRate', data.total_tokens || 0, 'total');
+
+      renderModelTable(data.by_model || {});
+
+    } catch (e) {
+      dot.className = 'status-dot error';
+      text.textContent = e.message || 'error';
+    }
+  }
+
+  poll();
+  setInterval(poll, 1000);
+</script>
+</body>
+</html>
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,6 +13,7 @@ dependencies = [
    "filelock>=3.18.0",
    "rustworkx>=0.17.1",
    "huggingface-hub>=0.33.4",
+    "typer", # for huggingface-cli
    "psutil>=7.0.0",
    "loguru>=0.7.3",
    "exo_pyo3_bindings", # rust bindings
@@ -34,6 +35,7 @@ dependencies = [
 exo-master = "exo.master.main:main"
 exo-worker = "exo.worker.main:main"
 exo = "exo.main:main"
+exo-eval = "bench.exo_eval:main"

 # dependencies only required for development
 [dependency-groups]
@@ -51,6 +53,9 @@ dev = [
 # cuda = [
 #     "mlx[cuda]==0.26.3",
 # ]
+eval = [
+    "lm_eval[api]",
+]

 ###
 # workspace configuration
--- a/src/exo/master/api.py
+++ b/src/exo/master/api.py
@@ -1,10 +1,11 @@
 import base64
 import contextlib
 import json
+import re
 import time
 from collections.abc import AsyncGenerator
 from http import HTTPStatus
-from typing import Annotated, Literal, cast
+from typing import Annotated, Literal, cast, Any
 from uuid import uuid4

 import anyio
@@ -42,6 +43,11 @@ from exo.shared.types.api import (
    ChatCompletionChoice,
    ChatCompletionMessage,
    ChatCompletionResponse,
+    CompletionChoice,
+    CompletionLogprobs,
+    CompletionResponse,
+    CompletionTaskParams,
+    CompletionTokensDetails,
    CreateInstanceParams,
    CreateInstanceResponse,
    DeleteDownloadResponse,
@@ -57,6 +63,8 @@ from exo.shared.types.api import (
    ImageGenerationTaskParams,
    ImageListItem,
    ImageListResponse,
+    Logprobs,
+    LogprobsContentItem,
    ModelList,
    ModelListModel,
    PlaceInstanceParams,
@@ -66,8 +74,10 @@ from exo.shared.types.api import (
    StartDownloadResponse,
    StreamingChoiceResponse,
    ToolCall,
+    Usage,
 )
 from exo.shared.types.chunks import (
+    CompletionChunk,
    ErrorChunk,
    ImageChunk,
    InputImageChunk,
@@ -107,14 +117,43 @@ from exo.utils.channels import Receiver, Sender, channel
 from exo.utils.dashboard_path import find_dashboard
 from exo.utils.event_buffer import OrderedBuffer

+_THINK_TAG_RE = re.compile(r"<think>.*?</think>", re.DOTALL)
+
+
+def _strip_think_tags(text: str) -> str:
+    """Strip <think>...</think> blocks from response text.
+
+    These tags are an artifact of GPT-OSS channel parsing, not part of the
+    model's intended output. The OpenAI API content field should not contain them.
+    """
+    return _THINK_TAG_RE.sub("", text).lstrip()
+

 def _format_to_content_type(image_format: Literal["png", "jpeg", "webp"] | None) -> str:
    return f"image/{image_format or 'png'}"


+def _build_logprobs(chunk: TokenChunk) -> Logprobs:
+    """Convert flat logprob fields to OpenAI Logprobs format."""
+    return Logprobs(
+        content=[
+            LogprobsContentItem(
+                token=chunk.text,
+                logprob=chunk.logprob if chunk.logprob is not None else 0.0,
+                bytes=list(chunk.text.encode("utf-8")),
+                top_logprobs=chunk.top_logprobs or [],
+            )
+        ]
+    )
+
+
 def chunk_to_response(
    chunk: TokenChunk | ToolCallChunk, command_id: CommandId
 ) -> ChatCompletionResponse:
+    logprobs: Logprobs | None = None
+    if isinstance(chunk, TokenChunk) and chunk.logprob is not None:
+        logprobs = _build_logprobs(chunk)
+
    return ChatCompletionResponse(
        id=command_id,
        created=int(time.time()),
@@ -135,6 +174,7 @@ def chunk_to_response(
                        for i, tool in enumerate(chunk.tool_calls)
                    ],
                ),
+                logprobs=logprobs,
                finish_reason=chunk.finish_reason,
            )
        ],
@@ -197,7 +237,8 @@ class API:
        )

        self._chat_completion_queues: dict[
-            CommandId, Sender[TokenChunk | ErrorChunk | ToolCallChunk]
+            CommandId,
+            Sender[TokenChunk | ErrorChunk | ToolCallChunk | CompletionChunk],
        ] = {}
        self._image_generation_queues: dict[
            CommandId, Sender[ImageChunk | ErrorChunk]
@@ -205,6 +246,9 @@ class API:
        self._image_store = ImageStore(EXO_IMAGE_CACHE_DIR)
        self._tg: TaskGroup | None = None

+        # Accumulated usage stats per instance (keyed by model id)
+        self._usage_by_model: dict[str, dict[str, int]] = {}
+
    def reset(self, new_session_id: SessionId, result_clock: int):
        logger.info("Resetting API State")
        self.state = State()
@@ -271,6 +315,42 @@ class API:
        self.app.get("/events")(lambda: self._event_log)
        self.app.post("/download/start")(self.start_download)
        self.app.delete("/download/{node_id}/{model_id:path}")(self.delete_download)
+        self.app.get("/v1/usage")(self.get_usage)
+
+    def get_usage(self) -> dict[str, Any]:
+        """Return accumulated token usage per model instance."""
+        total_requests = 0
+        total_prompt = 0
+        total_completion = 0
+        total_reasoning = 0
+        for counters in self._usage_by_model.values():
+            total_requests += counters.get("requests", 0)
+            total_prompt += counters.get("prompt_tokens", 0)
+            total_completion += counters.get("completion_tokens", 0)
+            total_reasoning += counters.get("reasoning_tokens", 0)
+        return {
+            "total_requests": total_requests,
+            "total_prompt_tokens": total_prompt,
+            "total_completion_tokens": total_completion,
+            "total_reasoning_tokens": total_reasoning,
+            "total_tokens": total_prompt + total_completion,
+            "by_model": self._usage_by_model,
+        }
+
+    def _accumulate_usage(self, model: str, prompt_tokens: int, completion_tokens: int, reasoning_tokens: int) -> None:
+        """Accumulate usage stats for a model instance."""
+        if model not in self._usage_by_model:
+            self._usage_by_model[model] = {
+                "requests": 0,
+                "prompt_tokens": 0,
+                "completion_tokens": 0,
+                "reasoning_tokens": 0,
+            }
+        counters = self._usage_by_model[model]
+        counters["requests"] += 1
+        counters["prompt_tokens"] += prompt_tokens
+        counters["completion_tokens"] += completion_tokens
+        counters["reasoning_tokens"] += reasoning_tokens

    async def place_instance(self, payload: PlaceInstanceParams):
        command = PlaceInstance(
@@ -492,29 +572,35 @@ class API:
        )

    async def _chat_chunk_stream(
-        self, command_id: CommandId
-    ) -> AsyncGenerator[ErrorChunk | ToolCallChunk | TokenChunk, None]:
-        """Yield `TokenChunk`s for a given command until completion."""
+        self, command_id: CommandId, timeout: float = 600.0
+    ) -> AsyncGenerator[TokenChunk | ErrorChunk | ToolCallChunk, None]:
+        """Yield `TokenChunk`s for a given command until completion.
+
+        Args:
+            timeout: Max seconds to wait for the next chunk before aborting.
+        """

        try:
            self._chat_completion_queues[command_id], recv = channel[
-                ErrorChunk | ToolCallChunk | TokenChunk
+                TokenChunk | ErrorChunk | ToolCallChunk
            ]()

            with recv as token_chunks:
-                async for chunk in token_chunks:
-                    yield chunk
-                    if chunk.finish_reason is not None:
-                        break
+                with anyio.fail_after(timeout):
+                    async for chunk in token_chunks:
+                        yield chunk
+                        if chunk.finish_reason is not None:
+                            break

        except anyio.get_cancelled_exc_class():
-            # TODO: TaskCancelled
-            """
-            self.command_sender.send_nowait(
-                ForwarderCommand(origin=self.node_id, command=command)
-            )
-            """
            raise
+        except TimeoutError:
+            logger.warning(f"Chat completion timed out after {timeout}s (command_id={command_id})")
+            yield ErrorChunk(
+                model=ModelId("unknown"),
+                finish_reason="error",
+                error_message=f"Request timed out after {timeout}s",
+            )
        finally:
            command = TaskFinished(finished_command_id=command_id)
            await self._send(command)
@@ -528,7 +614,7 @@ class API:

        async for chunk in self._chat_chunk_stream(command_id):
            assert not isinstance(chunk, ImageChunk)
-            if chunk.finish_reason == "error":
+            if isinstance(chunk, ErrorChunk):
                error_response = ErrorResponse(
                    error=ErrorInfo(
                        message=chunk.error_message or "Internal server error",
@@ -548,6 +634,15 @@ class API:
            yield f"data: {chunk_response.model_dump_json()}\n\n"

            if chunk.finish_reason is not None:
+                # Accumulate usage stats from the final chunk
+                if isinstance(chunk, TokenChunk) and chunk.stats is not None:
+                    s = chunk.stats
+                    self._accumulate_usage(
+                        model=chunk.model,
+                        prompt_tokens=s.prompt_tokens,
+                        completion_tokens=s.generation_tokens,
+                        reasoning_tokens=s.reasoning_tokens,
+                    )
                yield "data: [DONE]\n\n"

    async def _collect_chat_completion(
@@ -557,10 +652,14 @@ class API:

        text_parts: list[str] = []
        tool_calls: list[ToolCall] = []
+        logprobs_items: list[LogprobsContentItem] = []
        model: str | None = None
        finish_reason: FinishReason | None = None
+        stats: GenerationStats | None = None

        async for chunk in self._chat_chunk_stream(command_id):
+            # Skip CompletionChunk - it's for the legacy completions API
+
            if isinstance(chunk, ErrorChunk):
                raise HTTPException(
                    status_code=500,
@@ -572,6 +671,16 @@ class API:

            if isinstance(chunk, TokenChunk):
                text_parts.append(chunk.text)
+                if chunk.stats is not None:
+                    stats = chunk.stats
+                if chunk.logprob is not None:
+                    lp = _build_logprobs(chunk)
+                    if lp.content:
+                        if len(lp.content) != 1:
+                            logger.warning(
+                                f"Expected 1 logprobs content item per chunk, got {len(lp.content)}"
+                            )
+                        logprobs_items.append(lp.content[0])

            if isinstance(chunk, ToolCallChunk):
                tool_calls.extend(
@@ -586,9 +695,31 @@ class API:
            if chunk.finish_reason is not None:
                finish_reason = chunk.finish_reason

-        combined_text = "".join(text_parts)
+        combined_text = _strip_think_tags("".join(text_parts))
        assert model is not None

+        logprobs: Logprobs | None = None
+        if logprobs_items:
+            logprobs = Logprobs(content=logprobs_items)
+
+        usage: Usage | None = None
+        if stats is not None:
+            completion_tokens = stats.generation_tokens
+            usage = Usage(
+                prompt_tokens=stats.prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=stats.prompt_tokens + completion_tokens,
+                completion_tokens_details=CompletionTokensDetails(
+                    reasoning_tokens=stats.reasoning_tokens,
+                ) if stats.reasoning_tokens > 0 else None,
+            )
+            self._accumulate_usage(
+                model=model or "unknown",
+                prompt_tokens=stats.prompt_tokens,
+                completion_tokens=completion_tokens,
+                reasoning_tokens=stats.reasoning_tokens,
+            )
+
        return ChatCompletionResponse(
            id=command_id,
            created=int(time.time()),
@@ -601,9 +732,11 @@ class API:
                        content=combined_text,
                        tool_calls=tool_calls,
                    ),
+                    logprobs=logprobs,
                    finish_reason=finish_reason,
                )
            ],
+            usage=usage,
        )

    async def _collect_chat_completion_with_stats(
@@ -617,7 +750,8 @@ class API:
        stats: GenerationStats | None = None

        async for chunk in self._chat_chunk_stream(command_id):
-            if chunk.finish_reason == "error":
+
+            if isinstance(chunk, ErrorChunk):
                raise HTTPException(
                    status_code=500,
                    detail=chunk.error_message or "Internal server error",
@@ -628,6 +762,7 @@ class API:

            if isinstance(chunk, TokenChunk):
                text_parts.append(chunk.text)
+                stats = chunk.stats or stats

            if isinstance(chunk, ToolCallChunk):
                tool_calls.extend(
@@ -638,13 +773,12 @@ class API:
                    )
                    for i, tool in enumerate(chunk.tool_calls)
                )
-
-            stats = chunk.stats or stats
+                stats = chunk.stats or stats

            if chunk.finish_reason is not None:
                finish_reason = chunk.finish_reason

-        combined_text = "".join(text_parts)
+        combined_text = _strip_think_tags("".join(text_parts))
        assert model is not None

        resp = BenchChatCompletionResponse(
@@ -695,7 +829,14 @@ class API:
                media_type="text/event-stream",
            )

-        return await self._collect_chat_completion(command.command_id)
+        try:
+            return await self._collect_chat_completion(command.command_id)
+        except BaseException:
+            # Ensure task cleanup if handler is cancelled before _chat_chunk_stream's finally runs
+            with contextlib.suppress(Exception):
+                await self._send(TaskFinished(finished_command_id=command.command_id))
+            self._chat_completion_queues.pop(command.command_id, None)
+            raise

    async def bench_chat_completions(
        self, payload: BenchChatCompletionTaskParams
--- a/src/exo/master/main.py
+++ b/src/exo/master/main.py
@@ -13,6 +13,7 @@ from exo.master.placement import (
 from exo.shared.apply import apply
 from exo.shared.types.commands import (
    ChatCompletion,
+    Completion,
    CreateInstance,
    DeleteInstance,
    ForwarderCommand,
@@ -40,6 +41,9 @@ from exo.shared.types.state import State
 from exo.shared.types.tasks import (
    ChatCompletion as ChatCompletionTask,
 )
+from exo.shared.types.tasks import (
+    Completion as CompletionTask,
+)
 from exo.shared.types.tasks import (
    ImageEdits as ImageEditsTask,
 )
@@ -158,6 +162,48 @@ class Master:
                                )
                            )

+                            self.command_task_mapping[command.command_id] = task_id
+                        case Completion():
+                            for instance in self.state.instances.values():
+                                if (
+                                    instance.shard_assignments.model_id
+                                    == command.request_params.model
+                                ):
+                                    task_count = sum(
+                                        1
+                                        for task in self.state.tasks.values()
+                                        if task.instance_id == instance.instance_id
+                                    )
+                                    instance_task_counts[instance.instance_id] = (
+                                        task_count
+                                    )
+
+                            if not instance_task_counts:
+                                raise ValueError(
+                                    f"No instance found for model {command.request_params.model}"
+                                )
+
+                            available_instance_ids = sorted(
+                                instance_task_counts.keys(),
+                                key=lambda instance_id: instance_task_counts[
+                                    instance_id
+                                ],
+                            )
+
+                            task_id = TaskId()
+                            generated_events.append(
+                                TaskCreated(
+                                    task_id=task_id,
+                                    task=CompletionTask(
+                                        task_id=task_id,
+                                        command_id=command.command_id,
+                                        instance_id=available_instance_ids[0],
+                                        task_status=TaskStatus.Pending,
+                                        task_params=command.request_params,
+                                    ),
+                                )
+                            )
+
                            self.command_task_mapping[command.command_id] = task_id
                        case ImageGeneration():
                            for instance in self.state.instances.values():
@@ -279,17 +325,15 @@ class Master:
                                )
                            )
                        case TaskFinished():
-                            generated_events.append(
-                                TaskDeleted(
-                                    task_id=self.command_task_mapping[
-                                        command.finished_command_id
-                                    ]
-                                )
+                            task_id = self.command_task_mapping.pop(
+                                command.finished_command_id, None
                            )
-                            if command.finished_command_id in self.command_task_mapping:
-                                del self.command_task_mapping[
-                                    command.finished_command_id
-                                ]
+                            if task_id is not None:
+                                generated_events.append(TaskDeleted(task_id=task_id))
+                            else:
+                                logger.debug(
+                                    f"TaskFinished for unknown command_id={command.finished_command_id} (already cleaned up)"
+                                )
                        case RequestEventLog():
                            # We should just be able to send everything, since other buffers will ignore old messages
                            for i in range(command.since_idx, len(self._event_log)):
--- a/src/exo/shared/types/api.py
+++ b/src/exo/shared/types/api.py
@@ -98,6 +98,8 @@ class LogprobsContentItem(BaseModel):

 class Logprobs(BaseModel):
    content: list[LogprobsContentItem] | None = None
+    # This will always be null for open source models, but exists for OpenAI API
+    refusal: list[LogprobsContentItem] | None = None


 class PromptTokensDetails(BaseModel):
@@ -150,6 +152,7 @@ class GenerationStats(BaseModel):
    generation_tps: float
    prompt_tokens: int
    generation_tokens: int
+    reasoning_tokens: int = 0
    peak_memory_usage: Memory


@@ -170,6 +173,52 @@ class BenchChatCompletionResponse(ChatCompletionResponse):
    generation_stats: GenerationStats | None = None


+# Legacy Completions API types (for lm_eval compatibility)
+class CompletionLogprobs(BaseModel):
+    """Logprobs in the legacy completions format."""
+
+    tokens: list[str]
+    token_logprobs: list[float | None]
+    top_logprobs: list[dict[str, float]]
+    text_offset: list[int]
+
+
+class CompletionChoice(BaseModel):
+    text: str
+    index: int
+    logprobs: CompletionLogprobs | None = None
+    finish_reason: FinishReason | None = None
+
+
+class CompletionResponse(BaseModel):
+    id: str
+    object: Literal["text_completion"] = "text_completion"
+    created: int
+    model: str
+    choices: list[CompletionChoice]
+    usage: Usage | None = None
+
+
+class CompletionTaskParams(BaseModel):
+    """Parameters for the legacy /v1/completions endpoint."""
+
+    model: str
+    # Prompt can be: string, list of strings, list of token IDs, or list of token ID lists
+    prompt: str | list[str] | list[int] | list[list[int]]
+    max_tokens: int | None = 16
+    temperature: float | None = 1.0
+    top_p: float | None = 1.0
+    n: int | None = 1
+    stream: bool = False
+    logprobs: int | None = None
+    echo: bool = False
+    stop: str | list[str] | None = None
+    presence_penalty: float | None = None
+    frequency_penalty: float | None = None
+    seed: int | None = None
+    user: str | None = None
+
+
 class ChatCompletionTaskParams(BaseModel):
    model: str
    frequency_penalty: float | None = None
--- a/src/exo/shared/types/chunks.py
+++ b/src/exo/shared/types/chunks.py
@@ -2,7 +2,7 @@ from collections.abc import Generator
 from typing import Any, Literal

 from exo.shared.models.model_cards import ModelId
-from exo.shared.types.api import GenerationStats, ImageGenerationStats
+from exo.shared.types.api import GenerationStats, ImageGenerationStats, TopLogprobItem
 from exo.utils.pydantic_ext import TaggedModel

 from .api import FinishReason
@@ -17,6 +17,8 @@ class BaseChunk(TaggedModel):
 class TokenChunk(BaseChunk):
    text: str
    token_id: int
+    logprob: float | None = None
+    top_logprobs: list[TopLogprobItem] | None = None
    finish_reason: Literal["stop", "length", "content_filter"] | None = None
    stats: GenerationStats | None = None

@@ -32,6 +34,17 @@ class ToolCallChunk(BaseChunk):
    stats: GenerationStats | None = None


+class CompletionChunk(BaseChunk):
+    """Chunk for legacy completions API with full logprobs for all tokens."""
+
+    text: str
+    tokens: list[str]
+    token_logprobs: list[float | None]
+    top_logprobs: list[dict[str, float]]
+    text_offset: list[int]
+    finish_reason: FinishReason | None = None
+
+
 class ImageChunk(BaseChunk):
    data: str
    chunk_index: int
@@ -67,4 +80,4 @@ class InputImageChunk(BaseChunk):
                yield name, value


-GenerationChunk = TokenChunk | ImageChunk | ToolCallChunk | ErrorChunk
+GenerationChunk = TokenChunk | CompletionChunk | ImageChunk | ToolCallChunk | ErrorChunk
--- a/src/exo/shared/types/commands.py
+++ b/src/exo/shared/types/commands.py
@@ -3,6 +3,7 @@ from pydantic import Field
 from exo.shared.models.model_cards import ModelCard, ModelId
 from exo.shared.types.api import (
    ChatCompletionTaskParams,
+    CompletionTaskParams,
    ImageEditsInternalParams,
    ImageGenerationTaskParams,
 )
@@ -25,6 +26,12 @@ class ChatCompletion(BaseCommand):
    request_params: ChatCompletionTaskParams


+class Completion(BaseCommand):
+    """Legacy completions API command for scoring/generation."""
+
+    request_params: CompletionTaskParams
+
+
 class ImageGeneration(BaseCommand):
    request_params: ImageGenerationTaskParams

@@ -79,6 +86,7 @@ Command = (
    TestCommand
    | RequestEventLog
    | ChatCompletion
+    | Completion
    | ImageGeneration
    | ImageEdits
    | PlaceInstance
--- a/src/exo/shared/types/tasks.py
+++ b/src/exo/shared/types/tasks.py
@@ -4,6 +4,7 @@ from pydantic import Field

 from exo.shared.types.api import (
    ChatCompletionTaskParams,
+    CompletionTaskParams,
    ImageEditsInternalParams,
    ImageGenerationTaskParams,
 )
@@ -60,6 +61,16 @@ class ChatCompletion(BaseTask):  # emitted by Master
    error_message: str | None = Field(default=None)


+class Completion(BaseTask):
+    """Legacy completions task for scoring tokens with echo=True."""
+
+    command_id: CommandId
+    task_params: CompletionTaskParams
+
+    error_type: str | None = Field(default=None)
+    error_message: str | None = Field(default=None)
+
+
 class ImageGeneration(BaseTask):  # emitted by Master
    command_id: CommandId
    task_params: ImageGenerationTaskParams
@@ -87,6 +98,7 @@ Task = (
    | LoadModel
    | StartWarmup
    | ChatCompletion
+    | Completion
    | ImageGeneration
    | ImageEdits
    | Shutdown
--- a/src/exo/shared/types/worker/runner_response.py
+++ b/src/exo/shared/types/worker/runner_response.py
@@ -6,6 +6,7 @@ from exo.shared.types.api import (
    GenerationStats,
    ImageGenerationStats,
    ToolCallItem,
+    TopLogprobItem,
 )
 from exo.utils.pydantic_ext import TaggedModel

@@ -14,14 +15,11 @@ class BaseRunnerResponse(TaggedModel):
    pass


-class TokenizedResponse(BaseRunnerResponse):
-    prompt_tokens: int
-
-
 class GenerationResponse(BaseRunnerResponse):
    text: str
    token: int
-    # logprobs: list[float] | None = None # too big. we can change to be top-k
+    logprob: float | None = None
+    top_logprobs: list[TopLogprobItem] | None = None
    finish_reason: FinishReason | None = None
    stats: GenerationStats | None = None

--- a/src/exo/utils/channels.py
+++ b/src/exo/utils/channels.py
@@ -194,6 +194,22 @@ class MpReceiver[T]:
                raise EndOfStream from None
            return item

+    def receive_with_timeout(self, timeout: float) -> T | None:
+        """Receive with timeout, returns None if no message within timeout."""
+        if self._state.closed.is_set():
+            raise ClosedResourceError
+
+        try:
+            item = self._state.buffer.get(block=True, timeout=timeout)
+            if isinstance(item, _MpEndOfStream):
+                self.close()
+                raise EndOfStream
+            return item
+        except Empty:
+            return None
+        except ValueError as e:
+            raise ClosedResourceError from e
+
    # nb: this function will not cancel particularly well
    async def receive_async(self) -> T:
        return await to_thread.run_sync(self.receive, limiter=CapacityLimiter(1))
--- a/src/exo/worker/engines/mlx/auto_parallel.py
+++ b/src/exo/worker/engines/mlx/auto_parallel.py
@@ -102,6 +102,16 @@ class CustomMlxLayer(nn.Module):
                return getattr(original_layer, name)


+class EvalCheckpointLayer(CustomMlxLayer):
+    """Wraps a layer to force evaluation of its output, breaking up the computation graph
+    to prevent Metal command buffer timeouts with large batches in pipeline parallel."""
+
+    def __call__(self, x: mx.array, *args: object, **kwargs: object) -> mx.array:
+        output = self.original_layer(x, *args, **kwargs)
+        mx.eval(output)
+        return output
+
+
 class PipelineFirstLayer(CustomMlxLayer):
    def __init__(
        self,
@@ -116,7 +126,10 @@ class PipelineFirstLayer(CustomMlxLayer):
    def __call__(self, x: mx.array, *args: object, **kwargs: object) -> mx.array:
        if self.r != 0:
            x = mx.distributed.recv_like(x, (self.r - 1), group=self.group)
-        return self.original_layer(x, *args, **kwargs)
+            mx.eval(x)
+        output = self.original_layer(x, *args, **kwargs)
+        mx.eval(output)
+        return output


 class PipelineLastLayer(CustomMlxLayer):
@@ -139,11 +152,13 @@ class PipelineLastLayer(CustomMlxLayer):
        ).arguments.get("cache", None)

        output: mx.array = self.original_layer(x, *args, **kwargs)
+        mx.eval(output)

        if self.r != self.s - 1:
            output = mx.distributed.send(
                output, (self.r + 1) % self.s, group=self.group
            )
+            mx.async_eval(output)
            if cache is not None:
                cache.keys = mx.depends(cache.keys, output)  # type: ignore[reportUnknownMemberType]

@@ -201,6 +216,9 @@ def pipeline_auto_parallel(

    layers = layers[start_layer:end_layer]
    layers[0] = PipelineFirstLayer(layers[0], device_rank, group=group)
+    # Wrap intermediate layers with eval checkpoints to prevent GPU timeout
+    for i in range(1, len(layers) - 1):
+        layers[i] = EvalCheckpointLayer(layers[i])
    layers[-1] = PipelineLastLayer(
        layers[-1],
        device_rank,
@@ -254,6 +272,10 @@ def patch_pipeline_model[T](model: T, group: mx.distributed.Group) -> T:
            "cache", None
        )

+        # Evaluate logits before all_gather to break the computation graph
+        # and prevent Metal command buffer timeouts with large batches
+        mx.eval(logits)
+
        # Add dependency to last cache entry to ensure distributed ops are evaluated
        if cache is not None:
            cache[-1].state = mx.depends(cache[-1].state, logits)  # type: ignore
--- a/src/exo/worker/engines/mlx/generator/generate.py
+++ b/src/exo/worker/engines/mlx/generator/generate.py
@@ -3,7 +3,7 @@ from typing import Any, Callable, Generator, cast, get_args

 import mlx.core as mx
 from mlx_lm.generate import stream_generate
-from mlx_lm.models.cache import trim_prompt_cache
+from mlx_lm.models.cache import trim_prompt_cache, KVCache
 from mlx_lm.sample_utils import make_sampler
 from mlx_lm.tokenizer_utils import TokenizerWrapper

@@ -12,6 +12,7 @@ from exo.shared.types.api import (
    ChatCompletionMessage,
    FinishReason,
    GenerationStats,
+    TopLogprobItem,
 )
 from exo.shared.types.memory import Memory
 from exo.shared.types.mlx import KVCacheType
@@ -158,6 +159,206 @@ def eos_ids_from_tokenizer(tokenizer: TokenizerWrapper) -> list[int]:
    return eos


+def extract_top_logprobs(
+    logprobs_array: mx.array,
+    selected_token: int,
+    tokenizer: TokenizerWrapper,
+    top_k: int | None,
+) -> tuple[float, list[TopLogprobItem]]:
+    """Extract the selected token's logprob and top-k alternatives.
+
+    top k an be set to None to return all the logprobs
+    """
+    selected_logprob = float(logprobs_array[selected_token].item())
+
+    if top_k == 0:
+        return selected_logprob, []
+
+    vocab_size = logprobs_array.shape[0]
+
+    if top_k is None:
+        sorted_indices = mx.argsort(-logprobs_array)
+        mx.eval(sorted_indices)
+        indices_list: list[int] = cast(list[int], sorted_indices.tolist())
+    else:
+        k = min(top_k, vocab_size)
+        top_indices = mx.argpartition(-logprobs_array, kth=k - 1)[:k]
+        top_logprobs_values = logprobs_array[top_indices]
+        sorted_order = mx.argsort(-top_logprobs_values)
+        top_indices = top_indices[sorted_order]
+        mx.eval(top_indices)
+        indices_list = cast(list[int], top_indices.tolist())
+
+    top_logprob_items: list[TopLogprobItem] = []
+    for token_id in indices_list:
+        logprob_value = float(logprobs_array[token_id].item())
+        token_str = tokenizer.decode([token_id])
+
+        top_logprob_items.append(
+            TopLogprobItem(
+                token=token_str,
+                logprob=logprob_value,
+                bytes=list(token_str.encode("utf-8")),
+            )
+        )
+
+    return selected_logprob, top_logprob_items
+
+
+def score_tokens(
+    model: Model,
+    tokenizer: TokenizerWrapper,
+    tokens: list[int],
+    top_k: int | None = None,
+) -> list[tuple[float, list[TopLogprobItem]]]:
+    """Score a sequence of tokens, returning logprobs for each token.
+
+    This is used for the completions API with echo=True, where we need
+    logprobs for the prompt tokens (not just generated tokens).
+
+    Args:
+        model: The MLX model.
+        tokenizer: The tokenizer.
+        tokens: List of token IDs to score.
+        top_k: Number of top logprobs to return per position.
+               If None, returns all logprobs.
+
+    Returns:
+        List of (token_logprob, top_logprobs) tuples for each token position.
+        The first position has no logprob (no previous context), so returns (0.0, []).
+    """
+    if len(tokens) == 0:
+        return []
+
+    # First token has no previous context to condition on
+    results: list[tuple[float, list[TopLogprobItem]]] = [(0.0, [])]
+
+    if len(tokens) == 1:
+        return results
+
+    # Create an empty KV cache for the forward pass
+    cache = make_kv_cache(model=model)
+
+    # Convert to MLX array and run forward pass
+    input_tokens = mx.array(tokens[:-1])[None]  # All tokens except last, batched
+
+    # Run the model to get logits for all positions
+    # The model returns logits with shape [1, seq_len, vocab_size]
+    logits: mx.array = model(input_tokens, cache=cast(list[KVCache], cache))
+    logits = logits.squeeze(0)  # Shape: [seq_len, vocab_size]
+
+    # Convert to log probabilities
+    logprobs_all: mx.array = logits - mx.logsumexp(logits, axis=-1, keepdims=True)
+
+    mx.eval(logprobs_all)
+
+    # For each position, extract the logprob of the actual next token
+    for i in range(len(tokens) - 1):
+        next_token = tokens[i + 1]
+        logprobs_at_position: mx.array = logprobs_all[i]
+
+        logprob, top_logprobs_items = extract_top_logprobs(
+            logprobs_array=logprobs_at_position,
+            selected_token=next_token,
+            tokenizer=tokenizer,
+            top_k=top_k,
+        )
+        results.append((logprob, top_logprobs_items))
+
+    return results
+
+
+def score_tokens_batched(
+    model: Model,
+    tokenizer: TokenizerWrapper,
+    token_sequences: list[list[int]],
+    top_k: int | None = None,
+) -> list[list[tuple[float, list[TopLogprobItem]]]]:
+    """Score multiple token sequences in a single batched forward pass.
+
+    This is significantly faster than calling score_tokens() multiple times
+    because it batches the forward pass across all sequences.
+
+    Args:
+        model: The MLX model.
+        tokenizer: The tokenizer.
+        token_sequences: List of token ID sequences to score.
+        top_k: Number of top logprobs to return per position.
+
+    Returns:
+        List of results for each sequence. Each result is a list of
+        (token_logprob, top_logprobs) tuples for each token position.
+    """
+    if not token_sequences:
+        return []
+
+    # Handle empty sequences and single-token sequences
+    results: list[list[tuple[float, list[TopLogprobItem]]]] = []
+    non_empty_indices: list[int] = []
+    non_empty_sequences: list[list[int]] = []
+
+    for i, tokens in enumerate(token_sequences):
+        if len(tokens) == 0:
+            results.append([])
+        elif len(tokens) == 1:
+            results.append([(0.0, [])])
+        else:
+            results.append([])  # Placeholder, will be filled later
+            non_empty_indices.append(i)
+            non_empty_sequences.append(tokens)
+
+    if not non_empty_sequences:
+        return results
+
+    # Find max sequence length (excluding last token since we predict it)
+    max_len = max(len(seq) - 1 for seq in non_empty_sequences)
+
+    # Get pad token (use eos_token_id or 0)
+    pad_token_id = getattr(tokenizer, "pad_token_id", None)
+    if pad_token_id is None:
+        pad_token_id = getattr(tokenizer, "eos_token_id", 0)
+
+    # Pad sequences and create attention mask
+    batch_size = len(non_empty_sequences)
+    padded_inputs = mx.full((batch_size, max_len), pad_token_id, dtype=mx.int32)
+    seq_lengths: list[int] = []
+
+    for i, tokens in enumerate(non_empty_sequences):
+        input_len = len(tokens) - 1  # Exclude last token
+        padded_inputs[i, :input_len] = mx.array(tokens[:-1], dtype=mx.int32)
+        seq_lengths.append(input_len)
+
+    # Run batched forward pass (no KV cache for scoring)
+    # The model accepts [batch_size, seq_len] and returns [batch_size, seq_len, vocab_size]
+    logits = model(padded_inputs, cache=None)
+
+    # Convert to log probabilities - logits shape: [batch, seq_len, vocab]
+    logprobs_all = logits - mx.logsumexp(logits, axis=-1, keepdims=True)
+    mx.eval(logprobs_all)
+
+    # Extract results for each sequence
+    for batch_idx, (orig_idx, tokens, seq_len) in enumerate(
+        zip(non_empty_indices, non_empty_sequences, seq_lengths, strict=True)
+    ):
+        seq_results: list[tuple[float, list[TopLogprobItem]]] = [(0.0, [])]
+
+        for pos in range(seq_len):
+            next_token = tokens[pos + 1]
+            logprobs_at_position: mx.array = logprobs_all[batch_idx, pos]
+
+            logprob, top_logprobs_items = extract_top_logprobs(
+                logprobs_array=logprobs_at_position,
+                selected_token=next_token,
+                tokenizer=tokenizer,
+                top_k=top_k,
+            )
+            seq_results.append((logprob, top_logprobs_items))
+
+        results[orig_idx] = seq_results
+
+    return results
+
+
 def mlx_generate(
    model: Model,
    tokenizer: TokenizerWrapper,
@@ -209,6 +410,10 @@ def mlx_generate(
    # stream_generate starts from the last token
    last_token = prompt_tokens[-1:]

+    # Determine if we need logprobs
+    should_extract_logprobs = task.logprobs is True
+    top_k = task.top_logprobs if task.top_logprobs is not None else 0
+
    max_tokens = task.max_tokens or MAX_TOKENS
    generated_text_parts: list[str] = []
    generation_start_time = time.perf_counter()
@@ -245,9 +450,22 @@ def mlx_generate(
                    f"Model generated unexpected finish_reason: {out.finish_reason}"
                )

+        # Extract logprobs if requested
+        logprob: float | None = None
+        top_logprobs: list[TopLogprobItem] | None = None
+        if should_extract_logprobs:
+            logprob, top_logprobs = extract_top_logprobs(
+                logprobs_array=out.logprobs,
+                selected_token=out.token,
+                tokenizer=tokenizer,
+                top_k=top_k,
+            )
+
        yield GenerationResponse(
            text=out.text,
            token=out.token,
+            logprob=logprob,
+            top_logprobs=top_logprobs,
            finish_reason=cast(FinishReason | None, out.finish_reason),
            stats=stats,
        )
--- a/src/exo/worker/main.py
+++ b/src/exo/worker/main.py
@@ -33,6 +33,7 @@ from exo.shared.types.events import (
 from exo.shared.types.multiaddr import Multiaddr
 from exo.shared.types.state import State
 from exo.shared.types.tasks import (
+    ChatCompletion,
    CreateRunner,
    DownloadModel,
    ImageEdits,
@@ -184,8 +185,10 @@ class Worker:
                self.input_chunk_counts,
            )
            if task is None:
+                # Only sleep when there's nothing to do - allows rapid task dispatch
+                await anyio.sleep(0.01)
                continue
-            logger.info(f"Worker plan: {task.__class__.__name__}")
+            logger.debug(f"Worker plan: {task.__class__.__name__}")
            assert task.task_status
            await self.event_sender.send(TaskCreated(task_id=task.task_id, task=task))

@@ -269,6 +272,12 @@ class Worker:
                    await self.runners[self._task_to_runner_id(task)].start_task(
                        modified_task
                    )
+                case ChatCompletion():
+                    # Don't wait for acknowledgment for batchable inference tasks
+                    # This allows multiple tasks to reach the runner for batching
+                    await self.runners[self._task_to_runner_id(task)].start_task(
+                        task, wait_for_ack=False
+                    )
                case task:
                    await self.runners[self._task_to_runner_id(task)].start_task(task)

--- a/src/exo/worker/plan.py
+++ b/src/exo/worker/plan.py
@@ -5,6 +5,7 @@ from collections.abc import Mapping, Sequence
 from exo.shared.types.common import CommandId, NodeId
 from exo.shared.types.tasks import (
    ChatCompletion,
+    Completion,
    ConnectToGroup,
    CreateRunner,
    DownloadModel,
@@ -273,9 +274,9 @@ def _pending_tasks(
    input_chunk_buffer: Mapping[CommandId, dict[int, str]] | None = None,
 ) -> Task | None:
    for task in tasks.values():
-        # for now, just forward chat completions
+        # for now, just forward chat completions and completions
        # TODO(ciaran): do this better!
-        if not isinstance(task, (ChatCompletion, ImageGeneration, ImageEdits)):
+        if not isinstance(task, (ChatCompletion, Completion, ImageGeneration, ImageEdits)):
            continue
        if task.task_status not in (TaskStatus.Pending, TaskStatus.Running):
            continue
@@ -298,9 +299,14 @@ def _pending_tasks(
            if task.task_id in runner.completed:
                continue

+            # Skip tasks already sent to runner (waiting for completion)
+            if task.task_id in runner.sent:
+                continue
+
            # TODO: Check ordering aligns with MLX distributeds expectations.

-            if isinstance(runner.status, RunnerReady) and all(
+            # Allow sending tasks when runner is Ready OR Running (for batching)
+            if isinstance(runner.status, (RunnerReady, RunnerRunning)) and all(
                isinstance(all_runners[global_runner_id], (RunnerReady, RunnerRunning))
                for global_runner_id in runner.bound_instance.instance.shard_assignments.runner_to_shard
            ):
--- a/src/exo/worker/runner/batched_handler.py
+++ b/src/exo/worker/runner/batched_handler.py
@@ -0,0 +1,558 @@
+"""Batched inference handler for processing multiple ChatCompletion requests concurrently."""
+
+import time
+from collections.abc import Generator
+from dataclasses import dataclass, field
+from typing import Any, Callable, Literal
+
+import mlx.core as mx
+from mlx_lm.generate import BatchGenerator
+from mlx_lm.models.gpt_oss import Model as GptOssModel
+from mlx_lm.sample_utils import make_sampler
+from mlx_lm.tokenizer_utils import TokenizerWrapper
+from openai_harmony import (  # pyright: ignore[reportMissingTypeStubs]
+    HarmonyEncodingName,
+    Role,
+    StreamableParser,
+    load_harmony_encoding,
+)
+
+from exo.shared.models.model_cards import ModelId
+from exo.shared.types.api import (
+    GenerationStats,
+    TopLogprobItem,
+)
+from exo.shared.types.chunks import ErrorChunk, TokenChunk
+from exo.shared.types.common import CommandId
+from exo.shared.types.events import ChunkGenerated, Event
+from exo.shared.types.memory import Memory
+from exo.shared.types.tasks import ChatCompletion
+from exo.worker.engines.mlx import Model
+from exo.worker.engines.mlx.constants import MAX_TOKENS
+from exo.worker.engines.mlx.generator.generate import extract_top_logprobs
+from exo.worker.engines.mlx.utils_mlx import apply_chat_template
+from exo.worker.runner.bootstrap import logger
+from exo.worker.runner.pipelined_generator import PipelinedGenerator, PipelinedResponse
+
+# Type alias for the finish_reason values TokenChunk accepts
+TokenFinishReason = Literal["stop", "length", "content_filter"]
+
+
+@dataclass
+class PendingRequest:
+    """A request waiting to be added to the batch."""
+
+    task: ChatCompletion
+    prompt: str
+    max_tokens: int
+    sampler: Callable[[mx.array], mx.array]
+    should_extract_logprobs: bool
+    top_k: int
+
+
+@dataclass
+class ActiveRequest:
+    """A request currently being processed in the batch."""
+
+    command_id: CommandId
+    should_extract_logprobs: bool
+    top_k: int
+    gpt_oss_parser: Any | None = None  # StreamableParser for GPT-OSS models
+    gpt_oss_thinking: bool = False
+    tokens_generated: int = 0
+    reasoning_tokens: int = 0
+    prompt_tokens: int = 0
+    start_time: float = field(default_factory=time.perf_counter)
+
+
+class BatchedInferenceHandler:
+    """
+    Handles batched inference for multiple ChatCompletion requests.
+
+    Uses MLX-LM's BatchGenerator to process multiple requests concurrently,
+    improving throughput for scenarios with multiple concurrent requests.
+    """
+
+    def __init__(
+        self,
+        model: Model,
+        tokenizer: TokenizerWrapper,
+        model_id: ModelId,
+        device_rank: int,
+        world_size: int = 1,
+        max_batch_size: int = 32,
+    ):
+        self.model = model
+        self.tokenizer = tokenizer
+        self.model_id = model_id
+        self.device_rank = device_rank
+        self.world_size = world_size
+        self.max_batch_size = max_batch_size
+
+        # GPT-OSS model detection
+        self.is_gpt_oss = isinstance(model, GptOssModel)
+        self._gpt_oss_encoding: Any | None = None
+        if self.is_gpt_oss:
+            self._gpt_oss_encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
+            logger.info("GPT-OSS model detected, enabling per-request stream parsing")
+
+        # Pending requests waiting to be batched
+        self.pending: list[PendingRequest] = []
+
+        # Active batch generator and request tracking
+        self.batch_generator: BatchGenerator | None = None
+        self.pipelined_generator: PipelinedGenerator | None = None
+        self.uid_to_request: dict[int, ActiveRequest] = {}
+
+        # Use pipelined generator for multi-device pipeline parallelism
+        self.use_pipelined = world_size > 1
+        if self.use_pipelined:
+            logger.info(f"Using PipelinedGenerator with {world_size} streams for pipeline overlap")
+
+        # EOS tokens for the model
+        self.stop_tokens: set[int] = set()
+        eos_ids: list[int] | None = getattr(tokenizer, "eos_token_ids", None)
+        if eos_ids:
+            self.stop_tokens = set(eos_ids)
+
+    @property
+    def is_active(self) -> bool:
+        """Check if there's an active batch being processed."""
+        if self.use_pipelined:
+            return self.pipelined_generator is not None and self.pipelined_generator.has_active
+        return self.batch_generator is not None and len(self.uid_to_request) > 0
+
+    @property
+    def has_pending(self) -> bool:
+        """Check if there are pending requests waiting to be batched."""
+        return len(self.pending) > 0
+
+    @property
+    def current_batch_size(self) -> int:
+        """Current number of active requests in the batch."""
+        return len(self.uid_to_request)
+
+    def add_request(self, task: ChatCompletion) -> None:
+        """Add a ChatCompletion request to the pending batch."""
+        task_params = task.task_params
+
+        # Build prompt
+        prompt = apply_chat_template(self.tokenizer, task_params)
+
+        # Determine max tokens
+        max_tokens = task_params.max_tokens or MAX_TOKENS
+
+        # Create sampler for this request
+        sampler = make_sampler(
+            temp=task_params.temperature if task_params.temperature is not None else 0.7,
+            top_p=task_params.top_p if task_params.top_p is not None else 1.0,
+        )
+
+        # Logprobs configuration
+        should_extract_logprobs = task_params.logprobs is True
+        top_k = task_params.top_logprobs if task_params.top_logprobs is not None else 0
+
+        pending_request = PendingRequest(
+            task=task,
+            prompt=prompt,
+            max_tokens=max_tokens,
+            sampler=sampler,
+            should_extract_logprobs=should_extract_logprobs,
+            top_k=top_k,
+        )
+
+        self.pending.append(pending_request)
+
+        logger.info(
+            f"Added request to batch queue (pending={len(self.pending)}, active={self.current_batch_size})"
+        )
+
+    def flush(self) -> None:
+        """Start processing pending requests by adding them to the batch/pipelined generator."""
+        if not self.has_pending:
+            return
+
+        # Determine how many requests to flush (up to available slots)
+        available_slots = self.max_batch_size - self.current_batch_size
+        requests_to_flush = self.pending[:available_slots]
+        self.pending = self.pending[available_slots:]
+
+        # Prepare batch data - tokenize prompts
+        tokenized_prompts: list[list[int]] = []
+        max_tokens_list: list[int] = []
+        samplers: list[Callable[[mx.array], mx.array]] = []
+        prompt_token_counts: list[int] = []
+
+        for req in requests_to_flush:
+            tokens = self.tokenizer.encode(req.prompt)
+            tokenized_prompts.append(tokens)
+            max_tokens_list.append(req.max_tokens)
+            samplers.append(req.sampler)
+            prompt_token_counts.append(len(tokens))
+
+        if self.use_pipelined:
+            self._flush_pipelined(requests_to_flush, tokenized_prompts, max_tokens_list, samplers, prompt_token_counts)
+        else:
+            self._flush_batch(requests_to_flush, tokenized_prompts, max_tokens_list, samplers, prompt_token_counts)
+
+    def _flush_pipelined(
+        self,
+        requests_to_flush: list[PendingRequest],
+        tokenized_prompts: list[list[int]],
+        max_tokens_list: list[int],
+        samplers: list[Callable[[mx.array], mx.array]],
+        prompt_token_counts: list[int],
+    ) -> None:
+        """Flush using PipelinedGenerator (multi-stream pipeline overlap)."""
+        if self.pipelined_generator is None:
+            logger.info(f"Creating PipelinedGenerator for {len(requests_to_flush)} requests ({self.world_size} streams)")
+            mx.reset_peak_memory()
+            self.pipelined_generator = PipelinedGenerator(
+                model=self.model,
+                world_size=self.world_size,
+                stop_tokens=self.stop_tokens if self.stop_tokens else None,
+                max_tokens=MAX_TOKENS,
+            )
+        else:
+            logger.info(f"Adding {len(requests_to_flush)} requests to PipelinedGenerator")
+
+        uids = self.pipelined_generator.insert(
+            prompts=tokenized_prompts,
+            max_tokens=max_tokens_list,
+            samplers=samplers,
+        )
+
+        for uid, req, prompt_tokens in zip(uids, requests_to_flush, prompt_token_counts, strict=True):
+            parser = None
+            if self.is_gpt_oss and self._gpt_oss_encoding is not None:
+                parser = StreamableParser(self._gpt_oss_encoding, role=Role.ASSISTANT)  # pyright: ignore[reportAny]
+            self.uid_to_request[uid] = ActiveRequest(
+                command_id=req.task.command_id,
+                should_extract_logprobs=req.should_extract_logprobs,
+                top_k=req.top_k,
+                prompt_tokens=prompt_tokens,
+                gpt_oss_parser=parser,
+            )
+
+        logger.info(f"Flushed {len(requests_to_flush)} requests into pipelined generator (active={self.pipelined_generator.active_count}, uids={list(self.uid_to_request.keys())})")
+
+    def _flush_batch(
+        self,
+        requests_to_flush: list[PendingRequest],
+        tokenized_prompts: list[list[int]],
+        max_tokens_list: list[int],
+        samplers: list[Callable[[mx.array], mx.array]],
+        prompt_token_counts: list[int],
+    ) -> None:
+        """Flush using BatchGenerator (single-stream, for non-pipeline instances)."""
+        if self.batch_generator is None:
+            logger.info(f"Creating new BatchGenerator for {len(requests_to_flush)} requests")
+            mx.reset_peak_memory()
+            self.batch_generator = BatchGenerator(
+                model=self.model,
+                max_tokens=MAX_TOKENS,
+                stop_tokens=self.stop_tokens if self.stop_tokens else None,
+                prefill_batch_size=1,
+            )
+        else:
+            logger.info(f"Adding {len(requests_to_flush)} requests to existing BatchGenerator")
+
+        # Insert into batch generator
+        uids: list[int] = self.batch_generator.insert(  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+            prompts=tokenized_prompts,
+            max_tokens=max_tokens_list,
+            samplers=samplers,  # pyright: ignore[reportCallIssue]
+        )
+
+        for uid, req, prompt_tokens in zip(uids, requests_to_flush, prompt_token_counts, strict=True):  # pyright: ignore[reportUnknownArgumentType]
+            parser = None
+            if self.is_gpt_oss and self._gpt_oss_encoding is not None:
+                parser = StreamableParser(self._gpt_oss_encoding, role=Role.ASSISTANT)  # pyright: ignore[reportAny]
+            self.uid_to_request[uid] = ActiveRequest(
+                command_id=req.task.command_id,
+                should_extract_logprobs=req.should_extract_logprobs,
+                top_k=req.top_k,
+                prompt_tokens=prompt_tokens,
+                gpt_oss_parser=parser,
+            )
+
+        logger.info(f"Flushed {len(requests_to_flush)} requests into batch (active={self.current_batch_size}, uids={list(self.uid_to_request.keys())})")
+
+    def step(self) -> Generator[Event, None, None]:
+        """
+        Process one generation step and yield ChunkGenerated events.
+
+        Returns a generator of events for completed tokens across all active requests.
+        """
+        if self.use_pipelined:
+            yield from self._step_pipelined()
+            return
+
+        if self.batch_generator is None or not self.uid_to_request:
+            return
+
+        # Get next tokens for all active requests
+        # BatchGenerator.next() returns list of Response objects
+        logger.debug(f"BatchGenerator.next() called (active_uids={list(self.uid_to_request.keys())})")
+        responses: list[Any] = self.batch_generator.next()  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+        logger.debug(f"BatchGenerator.next() returned {len(responses)} responses")  # pyright: ignore[reportUnknownArgumentType]
+
+        completed_uids: list[int] = []
+
+        for response in responses:  # pyright: ignore[reportUnknownVariableType]
+            uid: int = response.uid  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+            if uid not in self.uid_to_request:
+                logger.warning(f"Received response for unknown uid: {uid}")
+                continue
+
+            active_request = self.uid_to_request[uid]
+            active_request.tokens_generated += 1
+
+            # Extract response fields with explicit typing
+            resp_token: int = response.token  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+            resp_finish_reason: str | None = response.finish_reason  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+            resp_logprobs: mx.array = response.logprobs  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+
+            # Only emit events from device_rank 0
+            if self.device_rank != 0:
+                if resp_finish_reason is not None:
+                    completed_uids.append(uid)  # pyright: ignore[reportUnknownArgumentType]
+                continue
+
+            # Decode token to text, applying GPT-OSS parsing if needed
+            token_text = self.tokenizer.decode([resp_token])
+            if active_request.gpt_oss_parser is not None:
+                parser = active_request.gpt_oss_parser  # pyright: ignore[reportAny]
+                parser.process(resp_token)  # pyright: ignore[reportAny]
+                delta: str | None = parser.last_content_delta  # pyright: ignore[reportAny]
+                channel: str = parser.current_channel  # pyright: ignore[reportAny]
+
+                # Track reasoning tokens (analysis channel = thinking)
+                if channel == "analysis":
+                    active_request.reasoning_tokens += 1
+
+                # Handle thinking tag transitions
+                prefix = ""
+                if channel == "analysis" and not active_request.gpt_oss_thinking:
+                    active_request.gpt_oss_thinking = True
+                    prefix = "<think>"
+                elif channel != "analysis" and active_request.gpt_oss_thinking:
+                    active_request.gpt_oss_thinking = False
+                    prefix = "</think>"
+
+                if resp_finish_reason is not None and active_request.gpt_oss_thinking:
+                    # Close thinking tag on finish
+                    prefix = "</think>"
+                    active_request.gpt_oss_thinking = False
+
+                effective_delta = delta or ""
+                token_text = prefix + effective_delta if (prefix or effective_delta) else ""
+                # Skip empty tokens (channel markers with no content delta)
+                if not token_text and resp_finish_reason is None:
+                    continue
+
+            # Extract logprobs if requested
+            logprob: float | None = None
+            top_logprobs: list[TopLogprobItem] | None = None
+            if active_request.should_extract_logprobs:
+                logprob, top_logprobs = extract_top_logprobs(
+                    logprobs_array=resp_logprobs,  # pyright: ignore[reportUnknownArgumentType]
+                    selected_token=resp_token,  # pyright: ignore[reportUnknownArgumentType]
+                    tokenizer=self.tokenizer,
+                    top_k=active_request.top_k,
+                )
+
+            # Build stats for final token
+            stats: GenerationStats | None = None
+            finish_reason: TokenFinishReason | None = None
+            if resp_finish_reason is not None:
+                elapsed_time = time.perf_counter() - active_request.start_time
+                prompt_tps = active_request.prompt_tokens / max(elapsed_time, 0.001)
+                generation_tps = active_request.tokens_generated / max(elapsed_time, 0.001)
+
+                # Get peak memory
+                peak_memory_bytes = 0
+                if mx.metal.is_available():
+                    peak_memory_bytes = mx.metal.get_peak_memory()
+
+                stats = GenerationStats(
+                    prompt_tps=prompt_tps,
+                    generation_tps=generation_tps,
+                    prompt_tokens=active_request.prompt_tokens,
+                    generation_tokens=active_request.tokens_generated,
+                    reasoning_tokens=active_request.reasoning_tokens,
+                    peak_memory_usage=Memory.from_bytes(peak_memory_bytes),
+                )
+
+                # Map finish reason to the narrower type TokenChunk expects
+                if resp_finish_reason == "stop":
+                    finish_reason = "stop"
+                elif resp_finish_reason == "length":
+                    finish_reason = "length"
+                elif resp_finish_reason == "content_filter":
+                    finish_reason = "content_filter"
+                else:
+                    # Unknown finish reasons default to "stop"
+                    logger.warning(f"Unknown finish_reason: {resp_finish_reason}, mapping to 'stop'")
+                    finish_reason = "stop"
+
+                completed_uids.append(uid)  # pyright: ignore[reportUnknownArgumentType]
+
+            yield ChunkGenerated(
+                command_id=active_request.command_id,
+                chunk=TokenChunk(
+                    model=self.model_id,
+                    text=token_text,
+                    token_id=resp_token,  # pyright: ignore[reportUnknownArgumentType]
+                    logprob=logprob,
+                    top_logprobs=top_logprobs,
+                    finish_reason=finish_reason,
+                    stats=stats,
+                ),
+            )
+
+        # Clean up completed requests
+        for uid in completed_uids:
+            del self.uid_to_request[uid]
+
+    def _step_pipelined(self) -> Generator[Event, None, None]:
+        """Process one generation step using the multi-stream PipelinedGenerator."""
+        if self.pipelined_generator is None or not self.uid_to_request:
+            return
+
+        logger.debug(f"PipelinedGenerator.next() called (active={self.pipelined_generator.active_count})")
+        responses: list[PipelinedResponse] = self.pipelined_generator.next()
+        logger.debug(f"PipelinedGenerator.next() returned {len(responses)} responses")
+
+        completed_uids: list[int] = []
+
+        for response in responses:
+            uid = response.uid
+            if uid not in self.uid_to_request:
+                logger.warning(f"Received response for unknown uid: {uid}")
+                continue
+
+            active_request = self.uid_to_request[uid]
+            active_request.tokens_generated += 1
+
+            resp_token: int = response.token
+            resp_finish_reason: str | None = response.finish_reason
+            resp_logprobs: mx.array = response.logprobs
+
+            # Only emit events from device_rank 0
+            if self.device_rank != 0:
+                if resp_finish_reason is not None:
+                    completed_uids.append(uid)
+                continue
+
+            # Decode token to text
+            token_text = self.tokenizer.decode([resp_token])
+            if active_request.gpt_oss_parser is not None:
+                parser = active_request.gpt_oss_parser  # pyright: ignore[reportAny]
+                parser.process(resp_token)  # pyright: ignore[reportAny]
+                delta: str | None = parser.last_content_delta  # pyright: ignore[reportAny]
+                channel: str = parser.current_channel  # pyright: ignore[reportAny]
+
+                if channel == "analysis":
+                    active_request.reasoning_tokens += 1
+
+                prefix = ""
+                if channel == "analysis" and not active_request.gpt_oss_thinking:
+                    active_request.gpt_oss_thinking = True
+                    prefix = "<think>"
+                elif channel != "analysis" and active_request.gpt_oss_thinking:
+                    active_request.gpt_oss_thinking = False
+                    prefix = "</think>"
+
+                if resp_finish_reason is not None and active_request.gpt_oss_thinking:
+                    prefix = "</think>"
+                    active_request.gpt_oss_thinking = False
+
+                effective_delta = delta or ""
+                token_text = prefix + effective_delta if (prefix or effective_delta) else ""
+                if not token_text and resp_finish_reason is None:
+                    continue
+
+            # Extract logprobs if requested
+            logprob: float | None = None
+            top_logprobs: list[TopLogprobItem] | None = None
+            if active_request.should_extract_logprobs:
+                logprob, top_logprobs = extract_top_logprobs(
+                    logprobs_array=resp_logprobs,
+                    selected_token=resp_token,
+                    tokenizer=self.tokenizer,
+                    top_k=active_request.top_k,
+                )
+
+            # Build stats for final token
+            stats: GenerationStats | None = None
+            finish_reason: TokenFinishReason | None = None
+            if resp_finish_reason is not None:
+                elapsed_time = time.perf_counter() - active_request.start_time
+                prompt_tps = active_request.prompt_tokens / max(elapsed_time, 0.001)
+                generation_tps = active_request.tokens_generated / max(elapsed_time, 0.001)
+
+                peak_memory_bytes = 0
+                if mx.metal.is_available():
+                    peak_memory_bytes = mx.metal.get_peak_memory()
+
+                stats = GenerationStats(
+                    prompt_tps=prompt_tps,
+                    generation_tps=generation_tps,
+                    prompt_tokens=active_request.prompt_tokens,
+                    generation_tokens=active_request.tokens_generated,
+                    reasoning_tokens=active_request.reasoning_tokens,
+                    peak_memory_usage=Memory.from_bytes(peak_memory_bytes),
+                )
+
+                if resp_finish_reason == "stop":
+                    finish_reason = "stop"
+                elif resp_finish_reason == "length":
+                    finish_reason = "length"
+                else:
+                    finish_reason = "stop"
+
+                completed_uids.append(uid)
+
+            yield ChunkGenerated(
+                command_id=active_request.command_id,
+                chunk=TokenChunk(
+                    model=self.model_id,
+                    text=token_text,
+                    token_id=resp_token,
+                    logprob=logprob,
+                    top_logprobs=top_logprobs,
+                    finish_reason=finish_reason,
+                    stats=stats,
+                ),
+            )
+
+        for uid in completed_uids:
+            del self.uid_to_request[uid]
+
+    def emit_error(self, command_id: CommandId, error_message: str) -> Event:
+        """Create an error event for a failed request."""
+        return ChunkGenerated(
+            command_id=command_id,
+            chunk=ErrorChunk(
+                model=self.model_id,
+                finish_reason="error",
+                error_message=error_message,
+            ),
+        )
+
+    def _close_generator(self) -> None:
+        """Close and clean up the batch/pipelined generator."""
+        if self.batch_generator is not None:
+            self.batch_generator.close()  # pyright: ignore[reportUnknownMemberType,reportAttributeAccessIssue]
+            self.batch_generator = None
+        if self.pipelined_generator is not None:
+            self.pipelined_generator.close()
+            self.pipelined_generator = None
+        self.uid_to_request.clear()
+        logger.info("Generator closed")
+
+    def close(self) -> None:
+        """Close the handler and clean up resources."""
+        self._close_generator()
+        self.pending.clear()
--- a/src/exo/worker/runner/batched_scoring_handler.py
+++ b/src/exo/worker/runner/batched_scoring_handler.py
@@ -0,0 +1,200 @@
+"""Batched scoring handler for processing multiple Completion requests concurrently."""
+
+import time
+from dataclasses import dataclass, field
+
+from mlx_lm.tokenizer_utils import TokenizerWrapper
+
+from exo.shared.models.model_cards import ModelId
+from exo.shared.types.api import TopLogprobItem
+from exo.shared.types.chunks import CompletionChunk, ErrorChunk
+from exo.shared.types.events import ChunkGenerated, Event
+from exo.shared.types.tasks import Completion
+from exo.worker.engines.mlx import Model
+from exo.worker.engines.mlx.generator.generate import score_tokens_batched
+from exo.worker.runner.bootstrap import logger
+
+
+@dataclass
+class PendingScoringRequest:
+    """A scoring request waiting to be batched."""
+
+    task: Completion
+    tokens: list[int]
+    prompt_text: str
+    top_k: int | None
+    echo: bool
+
+
+@dataclass
+class BatchedScoringHandler:
+    """
+    Handles batched scoring for multiple Completion requests.
+
+    Collects multiple scoring requests and processes them in a single
+    batched forward pass for improved throughput.
+    """
+
+    model: Model
+    tokenizer: TokenizerWrapper
+    model_id: ModelId
+    device_rank: int
+    max_batch_size: int = 32
+    batch_timeout_ms: int = 10
+
+    pending: list[PendingScoringRequest] = field(default_factory=list)
+    pending_start_time: float | None = None
+
+    @property
+    def has_pending(self) -> bool:
+        """Check if there are pending requests."""
+        return len(self.pending) > 0
+
+    def add_request(
+        self,
+        task: Completion,
+        tokens: list[int],
+        prompt_text: str,
+    ) -> None:
+        """Add a Completion request to the pending batch."""
+        task_params = task.task_params
+        top_k = task_params.logprobs
+
+        self.pending.append(
+            PendingScoringRequest(
+                task=task,
+                tokens=tokens,
+                prompt_text=prompt_text,
+                top_k=top_k,
+                echo=task_params.echo,
+            )
+        )
+
+        if self.pending_start_time is None:
+            self.pending_start_time = time.perf_counter()
+
+        logger.debug(f"Added scoring request to batch (pending={len(self.pending)})")
+
+    def should_flush(self) -> bool:
+        """Check if the batch should be flushed."""
+        if not self.has_pending:
+            return False
+
+        # Flush if batch is full
+        if len(self.pending) >= self.max_batch_size:
+            return True
+
+        # Flush if timeout reached
+        if self.pending_start_time is not None:
+            elapsed_ms = (time.perf_counter() - self.pending_start_time) * 1000
+            if elapsed_ms >= self.batch_timeout_ms:
+                return True
+
+        return False
+
+    def flush(self) -> list[Event]:
+        """Process all pending requests and return events."""
+        if not self.has_pending:
+            return []
+
+        requests = self.pending
+        self.pending = []
+        self.pending_start_time = None
+
+        logger.info(f"Processing batch of {len(requests)} scoring requests")
+
+        # Collect all token sequences
+        token_sequences = [req.tokens for req in requests]
+
+        # Get common top_k (use first request's top_k, they should all be the same)
+        top_k = requests[0].top_k if requests else None
+
+        try:
+            # Run batched scoring
+            all_results = score_tokens_batched(
+                model=self.model,
+                tokenizer=self.tokenizer,
+                token_sequences=token_sequences,
+                top_k=top_k,
+            )
+
+            # Generate events for each request
+            events: list[Event] = []
+            for req, logprob_results in zip(requests, all_results, strict=True):
+                if self.device_rank != 0:
+                    continue
+
+                event = self._build_completion_event(req, logprob_results)
+                events.append(event)
+
+            logger.info(f"Batch scoring complete ({len(events)} events)")
+            return events
+
+        except Exception as e:
+            # Return error events for all requests
+            logger.error(f"Batch scoring failed: {e}")
+            events = []
+            for req in requests:
+                if self.device_rank == 0:
+                    events.append(
+                        ChunkGenerated(
+                            command_id=req.task.command_id,
+                            chunk=ErrorChunk(
+                                model=self.model_id,
+                                finish_reason="error",
+                                error_message=str(e),
+                            ),
+                        )
+                    )
+            return events
+
+    def _build_completion_event(
+        self,
+        req: PendingScoringRequest,
+        logprob_results: list[tuple[float, list[TopLogprobItem]]],
+    ) -> Event:
+        """Build a ChunkGenerated event for a completed scoring request."""
+        tokens = req.tokens
+        tokenizer = self.tokenizer
+
+        # Build response in completions format
+        token_strings: list[str] = []
+        token_logprobs: list[float | None] = []
+        top_logprobs: list[dict[str, float]] = []
+        text_offset: list[int] = []
+
+        offset = 0
+        for i, token_id in enumerate(tokens):
+            token_str = tokenizer.decode([token_id])
+            token_strings.append(token_str)
+
+            if i < len(logprob_results):
+                logprob, top_items = logprob_results[i]
+                # First token has no logprob (None in OpenAI format)
+                token_logprobs.append(logprob if i > 0 else None)
+                top_lp_dict = {item.token: item.logprob for item in top_items}
+                top_logprobs.append(top_lp_dict)
+            else:
+                token_logprobs.append(None)
+                top_logprobs.append({})
+
+            text_offset.append(offset)
+            offset += len(token_str)
+
+        return ChunkGenerated(
+            command_id=req.task.command_id,
+            chunk=CompletionChunk(
+                model=self.model_id,
+                text=req.prompt_text if req.echo else "",
+                tokens=token_strings,
+                token_logprobs=token_logprobs,
+                top_logprobs=top_logprobs,
+                text_offset=text_offset,
+                finish_reason="stop",
+            ),
+        )
+
+    def close(self) -> None:
+        """Clean up resources."""
+        self.pending.clear()
+        self.pending_start_time = None
--- a/src/exo/worker/runner/bootstrap.py
+++ b/src/exo/worker/runner/bootstrap.py
@@ -7,7 +7,6 @@ from exo.shared.types.tasks import Task
 from exo.shared.types.worker.instances import BoundInstance, MlxJacclInstance
 from exo.shared.types.worker.runners import RunnerFailed
 from exo.utils.channels import ClosedResourceError, MpReceiver, MpSender
-from exo.worker.tests.patches import load_null_model

 logger: "loguru.Logger" = loguru.logger

@@ -17,8 +16,6 @@ def entrypoint(
    event_sender: MpSender[Event],
    task_receiver: MpReceiver[Task],
    _logger: "loguru.Logger",
-    *,
-    _load_null_models: bool = False,
 ) -> None:
    fast_synch_override = os.environ.get("EXO_FAST_SYNCH")
    if fast_synch_override == "on" or (
@@ -32,13 +29,6 @@ def entrypoint(
    else:
        os.environ["MLX_METAL_FAST_SYNCH"] = "0"

-    p = None
-    if _load_null_models:
-        from unittest.mock import patch
-
-        p = patch("mlx_lm.utils.load_model", new=load_null_model)
-        p.start()
-
    global logger
    logger = _logger

@@ -62,8 +52,6 @@ def entrypoint(
            )
        )
    finally:
-        if p is not None:
-            p.stop()
        try:
            event_sender.close()
            task_receiver.close()
--- a/src/exo/worker/runner/pipelined_generator.py
+++ b/src/exo/worker/runner/pipelined_generator.py
@@ -0,0 +1,334 @@
+"""Multi-stream pipelined batch generator for pipeline-parallel inference.
+
+When a model is split across N ranks (pipeline parallelism), each rank's GPU is idle
+for (N-1)/N of each step while waiting for other ranks to compute their layers.
+
+This module fills the pipeline bubble by splitting sequences into N micro-batch groups
+and processing each group on a different MLX stream. The GPU can overlap one stream's
+network communication (send/recv/all_gather) with another stream's compute.
+"""
+
+# pyright: reportUnknownMemberType=false, reportUnknownVariableType=false
+# pyright: reportUnknownArgumentType=false, reportAny=false
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from dataclasses import dataclass
+from typing import Any
+
+import mlx.core as mx
+import mlx.nn as nn
+from mlx_lm.models.cache import make_prompt_cache
+
+
+@dataclass
+class MicroBatch:
+    """State for one micro-batch group of sequences."""
+
+    uids: list[int]
+    y: mx.array  # Last sampled tokens [batch]
+    logprobs: list[mx.array]  # Logprobs for each sequence
+    max_tokens: list[int]
+    num_tokens: list[int]
+    cache: list[Any]  # KV cache (list of layer caches)
+    samplers: list[Callable[[mx.array], mx.array]]
+    tokens: list[mx.array]  # All tokens generated so far per sequence
+
+    def __len__(self) -> int:
+        return len(self.uids)
+
+
+@dataclass
+class PipelinedResponse:
+    """Response from one generation step."""
+
+    uid: int
+    token: int
+    logprobs: mx.array
+    finish_reason: str | None
+    cache: list[Any] | None = None
+
+
+@dataclass
+class PendingPrompt:
+    """A prompt waiting to be prefilled."""
+
+    uid: int
+    tokens: list[int]
+    max_tokens: int
+    sampler: Callable[[mx.array], mx.array]
+
+
+class PipelinedGenerator:
+    """
+    Multi-stream batch generator that fills pipeline bubbles.
+
+    Splits active sequences into `world_size` micro-batch groups, each processed
+    on its own MLX stream. During mx.eval(), the GPU overlaps network operations
+    on one stream with compute on another.
+    """
+
+    def __init__(
+        self,
+        model: nn.Module,
+        world_size: int,
+        stop_tokens: set[int] | None = None,
+        max_tokens: int = 4096,
+    ):
+        self.model = model
+        self.world_size = world_size
+        self.stop_tokens = stop_tokens or set()
+        self.max_tokens_default = max_tokens
+
+        # Create one stream per pipeline stage
+        self.streams = [mx.new_stream(mx.default_device()) for _ in range(world_size)]
+
+        # Micro-batch groups (one per stream)
+        self.micro_batches: list[MicroBatch | None] = [None] * world_size
+
+        # Pending prompts to be inserted
+        self.pending_prompts: list[PendingPrompt] = []
+
+        # UID counter
+        self._next_uid = 0
+
+    @property
+    def active_count(self) -> int:
+        """Total number of active sequences across all micro-batches."""
+        return sum(len(mb) for mb in self.micro_batches if mb is not None)
+
+    @property
+    def has_active(self) -> bool:
+        return self.active_count > 0 or len(self.pending_prompts) > 0
+
+    def insert(
+        self,
+        prompts: list[list[int]],
+        max_tokens: list[int],
+        samplers: list[Callable[[mx.array], mx.array]],
+    ) -> list[int]:
+        """Queue prompts for processing. Returns assigned UIDs."""
+        uids: list[int] = []
+        for prompt, mt, sampler in zip(prompts, max_tokens, samplers, strict=True):
+            uid = self._next_uid
+            self._next_uid += 1
+            self.pending_prompts.append(
+                PendingPrompt(uid=uid, tokens=prompt, max_tokens=mt, sampler=sampler)
+            )
+            uids.append(uid)
+        return uids
+
+    def _prefill_group(self, group_idx: int, prompts: list[PendingPrompt]) -> None:
+        """Prefill a group of prompts and create a MicroBatch."""
+        if not prompts:
+            return
+
+        stream = self.streams[group_idx]
+
+        with mx.stream(stream):
+            # Create per-sequence caches
+            caches = [make_prompt_cache(self.model) for _ in prompts]
+
+            # Tokenize and prefill each sequence
+            all_y: list[mx.array] = []
+            all_logprobs: list[mx.array] = []
+            all_samplers: list[Callable[[mx.array], mx.array]] = []
+            all_tokens: list[mx.array] = []
+
+            for prompt_info, cache in zip(prompts, caches, strict=True):
+                tokens = mx.array(prompt_info.tokens)
+                # Run prefill (process all tokens except last)
+                if len(prompt_info.tokens) > 1:
+                    self.model(tokens[:-1][None, :], cache=cache)
+                    mx.eval([c.state for c in cache])
+
+                # Process last token to get first generation logits
+                last_token = tokens[-1:][None, :]
+                logits = self.model(last_token, cache=cache)
+                logits = logits[:, -1, :]
+                logprobs = logits - mx.logsumexp(logits, axis=-1, keepdims=True)
+                sampled = prompt_info.sampler(logprobs)
+
+                all_y.append(sampled.squeeze(0))
+                all_logprobs.append(logprobs.squeeze(0))
+                all_samplers.append(prompt_info.sampler)
+                all_tokens.append(tokens)
+
+            mx.eval(*all_y, *all_logprobs)
+
+        # Create micro-batch
+        batch = MicroBatch(
+            uids=[p.uid for p in prompts],
+            y=mx.stack(all_y),
+            logprobs=all_logprobs,
+            max_tokens=[p.max_tokens for p in prompts],
+            num_tokens=[0] * len(prompts),
+            cache=caches,
+            samplers=all_samplers,
+            tokens=all_tokens,
+        )
+
+        if self.micro_batches[group_idx] is None:
+            self.micro_batches[group_idx] = batch
+        else:
+            # Extend existing micro-batch (would need cache merging - for now replace)
+            self.micro_batches[group_idx] = batch
+
+    def _prefill_pending(self) -> None:
+        """Distribute pending prompts across micro-batch groups and prefill."""
+        if not self.pending_prompts:
+            return
+
+        # Distribute round-robin across groups
+        groups: list[list[PendingPrompt]] = [[] for _ in range(self.world_size)]
+        for i, prompt in enumerate(self.pending_prompts):
+            groups[i % self.world_size].append(prompt)
+        self.pending_prompts.clear()
+
+        for group_idx, group_prompts in enumerate(groups):
+            if group_prompts:
+                self._prefill_group(group_idx, group_prompts)
+
+    def _step_all(self) -> None:
+        """
+        Run one generation step across all micro-batch groups on different streams.
+
+        This is where pipeline overlap happens: each group's model forward pass
+        runs on its own stream, and mx.eval() allows the GPU to overlap network
+        ops (send/recv/all_gather) from one stream with compute from another.
+
+        Each sequence is processed individually with its own KV cache, but all
+        lazy graphs across streams are evaluated together for GPU overlap.
+        """
+        # Build computation graphs on each stream (lazy, no evaluation yet)
+        # Each micro-batch group processes its sequences on its own stream.
+        all_sampled: list[mx.array] = []
+        all_logprobs: list[mx.array] = []
+        # Track which (group_idx, seq_idx) each result corresponds to
+        result_map: list[tuple[int, int]] = []
+
+        for i, mb in enumerate(self.micro_batches):
+            if mb is None or len(mb) == 0:
+                continue
+
+            with mx.stream(self.streams[i]):
+                for e in range(len(mb)):
+                    # Process each sequence individually with its own cache
+                    input_token = mb.y[e: e + 1][None, :]  # [1, 1]
+
+                    # Forward pass (lazy graph construction)
+                    # For pipeline models, this includes send/recv/all_gather ops
+                    logits = self.model(input_token, cache=mb.cache[e])
+                    logits = logits[:, -1, :]  # [1, vocab]
+
+                    # Compute logprobs
+                    logprobs = logits - mx.logsumexp(logits, axis=-1, keepdims=True)
+
+                    # Sample
+                    sampled = mb.samplers[e](logprobs)
+
+                    all_sampled.append(sampled.squeeze(0))
+                    all_logprobs.append(logprobs.squeeze(0))
+                    result_map.append((i, e))
+
+        if not result_map:
+            return
+
+        # Evaluate ALL streams together - this is where overlap happens!
+        # The GPU can execute stream0's all_gather while computing stream1's layers.
+        mx.eval(*all_sampled, *all_logprobs)
+
+        # Update micro-batch states with results
+        # Group results by micro-batch for efficient update
+        group_results: dict[int, list[int]] = {}
+        for idx, (group_idx, _seq_idx) in enumerate(result_map):
+            group_results.setdefault(group_idx, []).append(idx)
+
+        for group_idx, result_indices in group_results.items():
+            mb = self.micro_batches[group_idx]
+            assert mb is not None
+            group_sampled = [all_sampled[idx] for idx in result_indices]
+            group_logprobs = [all_logprobs[idx] for idx in result_indices]
+            mb.y = mx.stack(group_sampled)
+            mb.logprobs = group_logprobs
+            for e, idx in enumerate(result_indices):
+                mb.tokens[e] = mx.concatenate([mb.tokens[e], all_sampled[idx][None]])
+
+    def next(self) -> list[PipelinedResponse]:
+        """
+        Run one generation step and return responses.
+
+        Returns a PipelinedResponse for each active sequence (across all groups).
+        Finished sequences are removed from their micro-batch.
+        """
+        # Prefill any pending prompts first
+        self._prefill_pending()
+
+        if not self.has_active:
+            return []
+
+        # Run the multi-stream forward pass
+        self._step_all()
+
+        # Collect responses and filter completed sequences
+        responses: list[PipelinedResponse] = []
+
+        for group_idx, mb in enumerate(self.micro_batches):
+            if mb is None or len(mb) == 0:
+                continue
+
+            keep_idx: list[int] = []
+            end_idx: list[int] = []
+
+            for e in range(len(mb)):
+                token = int(mb.y[e].item())
+                uid = mb.uids[e]
+                num_tok = mb.num_tokens[e] + 1
+                max_tok = mb.max_tokens[e]
+                mb.num_tokens[e] = num_tok
+
+                if token in self.stop_tokens:
+                    finish_reason = "stop"
+                    end_idx.append(e)
+                elif num_tok >= max_tok:
+                    finish_reason = "length"
+                    end_idx.append(e)
+                else:
+                    finish_reason = None
+                    keep_idx.append(e)
+
+                responses.append(
+                    PipelinedResponse(
+                        uid=uid,
+                        token=token,
+                        logprobs=mb.logprobs[e],
+                        finish_reason=finish_reason,
+                    )
+                )
+
+            # Remove finished sequences
+            if end_idx:
+                if keep_idx:
+                    # Filter the micro-batch to keep only active sequences
+                    mb.uids = [mb.uids[i] for i in keep_idx]
+                    mb.y = mb.y[mx.array(keep_idx)]
+                    mb.logprobs = [mb.logprobs[i] for i in keep_idx]
+                    mb.max_tokens = [mb.max_tokens[i] for i in keep_idx]
+                    mb.num_tokens = [mb.num_tokens[i] for i in keep_idx]
+                    mb.samplers = [mb.samplers[i] for i in keep_idx]
+                    mb.tokens = [mb.tokens[i] for i in keep_idx]
+                    # Cache filtering: trim batch dimension
+                    for c in mb.cache:
+                        if hasattr(c, "keys") and c.keys is not None:
+                            c.keys = c.keys[mx.array(keep_idx)]
+                            c.values = c.values[mx.array(keep_idx)]
+                else:
+                    self.micro_batches[group_idx] = None
+
+        return responses
+
+    def close(self) -> None:
+        """Clean up resources."""
+        self.micro_batches = [None] * self.world_size
+        self.pending_prompts.clear()
--- a/src/exo/worker/runner/runner.py
+++ b/src/exo/worker/runner/runner.py
--- a/src/exo/worker/runner/runner_supervisor.py
+++ b/src/exo/worker/runner/runner_supervisor.py
@@ -52,6 +52,7 @@ class RunnerSupervisor:
    _tg: TaskGroup | None = field(default=None, init=False)
    status: RunnerStatus = field(default_factory=RunnerIdle, init=False)
    pending: dict[TaskId, anyio.Event] = field(default_factory=dict, init=False)
+    sent: set[TaskId] = field(default_factory=set, init=False)  # Tasks sent to runner (not yet completed)
    completed: set[TaskId] = field(default_factory=set, init=False)

    @classmethod
@@ -126,21 +127,39 @@ class RunnerSupervisor:
        assert self._tg
        self._tg.cancel_scope.cancel()

-    async def start_task(self, task: Task):
+    async def start_task(self, task: Task, wait_for_ack: bool = True):
+        """
+        Send a task to the runner.
+
+        Args:
+            task: The task to send.
+            wait_for_ack: If True, wait for TaskAcknowledged before returning.
+                          If False, return immediately after sending (for batching).
+        """
        if task.task_id in self.completed:
-            logger.info(
-                f"Skipping invalid task {task} as it has already been completed"
+            logger.debug(
+                f"Skipping task {task.task_id} as it has already been completed"
            )
+            return
+        if task.task_id in self.sent:
+            logger.debug(f"Task {task.task_id} already sent, skipping duplicate")
+            return
+        if task.task_id in self.pending:
+            logger.debug(f"Task {task.task_id} already pending, skipping duplicate")
+            return
        logger.info(f"Starting task {task}")
        event = anyio.Event()
        self.pending[task.task_id] = event
+        self.sent.add(task.task_id)
        try:
            self._task_sender.send(task)
        except ClosedResourceError:
            logger.warning(f"Task {task} dropped, runner closed communication.")
+            self.sent.discard(task.task_id)
            return
-        await event.wait()
-        logger.info(f"Finished task {task}")
+        if wait_for_ack:
+            await event.wait()
+            logger.info(f"Finished task {task}")

    async def _forward_events(self):
        with self._ev_recv as events:
@@ -149,7 +168,11 @@ class RunnerSupervisor:
                    if isinstance(event, RunnerStatusUpdated):
                        self.status = event.runner_status
                    if isinstance(event, TaskAcknowledged):
-                        self.pending.pop(event.task_id).set()
+                        # Use pop with default to handle tasks sent with wait_for_ack=False
+                        # that may have already been removed or never added
+                        pending_event = self.pending.pop(event.task_id, None)
+                        if pending_event:
+                            pending_event.set()
                        continue
                    if (
                        isinstance(event, TaskStatusUpdated)
@@ -167,6 +190,7 @@ class RunnerSupervisor:
                            ),
                        )
                        self.completed.add(event.task_id)
+                        self.sent.discard(event.task_id)
                    await self._event_sender.send(event)
            except (ClosedResourceError, BrokenResourceError) as e:
                await self._check_runner(e)
--- a/src/exo/worker/tests/patches.py
+++ b/src/exo/worker/tests/patches.py
@@ -1,50 +0,0 @@
-# type: ignore
-
-import importlib
-import json
-from pathlib import Path
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    from exo.worker.engines.mlx import Model
-
-
-def load_null_model(path: Path, **_: object) -> "tuple[Model, dict[str, Any]]":
-    with open(path / "config.json", "r") as f:
-        cfg = json.load(f)
-        model, args = _get_classes(cfg)
-        model = model(args.from_dict(cfg))
-        return model, cfg
-
-
-def _get_classes(config: dict):
-    """
-    Retrieve the model and model args classes based on the configuration.
-
-    Args:
-        config (dict): The model configuration.
-
-    Returns:
-        A tuple containing the Model class and the ModelArgs class.
-    """
-    model_type = config["model_type"]
-    model_type = MODEL_REMAPPING.get(model_type, model_type)
-    try:
-        arch = importlib.import_module(f"mlx_lm.models.{model_type}")
-    except ImportError:
-        msg = f"Model type {model_type} not supported."
-        raise ValueError(msg) from None
-
-    return arch.Model, arch.ModelArgs
-
-
-MODEL_REMAPPING = {
-    "mistral": "llama",
-    "llava": "mistral3",
-    "phi-msft": "phixtral",
-    "falcon_mamba": "mamba",
-    "kimi_k2": "deepseek_v3",
-    "qwen2_5_vl": "qwen2_vl",
-    "minimax_m2": "minimax",
-    "iquestcoder": "llama",
-}
--- a/src/exo/worker/tests/unittests/conftest.py
+++ b/src/exo/worker/tests/unittests/conftest.py
@@ -20,6 +20,7 @@ class FakeRunnerSupervisor:
    bound_instance: BoundInstance
    status: RunnerStatus
    completed: set[TaskId] = field(default_factory=set)
+    sent: set[TaskId] = field(default_factory=set)


 class OtherTask(BaseTask):
--- a/src/exo/worker/tests/unittests/test_runner/test_event_ordering.py
+++ b/src/exo/worker/tests/unittests/test_runner/test_event_ordering.py
@@ -118,6 +118,10 @@ def patch_out_mlx(monkeypatch: pytest.MonkeyPatch):
    # Returns a prompt without thinking tag so detect_thinking_prompt_suffix returns None.
    monkeypatch.setattr(mlx_runner, "apply_chat_template", make_nothin("test prompt"))
    monkeypatch.setattr(mlx_runner, "detect_thinking_prompt_suffix", make_nothin(False))
+    # Force serial processing mode since batch mode requires a real tokenizer
+    monkeypatch.setattr(mlx_runner, "_should_use_serial_processing", make_nothin(True))
+    # Disable batch handler initialization
+    monkeypatch.setattr(mlx_runner, "BATCH_ENABLED", False)

    def fake_generate(*_1: object, **_2: object):
        yield GenerationResponse(token=0, text="hi", finish_reason="stop")
@@ -192,29 +196,30 @@ def test_events_processed_in_correct_order(patch_out_mlx: pytest.MonkeyPatch):
            TaskStatusUpdated(
                task_id=INITIALIZATION_TASK_ID, task_status=TaskStatus.Running
            ),
-            TaskAcknowledged(task_id=INITIALIZATION_TASK_ID),
+            # Status update comes before ack to prevent race conditions
            RunnerStatusUpdated(
                runner_id=RUNNER_1_ID, runner_status=RunnerConnecting()
            ),
+            TaskAcknowledged(task_id=INITIALIZATION_TASK_ID),
            TaskStatusUpdated(
                task_id=INITIALIZATION_TASK_ID, task_status=TaskStatus.Complete
            ),
            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerConnected()),
            TaskStatusUpdated(task_id=LOAD_TASK_ID, task_status=TaskStatus.Running),
-            TaskAcknowledged(task_id=LOAD_TASK_ID),
            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerLoading()),
+            TaskAcknowledged(task_id=LOAD_TASK_ID),
            TaskStatusUpdated(task_id=LOAD_TASK_ID, task_status=TaskStatus.Complete),
            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerLoaded()),
            TaskStatusUpdated(task_id=WARMUP_TASK_ID, task_status=TaskStatus.Running),
-            TaskAcknowledged(task_id=WARMUP_TASK_ID),
            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerWarmingUp()),
+            TaskAcknowledged(task_id=WARMUP_TASK_ID),
            TaskStatusUpdated(task_id=WARMUP_TASK_ID, task_status=TaskStatus.Complete),
            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerReady()),
            TaskStatusUpdated(
                task_id=CHAT_COMPLETION_TASK_ID, task_status=TaskStatus.Running
            ),
-            TaskAcknowledged(task_id=CHAT_COMPLETION_TASK_ID),
            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerRunning()),
+            TaskAcknowledged(task_id=CHAT_COMPLETION_TASK_ID),
            expected_chunk,
            TaskStatusUpdated(
                task_id=CHAT_COMPLETION_TASK_ID, task_status=TaskStatus.Complete
@@ -222,10 +227,10 @@ def test_events_processed_in_correct_order(patch_out_mlx: pytest.MonkeyPatch):
            # CHAT COMPLETION TASK SHOULD COMPLETE BEFORE RUNNER READY
            RunnerStatusUpdated(runner_id=RUNNER_1_ID, runner_status=RunnerReady()),
            TaskStatusUpdated(task_id=SHUTDOWN_TASK_ID, task_status=TaskStatus.Running),
-            TaskAcknowledged(task_id=SHUTDOWN_TASK_ID),
            RunnerStatusUpdated(
                runner_id=RUNNER_1_ID, runner_status=RunnerShuttingDown()
            ),
+            TaskAcknowledged(task_id=SHUTDOWN_TASK_ID),
            TaskStatusUpdated(
                task_id=SHUTDOWN_TASK_ID, task_status=TaskStatus.Complete
            ),
--- a/tests/headless_runner.py
+++ b/tests/headless_runner.py
@@ -1,6 +1,7 @@
 import multiprocessing as mp
 import socket
 import time
+import typing

 import anyio
 from fastapi import FastAPI
@@ -10,12 +11,16 @@ from hypercorn.asyncio import serve  # pyright: ignore[reportUnknownVariableType
 from loguru import logger
 from pydantic import BaseModel

+from exo.download.impl_shard_downloader import (
+    build_full_shard,
+    exo_shard_downloader,
+)
 from exo.shared.logging import InterceptLogger, logger_setup
 from exo.shared.models.model_cards import MODEL_CARDS, ModelId
 from exo.shared.types.api import ChatCompletionMessage, ChatCompletionTaskParams
 from exo.shared.types.commands import CommandId
 from exo.shared.types.common import Host, NodeId
-from exo.shared.types.events import Event, RunnerStatusUpdated
+from exo.shared.types.events import Event
 from exo.shared.types.tasks import (
    ChatCompletion,
    ConnectToGroup,
@@ -31,17 +36,18 @@ from exo.shared.types.worker.instances import (
    MlxJacclInstance,
    MlxRingInstance,
 )
-from exo.shared.types.worker.runners import RunnerFailed, RunnerId, ShardAssignments
+from exo.shared.types.worker.runners import RunnerId, ShardAssignments
 from exo.shared.types.worker.shards import PipelineShardMetadata, TensorShardMetadata
 from exo.utils.channels import MpReceiver, MpSender, channel, mp_channel
 from exo.utils.info_gatherer.info_gatherer import GatheredInfo, InfoGatherer
 from exo.worker.runner.bootstrap import entrypoint

-MODEL_CARDS = {"haha": MODEL_CARDS["qwen3-coder-480b-a35b-8bit"]}

 class Tests(BaseModel):
    # list[hostname, ip addr]
    devs: list[list[str]]
+    model_id: str
+    kind: typing.Literal["init", "warmup", "inference"]


 mp.set_start_method("spawn", force=True)
@@ -50,14 +56,16 @@ logger_setup(None)

 async def main():
    logger.info("starting cool server majig")
+    await assert_downloads()
    cfg = Config()
-    cfg.bind = "0.0.0.0:8000"
+    cfg.bind = "0.0.0.0:52415"
    # nb: shared.logging needs updating if any of this changes
    cfg.accesslog = "-"
    cfg.errorlog = "-"
    cfg.logger_class = InterceptLogger
    app = FastAPI()
-    app.post("/run_test")(run_test)
+    app.post("/ring")(ring_backend)
+    app.post("/jaccl")(jaccl_backend)
    app.post("/tb_detection")(tb_detection)
    shutdown = anyio.Event()
    await serve(
@@ -79,7 +87,28 @@ async def tb_detection():
        return recv.collect()


-async def run_test(test: Tests):
+async def assert_downloads():
+    sd = exo_shard_downloader()
+    # await sd.ensure_shard(await build_full_shard(MODEL_CARDS["qwen3-0.6b"].model_id))
+    await sd.ensure_shard(
+        await build_full_shard(MODEL_CARDS["llama-3.1-8b-bf16"].model_id)
+    )
+    await sd.ensure_shard(await build_full_shard(MODEL_CARDS["qwen3-30b"].model_id))
+    await sd.ensure_shard(
+        await build_full_shard(MODEL_CARDS["gpt-oss-120b-MXFP4-Q8"].model_id)
+    )
+    await sd.ensure_shard(
+        await build_full_shard(MODEL_CARDS["gpt-oss-20b-4bit"].model_id)
+    )
+    await sd.ensure_shard(
+        await build_full_shard(MODEL_CARDS["glm-4.7-8bit-gs32"].model_id)
+    )
+    await sd.ensure_shard(
+        await build_full_shard(MODEL_CARDS["minimax-m2.1-8bit"].model_id)
+    )
+
+
+async def ring_backend(test: Tests):
    iid = InstanceId(str(hash(str(test.devs))))
    weird_hn = socket.gethostname()
    for dev in test.devs:
@@ -88,30 +117,10 @@ async def run_test(test: Tests):
            break
    else:
        raise ValueError(f"{weird_hn} not in {test.devs}")
-
-    async def run():
-        for card in MODEL_CARDS.values():
-            for instance in (
-                ring_instance(test, card.model_id, iid, hn),
-                jaccl_instance(test, card.model_id, iid),
-            ):
-                recv = await execute_test(test, instance, hn)
-
-                with recv:
-                    try:
-                        async for item in recv:
-                            yield item.model_dump_json() + "\n"
-                            if isinstance(item, RunnerStatusUpdated) and isinstance(
-                                item.runner_status, RunnerFailed
-                            ):
-                                return
-                    except anyio.ClosedResourceError:
-                        pass
-
-    return StreamingResponse(run())
+    return await execute_test(test, ring_instance(test, iid, hn), hn)


-def ring_instance(test: Tests, model_id: ModelId, iid: InstanceId, hn: str) -> Instance:
+def ring_instance(test: Tests, iid: InstanceId, hn: str) -> Instance:
    hbn = [Host(ip="i dont care", port=52416) for _ in test.devs]
    world_size = len(test.devs)
    for i in range(world_size):
@@ -126,13 +135,13 @@ def ring_instance(test: Tests, model_id: ModelId, iid: InstanceId, hn: str) -> I
    else:
        raise ValueError(f"{hn} not in {test.devs}")

-    card = next(card for card in MODEL_CARDS.values() if card.model_id == model_id)
+    card = MODEL_CARDS[test.model_id]
    instance = MlxRingInstance(
        instance_id=iid,
        ephemeral_port=52416,
        hosts_by_node={NodeId(hn): hbn},
        shard_assignments=ShardAssignments(
-            model_id=model_id,
+            model_id=ModelId(test.model_id),
            node_to_runner={NodeId(host[0]): RunnerId(host[0]) for host in test.devs},
            runner_to_shard={
                RunnerId(test.devs[i][0]): PipelineShardMetadata(
@@ -154,7 +163,7 @@ def ring_instance(test: Tests, model_id: ModelId, iid: InstanceId, hn: str) -> I
    return instance


-async def execute_test(test: Tests, instance: Instance, hn: str) -> MpReceiver[Event]:
+async def execute_test(test: Tests, instance: Instance, hn: str):
    world_size = len(test.devs)
    iid = InstanceId(str(hash(str(test.devs))))
    _handle, recv, send = new_runner(instance, hn)
@@ -162,33 +171,60 @@ async def execute_test(test: Tests, instance: Instance, hn: str) -> MpReceiver[E
        send.send(ConnectToGroup(instance_id=iid))
    send.send(LoadModel(instance_id=iid))

-    for card in MODEL_CARDS.values():
-        send.send(StartWarmup(instance_id=iid))
-        send.send(
-            ChatCompletion(
-                task_params=ChatCompletionTaskParams(
-                    model=card.model_id,
-                    messages=[
-                        ChatCompletionMessage(
-                            role="system", content="You are a helpful assistant"
-                        ),
-                        ChatCompletionMessage(
-                            role="user", content="What is the capital of France?"
-                        ),
-                    ],
-                ),
-                command_id=CommandId("yo"),
-                instance_id=iid,
+    match test.kind:
+        case "init":
+            pass
+        case "warmup":
+            send.send(StartWarmup(instance_id=iid))
+        case "inference":
+            send.send(StartWarmup(instance_id=iid))
+            send.send(
+                ChatCompletion(
+                    task_params=ChatCompletionTaskParams(
+                        model=test.model_id,
+                        messages=[
+                            ChatCompletionMessage(
+                                role="system", content="You are a helpful assistant"
+                            ),
+                            ChatCompletionMessage(
+                                role="user", content="What is the capital of France?"
+                            ),
+                        ],
+                    ),
+                    command_id=CommandId("yo"),
+                    instance_id=iid,
+                )
            )
-        )

    send.send(Shutdown(runner_id=RunnerId(hn), instance_id=iid))

-    return recv
+    async def map_recv():
+        with recv:
+            try:
+                async for item in recv:
+                    yield item.model_dump_json() + "\n"
+            except anyio.ClosedResourceError:
+                pass
+
+    ret = StreamingResponse(map_recv())
+    ret._pls_dont_gc = _handle  # type: ignore
+    return ret


-def jaccl_instance(test: Tests, model_id: ModelId, iid: InstanceId):
-    card = next(card for card in MODEL_CARDS.values() if card.model_id == model_id)
+async def jaccl_backend(test: Tests):
+    iid = InstanceId(str(hash(str(test.devs))))
+    weird_hn = socket.gethostname()
+    for dev in test.devs:
+        if weird_hn.startswith(dev[0]) or dev[0].startswith(weird_hn):
+            hn = dev[0]
+            break
+    else:
+        raise ValueError(f"{weird_hn} not in {test.devs}")
+    return await execute_test(test, jaccl_instance(test, iid), hn)
+
+
+def jaccl_instance(test: Tests, iid: InstanceId):
+    card = MODEL_CARDS[test.model_id]
    world_size = len(test.devs)

    return MlxJacclInstance(
@@ -199,7 +235,7 @@ def jaccl_instance(test: Tests, model_id: ModelId, iid: InstanceId):
            NodeId(host[0]): test.devs[0][1] + ":52416" for host in test.devs
        },
        shard_assignments=ShardAssignments(
-            model_id=model_id,
+            model_id=ModelId(test.model_id),
            node_to_runner={NodeId(host[0]): RunnerId(host[0]) for host in test.devs},
            runner_to_shard={
                RunnerId(test.devs[i][0]): TensorShardMetadata(
@@ -234,7 +270,6 @@ def new_runner(
            task_recv,
            logger,
        ),
-        kwargs={"_load_null_models": True},
    )
    runner_process._pls_dont_gc = (ev_send, task_recv)  # type: ignore
    runner_process.start()
--- a/tests/start_distributed_test.sh
+++ b/tests/start_distributed_test.sh
@@ -6,8 +6,19 @@ query() {
  tailscale status | awk -v find="$1" '$2 == find { print $1 }'
 }

-if [[ $# -lt 1 ]]; then
-  echo "USAGE: $0 [host1] [host2] ..."
+if [[ $# -lt 2 ]]; then
+  echo "USAGE: $0 <test kind> [host1] [host2] ..."
+  exit 1
+fi
+
+
+kind=$1
+shift
+
+test_kinds="ring jaccl"
+
+if ! echo "$test_kinds" | grep -q "$kind"; then
+  printf "%s is not a known test kind.\nCurrent test kinds are %s" "$kind" "$test_kinds"
  exit 1
 fi

@@ -23,12 +34,23 @@ done
 devs_raw=$(printf "[\"%s\", \"%s\"], " "${weaved[@]}")
 devs="[${devs_raw%, }]"

-for i in "${!ips[@]}"; do  
-  { 
-    curl -sN \
-      -X POST "http://${ips[$i]}:8000/run_test" \
-      -H "Content-Type: application/json" -d "{\"devs\": ${devs}}" \
-    2>&1 | sed "s/^/\n${hostnames[$i]}@${ips[$i]}: /" || echo "curl to ${hostnames[$i]} failed" && exit 1
-  } &
+model_ids=("qwen3-30b" "gpt-oss-120b-MXFP4-Q8" "kimi-k2-thinking")
+
+for model_id in "${model_ids[@]}"; do
+  for i in "${!ips[@]}"; do  
+    { 
+      req="{
+        \"model_id\": \"${model_id}\",
+        \"devs\": ${devs},
+        \"kind\": \"inference\"
+       }"
+      echo "req $req"
+      curl -sN \
+        -X POST "http://${ips[$i]}:52415/${kind}" \
+        -H "Content-Type: application/json" -d "$req" \
+      2>&1 | sed "s/^/\n${hostnames[$i]}@${ips[$i]}: /" || echo "curl to ${hostnames[$i]} failed" && exit 1
+    } &
+  done
+  wait
 done
-wait
+
--- a/uv.lock
+++ b/uv.lock
@@ -2,8 +2,10 @@ version = 1
 revision = 3
 requires-python = ">=3.13"
 resolution-markers = [
-    "sys_platform == 'darwin'",
-    "sys_platform == 'linux'",
+    "python_full_version >= '3.14' and sys_platform == 'darwin'",
+    "python_full_version < '3.14' and sys_platform == 'darwin'",
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version < '3.14' and sys_platform == 'linux'",
 ]
 supported-markers = [
    "sys_platform == 'darwin'",
@@ -19,6 +21,33 @@ members = [
    "exo-pyo3-bindings",
 ]

+[[package]]
+name = "absl-py"
+version = "2.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/10/2a/c93173ffa1b39c1d0395b7e842bbdc62e556ca9d8d3b5572926f3e4ca752/absl_py-2.3.1.tar.gz", hash = "sha256:a97820526f7fbfd2ec1bce83f3f25e3a14840dac0d8e02a0b71cd75db3f77fc9", size = 116588, upload-time = "2025-07-03T09:31:44.05Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8f/aa/ba0014cc4659328dc818a28827be78e6d97312ab0cb98105a770924dc11e/absl_py-2.3.1-py3-none-any.whl", hash = "sha256:eeecf07f0c2a93ace0772c92e596ace6d3d3996c042b2128459aaae2a76de11d", size = 135811, upload-time = "2025-07-03T09:31:42.253Z" },
+]
+
+[[package]]
+name = "accelerate"
+version = "1.12.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "psutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "safetensors", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "torch", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/4a/8e/ac2a9566747a93f8be36ee08532eb0160558b07630a081a6056a9f89bf1d/accelerate-1.12.0.tar.gz", hash = "sha256:70988c352feb481887077d2ab845125024b2a137a5090d6d7a32b57d03a45df6", size = 398399, upload-time = "2025-11-21T11:27:46.973Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9f/d2/c581486aa6c4fbd7394c23c47b83fa1a919d34194e16944241daf9e762dd/accelerate-1.12.0-py3-none-any.whl", hash = "sha256:3e2091cd341423207e2f084a6654b1efcd250dc326f2a37d6dde446e07cabb11", size = 380935, upload-time = "2025-11-21T11:27:44.522Z" },
+]
+
 [[package]]
 name = "aiofiles"
 version = "25.1.0"
@@ -211,6 +240,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" },
 ]

+[[package]]
+name = "chardet"
+version = "5.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618, upload-time = "2023-08-01T19:23:02.662Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385, upload-time = "2023-08-01T19:23:00.661Z" },
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.4"
@@ -255,6 +293,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
 ]

+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
+]
+
 [[package]]
 name = "contourpy"
 version = "1.3.3"
@@ -351,6 +398,53 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" },
 ]

+[[package]]
+name = "dataproperty"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mbstrdecoder", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "typepy", extra = ["datetime"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0b/81/8c8b64ae873cb9014815214c07b63b12e3b18835780fb342223cfe3fe7d8/dataproperty-1.1.0.tar.gz", hash = "sha256:b038437a4097d1a1c497695c3586ea34bea67fdd35372b9a50f30bf044d77d04", size = 42574, upload-time = "2024-12-31T14:37:26.033Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/21/c2/e12e95e289e6081a40454199ab213139ef16a528c7c86432de545b05a23a/DataProperty-1.1.0-py3-none-any.whl", hash = "sha256:c61fcb2e2deca35e6d1eb1f251a7f22f0dcde63e80e61f0cc18c19f42abfd25b", size = 27581, upload-time = "2024-12-31T14:37:22.657Z" },
+]
+
+[[package]]
+name = "datasets"
+version = "4.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "fsspec", extra = ["http"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "multiprocess", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pandas", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pyarrow", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "xxhash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/bf/bb927bde63d649296c83e883171ae77074717c1b80fe2868b328bd0dbcbb/datasets-4.5.0.tar.gz", hash = "sha256:00c698ce1c2452e646cc5fad47fef39d3fe78dd650a8a6eb205bb45eb63cd500", size = 588384, upload-time = "2026-01-14T18:27:54.297Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fc/d5/0d563ea3c205eee226dc8053cf7682a8ac588db8acecd0eda2b587987a0b/datasets-4.5.0-py3-none-any.whl", hash = "sha256:b5d7e08096ffa407dd69e58b1c0271c9b2506140839b8d99af07375ad31b6726", size = 515196, upload-time = "2026-01-14T18:27:52.419Z" },
+]
+
+[[package]]
+name = "dill"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" },
+]
+
 [[package]]
 name = "docutils"
 version = "0.22.4"
@@ -360,6 +454,28 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" },
 ]

+[[package]]
+name = "evaluate"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "datasets", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "fsspec", extra = ["http"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "multiprocess", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pandas", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "xxhash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ad/d0/0c17a8e6e8dc7245f22dea860557c32bae50fc4d287ae030cb0e8ab8720f/evaluate-0.4.6.tar.gz", hash = "sha256:e07036ca12b3c24331f83ab787f21cc2dbf3631813a1631e63e40897c69a3f21", size = 65716, upload-time = "2025-09-18T13:06:30.581Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3e/af/3e990d8d4002bbc9342adb4facd59506e653da93b2417de0fa6027cb86b1/evaluate-0.4.6-py3-none-any.whl", hash = "sha256:bca85bc294f338377b7ac2f861e21c308b11b2a285f510d7d5394d5df437db29", size = 84069, upload-time = "2025-09-18T13:06:29.265Z" },
+]
+
 [[package]]
 name = "exo"
 version = "0.3.0"
@@ -387,9 +503,15 @@ dependencies = [
    { name = "rustworkx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "tiktoken", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "tomlkit", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "typer", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "types-aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]

+[package.optional-dependencies]
+eval = [
+    { name = "lm-eval", extra = ["api"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+
 [package.dev-dependencies]
 dev = [
    { name = "basedpyright", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -411,6 +533,7 @@ requires-dist = [
    { name = "httpx", specifier = ">=0.28.1" },
    { name = "huggingface-hub", specifier = ">=0.33.4" },
    { name = "hypercorn", specifier = ">=0.18.0" },
+    { name = "lm-eval", extras = ["api"], marker = "extra == 'eval'" },
    { name = "loguru", specifier = ">=0.7.3" },
    { name = "mflux", specifier = "==0.15.4" },
    { name = "mlx", marker = "sys_platform == 'darwin'", specifier = "==0.30.3" },
@@ -424,8 +547,10 @@ requires-dist = [
    { name = "rustworkx", specifier = ">=0.17.1" },
    { name = "tiktoken", specifier = ">=0.12.0" },
    { name = "tomlkit", specifier = ">=0.14.0" },
+    { name = "typer" },
    { name = "types-aiofiles", specifier = ">=24.1.0.20250708" },
 ]
+provides-extras = ["eval"]

 [package.metadata.requires-dev]
 dev = [
@@ -572,11 +697,16 @@ wheels = [

 [[package]]
 name = "fsspec"
-version = "2026.1.0"
+version = "2025.10.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d5/7d/5df2650c57d47c57232af5ef4b4fdbff182070421e405e0d62c6cdbfaa87/fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b", size = 310496, upload-time = "2026-01-09T15:21:35.562Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/24/7f/2747c0d332b9acfa75dc84447a066fdf812b5a6b8d30472b74d309bfe8cb/fsspec-2025.10.0.tar.gz", hash = "sha256:b6789427626f068f9a83ca4e8a3cc050850b6c0f71f99ddb4f542b8266a26a59", size = 309285, upload-time = "2025-10-30T14:58:44.036Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/01/c9/97cc5aae1648dcb851958a3ddf73ccd7dbe5650d95203ecb4d7720b4cdbf/fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc", size = 201838, upload-time = "2026-01-09T15:21:34.041Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
+]
+
+[package.optional-dependencies]
+http = [
+    { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]

 [[package]]
@@ -796,6 +926,27 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
 ]

+[[package]]
+name = "joblib"
+version = "1.5.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload-time = "2025-12-15T08:41:46.427Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" },
+]
+
+[[package]]
+name = "jsonlines"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/35/87/bcda8e46c88d0e34cad2f09ee2d0c7f5957bccdb9791b0b934ec84d84be4/jsonlines-4.0.0.tar.gz", hash = "sha256:0c6d2c09117550c089995247f605ae4cf77dd1533041d366351f6f298822ea74", size = 11359, upload-time = "2023-09-01T12:34:44.187Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f8/62/d9ba6323b9202dd2fe166beab8a86d29465c41a0288cbe229fac60c1ab8d/jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55", size = 8701, upload-time = "2023-09-01T12:34:42.563Z" },
+]
+
 [[package]]
 name = "keyring"
 version = "25.7.0"
@@ -864,6 +1015,45 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/64/ad/53bd6b22fa1917746096b6240dd0c546020e358506e8503dce57f3cdcd9a/kiwisolver-1.4.10rc0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:acc08f93b36220a6baa7df3428cb8847b27717db9be4295c0b1571d040c77327", size = 2391902, upload-time = "2025-08-10T20:22:12.421Z" },
 ]

+[[package]]
+name = "lm-eval"
+version = "0.4.9.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "accelerate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "datasets", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "evaluate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "jsonlines", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "more-itertools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "numexpr", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "peft", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pybind11", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pytablewriter", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "rouge-score", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "sacrebleu", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "scikit-learn", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "sqlitedict", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "torch", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tqdm-multiprocess", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "word2number", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "zstandard", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e1/5e/e6d70f4aa2acd7932a873c51de93ddf2dbd1c43aa6f6c86314ab3c0d279d/lm_eval-0.4.9.2.tar.gz", hash = "sha256:131c2f21911beee92e6ab8286f08adce86d6aa23852f87451651e6a68b4a631a", size = 3101000, upload-time = "2025-11-26T23:28:11.048Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6c/50/3903694e69c94ae8dcec654843cb99799e45137e0d1a0a581009aa592673/lm_eval-0.4.9.2-py3-none-any.whl", hash = "sha256:b8d12410ab9c6321c9e26a9fb7e13a4e1b920ed4907905804dc2e231b7482a6e", size = 8231261, upload-time = "2025-11-26T23:28:08.009Z" },
+]
+
+[package.optional-dependencies]
+api = [
+    { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tenacity", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tiktoken", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+
 [[package]]
 name = "loguru"
 version = "0.7.3"
@@ -873,6 +1063,59 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" },
 ]

+[[package]]
+name = "lxml"
+version = "6.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/aa/88/262177de60548e5a2bfc46ad28232c9e9cbde697bd94132aeb80364675cb/lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62", size = 4073426, upload-time = "2025-09-22T04:04:59.287Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/53/fd/4e8f0540608977aea078bf6d79f128e0e2c2bba8af1acf775c30baa70460/lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77", size = 8648494, upload-time = "2025-09-22T04:01:54.242Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/f4/2a94a3d3dfd6c6b433501b8d470a1960a20ecce93245cf2db1706adf6c19/lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f", size = 4661146, upload-time = "2025-09-22T04:01:56.282Z" },
+    { url = "https://files.pythonhosted.org/packages/25/2e/4efa677fa6b322013035d38016f6ae859d06cac67437ca7dc708a6af7028/lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452", size = 4946932, upload-time = "2025-09-22T04:01:58.989Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/0f/526e78a6d38d109fdbaa5049c62e1d32fdd70c75fb61c4eadf3045d3d124/lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048", size = 5100060, upload-time = "2025-09-22T04:02:00.812Z" },
+    { url = "https://files.pythonhosted.org/packages/81/76/99de58d81fa702cc0ea7edae4f4640416c2062813a00ff24bd70ac1d9c9b/lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df", size = 5019000, upload-time = "2025-09-22T04:02:02.671Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/35/9e57d25482bc9a9882cb0037fdb9cc18f4b79d85df94fa9d2a89562f1d25/lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1", size = 5348496, upload-time = "2025-09-22T04:02:04.904Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/8e/cb99bd0b83ccc3e8f0f528e9aa1f7a9965dfec08c617070c5db8d63a87ce/lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916", size = 5643779, upload-time = "2025-09-22T04:02:06.689Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/34/9e591954939276bb679b73773836c6684c22e56d05980e31d52a9a8deb18/lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd", size = 5244072, upload-time = "2025-09-22T04:02:08.587Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/27/b29ff065f9aaca443ee377aff699714fcbffb371b4fce5ac4ca759e436d5/lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6", size = 4718675, upload-time = "2025-09-22T04:02:10.783Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/9f/f756f9c2cd27caa1a6ef8c32ae47aadea697f5c2c6d07b0dae133c244fbe/lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a", size = 5255171, upload-time = "2025-09-22T04:02:12.631Z" },
+    { url = "https://files.pythonhosted.org/packages/61/46/bb85ea42d2cb1bd8395484fd72f38e3389611aa496ac7772da9205bbda0e/lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679", size = 5057175, upload-time = "2025-09-22T04:02:14.718Z" },
+    { url = "https://files.pythonhosted.org/packages/95/0c/443fc476dcc8e41577f0af70458c50fe299a97bb6b7505bb1ae09aa7f9ac/lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659", size = 4785688, upload-time = "2025-09-22T04:02:16.957Z" },
+    { url = "https://files.pythonhosted.org/packages/48/78/6ef0b359d45bb9697bc5a626e1992fa5d27aa3f8004b137b2314793b50a0/lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484", size = 5660655, upload-time = "2025-09-22T04:02:18.815Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/ea/e1d33808f386bc1339d08c0dcada6e4712d4ed8e93fcad5f057070b7988a/lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2", size = 5247695, upload-time = "2025-09-22T04:02:20.593Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/47/eba75dfd8183673725255247a603b4ad606f4ae657b60c6c145b381697da/lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314", size = 5269841, upload-time = "2025-09-22T04:02:22.489Z" },
+    { url = "https://files.pythonhosted.org/packages/03/15/d4a377b385ab693ce97b472fe0c77c2b16ec79590e688b3ccc71fba19884/lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe", size = 8659801, upload-time = "2025-09-22T04:02:30.113Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/e8/c128e37589463668794d503afaeb003987373c5f94d667124ffd8078bbd9/lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d", size = 4659403, upload-time = "2025-09-22T04:02:32.119Z" },
+    { url = "https://files.pythonhosted.org/packages/00/ce/74903904339decdf7da7847bb5741fc98a5451b42fc419a86c0c13d26fe2/lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d", size = 4966974, upload-time = "2025-09-22T04:02:34.155Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/d3/131dec79ce61c5567fecf82515bd9bc36395df42501b50f7f7f3bd065df0/lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5", size = 5102953, upload-time = "2025-09-22T04:02:36.054Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/ea/a43ba9bb750d4ffdd885f2cd333572f5bb900cd2408b67fdda07e85978a0/lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0", size = 5055054, upload-time = "2025-09-22T04:02:38.154Z" },
+    { url = "https://files.pythonhosted.org/packages/60/23/6885b451636ae286c34628f70a7ed1fcc759f8d9ad382d132e1c8d3d9bfd/lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba", size = 5352421, upload-time = "2025-09-22T04:02:40.413Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5b/fc2ddfc94ddbe3eebb8e9af6e3fd65e2feba4967f6a4e9683875c394c2d8/lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0", size = 5673684, upload-time = "2025-09-22T04:02:42.288Z" },
+    { url = "https://files.pythonhosted.org/packages/29/9c/47293c58cc91769130fbf85531280e8cc7868f7fbb6d92f4670071b9cb3e/lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d", size = 5252463, upload-time = "2025-09-22T04:02:44.165Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/da/ba6eceb830c762b48e711ded880d7e3e89fc6c7323e587c36540b6b23c6b/lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37", size = 4698437, upload-time = "2025-09-22T04:02:46.524Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/24/7be3f82cb7990b89118d944b619e53c656c97dc89c28cfb143fdb7cd6f4d/lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9", size = 5269890, upload-time = "2025-09-22T04:02:48.812Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/bd/dcfb9ea1e16c665efd7538fc5d5c34071276ce9220e234217682e7d2c4a5/lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917", size = 5097185, upload-time = "2025-09-22T04:02:50.746Z" },
+    { url = "https://files.pythonhosted.org/packages/21/04/a60b0ff9314736316f28316b694bccbbabe100f8483ad83852d77fc7468e/lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f", size = 4745895, upload-time = "2025-09-22T04:02:52.968Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/bd/7d54bd1846e5a310d9c715921c5faa71cf5c0853372adf78aee70c8d7aa2/lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8", size = 5695246, upload-time = "2025-09-22T04:02:54.798Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/32/5643d6ab947bc371da21323acb2a6e603cedbe71cb4c99c8254289ab6f4e/lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a", size = 5260797, upload-time = "2025-09-22T04:02:57.058Z" },
+    { url = "https://files.pythonhosted.org/packages/33/da/34c1ec4cff1eea7d0b4cd44af8411806ed943141804ac9c5d565302afb78/lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c", size = 5277404, upload-time = "2025-09-22T04:02:58.966Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/5c/42c2c4c03554580708fc738d13414801f340c04c3eff90d8d2d227145275/lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d", size = 8910380, upload-time = "2025-09-22T04:03:01.645Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/4f/12df843e3e10d18d468a7557058f8d3733e8b6e12401f30b1ef29360740f/lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba", size = 4775632, upload-time = "2025-09-22T04:03:03.814Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/0c/9dc31e6c2d0d418483cbcb469d1f5a582a1cd00a1f4081953d44051f3c50/lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601", size = 4975171, upload-time = "2025-09-22T04:03:05.651Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/2b/9b870c6ca24c841bdd887504808f0417aa9d8d564114689266f19ddf29c8/lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed", size = 5110109, upload-time = "2025-09-22T04:03:07.452Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/0c/4f5f2a4dd319a178912751564471355d9019e220c20d7db3fb8307ed8582/lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37", size = 5041061, upload-time = "2025-09-22T04:03:09.297Z" },
+    { url = "https://files.pythonhosted.org/packages/12/64/554eed290365267671fe001a20d72d14f468ae4e6acef1e179b039436967/lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338", size = 5306233, upload-time = "2025-09-22T04:03:11.651Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/31/1d748aa275e71802ad9722df32a7a35034246b42c0ecdd8235412c3396ef/lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9", size = 5604739, upload-time = "2025-09-22T04:03:13.592Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/41/2c11916bcac09ed561adccacceaedd2bf0e0b25b297ea92aab99fd03d0fa/lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd", size = 5225119, upload-time = "2025-09-22T04:03:15.408Z" },
+    { url = "https://files.pythonhosted.org/packages/99/05/4e5c2873d8f17aa018e6afde417c80cc5d0c33be4854cce3ef5670c49367/lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d", size = 4633665, upload-time = "2025-09-22T04:03:17.262Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/c9/dcc2da1bebd6275cdc723b515f93edf548b82f36a5458cca3578bc899332/lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9", size = 5234997, upload-time = "2025-09-22T04:03:19.14Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/e2/5172e4e7468afca64a37b81dba152fc5d90e30f9c83c7c3213d6a02a5ce4/lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e", size = 5090957, upload-time = "2025-09-22T04:03:21.436Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/b3/15461fd3e5cd4ddcb7938b87fc20b14ab113b92312fc97afe65cd7c85de1/lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d", size = 4764372, upload-time = "2025-09-22T04:03:23.27Z" },
+    { url = "https://files.pythonhosted.org/packages/05/33/f310b987c8bf9e61c4dd8e8035c416bd3230098f5e3cfa69fc4232de7059/lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec", size = 5634653, upload-time = "2025-09-22T04:03:25.767Z" },
+    { url = "https://files.pythonhosted.org/packages/70/ff/51c80e75e0bc9382158133bdcf4e339b5886c6ee2418b5199b3f1a61ed6d/lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272", size = 5233795, upload-time = "2025-09-22T04:03:27.62Z" },
+    { url = "https://files.pythonhosted.org/packages/56/4d/4856e897df0d588789dd844dbed9d91782c4ef0b327f96ce53c807e13128/lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f", size = 5257023, upload-time = "2025-09-22T04:03:30.056Z" },
+]
+
 [[package]]
 name = "macholib"
 version = "1.16.4"
@@ -976,6 +1219,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/4d/4b/e7beb6bbd49f6bae727a12b270a2654d13c397576d25bd6786e47033300f/matplotlib-3.10.8-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:595ba4d8fe983b88f0eec8c26a241e16d6376fe1979086232f481f8f3f67494c", size = 9614011, upload-time = "2025-12-10T22:56:33.85Z" },
 ]

+[[package]]
+name = "mbstrdecoder"
+version = "1.1.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "chardet", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/31/ab/05ae008357c8bdb6245ebf8a101d99f26c096e0ea20800b318153da23796/mbstrdecoder-1.1.4.tar.gz", hash = "sha256:8105ef9cf6b7d7d69fe7fd6b68a2d8f281ca9b365d7a9b670be376b2e6c81b21", size = 14527, upload-time = "2025-01-18T10:07:31.089Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/30/ac/5ce64a1d4cce00390beab88622a290420401f1cabf05caf2fc0995157c21/mbstrdecoder-1.1.4-py3-none-any.whl", hash = "sha256:03dae4ec50ec0d2ff4743e63fdbd5e0022815857494d35224b60775d3d934a8c", size = 7933, upload-time = "2025-01-18T10:07:29.562Z" },
+]
+
 [[package]]
 name = "mdurl"
 version = "0.1.2"
@@ -1185,6 +1440,23 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
 ]

+[[package]]
+name = "multiprocess"
+version = "0.70.18"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ba/d8/0cba6cf51a1a31f20471fbc823a716170c73012ddc4fb85d706630ed6e8f/multiprocess-0.70.18-py310-none-any.whl", hash = "sha256:60c194974c31784019c1f459d984e8f33ee48f10fcf42c309ba97b30d9bd53ea", size = 134948, upload-time = "2025-04-17T03:11:20.223Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/88/9039f2fed1012ef584751d4ceff9ab4a51e5ae264898f0b7cbf44340a859/multiprocess-0.70.18-py311-none-any.whl", hash = "sha256:5aa6eef98e691281b3ad923be2832bf1c55dd2c859acd73e5ec53a66aae06a1d", size = 144462, upload-time = "2025-04-17T03:11:21.657Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/b6/5f922792be93b82ec6b5f270bbb1ef031fd0622847070bbcf9da816502cc/multiprocess-0.70.18-py312-none-any.whl", hash = "sha256:9b78f8e5024b573730bfb654783a13800c2c0f2dfc0c25e70b40d184d64adaa2", size = 150287, upload-time = "2025-04-17T03:11:22.69Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/25/7d7e78e750bc1aecfaf0efbf826c69a791d2eeaf29cf20cba93ff4cced78/multiprocess-0.70.18-py313-none-any.whl", hash = "sha256:871743755f43ef57d7910a38433cfe41319e72be1bbd90b79c7a5ac523eb9334", size = 151917, upload-time = "2025-04-17T03:11:24.044Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/c3/ca84c19bd14cdfc21c388fdcebf08b86a7a470ebc9f5c3c084fc2dbc50f7/multiprocess-0.70.18-py38-none-any.whl", hash = "sha256:dbf705e52a154fe5e90fb17b38f02556169557c2dd8bb084f2e06c2784d8279b", size = 132636, upload-time = "2025-04-17T03:11:24.936Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" },
+]
+
 [[package]]
 name = "networkx"
 version = "3.6.1"
@@ -1221,6 +1493,21 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e2/c8/97a2d5f7a314cce2c5c49f30c6f161b7f3617960ade4bfc2fd1ee092cb20/nh3-0.3.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:91e9b001101fb4500a2aafe3e7c92928d85242d38bf5ac0aba0b7480da0a4cd6", size = 987439, upload-time = "2025-10-30T11:17:40.81Z" },
 ]

+[[package]]
+name = "nltk"
+version = "3.9.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "joblib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f9/76/3a5e4312c19a028770f86fd7c058cf9f4ec4321c6cf7526bab998a5b683c/nltk-3.9.2.tar.gz", hash = "sha256:0f409e9b069ca4177c1903c3e843eef90c7e92992fa4931ae607da6de49e1419", size = 2887629, upload-time = "2025-10-01T07:19:23.764Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/60/90/81ac364ef94209c100e12579629dc92bf7a709a84af32f8c551b02c07e94/nltk-3.9.2-py3-none-any.whl", hash = "sha256:1e209d2b3009110635ed9709a67a1a3e33a10f799490fa71cf4bec218c11c88a", size = 1513404, upload-time = "2025-10-01T07:19:21.648Z" },
+]
+
 [[package]]
 name = "nodejs-wheel-binaries"
 version = "25.2.1rc0"
@@ -1235,6 +1522,41 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/11/3a/865f45bca0f6daf6a6150e20ae4e1ef1757574967b5c1a55705eb1a3aa51/nodejs_wheel_binaries-25.2.1rc0-py2.py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:8c30fe61adfcf89002002438fe810ebd660a856417540578aeb6eb4b9ef88c74", size = 63431735, upload-time = "2025-11-24T22:56:07.462Z" },
 ]

+[[package]]
+name = "numexpr"
+version = "2.14.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cb/2f/fdba158c9dbe5caca9c3eca3eaffffb251f2fb8674bf8e2d0aed5f38d319/numexpr-2.14.1.tar.gz", hash = "sha256:4be00b1086c7b7a5c32e31558122b7b80243fe098579b170967da83f3152b48b", size = 119400, upload-time = "2025-10-13T16:17:27.351Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/73/b4/9f6d637fd79df42be1be29ee7ba1f050fab63b7182cb922a0e08adc12320/numexpr-2.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:09078ba73cffe94745abfbcc2d81ab8b4b4e9d7bfbbde6cac2ee5dbf38eee222", size = 162794, upload-time = "2025-10-13T16:16:38.291Z" },
+    { url = "https://files.pythonhosted.org/packages/35/ae/d58558d8043de0c49f385ea2fa789e3cfe4d436c96be80200c5292f45f15/numexpr-2.14.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:dce0b5a0447baa7b44bc218ec2d7dcd175b8eee6083605293349c0c1d9b82fb6", size = 152203, upload-time = "2025-10-13T16:16:39.907Z" },
+    { url = "https://files.pythonhosted.org/packages/13/65/72b065f9c75baf8f474fd5d2b768350935989d4917db1c6c75b866d4067c/numexpr-2.14.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:06855053de7a3a8425429bd996e8ae3c50b57637ad3e757e0fa0602a7874be30", size = 455860, upload-time = "2025-10-13T16:13:35.811Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/f9/c9457652dfe28e2eb898372da2fe786c6db81af9540c0f853ee04a0699cc/numexpr-2.14.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05f9366d23a2e991fd5a8b5e61a17558f028ba86158a4552f8f239b005cdf83c", size = 446574, upload-time = "2025-10-13T16:15:17.367Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/99/8d3879c4d67d3db5560cf2de65ce1778b80b75f6fa415eb5c3e7bd37ba27/numexpr-2.14.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c5f1b1605695778896534dfc6e130d54a65cd52be7ed2cd0cfee3981fd676bf5", size = 1417306, upload-time = "2025-10-13T16:13:42.813Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/05/6bddac9f18598ba94281e27a6943093f7d0976544b0cb5d92272c64719bd/numexpr-2.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a4ba71db47ea99c659d88ee6233fa77b6dc83392f1d324e0c90ddf617ae3f421", size = 1466145, upload-time = "2025-10-13T16:15:27.464Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/76/7aac965fd93a56803cbe502aee2adcad667253ae34b0badf6c5af7908b6c/numexpr-2.14.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:557887ad7f5d3c2a40fd7310e50597045a68e66b20a77b3f44d7bc7608523b4b", size = 163524, upload-time = "2025-10-13T16:16:42.213Z" },
+    { url = "https://files.pythonhosted.org/packages/58/65/79d592d5e63fbfab3b59a60c386853d9186a44a3fa3c87ba26bdc25b6195/numexpr-2.14.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:af111c8fe6fc55d15e4c7cab11920fc50740d913636d486545b080192cd0ad73", size = 152919, upload-time = "2025-10-13T16:16:44.229Z" },
+    { url = "https://files.pythonhosted.org/packages/84/78/3c8335f713d4aeb99fa758d7c62f0be1482d4947ce5b508e2052bb7aeee9/numexpr-2.14.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:33265294376e7e2ae4d264d75b798a915d2acf37b9dd2b9405e8b04f84d05cfc", size = 465972, upload-time = "2025-10-13T16:13:45.061Z" },
+    { url = "https://files.pythonhosted.org/packages/35/81/9ee5f69b811e8f18746c12d6f71848617684edd3161927f95eee7a305631/numexpr-2.14.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:83647d846d3eeeb9a9255311236135286728b398d0d41d35dedb532dca807fe9", size = 456953, upload-time = "2025-10-13T16:15:31.186Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/39/9b8bc6e294d85cbb54a634e47b833e9f3276a8bdf7ce92aa808718a0212d/numexpr-2.14.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6e575fd3ad41ddf3355d0c7ef6bd0168619dc1779a98fe46693cad5e95d25e6e", size = 1426199, upload-time = "2025-10-13T16:13:48.231Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/ce/0d4fcd31ab49319740d934fba1734d7dad13aa485532ca754e555ca16c8b/numexpr-2.14.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:67ea4771029ce818573b1998f5ca416bd255156feea017841b86176a938f7d19", size = 1474214, upload-time = "2025-10-13T16:15:38.893Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/36/9db78dfbfdfa1f8bf0872993f1a334cdd8fca5a5b6567e47dcb128bcb7c2/numexpr-2.14.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ede79f7ff06629f599081de644546ce7324f1581c09b0ac174da88a470d39c21", size = 162848, upload-time = "2025-10-13T16:16:46.216Z" },
+    { url = "https://files.pythonhosted.org/packages/13/c1/a5c78ae637402c5550e2e0ba175275d2515d432ec28af0cdc23c9b476e65/numexpr-2.14.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2eac7a5a2f70b3768c67056445d1ceb4ecd9b853c8eda9563823b551aeaa5082", size = 152270, upload-time = "2025-10-13T16:16:47.92Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/ed/aabd8678077848dd9a751c5558c2057839f5a09e2a176d8dfcd0850ee00e/numexpr-2.14.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5aedf38d4c0c19d3cecfe0334c3f4099fb496f54c146223d30fa930084bc8574", size = 455918, upload-time = "2025-10-13T16:13:50.338Z" },
+    { url = "https://files.pythonhosted.org/packages/88/e1/3db65117f02cdefb0e5e4c440daf1c30beb45051b7f47aded25b7f4f2f34/numexpr-2.14.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:439ec4d57b853792ebe5456e3160312281c3a7071ecac5532ded3278ede614de", size = 446512, upload-time = "2025-10-13T16:15:42.313Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/fb/7ceb9ee55b5f67e4a3e4d73d5af4c7e37e3c9f37f54bee90361b64b17e3f/numexpr-2.14.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e23b87f744e04e302d82ac5e2189ae20a533566aec76a46885376e20b0645bf8", size = 1417845, upload-time = "2025-10-13T16:13:53.836Z" },
+    { url = "https://files.pythonhosted.org/packages/45/2d/9b5764d0eafbbb2889288f80de773791358acf6fad1a55767538d8b79599/numexpr-2.14.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:44f84e0e5af219dbb62a081606156420815890e041b87252fbcea5df55214c4c", size = 1466211, upload-time = "2025-10-13T16:15:48.985Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/d6/ec947806bb57836d6379a8c8a253c2aeaa602b12fef2336bfd2462bb4ed5/numexpr-2.14.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ec368819502b64f190c3f71be14a304780b5935c42aae5bf22c27cc2cbba70b5", size = 163525, upload-time = "2025-10-13T16:16:50.133Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/77/048f30dcf661a3d52963a88c29b52b6d5ce996d38e9313a56a922451c1e0/numexpr-2.14.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7e87f6d203ac57239de32261c941e9748f9309cbc0da6295eabd0c438b920d3a", size = 152917, upload-time = "2025-10-13T16:16:52.055Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/d3/956a13e628d722d649fbf2fded615134a308c082e122a48bad0e90a99ce9/numexpr-2.14.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd72d8c2a165fe45ea7650b16eb8cc1792a94a722022006bb97c86fe51fd2091", size = 466242, upload-time = "2025-10-13T16:13:55.795Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/dd/abe848678d82486940892f2cacf39e82eec790e8930d4d713d3f9191063b/numexpr-2.14.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:70d80fcb418a54ca208e9a38e58ddc425c07f66485176b261d9a67c7f2864f73", size = 457149, upload-time = "2025-10-13T16:15:52.036Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/bb/797b583b5fb9da5700a5708ca6eb4f889c94d81abb28de4d642c0f4b3258/numexpr-2.14.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:edea2f20c2040df8b54ee8ca8ebda63de9545b2112872466118e9df4d0ae99f3", size = 1426493, upload-time = "2025-10-13T16:13:59.244Z" },
+    { url = "https://files.pythonhosted.org/packages/77/c4/0519ab028fdc35e3e7ee700def7f2b4631b175cd9e1202bd7966c1695c33/numexpr-2.14.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:790447be6879a6c51b9545f79612d24c9ea0a41d537a84e15e6a8ddef0b6268e", size = 1474413, upload-time = "2025-10-13T16:15:59.211Z" },
+]
+
 [[package]]
 name = "numpy"
 version = "2.4.1"
@@ -1492,6 +1814,72 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/40/35/ddf3a6e8fc754fb939e2ea36fde96c28189184d6115afcf60011bb438ae5/packaging-26.0rc1-py3-none-any.whl", hash = "sha256:ecf921b33c620e357b1eed2ac3bc6313b1582874b0282d0773b6797b79cb0786", size = 74021, upload-time = "2026-01-09T17:41:17.134Z" },
 ]

+[[package]]
+name = "pandas"
+version = "3.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/de/da/b1dc0481ab8d55d0f46e343cfe67d4551a0e14fcee52bd38ca1bd73258d8/pandas-3.0.0.tar.gz", hash = "sha256:0facf7e87d38f721f0af46fe70d97373a37701b1c09f7ed7aeeb292ade5c050f", size = 4633005, upload-time = "2026-01-21T15:52:04.726Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6f/fa/7f0ac4ca8877c57537aaff2a842f8760e630d8e824b730eb2e859ffe96ca/pandas-3.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b78d646249b9a2bc191040988c7bb524c92fa8534fb0898a0741d7e6f2ffafa6", size = 10307129, upload-time = "2026-01-21T15:50:52.877Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/11/28a221815dcea4c0c9414dfc845e34a84a6a7dabc6da3194498ed5ba4361/pandas-3.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bc9cba7b355cb4162442a88ce495e01cb605f17ac1e27d6596ac963504e0305f", size = 9850201, upload-time = "2026-01-21T15:50:54.807Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/da/53bbc8c5363b7e5bd10f9ae59ab250fc7a382ea6ba08e4d06d8694370354/pandas-3.0.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3c9a1a149aed3b6c9bf246033ff91e1b02d529546c5d6fb6b74a28fea0cf4c70", size = 10354031, upload-time = "2026-01-21T15:50:57.463Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/a3/51e02ebc2a14974170d51e2410dfdab58870ea9bcd37cda15bd553d24dc4/pandas-3.0.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95683af6175d884ee89471842acfca29172a85031fccdabc35e50c0984470a0e", size = 10861165, upload-time = "2026-01-21T15:50:59.32Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/fe/05a51e3cac11d161472b8297bd41723ea98013384dd6d76d115ce3482f9b/pandas-3.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1fbbb5a7288719e36b76b4f18d46ede46e7f916b6c8d9915b756b0a6c3f792b3", size = 11359359, upload-time = "2026-01-21T15:51:02.014Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/56/ba620583225f9b85a4d3e69c01df3e3870659cc525f67929b60e9f21dcd1/pandas-3.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8e8b9808590fa364416b49b2a35c1f4cf2785a6c156935879e57f826df22038e", size = 11912907, upload-time = "2026-01-21T15:51:05.175Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/c6/f5e2171914d5e29b9171d495344097d54e3ffe41d2d85d8115baba4dc483/pandas-3.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2713810ad3806767b89ad3b7b69ba153e1c6ff6d9c20f9c2140379b2a98b6c98", size = 10741936, upload-time = "2026-01-21T15:51:11.693Z" },
+    { url = "https://files.pythonhosted.org/packages/51/88/9a0164f99510a1acb9f548691f022c756c2314aad0d8330a24616c14c462/pandas-3.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:15d59f885ee5011daf8335dff47dcb8a912a27b4ad7826dc6cbe809fd145d327", size = 10393884, upload-time = "2026-01-21T15:51:14.197Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/53/b34d78084d88d8ae2b848591229da8826d1e65aacf00b3abe34023467648/pandas-3.0.0-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:24e6547fb64d2c92665dd2adbfa4e85fa4fd70a9c070e7cfb03b629a0bbab5eb", size = 10310740, upload-time = "2026-01-21T15:51:16.093Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/d3/bee792e7c3d6930b74468d990604325701412e55d7aaf47460a22311d1a5/pandas-3.0.0-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:48ee04b90e2505c693d3f8e8f524dab8cb8aaf7ddcab52c92afa535e717c4812", size = 10700014, upload-time = "2026-01-21T15:51:18.818Z" },
+    { url = "https://files.pythonhosted.org/packages/55/db/2570bc40fb13aaed1cbc3fbd725c3a60ee162477982123c3adc8971e7ac1/pandas-3.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:66f72fb172959af42a459e27a8d8d2c7e311ff4c1f7db6deb3b643dbc382ae08", size = 11323737, upload-time = "2026-01-21T15:51:20.784Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/2e/297ac7f21c8181b62a4cccebad0a70caf679adf3ae5e83cb676194c8acc3/pandas-3.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4a4a400ca18230976724a5066f20878af785f36c6756e498e94c2a5e5d57779c", size = 11771558, upload-time = "2026-01-21T15:51:22.977Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/db/0270ad9d13c344b7a36fa77f5f8344a46501abf413803e885d22864d10bf/pandas-3.0.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:597c08fb9fef0edf1e4fa2f9828dd27f3d78f9b8c9b4a748d435ffc55732310b", size = 10312075, upload-time = "2026-01-21T15:51:28.5Z" },
+    { url = "https://files.pythonhosted.org/packages/09/9f/c176f5e9717f7c91becfe0f55a52ae445d3f7326b4a2cf355978c51b7913/pandas-3.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:447b2d68ac5edcbf94655fe909113a6dba6ef09ad7f9f60c80477825b6c489fe", size = 9900213, upload-time = "2026-01-21T15:51:30.955Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/e7/63ad4cc10b257b143e0a5ebb04304ad806b4e1a61c5da25f55896d2ca0f4/pandas-3.0.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:debb95c77ff3ed3ba0d9aa20c3a2f19165cc7956362f9873fce1ba0a53819d70", size = 10428768, upload-time = "2026-01-21T15:51:33.018Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/0e/4e4c2d8210f20149fd2248ef3fff26623604922bd564d915f935a06dd63d/pandas-3.0.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fedabf175e7cd82b69b74c30adbaa616de301291a5231138d7242596fc296a8d", size = 10882954, upload-time = "2026-01-21T15:51:35.287Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/60/c9de8ac906ba1f4d2250f8a951abe5135b404227a55858a75ad26f84db47/pandas-3.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:412d1a89aab46889f3033a386912efcdfa0f1131c5705ff5b668dda88305e986", size = 11430293, upload-time = "2026-01-21T15:51:37.57Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/69/806e6637c70920e5787a6d6896fd707f8134c2c55cd761e7249a97b7dc5a/pandas-3.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e979d22316f9350c516479dd3a92252be2937a9531ed3a26ec324198a99cdd49", size = 11952452, upload-time = "2026-01-21T15:51:39.618Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/26/430d91257eaf366f1737d7a1c158677caaf6267f338ec74e3a1ec444111c/pandas-3.0.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:697b8f7d346c68274b1b93a170a70974cdc7d7354429894d5927c1effdcccd73", size = 10761999, upload-time = "2026-01-21T15:51:46.899Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/1a/954eb47736c2b7f7fe6a9d56b0cb6987773c00faa3c6451a43db4beb3254/pandas-3.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8cb3120f0d9467ed95e77f67a75e030b67545bcfa08964e349252d674171def2", size = 10410279, upload-time = "2026-01-21T15:51:48.89Z" },
+    { url = "https://files.pythonhosted.org/packages/20/fc/b96f3a5a28b250cd1b366eb0108df2501c0f38314a00847242abab71bb3a/pandas-3.0.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:33fd3e6baa72899746b820c31e4b9688c8e1b7864d7aec2de7ab5035c285277a", size = 10330198, upload-time = "2026-01-21T15:51:51.015Z" },
+    { url = "https://files.pythonhosted.org/packages/90/b3/d0e2952f103b4fbef1ef22d0c2e314e74fc9064b51cee30890b5e3286ee6/pandas-3.0.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8942e333dc67ceda1095227ad0febb05a3b36535e520154085db632c40ad084", size = 10728513, upload-time = "2026-01-21T15:51:53.387Z" },
+    { url = "https://files.pythonhosted.org/packages/76/81/832894f286df828993dc5fd61c63b231b0fb73377e99f6c6c369174cf97e/pandas-3.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:783ac35c4d0fe0effdb0d67161859078618b1b6587a1af15928137525217a721", size = 11345550, upload-time = "2026-01-21T15:51:55.329Z" },
+    { url = "https://files.pythonhosted.org/packages/34/a0/ed160a00fb4f37d806406bc0a79a8b62fe67f29d00950f8d16203ff3409b/pandas-3.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:125eb901e233f155b268bbef9abd9afb5819db74f0e677e89a61b246228c71ac", size = 11799386, upload-time = "2026-01-21T15:51:57.457Z" },
+]
+
+[[package]]
+name = "pathvalidate"
+version = "3.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fa/2a/52a8da6fe965dea6192eb716b357558e103aea0a1e9a8352ad575a8406ca/pathvalidate-3.3.1.tar.gz", hash = "sha256:b18c07212bfead624345bb8e1d6141cdcf15a39736994ea0b94035ad2b1ba177", size = 63262, upload-time = "2025-06-15T09:07:20.736Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9a/70/875f4a23bfc4731703a5835487d0d2fb999031bd415e7d17c0ae615c18b7/pathvalidate-3.3.1-py3-none-any.whl", hash = "sha256:5263baab691f8e1af96092fa5137ee17df5bdfbd6cff1fcac4d6ef4bc2e1735f", size = 24305, upload-time = "2025-06-15T09:07:19.117Z" },
+]
+
+[[package]]
+name = "peft"
+version = "0.18.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "accelerate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "psutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "safetensors", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "torch", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d8/48/147b3ea999560b40a34fd78724c7777aa9d18409c2250bdcaf9c4f2db7fc/peft-0.18.1.tar.gz", hash = "sha256:2dd0d6bfce936d1850e48aaddbd250941c5c02fc8ef3237cd8fd5aac35e0bae2", size = 635030, upload-time = "2026-01-09T13:08:01.136Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/14/b4e3f574acf349ae6f61f9c000a77f97a3b315b4bb6ad03791e79ae4a568/peft-0.18.1-py3-none-any.whl", hash = "sha256:0bf06847a3551e3019fc58c440cffc9a6b73e6e2962c95b52e224f77bbdb50f1", size = 556960, upload-time = "2026-01-09T13:07:55.865Z" },
+]
+
 [[package]]
 name = "piexif"
 version = "1.1.3"
@@ -1562,6 +1950,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
 ]

+[[package]]
+name = "portalocker"
+version = "3.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5e/77/65b857a69ed876e1951e88aaba60f5ce6120c33703f7cb61a3c894b8c1b6/portalocker-3.2.0.tar.gz", hash = "sha256:1f3002956a54a8c3730586c5c77bf18fae4149e07eaf1c29fc3faf4d5a3f89ac", size = 95644, upload-time = "2025-06-14T13:20:40.03Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4b/a6/38c8e2f318bf67d338f4d629e93b0b4b9af331f455f0390ea8ce4a099b26/portalocker-3.2.0-py3-none-any.whl", hash = "sha256:3cdc5f565312224bc570c49337bd21428bba0ef363bbcf58b9ef4a9f11779968", size = 22424, upload-time = "2025-06-14T13:20:38.083Z" },
+]
+
 [[package]]
 name = "priority"
 version = "2.0.0"
@@ -1663,6 +2060,47 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/1c/15/dd6fd869753ce82ff64dcbc18356093471a5a5adf4f77ed1f805d473d859/psutil-7.2.1-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:99a4cd17a5fdd1f3d014396502daa70b5ec21bf4ffe38393e152f8e449757d67", size = 147402, upload-time = "2025-12-29T08:26:39.21Z" },
 ]

+[[package]]
+name = "pyarrow"
+version = "23.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/33/ffd9c3eb087fa41dd79c3cf20c4c0ae3cdb877c4f8e1107a446006344924/pyarrow-23.0.0.tar.gz", hash = "sha256:180e3150e7edfcd182d3d9afba72f7cf19839a497cc76555a8dce998a8f67615", size = 1167185, upload-time = "2026-01-18T16:19:42.218Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/66/34/564db447d083ec7ff93e0a883a597d2f214e552823bfc178a2d0b1f2c257/pyarrow-23.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:ad96a597547af7827342ffb3c503c8316e5043bb09b47a84885ce39394c96e00", size = 34184630, upload-time = "2026-01-18T16:16:22.141Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/3a/3999daebcb5e6119690c92a621c4d78eef2ffba7a0a1b56386d2875fcd77/pyarrow-23.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:b9edf990df77c2901e79608f08c13fbde60202334a4fcadb15c1f57bf7afee43", size = 35796820, upload-time = "2026-01-18T16:16:29.441Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/ee/39195233056c6a8d0976d7d1ac1cd4fe21fb0ec534eca76bc23ef3f60e11/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:36d1b5bc6ddcaff0083ceec7e2561ed61a51f49cce8be079ee8ed406acb6fdef", size = 44438735, upload-time = "2026-01-18T16:16:38.79Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/41/6a7328ee493527e7afc0c88d105ecca69a3580e29f2faaeac29308369fd7/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4292b889cd224f403304ddda8b63a36e60f92911f89927ec8d98021845ea21be", size = 47557263, upload-time = "2026-01-18T16:16:46.248Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/ee/34e95b21ee84db494eae60083ddb4383477b31fb1fd19fd866d794881696/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dfd9e133e60eaa847fd80530a1b89a052f09f695d0b9c34c235ea6b2e0924cf7", size = 48153529, upload-time = "2026-01-18T16:16:53.412Z" },
+    { url = "https://files.pythonhosted.org/packages/52/88/8a8d83cea30f4563efa1b7bf51d241331ee5cd1b185a7e063f5634eca415/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832141cc09fac6aab1cd3719951d23301396968de87080c57c9a7634e0ecd068", size = 50598851, upload-time = "2026-01-18T16:17:01.133Z" },
+    { url = "https://files.pythonhosted.org/packages/64/52/564a61b0b82d72bd68ec3aef1adda1e3eba776f89134b9ebcb5af4b13cb6/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ce9486e0535a843cf85d990e2ec5820a47918235183a5c7b8b97ed7e92c2d47d", size = 34446038, upload-time = "2026-01-18T16:17:07.861Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/c9/232d4f9855fd1de0067c8a7808a363230d223c83aeee75e0fe6eab851ba9/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:075c29aeaa685fd1182992a9ed2499c66f084ee54eea47da3eb76e125e06064c", size = 35921142, upload-time = "2026-01-18T16:17:15.401Z" },
+    { url = "https://files.pythonhosted.org/packages/96/f2/60af606a3748367b906bb82d41f0032e059f075444445d47e32a7ff1df62/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:799965a5379589510d888be3094c2296efd186a17ca1cef5b77703d4d5121f53", size = 44490374, upload-time = "2026-01-18T16:17:23.93Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/2d/7731543050a678ea3a413955a2d5d80d2a642f270aa57a3cb7d5a86e3f46/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ef7cac8fe6fccd8b9e7617bfac785b0371a7fe26af59463074e4882747145d40", size = 47527896, upload-time = "2026-01-18T16:17:33.393Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/90/f3342553b7ac9879413aed46500f1637296f3c8222107523a43a1c08b42a/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15a414f710dc927132dd67c361f78c194447479555af57317066ee5116b90e9e", size = 48210401, upload-time = "2026-01-18T16:17:42.012Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/da/9862ade205ecc46c172b6ce5038a74b5151c7401e36255f15975a45878b2/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e0d2e6915eca7d786be6a77bf227fbc06d825a75b5b5fe9bcbef121dec32685", size = 50579677, upload-time = "2026-01-18T16:17:50.241Z" },
+    { url = "https://files.pythonhosted.org/packages/97/bb/15aec78bcf43a0c004067bd33eb5352836a29a49db8581fc56f2b6ca88b7/pyarrow-23.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:20b187ed9550d233a872074159f765f52f9d92973191cd4b93f293a19efbe377", size = 34213265, upload-time = "2026-01-18T16:18:07.904Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/6c/deb2c594bbba41c37c5d9aa82f510376998352aa69dfcb886cb4b18ad80f/pyarrow-23.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:18ec84e839b493c3886b9b5e06861962ab4adfaeb79b81c76afbd8d84c7d5fda", size = 35819211, upload-time = "2026-01-18T16:18:13.94Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/e5/ee82af693cb7b5b2b74f6524cdfede0e6ace779d7720ebca24d68b57c36b/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:e438dd3f33894e34fd02b26bd12a32d30d006f5852315f611aa4add6c7fab4bc", size = 44502313, upload-time = "2026-01-18T16:18:20.367Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/86/95c61ad82236495f3c31987e85135926ba3ec7f3819296b70a68d8066b49/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:a244279f240c81f135631be91146d7fa0e9e840e1dfed2aba8483eba25cd98e6", size = 47585886, upload-time = "2026-01-18T16:18:27.544Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/6e/a72d901f305201802f016d015de1e05def7706fff68a1dedefef5dc7eff7/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c4692e83e42438dba512a570c6eaa42be2f8b6c0f492aea27dec54bdc495103a", size = 48207055, upload-time = "2026-01-18T16:18:35.425Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/e5/5de029c537630ca18828db45c30e2a78da03675a70ac6c3528203c416fe3/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ae7f30f898dfe44ea69654a35c93e8da4cef6606dc4c72394068fd95f8e9f54a", size = 50619812, upload-time = "2026-01-18T16:18:43.553Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/7f/caab863e587041156f6786c52e64151b7386742c8c27140f637176e9230e/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ebc017d765d71d80a3f8584ca0566b53e40464586585ac64176115baa0ada7d3", size = 34463240, upload-time = "2026-01-18T16:18:49.755Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/fa/3a5b8c86c958e83622b40865e11af0857c48ec763c11d472c87cd518283d/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:0800cc58a6d17d159df823f87ad66cefebf105b982493d4bad03ee7fab84b993", size = 35935712, upload-time = "2026-01-18T16:18:55.626Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/08/17a62078fc1a53decb34a9aa79cf9009efc74d63d2422e5ade9fed2f99e3/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3a7c68c722da9bb5b0f8c10e3eae71d9825a4b429b40b32709df5d1fa55beb3d", size = 44503523, upload-time = "2026-01-18T16:19:03.958Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/70/84d45c74341e798aae0323d33b7c39194e23b1abc439ceaf60a68a7a969a/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:bd5556c24622df90551063ea41f559b714aa63ca953db884cfb958559087a14e", size = 47542490, upload-time = "2026-01-18T16:19:11.208Z" },
+    { url = "https://files.pythonhosted.org/packages/61/d9/d1274b0e6f19e235de17441e53224f4716574b2ca837022d55702f24d71d/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54810f6e6afc4ffee7c2e0051b61722fbea9a4961b46192dcfae8ea12fa09059", size = 48233605, upload-time = "2026-01-18T16:19:19.544Z" },
+    { url = "https://files.pythonhosted.org/packages/39/07/e4e2d568cb57543d84482f61e510732820cddb0f47c4bb7df629abfed852/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:14de7d48052cf4b0ed174533eafa3cfe0711b8076ad70bede32cf59f744f0d7c", size = 50603979, upload-time = "2026-01-18T16:19:26.717Z" },
+]
+
+[[package]]
+name = "pybind11"
+version = "3.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2f/7b/a6d8dcb83c457e24a9df1e4d8fd5fb8034d4bbc62f3c324681e8a9ba57c2/pybind11-3.0.1.tar.gz", hash = "sha256:9c0f40056a016da59bab516efb523089139fcc6f2ba7e4930854c61efb932051", size = 546914, upload-time = "2025-08-22T20:09:27.265Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cd/8a/37362fc2b949d5f733a8b0f2ff51ba423914cabefe69f1d1b6aab710f5fe/pybind11-3.0.1-py3-none-any.whl", hash = "sha256:aa8f0aa6e0a94d3b64adfc38f560f33f15e589be2175e103c0a33c6bce55ee89", size = 293611, upload-time = "2025-08-22T20:09:25.235Z" },
+]
+
 [[package]]
 name = "pycparser"
 version = "2.23"
@@ -1785,6 +2223,24 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/8b/40/2614036cdd416452f5bf98ec037f38a1afb17f327cb8e6b652d4729e0af8/pyparsing-3.3.1-py3-none-any.whl", hash = "sha256:023b5e7e5520ad96642e2c6db4cb683d3970bd640cdf7115049a6e9c3682df82", size = 121793, upload-time = "2025-12-23T03:14:02.103Z" },
 ]

+[[package]]
+name = "pytablewriter"
+version = "1.2.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "dataproperty", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mbstrdecoder", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pathvalidate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tabledata", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tcolorpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "typepy", extra = ["datetime"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f6/a1/617730f290f04d347103ab40bf67d317df6691b14746f6e1ea039fb57062/pytablewriter-1.2.1.tar.gz", hash = "sha256:7bd0f4f397e070e3b8a34edcf1b9257ccbb18305493d8350a5dbc9957fced959", size = 619241, upload-time = "2025-01-01T15:37:00.04Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/21/4c/c199512f01c845dfe5a7840ab3aae6c60463b5dc2a775be72502dfd9170a/pytablewriter-1.2.1-py3-none-any.whl", hash = "sha256:e906ff7ff5151d70a5f66e0f7b75642a7f2dce8d893c265b79cc9cf6bc04ddb4", size = 91083, upload-time = "2025-01-01T15:36:55.63Z" },
+]
+
 [[package]]
 name = "pytest"
 version = "9.0.2"
@@ -1845,6 +2301,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/aa/76/03af049af4dcee5d27442f71b6924f01f3efb5d2bd34f23fcd563f2cc5f5/python_multipart-0.0.21-py3-none-any.whl", hash = "sha256:cf7a6713e01c87aa35387f4774e812c4361150938d20d232800f75ffcf266090", size = 24541, upload-time = "2025-12-17T09:24:21.153Z" },
 ]

+[[package]]
+name = "pytz"
+version = "2025.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
+]
+
 [[package]]
 name = "pyyaml"
 version = "6.0.3"
@@ -1989,6 +2454,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
 ]

+[[package]]
+name = "rouge-score"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "absl-py", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "nltk", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e2/c5/9136736c37022a6ad27fea38f3111eb8f02fe75d067f9a985cc358653102/rouge_score-0.1.2.tar.gz", hash = "sha256:c7d4da2683e68c9abf0135ef915d63a46643666f848e558a1b9f7ead17ff0f04", size = 17400, upload-time = "2022-07-22T22:46:22.909Z" }
+
 [[package]]
 name = "ruff"
 version = "0.14.11"
@@ -2032,6 +2509,23 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/4f/ef/c9199e4b6336ee5a9f1979c11b5779c5cf9ab6f8386e0b9a96c8ffba7009/rustworkx-0.17.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:48784a673cf8d04f3cd246fa6b53fd1ccc4d83304503463bd561c153517bccc1", size = 2302783, upload-time = "2025-08-13T01:43:42.073Z" },
 ]

+[[package]]
+name = "sacrebleu"
+version = "2.6.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "lxml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "portalocker", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tabulate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d3/ed/d7acddcff74d690c56fe26a1f7828bdde548262828d0743414ea916c40c1/sacrebleu-2.6.0.tar.gz", hash = "sha256:91499b6cd46138d95154fff1e863c2f9be57e82f0c719d8dd718d0006cf6c566", size = 1893419, upload-time = "2026-01-12T17:17:20.799Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/06/f2/6c90ccf3ad1d09a7d662a405b274f3c93b92df59c8d6a025d26aaf34d302/sacrebleu-2.6.0-py3-none-any.whl", hash = "sha256:3edc1531575cfe4ad04ce53491a9307e234af1c3f805a1f491cbec844229a8a8", size = 100785, upload-time = "2026-01-12T17:17:18.868Z" },
+]
+
 [[package]]
 name = "safetensors"
 version = "0.7.0"
@@ -2052,6 +2546,79 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" },
 ]

+[[package]]
+name = "scikit-learn"
+version = "1.8.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "joblib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "scipy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "threadpoolctl", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0e/d4/40988bf3b8e34feec1d0e6a051446b1f66225f8529b9309becaeef62b6c4/scikit_learn-1.8.0.tar.gz", hash = "sha256:9bccbb3b40e3de10351f8f5068e105d0f4083b1a65fa07b6634fbc401a6287fd", size = 7335585, upload-time = "2025-12-10T07:08:53.618Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/03/aa/e22e0768512ce9255eba34775be2e85c2048da73da1193e841707f8f039c/scikit_learn-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0d6ae97234d5d7079dc0040990a6f7aeb97cb7fa7e8945f1999a429b23569e0a", size = 8513770, upload-time = "2025-12-10T07:08:03.251Z" },
+    { url = "https://files.pythonhosted.org/packages/58/37/31b83b2594105f61a381fc74ca19e8780ee923be2d496fcd8d2e1147bd99/scikit_learn-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:edec98c5e7c128328124a029bceb09eda2d526997780fef8d65e9a69eead963e", size = 8044458, upload-time = "2025-12-10T07:08:05.336Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/5a/3f1caed8765f33eabb723596666da4ebbf43d11e96550fb18bdec42b467b/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:74b66d8689d52ed04c271e1329f0c61635bcaf5b926db9b12d58914cdc01fe57", size = 8610341, upload-time = "2025-12-10T07:08:07.732Z" },
+    { url = "https://files.pythonhosted.org/packages/38/cf/06896db3f71c75902a8e9943b444a56e727418f6b4b4a90c98c934f51ed4/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8fdf95767f989b0cfedb85f7ed8ca215d4be728031f56ff5a519ee1e3276dc2e", size = 8900022, upload-time = "2025-12-10T07:08:09.862Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/7d/a630359fc9dcc95496588c8d8e3245cc8fd81980251079bc09c70d41d951/scikit_learn-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7cc267b6108f0a1499a734167282c00c4ebf61328566b55ef262d48e9849c735", size = 8826045, upload-time = "2025-12-10T07:08:15.215Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/56/a0c86f6930cfcd1c7054a2bc417e26960bb88d32444fe7f71d5c2cfae891/scikit_learn-1.8.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:fe1c011a640a9f0791146011dfd3c7d9669785f9fed2b2a5f9e207536cf5c2fd", size = 8420324, upload-time = "2025-12-10T07:08:17.561Z" },
+    { url = "https://files.pythonhosted.org/packages/46/1e/05962ea1cebc1cf3876667ecb14c283ef755bf409993c5946ade3b77e303/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72358cce49465d140cc4e7792015bb1f0296a9742d5622c67e31399b75468b9e", size = 8680651, upload-time = "2025-12-10T07:08:19.952Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/56/a85473cd75f200c9759e3a5f0bcab2d116c92a8a02ee08ccd73b870f8bb4/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80832434a6cc114f5219211eec13dcbc16c2bac0e31ef64c6d346cde3cf054cb", size = 8925045, upload-time = "2025-12-10T07:08:22.11Z" },
+    { url = "https://files.pythonhosted.org/packages/24/05/1af2c186174cc92dcab2233f327336058c077d38f6fe2aceb08e6ab4d509/scikit_learn-1.8.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c22a2da7a198c28dd1a6e1136f19c830beab7fdca5b3e5c8bba8394f8a5c45b3", size = 8528667, upload-time = "2025-12-10T07:08:27.541Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/25/01c0af38fe969473fb292bba9dc2b8f9b451f3112ff242c647fee3d0dfe7/scikit_learn-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:6b595b07a03069a2b1740dc08c2299993850ea81cce4fe19b2421e0c970de6b7", size = 8066524, upload-time = "2025-12-10T07:08:29.822Z" },
+    { url = "https://files.pythonhosted.org/packages/be/ce/a0623350aa0b68647333940ee46fe45086c6060ec604874e38e9ab7d8e6c/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:29ffc74089f3d5e87dfca4c2c8450f88bdc61b0fc6ed5d267f3988f19a1309f6", size = 8657133, upload-time = "2025-12-10T07:08:31.865Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/cb/861b41341d6f1245e6ca80b1c1a8c4dfce43255b03df034429089ca2a2c5/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fb65db5d7531bccf3a4f6bec3462223bea71384e2cda41da0f10b7c292b9e7c4", size = 8923223, upload-time = "2025-12-10T07:08:34.166Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/d1/ef294ca754826daa043b2a104e59960abfab4cf653891037d19dd5b6f3cf/scikit_learn-1.8.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4511be56637e46c25721e83d1a9cea9614e7badc7040c4d573d75fbe257d6fd7", size = 8848305, upload-time = "2025-12-10T07:08:41.013Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/e2/b1f8b05138ee813b8e1a4149f2f0d289547e60851fd1bb268886915adbda/scikit_learn-1.8.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:a69525355a641bf8ef136a7fa447672fb54fe8d60cab5538d9eb7c6438543fb9", size = 8432257, upload-time = "2025-12-10T07:08:42.873Z" },
+    { url = "https://files.pythonhosted.org/packages/26/11/c32b2138a85dcb0c99f6afd13a70a951bfdff8a6ab42d8160522542fb647/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c2656924ec73e5939c76ac4c8b026fc203b83d8900362eb2599d8aee80e4880f", size = 8678673, upload-time = "2025-12-10T07:08:45.362Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/57/51f2384575bdec454f4fe4e7a919d696c9ebce914590abf3e52d47607ab8/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15fc3b5d19cc2be65404786857f2e13c70c83dd4782676dd6814e3b89dc8f5b9", size = 8922467, upload-time = "2025-12-10T07:08:47.408Z" },
+]
+
+[[package]]
+name = "scipy"
+version = "1.17.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/56/3e/9cca699f3486ce6bc12ff46dc2031f1ec8eb9ccc9a320fdaf925f1417426/scipy-1.17.0.tar.gz", hash = "sha256:2591060c8e648d8b96439e111ac41fd8342fdeff1876be2e19dea3fe8930454e", size = 30396830, upload-time = "2026-01-10T21:34:23.009Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/51/3468fdfd49387ddefee1636f5cf6d03ce603b75205bf439bbf0e62069bfd/scipy-1.17.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:65ec32f3d32dfc48c72df4291345dae4f048749bc8d5203ee0a3f347f96c5ce6", size = 31344101, upload-time = "2026-01-10T21:26:30.25Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/9a/9406aec58268d437636069419e6977af953d1e246df941d42d3720b7277b/scipy-1.17.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:1f9586a58039d7229ce77b52f8472c972448cded5736eaf102d5658bbac4c269", size = 27950385, upload-time = "2026-01-10T21:26:36.801Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/98/e7342709e17afdfd1b26b56ae499ef4939b45a23a00e471dfb5375eea205/scipy-1.17.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9fad7d3578c877d606b1150135c2639e9de9cecd3705caa37b66862977cc3e72", size = 20122115, upload-time = "2026-01-10T21:26:42.107Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/0e/9eeeb5357a64fd157cbe0302c213517c541cc16b8486d82de251f3c68ede/scipy-1.17.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:423ca1f6584fc03936972b5f7c06961670dbba9f234e71676a7c7ccf938a0d61", size = 22442402, upload-time = "2026-01-10T21:26:48.029Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/10/be13397a0e434f98e0c79552b2b584ae5bb1c8b2be95db421533bbca5369/scipy-1.17.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fe508b5690e9eaaa9467fc047f833af58f1152ae51a0d0aed67aa5801f4dd7d6", size = 32696338, upload-time = "2026-01-10T21:26:55.521Z" },
+    { url = "https://files.pythonhosted.org/packages/63/1e/12fbf2a3bb240161651c94bb5cdd0eae5d4e8cc6eaeceb74ab07b12a753d/scipy-1.17.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6680f2dfd4f6182e7d6db161344537da644d1cf85cf293f015c60a17ecf08752", size = 34977201, upload-time = "2026-01-10T21:27:03.501Z" },
+    { url = "https://files.pythonhosted.org/packages/19/5b/1a63923e23ccd20bd32156d7dd708af5bbde410daa993aa2500c847ab2d2/scipy-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eec3842ec9ac9de5917899b277428886042a93db0b227ebbe3a333b64ec7643d", size = 34777384, upload-time = "2026-01-10T21:27:11.423Z" },
+    { url = "https://files.pythonhosted.org/packages/39/22/b5da95d74edcf81e540e467202a988c50fef41bd2011f46e05f72ba07df6/scipy-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d7425fcafbc09a03731e1bc05581f5fad988e48c6a861f441b7ab729a49a55ea", size = 37379586, upload-time = "2026-01-10T21:27:20.171Z" },
+    { url = "https://files.pythonhosted.org/packages/20/b6/7feaa252c21cc7aff335c6c55e1b90ab3e3306da3f048109b8b639b94648/scipy-1.17.0-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:ec0827aa4d36cb79ff1b81de898e948a51ac0b9b1c43e4a372c0508c38c0f9a3", size = 31693194, upload-time = "2026-01-10T21:27:27.454Z" },
+    { url = "https://files.pythonhosted.org/packages/76/bb/bbb392005abce039fb7e672cb78ac7d158700e826b0515cab6b5b60c26fb/scipy-1.17.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:819fc26862b4b3c73a60d486dbb919202f3d6d98c87cf20c223511429f2d1a97", size = 28365415, upload-time = "2026-01-10T21:27:34.26Z" },
+    { url = "https://files.pythonhosted.org/packages/37/da/9d33196ecc99fba16a409c691ed464a3a283ac454a34a13a3a57c0d66f3a/scipy-1.17.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:363ad4ae2853d88ebcde3ae6ec46ccca903ea9835ee8ba543f12f575e7b07e4e", size = 20537232, upload-time = "2026-01-10T21:27:40.306Z" },
+    { url = "https://files.pythonhosted.org/packages/56/9d/f4b184f6ddb28e9a5caea36a6f98e8ecd2a524f9127354087ce780885d83/scipy-1.17.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:979c3a0ff8e5ba254d45d59ebd38cde48fce4f10b5125c680c7a4bfe177aab07", size = 22791051, upload-time = "2026-01-10T21:27:46.539Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/9d/025cccdd738a72140efc582b1641d0dd4caf2e86c3fb127568dc80444e6e/scipy-1.17.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:130d12926ae34399d157de777472bf82e9061c60cc081372b3118edacafe1d00", size = 32815098, upload-time = "2026-01-10T21:27:54.389Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5f/09b879619f8bca15ce392bfc1894bd9c54377e01d1b3f2f3b595a1b4d945/scipy-1.17.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e886000eb4919eae3a44f035e63f0fd8b651234117e8f6f29bad1cd26e7bc45", size = 35031342, upload-time = "2026-01-10T21:28:03.012Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/9a/f0f0a9f0aa079d2f106555b984ff0fbb11a837df280f04f71f056ea9c6e4/scipy-1.17.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:13c4096ac6bc31d706018f06a49abe0485f96499deb82066b94d19b02f664209", size = 34893199, upload-time = "2026-01-10T21:28:10.832Z" },
+    { url = "https://files.pythonhosted.org/packages/90/b8/4f0f5cf0c5ea4d7548424e6533e6b17d164f34a6e2fb2e43ffebb6697b06/scipy-1.17.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cacbaddd91fcffde703934897c5cd2c7cb0371fac195d383f4e1f1c5d3f3bd04", size = 37438061, upload-time = "2026-01-10T21:28:19.684Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/2d/51006cd369b8e7879e1c630999a19d1fbf6f8b5ed3e33374f29dc87e53b3/scipy-1.17.0-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:c17514d11b78be8f7e6331b983a65a7f5ca1fd037b95e27b280921fe5606286a", size = 31346803, upload-time = "2026-01-10T21:28:57.24Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/2e/2349458c3ce445f53a6c93d4386b1c4c5c0c540917304c01222ff95ff317/scipy-1.17.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:4e00562e519c09da34c31685f6acc3aa384d4d50604db0f245c14e1b4488bfa2", size = 27967182, upload-time = "2026-01-10T21:29:04.107Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/7c/df525fbfa77b878d1cfe625249529514dc02f4fd5f45f0f6295676a76528/scipy-1.17.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f7df7941d71314e60a481e02d5ebcb3f0185b8d799c70d03d8258f6c80f3d467", size = 20139125, upload-time = "2026-01-10T21:29:10.179Z" },
+    { url = "https://files.pythonhosted.org/packages/33/11/fcf9d43a7ed1234d31765ec643b0515a85a30b58eddccc5d5a4d12b5f194/scipy-1.17.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:aabf057c632798832f071a8dde013c2e26284043934f53b00489f1773b33527e", size = 22443554, upload-time = "2026-01-10T21:29:15.888Z" },
+    { url = "https://files.pythonhosted.org/packages/80/5c/ea5d239cda2dd3d31399424967a24d556cf409fbea7b5b21412b0fd0a44f/scipy-1.17.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a38c3337e00be6fd8a95b4ed66b5d988bac4ec888fd922c2ea9fe5fb1603dd67", size = 32757834, upload-time = "2026-01-10T21:29:23.406Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/7e/8c917cc573310e5dc91cbeead76f1b600d3fb17cf0969db02c9cf92e3cfa/scipy-1.17.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00fb5f8ec8398ad90215008d8b6009c9db9fa924fd4c7d6be307c6f945f9cd73", size = 34995775, upload-time = "2026-01-10T21:29:31.915Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/43/176c0c3c07b3f7df324e7cdd933d3e2c4898ca202b090bd5ba122f9fe270/scipy-1.17.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f2a4942b0f5f7c23c7cd641a0ca1955e2ae83dedcff537e3a0259096635e186b", size = 34841240, upload-time = "2026-01-10T21:29:39.995Z" },
+    { url = "https://files.pythonhosted.org/packages/44/8c/d1f5f4b491160592e7f084d997de53a8e896a3ac01cd07e59f43ca222744/scipy-1.17.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:dbf133ced83889583156566d2bdf7a07ff89228fe0c0cb727f777de92092ec6b", size = 37394463, upload-time = "2026-01-10T21:29:48.723Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/01/f58916b9d9ae0112b86d7c3b10b9e685625ce6e8248df139d0fcb17f7397/scipy-1.17.0-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:2b531f57e09c946f56ad0b4a3b2abee778789097871fc541e267d2eca081cff1", size = 31706502, upload-time = "2026-01-10T21:29:56.326Z" },
+    { url = "https://files.pythonhosted.org/packages/59/8e/2912a87f94a7d1f8b38aabc0faf74b82d3b6c9e22be991c49979f0eceed8/scipy-1.17.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:13e861634a2c480bd237deb69333ac79ea1941b94568d4b0efa5db5e263d4fd1", size = 28380854, upload-time = "2026-01-10T21:30:01.554Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/1c/874137a52dddab7d5d595c1887089a2125d27d0601fce8c0026a24a92a0b/scipy-1.17.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:eb2651271135154aa24f6481cbae5cc8af1f0dd46e6533fb7b56aa9727b6a232", size = 20552752, upload-time = "2026-01-10T21:30:05.93Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/f0/7518d171cb735f6400f4576cf70f756d5b419a07fe1867da34e2c2c9c11b/scipy-1.17.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:c5e8647f60679790c2f5c76be17e2e9247dc6b98ad0d3b065861e082c56e078d", size = 22803972, upload-time = "2026-01-10T21:30:10.651Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/74/3498563a2c619e8a3ebb4d75457486c249b19b5b04a30600dfd9af06bea5/scipy-1.17.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5fb10d17e649e1446410895639f3385fd2bf4c3c7dfc9bea937bddcbc3d7b9ba", size = 32829770, upload-time = "2026-01-10T21:30:16.359Z" },
+    { url = "https://files.pythonhosted.org/packages/48/d1/7b50cedd8c6c9d6f706b4b36fa8544d829c712a75e370f763b318e9638c1/scipy-1.17.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8547e7c57f932e7354a2319fab613981cde910631979f74c9b542bb167a8b9db", size = 35051093, upload-time = "2026-01-10T21:30:22.987Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/82/a2d684dfddb87ba1b3ea325df7c3293496ee9accb3a19abe9429bce94755/scipy-1.17.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33af70d040e8af9d5e7a38b5ed3b772adddd281e3062ff23fec49e49681c38cf", size = 34909905, upload-time = "2026-01-10T21:30:28.704Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/5e/e565bd73991d42023eb82bb99e51c5b3d9e2c588ca9d4b3e2cc1d3ca62a6/scipy-1.17.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f9eb55bb97d00f8b7ab95cb64f873eb0bf54d9446264d9f3609130381233483f", size = 37457743, upload-time = "2026-01-10T21:30:34.819Z" },
+]
+
 [[package]]
 name = "secretstorage"
 version = "3.5.0"
@@ -2129,6 +2696,12 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
 ]

+[[package]]
+name = "sqlitedict"
+version = "2.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/12/9a/7620d1e9dcb02839ed6d4b14064e609cdd7a8ae1e47289aa0456796dd9ca/sqlitedict-2.1.0.tar.gz", hash = "sha256:03d9cfb96d602996f1d4c2db2856f1224b96a9c431bdd16e78032a72940f9e8c", size = 21846, upload-time = "2022-12-03T13:39:13.102Z" }
+
 [[package]]
 name = "starlette"
 version = "0.50.0"
@@ -2153,6 +2726,55 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" },
 ]

+[[package]]
+name = "tabledata"
+version = "1.3.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "dataproperty", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "typepy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b2/35/171c8977162f1163368406deddde4c59673b62bd0cb2f34948a02effb075/tabledata-1.3.4.tar.gz", hash = "sha256:e9649cab129d718f3bff4150083b77f8a78c30f6634a30caf692b10fdc60cb97", size = 25074, upload-time = "2024-12-31T14:12:31.198Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/08/64/fa4160151976ee4b2cf0c1217a99443ffaeb991956feddfeac9eee9952f8/tabledata-1.3.4-py3-none-any.whl", hash = "sha256:1f56e433bfdeb89f4487abfa48c4603a3b07c5d3a3c7e05ff73dd018c24bd0d4", size = 11820, upload-time = "2024-12-31T14:12:28.584Z" },
+]
+
+[[package]]
+name = "tabulate"
+version = "0.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090, upload-time = "2022-10-06T17:21:48.54Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" },
+]
+
+[[package]]
+name = "tcolorpy"
+version = "0.1.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/80/cc/44f2d81d8f9093aad81c3467a5bf5718d2b5f786e887b6e4adcfc17ec6b9/tcolorpy-0.1.7.tar.gz", hash = "sha256:0fbf6bf238890bbc2e32662aa25736769a29bf6d880328f310c910a327632614", size = 299437, upload-time = "2024-12-29T15:24:23.847Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/a2/ed023f2edd1e011b4d99b6727bce8253842d66c3fbf9ed0a26fc09a92571/tcolorpy-0.1.7-py3-none-any.whl", hash = "sha256:26a59d52027e175a37e0aba72efc99dda43f074db71f55b316d3de37d3251378", size = 8096, upload-time = "2024-12-29T15:24:21.33Z" },
+]
+
+[[package]]
+name = "tenacity"
+version = "9.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036, upload-time = "2025-04-02T08:25:09.966Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload-time = "2025-04-02T08:25:07.678Z" },
+]
+
+[[package]]
+name = "threadpoolctl"
+version = "3.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" },
+]
+
 [[package]]
 name = "tiktoken"
 version = "0.12.0"
@@ -2283,6 +2905,19 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" },
 ]

+[[package]]
+name = "tqdm-multiprocess"
+version = "0.0.11"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b4/1e/de81bd0f6cb2b61d6ee7ccbf304d99a42a0f53879481536dfb3288ee9a87/tqdm-multiprocess-0.0.11.tar.gz", hash = "sha256:a74002a1222ea9cbe8cdc9bd460108c6009be359621fbee9b92d0515d4d180f7", size = 8082, upload-time = "2020-10-27T06:57:54.313Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/25/7e/0d889fc6c84e3df6b69aaafe893fc77f69b3d968ac9ce574d1c62c688050/tqdm_multiprocess-0.0.11-py3-none-any.whl", hash = "sha256:3ebdf03e7a675150fa0bbceaa9c3c64b8cb556e9ffafa4fe6c078e51820524aa", size = 9817, upload-time = "2020-10-27T06:57:53.167Z" },
+]
+
 [[package]]
 name = "transformers"
 version = "5.0.0rc3"
@@ -2336,6 +2971,40 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/3a/7a/882d99539b19b1490cac5d77c67338d126e4122c8276bf640e411650c830/twine-6.2.0-py3-none-any.whl", hash = "sha256:418ebf08ccda9a8caaebe414433b0ba5e25eb5e4a927667122fbe8f829f985d8", size = 42727, upload-time = "2025-09-04T15:43:15.994Z" },
 ]

+[[package]]
+name = "typepy"
+version = "1.3.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mbstrdecoder", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/79/59/4c39942077d7de285f762a91024dbda731be693591732977358f77d120fb/typepy-1.3.4.tar.gz", hash = "sha256:89c1f66de6c6133209c43a94d23431d320ba03ef5db18f241091ea594035d9de", size = 39558, upload-time = "2024-12-29T09:18:15.774Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ee/31/e393c3830bdedd01735bd195c85ac3034b6bcaf6c18142bab60a4047ca36/typepy-1.3.4-py3-none-any.whl", hash = "sha256:d5ed3e0c7f49521bff0603dd08cf8d453371cf68d65a29d3d0038552ccc46e2e", size = 31449, upload-time = "2024-12-29T09:18:13.135Z" },
+]
+
+[package.optional-dependencies]
+datetime = [
+    { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "pytz", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+
+[[package]]
+name = "typer"
+version = "0.21.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "shellingham", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/36/bf/8825b5929afd84d0dabd606c67cd57b8388cb3ec385f7ef19c5cc2202069/typer-0.21.1.tar.gz", hash = "sha256:ea835607cd752343b6b2b7ce676893e5a0324082268b48f27aa058bdb7d2145d", size = 110371, upload-time = "2026-01-06T11:21:10.989Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/1d/d9257dd49ff2ca23ea5f132edf1281a0c4f9de8a762b9ae399b670a59235/typer-0.21.1-py3-none-any.whl", hash = "sha256:7985e89081c636b88d172c2ee0cfe33c253160994d47bdfdc302defd7d1f1d01", size = 47381, upload-time = "2026-01-06T11:21:09.824Z" },
+]
+
 [[package]]
 name = "typer-slim"
 version = "0.21.1"
@@ -2388,6 +3057,12 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
 ]

+[[package]]
+name = "word2number"
+version = "1.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4a/29/a31940c848521f0725f0df6b25dca8917f13a2025b0e8fcbe5d0457e45e6/word2number-1.1.zip", hash = "sha256:70e27a5d387f67b04c71fbb7621c05930b19bfd26efd6851e6e0f9969dcde7d0", size = 9723, upload-time = "2017-06-02T15:45:14.488Z" }
+
 [[package]]
 name = "wsproto"
 version = "1.3.2"
@@ -2400,6 +3075,62 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/a4/f5/10b68b7b1544245097b2a1b8238f66f2fc6dcaeb24ba5d917f52bd2eed4f/wsproto-1.3.2-py3-none-any.whl", hash = "sha256:61eea322cdf56e8cc904bd3ad7573359a242ba65688716b0710a5eb12beab584", size = 24405, upload-time = "2025-11-20T18:18:00.454Z" },
 ]

+[[package]]
+name = "xxhash"
+version = "3.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/02/84/30869e01909fb37a6cc7e18688ee8bf1e42d57e7e0777636bd47524c43c7/xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6", size = 85160, upload-time = "2025-10-02T14:37:08.097Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/33/76/35d05267ac82f53ae9b0e554da7c5e281ee61f3cad44c743f0fcd354f211/xxhash-3.6.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:599e64ba7f67472481ceb6ee80fa3bd828fd61ba59fb11475572cc5ee52b89ec", size = 32738, upload-time = "2025-10-02T14:34:55.839Z" },
+    { url = "https://files.pythonhosted.org/packages/31/a8/3fbce1cd96534a95e35d5120637bf29b0d7f5d8fa2f6374e31b4156dd419/xxhash-3.6.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d8b8aaa30fca4f16f0c84a5c8d7ddee0e25250ec2796c973775373257dde8f1", size = 30821, upload-time = "2025-10-02T14:34:57.219Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/ea/d387530ca7ecfa183cb358027f1833297c6ac6098223fd14f9782cd0015c/xxhash-3.6.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d597acf8506d6e7101a4a44a5e428977a51c0fadbbfd3c39650cca9253f6e5a6", size = 194127, upload-time = "2025-10-02T14:34:59.21Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/0c/71435dcb99874b09a43b8d7c54071e600a7481e42b3e3ce1eb5226a5711a/xxhash-3.6.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:858dc935963a33bc33490128edc1c12b0c14d9c7ebaa4e387a7869ecc4f3e263", size = 212975, upload-time = "2025-10-02T14:35:00.816Z" },
+    { url = "https://files.pythonhosted.org/packages/84/7a/c2b3d071e4bb4a90b7057228a99b10d51744878f4a8a6dd643c8bd897620/xxhash-3.6.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba284920194615cb8edf73bf52236ce2e1664ccd4a38fdb543506413529cc546", size = 212241, upload-time = "2025-10-02T14:35:02.207Z" },
+    { url = "https://files.pythonhosted.org/packages/81/5f/640b6eac0128e215f177df99eadcd0f1b7c42c274ab6a394a05059694c5a/xxhash-3.6.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b54219177f6c6674d5378bd862c6aedf64725f70dd29c472eaae154df1a2e89", size = 445471, upload-time = "2025-10-02T14:35:03.61Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/1e/3c3d3ef071b051cc3abbe3721ffb8365033a172613c04af2da89d5548a87/xxhash-3.6.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42c36dd7dbad2f5238950c377fcbf6811b1cdb1c444fab447960030cea60504d", size = 193936, upload-time = "2025-10-02T14:35:05.013Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/bd/4a5f68381939219abfe1c22a9e3a5854a4f6f6f3c4983a87d255f21f2e5d/xxhash-3.6.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f22927652cba98c44639ffdc7aaf35828dccf679b10b31c4ad72a5b530a18eb7", size = 210440, upload-time = "2025-10-02T14:35:06.239Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/37/b80fe3d5cfb9faff01a02121a0f4d565eb7237e9e5fc66e73017e74dcd36/xxhash-3.6.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b45fad44d9c5c119e9c6fbf2e1c656a46dc68e280275007bbfd3d572b21426db", size = 197990, upload-time = "2025-10-02T14:35:07.735Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/fd/2c0a00c97b9e18f72e1f240ad4e8f8a90fd9d408289ba9c7c495ed7dc05c/xxhash-3.6.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6f2580ffab1a8b68ef2b901cde7e55fa8da5e4be0977c68f78fc80f3c143de42", size = 210689, upload-time = "2025-10-02T14:35:09.438Z" },
+    { url = "https://files.pythonhosted.org/packages/93/86/5dd8076a926b9a95db3206aba20d89a7fc14dd5aac16e5c4de4b56033140/xxhash-3.6.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:40c391dd3cd041ebc3ffe6f2c862f402e306eb571422e0aa918d8070ba31da11", size = 414068, upload-time = "2025-10-02T14:35:11.162Z" },
+    { url = "https://files.pythonhosted.org/packages/af/3c/0bb129170ee8f3650f08e993baee550a09593462a5cddd8e44d0011102b1/xxhash-3.6.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f205badabde7aafd1a31e8ca2a3e5a763107a71c397c4481d6a804eb5063d8bd", size = 191495, upload-time = "2025-10-02T14:35:12.971Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/30/25e5321c8732759e930c555176d37e24ab84365482d257c3b16362235212/xxhash-3.6.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a42e633d75cdad6d625434e3468126c73f13f7584545a9cf34e883aa1710e702", size = 32956, upload-time = "2025-10-02T14:35:17.413Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/3c/0573299560d7d9f8ab1838f1efc021a280b5ae5ae2e849034ef3dee18810/xxhash-3.6.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:568a6d743219e717b07b4e03b0a828ce593833e498c3b64752e0f5df6bfe84db", size = 31072, upload-time = "2025-10-02T14:35:18.844Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/1c/52d83a06e417cd9d4137722693424885cc9878249beb3a7c829e74bf7ce9/xxhash-3.6.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bec91b562d8012dae276af8025a55811b875baace6af510412a5e58e3121bc54", size = 196409, upload-time = "2025-10-02T14:35:20.31Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/8e/c6d158d12a79bbd0b878f8355432075fc82759e356ab5a111463422a239b/xxhash-3.6.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78e7f2f4c521c30ad5e786fdd6bae89d47a32672a80195467b5de0480aa97b1f", size = 215736, upload-time = "2025-10-02T14:35:21.616Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/68/c4c80614716345d55071a396cf03d06e34b5f4917a467faf43083c995155/xxhash-3.6.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3ed0df1b11a79856df5ffcab572cbd6b9627034c1c748c5566fa79df9048a7c5", size = 214833, upload-time = "2025-10-02T14:35:23.32Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/e9/ae27c8ffec8b953efa84c7c4a6c6802c263d587b9fc0d6e7cea64e08c3af/xxhash-3.6.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e4edbfc7d420925b0dd5e792478ed393d6e75ff8fc219a6546fb446b6a417b1", size = 448348, upload-time = "2025-10-02T14:35:25.111Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/6b/33e21afb1b5b3f46b74b6bd1913639066af218d704cc0941404ca717fc57/xxhash-3.6.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fba27a198363a7ef87f8c0f6b171ec36b674fe9053742c58dd7e3201c1ab30ee", size = 196070, upload-time = "2025-10-02T14:35:26.586Z" },
+    { url = "https://files.pythonhosted.org/packages/96/b6/fcabd337bc5fa624e7203aa0fa7d0c49eed22f72e93229431752bddc83d9/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:794fe9145fe60191c6532fa95063765529770edcdd67b3d537793e8004cabbfd", size = 212907, upload-time = "2025-10-02T14:35:28.087Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/d3/9ee6160e644d660fcf176c5825e61411c7f62648728f69c79ba237250143/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:6105ef7e62b5ac73a837778efc331a591d8442f8ef5c7e102376506cb4ae2729", size = 200839, upload-time = "2025-10-02T14:35:29.857Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/98/e8de5baa5109394baf5118f5e72ab21a86387c4f89b0e77ef3e2f6b0327b/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f01375c0e55395b814a679b3eea205db7919ac2af213f4a6682e01220e5fe292", size = 213304, upload-time = "2025-10-02T14:35:31.222Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/1d/71056535dec5c3177eeb53e38e3d367dd1d16e024e63b1cee208d572a033/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d706dca2d24d834a4661619dcacf51a75c16d65985718d6a7d73c1eeeb903ddf", size = 416930, upload-time = "2025-10-02T14:35:32.517Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/6c/5cbde9de2cd967c322e651c65c543700b19e7ae3e0aae8ece3469bf9683d/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5f059d9faeacd49c0215d66f4056e1326c80503f51a1532ca336a385edadd033", size = 193787, upload-time = "2025-10-02T14:35:33.827Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/5e/0138bc4484ea9b897864d59fce9be9086030825bc778b76cb5a33a906d37/xxhash-3.6.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a40a3d35b204b7cc7643cbcf8c9976d818cb47befcfac8bbefec8038ac363f3e", size = 32754, upload-time = "2025-10-02T14:35:38.245Z" },
+    { url = "https://files.pythonhosted.org/packages/18/d7/5dac2eb2ec75fd771957a13e5dda560efb2176d5203f39502a5fc571f899/xxhash-3.6.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a54844be970d3fc22630b32d515e79a90d0a3ddb2644d8d7402e3c4c8da61405", size = 30846, upload-time = "2025-10-02T14:35:39.6Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/71/8bc5be2bb00deb5682e92e8da955ebe5fa982da13a69da5a40a4c8db12fb/xxhash-3.6.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:016e9190af8f0a4e3741343777710e3d5717427f175adfdc3e72508f59e2a7f3", size = 194343, upload-time = "2025-10-02T14:35:40.69Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/3b/52badfb2aecec2c377ddf1ae75f55db3ba2d321c5e164f14461c90837ef3/xxhash-3.6.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f6f72232f849eb9d0141e2ebe2677ece15adfd0fa599bc058aad83c714bb2c6", size = 213074, upload-time = "2025-10-02T14:35:42.29Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/2b/ae46b4e9b92e537fa30d03dbc19cdae57ed407e9c26d163895e968e3de85/xxhash-3.6.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:63275a8aba7865e44b1813d2177e0f5ea7eadad3dd063a21f7cf9afdc7054063", size = 212388, upload-time = "2025-10-02T14:35:43.929Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/80/49f88d3afc724b4ac7fbd664c8452d6db51b49915be48c6982659e0e7942/xxhash-3.6.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cd01fa2aa00d8b017c97eb46b9a794fbdca53fc14f845f5a328c71254b0abb7", size = 445614, upload-time = "2025-10-02T14:35:45.216Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/ba/603ce3961e339413543d8cd44f21f2c80e2a7c5cfe692a7b1f2cccf58f3c/xxhash-3.6.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0226aa89035b62b6a86d3c68df4d7c1f47a342b8683da2b60cedcddb46c4d95b", size = 194024, upload-time = "2025-10-02T14:35:46.959Z" },
+    { url = "https://files.pythonhosted.org/packages/78/d1/8e225ff7113bf81545cfdcd79eef124a7b7064a0bba53605ff39590b95c2/xxhash-3.6.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c6e193e9f56e4ca4923c61238cdaced324f0feac782544eb4c6d55ad5cc99ddd", size = 210541, upload-time = "2025-10-02T14:35:48.301Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/58/0f89d149f0bad89def1a8dd38feb50ccdeb643d9797ec84707091d4cb494/xxhash-3.6.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9176dcaddf4ca963d4deb93866d739a343c01c969231dbe21680e13a5d1a5bf0", size = 198305, upload-time = "2025-10-02T14:35:49.584Z" },
+    { url = "https://files.pythonhosted.org/packages/11/38/5eab81580703c4df93feb5f32ff8fa7fe1e2c51c1f183ee4e48d4bb9d3d7/xxhash-3.6.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c1ce4009c97a752e682b897aa99aef84191077a9433eb237774689f14f8ec152", size = 210848, upload-time = "2025-10-02T14:35:50.877Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/6b/953dc4b05c3ce678abca756416e4c130d2382f877a9c30a20d08ee6a77c0/xxhash-3.6.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:8cb2f4f679b01513b7adbb9b1b2f0f9cdc31b70007eaf9d59d0878809f385b11", size = 414142, upload-time = "2025-10-02T14:35:52.15Z" },
+    { url = "https://files.pythonhosted.org/packages/08/a9/238ec0d4e81a10eb5026d4a6972677cbc898ba6c8b9dbaec12ae001b1b35/xxhash-3.6.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:653a91d7c2ab54a92c19ccf43508b6a555440b9be1bc8be553376778be7f20b5", size = 191547, upload-time = "2025-10-02T14:35:53.547Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/db/0e99732ed7f64182aef4a6fb145e1a295558deec2a746265dcdec12d191e/xxhash-3.6.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c5294f596a9017ca5a3e3f8884c00b91ab2ad2933cf288f4923c3fd4346cf3d4", size = 32955, upload-time = "2025-10-02T14:35:58.267Z" },
+    { url = "https://files.pythonhosted.org/packages/55/f4/2a7c3c68e564a099becfa44bb3d398810cc0ff6749b0d3cb8ccb93f23c14/xxhash-3.6.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1cf9dcc4ab9cff01dfbba78544297a3a01dafd60f3bde4e2bfd016cf7e4ddc67", size = 31072, upload-time = "2025-10-02T14:35:59.382Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/d9/72a29cddc7250e8a5819dad5d466facb5dc4c802ce120645630149127e73/xxhash-3.6.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:01262da8798422d0685f7cef03b2bd3f4f46511b02830861df548d7def4402ad", size = 196579, upload-time = "2025-10-02T14:36:00.838Z" },
+    { url = "https://files.pythonhosted.org/packages/63/93/b21590e1e381040e2ca305a884d89e1c345b347404f7780f07f2cdd47ef4/xxhash-3.6.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51a73fb7cb3a3ead9f7a8b583ffd9b8038e277cdb8cb87cf890e88b3456afa0b", size = 215854, upload-time = "2025-10-02T14:36:02.207Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/b8/edab8a7d4fa14e924b29be877d54155dcbd8b80be85ea00d2be3413a9ed4/xxhash-3.6.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b9c6df83594f7df8f7f708ce5ebeacfc69f72c9fbaaababf6cf4758eaada0c9b", size = 214965, upload-time = "2025-10-02T14:36:03.507Z" },
+    { url = "https://files.pythonhosted.org/packages/27/67/dfa980ac7f0d509d54ea0d5a486d2bb4b80c3f1bb22b66e6a05d3efaf6c0/xxhash-3.6.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:627f0af069b0ea56f312fd5189001c24578868643203bca1abbc2c52d3a6f3ca", size = 448484, upload-time = "2025-10-02T14:36:04.828Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/63/8ffc2cc97e811c0ca5d00ab36604b3ea6f4254f20b7bc658ca825ce6c954/xxhash-3.6.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa912c62f842dfd013c5f21a642c9c10cd9f4c4e943e0af83618b4a404d9091a", size = 196162, upload-time = "2025-10-02T14:36:06.182Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/77/07f0e7a3edd11a6097e990f6e5b815b6592459cb16dae990d967693e6ea9/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b465afd7909db30168ab62afe40b2fcf79eedc0b89a6c0ab3123515dc0df8b99", size = 213007, upload-time = "2025-10-02T14:36:07.733Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/d8/bc5fa0d152837117eb0bef6f83f956c509332ce133c91c63ce07ee7c4873/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a881851cf38b0a70e7c4d3ce81fc7afd86fbc2a024f4cfb2a97cf49ce04b75d3", size = 200956, upload-time = "2025-10-02T14:36:09.106Z" },
+    { url = "https://files.pythonhosted.org/packages/26/a5/d749334130de9411783873e9b98ecc46688dad5db64ca6e04b02acc8b473/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9b3222c686a919a0f3253cfc12bb118b8b103506612253b5baeaac10d8027cf6", size = 213401, upload-time = "2025-10-02T14:36:10.585Z" },
+    { url = "https://files.pythonhosted.org/packages/89/72/abed959c956a4bfc72b58c0384bb7940663c678127538634d896b1195c10/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:c5aa639bc113e9286137cec8fadc20e9cd732b2cc385c0b7fa673b84fc1f2a93", size = 417083, upload-time = "2025-10-02T14:36:12.276Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/b3/62fd2b586283b7d7d665fb98e266decadf31f058f1cf6c478741f68af0cb/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5c1343d49ac102799905e115aee590183c3921d475356cb24b4de29a4bc56518", size = 193913, upload-time = "2025-10-02T14:36:14.025Z" },
+]
+
 [[package]]
 name = "yarl"
 version = "1.22.0"
@@ -2465,3 +3196,37 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/cf/72/6a85bba382f22cf78add705d8c3731748397d986e197e53ecc7835e76de7/yarl-1.22.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c0232bce2170103ec23c454e54a57008a9a72b5d1c3105dc2496750da8cfa47c", size = 341473, upload-time = "2025-10-06T14:12:10.994Z" },
    { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" },
 ]
+
+[[package]]
+name = "zstandard"
+version = "0.25.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fd/aa/3e0508d5a5dd96529cdc5a97011299056e14c6505b678fd58938792794b1/zstandard-0.25.0.tar.gz", hash = "sha256:7713e1179d162cf5c7906da876ec2ccb9c3a9dcbdffef0cc7f70c3667a205f0b", size = 711513, upload-time = "2025-09-14T22:15:54.002Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/35/0b/8df9c4ad06af91d39e94fa96cc010a24ac4ef1378d3efab9223cc8593d40/zstandard-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec996f12524f88e151c339688c3897194821d7f03081ab35d31d1e12ec975e94", size = 795735, upload-time = "2025-09-14T22:17:26.042Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/06/9ae96a3e5dcfd119377ba33d4c42a7d89da1efabd5cb3e366b156c45ff4d/zstandard-0.25.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a1a4ae2dec3993a32247995bdfe367fc3266da832d82f8438c8570f989753de1", size = 640440, upload-time = "2025-09-14T22:17:27.366Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/14/933d27204c2bd404229c69f445862454dcc101cd69ef8c6068f15aaec12c/zstandard-0.25.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:e96594a5537722fdfb79951672a2a63aec5ebfb823e7560586f7484819f2a08f", size = 5343070, upload-time = "2025-09-14T22:17:28.896Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/db/ddb11011826ed7db9d0e485d13df79b58586bfdec56e5c84a928a9a78c1c/zstandard-0.25.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bfc4e20784722098822e3eee42b8e576b379ed72cca4a7cb856ae733e62192ea", size = 5063001, upload-time = "2025-09-14T22:17:31.044Z" },
+    { url = "https://files.pythonhosted.org/packages/db/00/87466ea3f99599d02a5238498b87bf84a6348290c19571051839ca943777/zstandard-0.25.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:457ed498fc58cdc12fc48f7950e02740d4f7ae9493dd4ab2168a47c93c31298e", size = 5394120, upload-time = "2025-09-14T22:17:32.711Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/95/fc5531d9c618a679a20ff6c29e2b3ef1d1f4ad66c5e161ae6ff847d102a9/zstandard-0.25.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:fd7a5004eb1980d3cefe26b2685bcb0b17989901a70a1040d1ac86f1d898c551", size = 5451230, upload-time = "2025-09-14T22:17:34.41Z" },
+    { url = "https://files.pythonhosted.org/packages/63/4b/e3678b4e776db00f9f7b2fe58e547e8928ef32727d7a1ff01dea010f3f13/zstandard-0.25.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8e735494da3db08694d26480f1493ad2cf86e99bdd53e8e9771b2752a5c0246a", size = 5547173, upload-time = "2025-09-14T22:17:36.084Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/d5/ba05ed95c6b8ec30bd468dfeab20589f2cf709b5c940483e31d991f2ca58/zstandard-0.25.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3a39c94ad7866160a4a46d772e43311a743c316942037671beb264e395bdd611", size = 5046736, upload-time = "2025-09-14T22:17:37.891Z" },
+    { url = "https://files.pythonhosted.org/packages/50/d5/870aa06b3a76c73eced65c044b92286a3c4e00554005ff51962deef28e28/zstandard-0.25.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:172de1f06947577d3a3005416977cce6168f2261284c02080e7ad0185faeced3", size = 5576368, upload-time = "2025-09-14T22:17:40.206Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/35/398dc2ffc89d304d59bc12f0fdd931b4ce455bddf7038a0a67733a25f550/zstandard-0.25.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3c83b0188c852a47cd13ef3bf9209fb0a77fa5374958b8c53aaa699398c6bd7b", size = 4954022, upload-time = "2025-09-14T22:17:41.879Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/5c/36ba1e5507d56d2213202ec2b05e8541734af5f2ce378c5d1ceaf4d88dc4/zstandard-0.25.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1673b7199bbe763365b81a4f3252b8e80f44c9e323fc42940dc8843bfeaf9851", size = 5267889, upload-time = "2025-09-14T22:17:43.577Z" },
+    { url = "https://files.pythonhosted.org/packages/70/e8/2ec6b6fb7358b2ec0113ae202647ca7c0e9d15b61c005ae5225ad0995df5/zstandard-0.25.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0be7622c37c183406f3dbf0cba104118eb16a4ea7359eeb5752f0794882fc250", size = 5433952, upload-time = "2025-09-14T22:17:45.271Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/01/b5f4d4dbc59ef193e870495c6f1275f5b2928e01ff5a81fecb22a06e22fb/zstandard-0.25.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5f5e4c2a23ca271c218ac025bd7d635597048b366d6f31f420aaeb715239fc98", size = 5814054, upload-time = "2025-09-14T22:17:47.08Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/e5/fbd822d5c6f427cf158316d012c5a12f233473c2f9c5fe5ab1ae5d21f3d8/zstandard-0.25.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f187a0bb61b35119d1926aee039524d1f93aaf38a9916b8c4b78ac8514a0aaf", size = 5360113, upload-time = "2025-09-14T22:17:48.893Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/5c/f8923b595b55fe49e30612987ad8bf053aef555c14f05bb659dd5dbe3e8a/zstandard-0.25.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e29f0cf06974c899b2c188ef7f783607dbef36da4c242eb6c82dcd8b512855e3", size = 795887, upload-time = "2025-09-14T22:17:54.198Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/09/d0a2a14fc3439c5f874042dca72a79c70a532090b7ba0003be73fee37ae2/zstandard-0.25.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:05df5136bc5a011f33cd25bc9f506e7426c0c9b3f9954f056831ce68f3b6689f", size = 640658, upload-time = "2025-09-14T22:17:55.423Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/7c/8b6b71b1ddd517f68ffb55e10834388d4f793c49c6b83effaaa05785b0b4/zstandard-0.25.0-cp314-cp314-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:f604efd28f239cc21b3adb53eb061e2a205dc164be408e553b41ba2ffe0ca15c", size = 5379849, upload-time = "2025-09-14T22:17:57.372Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/86/a48e56320d0a17189ab7a42645387334fba2200e904ee47fc5a26c1fd8ca/zstandard-0.25.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223415140608d0f0da010499eaa8ccdb9af210a543fac54bce15babbcfc78439", size = 5058095, upload-time = "2025-09-14T22:17:59.498Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/ad/eb659984ee2c0a779f9d06dbfe45e2dc39d99ff40a319895df2d3d9a48e5/zstandard-0.25.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e54296a283f3ab5a26fc9b8b5d4978ea0532f37b231644f367aa588930aa043", size = 5551751, upload-time = "2025-09-14T22:18:01.618Z" },
+    { url = "https://files.pythonhosted.org/packages/61/b3/b637faea43677eb7bd42ab204dfb7053bd5c4582bfe6b1baefa80ac0c47b/zstandard-0.25.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ca54090275939dc8ec5dea2d2afb400e0f83444b2fc24e07df7fdef677110859", size = 6364818, upload-time = "2025-09-14T22:18:03.769Z" },
+    { url = "https://files.pythonhosted.org/packages/31/dc/cc50210e11e465c975462439a492516a73300ab8caa8f5e0902544fd748b/zstandard-0.25.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e09bb6252b6476d8d56100e8147b803befa9a12cea144bbe629dd508800d1ad0", size = 5560402, upload-time = "2025-09-14T22:18:05.954Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/ae/56523ae9c142f0c08efd5e868a6da613ae76614eca1305259c3bf6a0ed43/zstandard-0.25.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a9ec8c642d1ec73287ae3e726792dd86c96f5681eb8df274a757bf62b750eae7", size = 4955108, upload-time = "2025-09-14T22:18:07.68Z" },
+    { url = "https://files.pythonhosted.org/packages/98/cf/c899f2d6df0840d5e384cf4c4121458c72802e8bda19691f3b16619f51e9/zstandard-0.25.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a4089a10e598eae6393756b036e0f419e8c1d60f44a831520f9af41c14216cf2", size = 5269248, upload-time = "2025-09-14T22:18:09.753Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/c0/59e912a531d91e1c192d3085fc0f6fb2852753c301a812d856d857ea03c6/zstandard-0.25.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f67e8f1a324a900e75b5e28ffb152bcac9fbed1cc7b43f99cd90f395c4375344", size = 5430330, upload-time = "2025-09-14T22:18:11.966Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/1d/7e31db1240de2df22a58e2ea9a93fc6e38cc29353e660c0272b6735d6669/zstandard-0.25.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:9654dbc012d8b06fc3d19cc825af3f7bf8ae242226df5f83936cb39f5fdc846c", size = 5811123, upload-time = "2025-09-14T22:18:13.907Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/49/fac46df5ad353d50535e118d6983069df68ca5908d4d65b8c466150a4ff1/zstandard-0.25.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4203ce3b31aec23012d3a4cf4a2ed64d12fea5269c49aed5e4c3611b938e4088", size = 5359591, upload-time = "2025-09-14T22:18:16.465Z" },
+]
Author	SHA1	Message	Date
Ryuichi Leo Takashige	022a09b6d9	Fix merge	2026-01-27 12:13:49 +00:00
Ryuichi Leo Takashige	0aa708fac4	Fix merge	2026-01-27 11:57:33 +00:00
Ryuichi Leo Takashige	eb89c2e4b9	Use tensor rdma minimax	2026-01-27 11:46:18 +00:00
Ryuichi Leo Takashige	72a5eec3f7	Merge branch 'main' into leo/add-logprobs-to-chatcompletion	2026-01-27 11:34:10 +00:00
Ryuichi Leo Takashige	a25892e8d5	bug	2026-01-23 15:05:42 +00:00
Ryuichi Leo Takashige	8798ab52ee	bug	2026-01-23 15:00:11 +00:00
Ryuichi Leo Takashige	457debc338	bug	2026-01-23 13:41:56 +00:00
Ryuichi Leo Takashige	0cfaea41bc	bug	2026-01-23 13:21:35 +00:00
Ryuichi Leo Takashige	18c82443ba	fixes	2026-01-23 13:17:37 +00:00
Ryuichi Leo Takashige	b9ec8b0a44	fix	2026-01-23 12:58:36 +00:00
Ryuichi Leo Takashige	00442b3cfd	Add more llm stuff	2026-01-23 12:55:13 +00:00
Ryuichi Leo Takashige	aa41da8541	Add more llm stuff	2026-01-23 12:47:04 +00:00
Ryuichi Leo Takashige	86e5d7b101	optimize further and get usage stats	2026-01-22 22:13:00 +00:00
Ryuichi Leo Takashige	d9ddf90575	add token usage stats	2026-01-22 21:04:56 +00:00
Ryuichi Leo Takashige	4591301767	Add a bunch of LLM generated slop	2026-01-22 20:44:40 +00:00
Ryuichi Leo Takashige	8b0b5e1b88	Add completions endpoint	2026-01-22 17:26:52 +00:00
Ryuichi Leo Takashige	bd6287727a	Add basic exo eval	2026-01-22 16:48:12 +00:00
Ryuichi Leo Takashige	eb53611210	Add option to use null top k	2026-01-22 16:44:53 +00:00
Ryuichi Leo Takashige	71bbe5f25b	Review and extract logprob stuff from alexcheema/uncertainty-visualization	2026-01-22 14:51:12 +00:00