Compare commits

...

65 Commits

Author SHA1 Message Date
Ryuichi Leo Takashige
ddc81385fd raise exo bench default times 2026-02-03 14:54:54 +00:00
Ryuichi Leo Takashige
694be69f30 single node regression 2026-02-03 11:33:50 +00:00
Ryuichi Leo Takashige
8a7d01641a fix 2026-02-03 11:17:17 +00:00
Ryuichi Leo Takashige
5aaf8d8003 uv lock 2026-02-03 11:06:05 +00:00
Ryuichi Leo Takashige
8319477913 bring back david changes 2026-02-03 10:57:42 +00:00
Ryuichi Leo Takashige
4a2a2c092e timeout 2026-02-03 02:07:42 +00:00
Ryuichi Leo Takashige
bc90ac33d7 pyproject.toml 2026-02-03 02:00:05 +00:00
Ryuichi Leo Takashige
42d256d70d pyproject.toml 2026-02-03 01:59:47 +00:00
Ryuichi Leo Takashige
07a0622d04 revert deepseek changes 2026-02-03 01:58:22 +00:00
Ryuichi Leo Takashige
9868aaaf54 revert deepseek changes 2026-02-03 01:57:59 +00:00
Ryuichi Leo Takashige
90b2c97342 update config 2026-02-03 01:54:20 +00:00
Ryuichi Leo Takashige
af97b836c7 fix deepseek patch 2026-02-03 01:02:29 +00:00
Ryuichi Leo Takashige
fd722f663c patch deepseek 2026-02-03 00:52:03 +00:00
Ryuichi Leo Takashige
4ec4695f84 ... 2026-02-03 00:37:02 +00:00
Ryuichi Leo Takashige
87caab8647 ... 2026-02-03 00:22:11 +00:00
Ryuichi Leo Takashige
c4b19088da warmup is broken too?? 2026-02-03 00:17:11 +00:00
Ryuichi Leo Takashige
b119a81d33 grr 2026-02-03 00:02:05 +00:00
Ryuichi Leo Takashige
110ef45928 im done with this 2026-02-03 00:01:10 +00:00
Ryuichi Leo Takashige
22fa3c8bb0 jeez say it's done 2026-02-02 23:53:56 +00:00
Ryuichi Leo Takashige
7608a5e7f4 told it to test... 2026-02-02 23:38:34 +00:00
Ryuichi Leo Takashige
04cc92a97f claude is 100% nerfed to oblivion 2026-02-02 23:07:40 +00:00
Ryuichi Leo Takashige
163bb83195 what a mess 2026-02-02 23:06:49 +00:00
Ryuichi Leo Takashige
f082d284a5 fix recv errors 2026-02-02 22:58:25 +00:00
Ryuichi Leo Takashige
5ec1906a98 fix the race probably 2026-02-02 22:43:50 +00:00
Ryuichi Leo Takashige
5bc35b38b0 kill processes maybe 2026-02-02 22:31:03 +00:00
Ryuichi Leo Takashige
f906db66c5 generate results.json 2026-02-02 22:14:07 +00:00
Ryuichi Leo Takashige
d1ea5f4315 stop downloading shit 2026-02-02 22:04:21 +00:00
Ryuichi Leo Takashige
d9648194ed dont download every time 2026-02-02 21:58:13 +00:00
Ryuichi Leo Takashige
bc73ef1436 what a failure 2026-02-02 21:44:05 +00:00
Ryuichi Leo Takashige
8aeeb46d2f failures 2026-02-02 21:33:16 +00:00
Ryuichi Leo Takashige
edb2015607 failures 2026-02-02 21:13:42 +00:00
Ryuichi Leo Takashige
f613ebdc6c failures 2026-02-02 21:12:34 +00:00
Ryuichi Leo Takashige
e72a1778dd maybe fix 2026-02-02 20:24:52 +00:00
Ryuichi Leo Takashige
eb4c76e758 log text 2026-02-02 19:27:34 +00:00
Ryuichi Leo Takashige
b890c671b8 use new auto parallel 2026-02-02 19:23:29 +00:00
Ryuichi Leo Takashige
e7f3f47754 jeez that was dumb 2026-02-02 19:14:19 +00:00
Ryuichi Leo Takashige
d935c7a372 maybe fix? 2026-02-02 19:08:32 +00:00
Ryuichi Leo Takashige
bd089b30d7 raise timeouts 2026-02-02 18:50:26 +00:00
Ryuichi Leo Takashige
13b397a3c9 raise max concurrency 2026-02-02 18:45:29 +00:00
Ryuichi Leo Takashige
cf5fddf3f8 oops 2026-02-02 18:40:41 +00:00
Ryuichi Leo Takashige
c9df4ff004 save properly 2026-02-02 18:30:53 +00:00
Ryuichi Leo Takashige
4f7869b91b cleanup after control c 2026-02-02 18:23:42 +00:00
Ryuichi Leo Takashige
b08ec25ef6 better limit? 2026-02-02 18:22:39 +00:00
Ryuichi Leo Takashige
f235019c28 make control c exit cleanly and add --limit 2026-02-02 18:04:58 +00:00
Ryuichi Leo Takashige
68a77f0910 little confusing pyproject change 2026-02-02 17:47:08 +00:00
Ryuichi Leo Takashige
8456e3f74b actually fix exo eval 2026-02-02 17:37:37 +00:00
Ryuichi Leo Takashige
83e4725415 add 4bit attention 2026-02-02 17:30:52 +00:00
Ryuichi Leo Takashige
49dc7a8798 livecodebench fix 2026-02-02 17:30:34 +00:00
Ryuichi Leo Takashige
dea52342ca livecodebench fix 2026-02-02 17:27:59 +00:00
Ryuichi Leo Takashige
aae28d8e8b livecodebench eval 2026-02-02 17:14:56 +00:00
Ryuichi Leo Takashige
a28def8e45 revert use ssh 2026-02-02 16:06:32 +00:00
Ryuichi Leo Takashige
56a9864e19 use ssh 2026-02-02 15:59:42 +00:00
Ryuichi Leo Takashige
10afd08427 optimizations 2026-02-02 15:46:18 +00:00
Ryuichi Leo Takashige
04a0690746 faster prompt sizer 2026-02-02 14:50:04 +00:00
Ryuichi Leo Takashige
970717f1bb dont time out pleaseee 2026-02-02 13:49:31 +00:00
Ryuichi Leo Takashige
774eb1756a fix 2026-02-02 13:31:32 +00:00
Ryuichi Leo Takashige
061e58ce39 add livebench 2026-02-02 13:26:36 +00:00
Ryuichi Leo Takashige
e8b6ec131b fix exo bench 2026-02-02 13:12:50 +00:00
Ryuichi Leo Takashige
7b4c5d0c6d relative import 2026-02-02 11:44:14 +00:00
Ryuichi Leo Takashige
fb3d1e887f relative import 2026-02-02 11:43:56 +00:00
Ryuichi Leo Takashige
2d15e49f4e tagged model 2026-02-02 11:41:22 +00:00
Ryuichi Leo Takashige
c0f192897c dumb upstream changes 2026-02-02 11:37:11 +00:00
Ryuichi Leo Takashige
7587cb872c several fixes from main 2026-02-02 11:35:10 +00:00
Ryuichi Leo Takashige
bcb07782c1 no batch 2026-02-02 11:30:19 +00:00
Ryuichi Leo Takashige
24a6adf022 Add metadata to results.json 2026-01-29 13:02:35 +00:00
19 changed files with 1478 additions and 331 deletions

View File

@@ -2,8 +2,8 @@
# See bench/exo_eval.py for usage
[eval]
# Eval framework type: "lm_eval" | "swe_bench" | "custom"
type = "lm_eval"
# Eval framework type: "lm_eval" | "swe_bench" | "custom" | "livecodebench"
type = "livecodebench"
# Require HuggingFace token (default: true)
# Set to false if using only public datasets
require_hf_token = true
@@ -49,6 +49,44 @@ fewshot_as_multiturn = true
# Output path for results
output_path = "bench/eval_results"
# LiveCodeBench configuration
# Contamination-free code generation benchmark
# See: https://livecodebench.github.io/
[livecodebench]
# Evaluation scenario: "codegeneration" | "selfrepair" | "testoutputprediction" | "codeexecution"
scenario = "codegeneration"
# Dataset release version (newer versions have more problems)
# release_v1: May 2023 - March 2024 (400 problems)
# release_v2: May 2023 - May 2024 (511 problems)
# release_v3: May 2023 - July 2024 (612 problems)
# release_v4: May 2023 - September 2024 (713 problems)
# release_v5: May 2023 - January 2025 (880 problems)
release_version = "release_v5"
# Sampling temperature
# - For non-reasoning models: 0 (deterministic)
# - For reasoning/thinking models (Kimi K2.5, DeepSeek R1): use model-recommended (e.g., 1.0)
temperature = 1.0
# Number of samples per problem (1 for pass@1, matches Artificial Analysis)
n_samples = 1
# Max tokens for generation
# - For non-reasoning models: 16384
# - For reasoning/thinking models: use model-recommended (Kimi K2.5 uses 96k)
max_tokens = 96000
# Use code_generation_lite for faster evaluation (default: true)
# Set to false to use full test suite (slower but more thorough)
fast = true
# Run evaluation after generation (computes pass@1, pass@5)
evaluate = true
# Number of parallel API requests
multiprocess = 8
# Cache generated outputs for resumption (disabled by default to avoid stale results)
use_cache = false
# Timeout in seconds (universal for all operations)
timeout = 100000
openai_timeout = 100000
# Output path for results
output_path = "bench/lcb_results"
# SWE-bench configuration (placeholder)
[swe_bench]
# SWE-bench dataset

View File

@@ -5,6 +5,7 @@ from __future__ import annotations
import argparse
import contextlib
import http.client
import itertools
import json
import os
import time
@@ -24,7 +25,7 @@ class ExoHttpError(RuntimeError):
class ExoClient:
def __init__(self, host: str, port: int, timeout_s: float = 600.0):
def __init__(self, host: str, port: int, timeout_s: float = 100000.0):
self.host = host
self.port = port
self.timeout_s = timeout_s
@@ -180,14 +181,7 @@ def parse_int_list(values: list[str]) -> list[int]:
part = part.strip()
if part:
items.append(int(part))
seen: set[int] = set()
out: list[int] = []
for x in items:
if x not in seen:
out.append(x)
seen.add(x)
return out
return items
def resolve_model_short_id(client: ExoClient, model_arg: str) -> tuple[str, str]:
@@ -277,12 +271,29 @@ class PromptSizer:
f"Target ({target}) is smaller than template overhead ({self.base_tokens})."
)
content = ""
tok = self.count_fn(content)
# Estimate tokens per atom using a sample
sample_count = 100
sample_content = self.atom * sample_count
sample_tokens = self.count_fn(sample_content) - self.base_tokens
tokens_per_atom = sample_tokens / sample_count
while tok < target:
content += self.atom
tok = self.count_fn(content)
# Estimate starting point
needed_tokens = target - self.base_tokens
estimated_atoms = int(needed_tokens / tokens_per_atom)
# Binary search to find exact atom count
low, high = 0, estimated_atoms * 2 + 100
while low < high:
mid = (low + high) // 2
tok = self.count_fn(self.atom * mid)
if tok < target:
low = mid + 1
else:
high = mid
content = self.atom * low
tok = self.count_fn(content)
logger.info(f"{tok=}")
if tok != target:
raise RuntimeError(
@@ -348,7 +359,7 @@ def main() -> int:
help="Warmup runs per placement (uses first pp/tg).",
)
ap.add_argument(
"--timeout", type=float, default=600.0, help="HTTP timeout (seconds)."
"--timeout", type=float, default=100000.0, help="HTTP timeout (seconds)."
)
ap.add_argument(
"--json-out",
@@ -369,6 +380,14 @@ def main() -> int:
logger.error("--repeat must be >= 1")
return 2
# Log pairing mode
if len(pp_list) == len(tg_list):
logger.info(f"pp/tg mode: tandem (zip) - {len(pp_list)} pairs")
else:
logger.info(
f"pp/tg mode: combinations (product) - {len(pp_list) * len(tg_list)} pairs"
)
client = ExoClient(args.host, args.port, timeout_s=args.timeout)
short_id, full_model_id = resolve_model_short_id(client, args.model)
@@ -486,60 +505,55 @@ def main() -> int:
)
logger.debug(f" warmup {i + 1}/{args.warmup} done")
for pp in pp_list:
# if (
# pp * n_nodes > 2048
# and "ring" in instance_meta.lower()
# and "tensor" in sharding.lower()
# ):
# model_card = MODEL_CARDS[short_id]
# if model_card.metadata.storage_size > Memory.from_gb(10):
# logger.info(
# f"Skipping tensor ring as this is too slow for model of size {model_card.metadata.storage_size} on {n_nodes=}"
# )
# continue
for tg in tg_list:
runs: list[dict[str, Any]] = []
for r in range(args.repeat):
time.sleep(3)
try:
row, actual_pp_tokens = run_one_completion(
client, full_model_id, pp, tg, prompt_sizer
)
except Exception as e:
logger.error(e)
continue
row.update(
{
"model_short_id": short_id,
"model_id": full_model_id,
"placement_sharding": sharding,
"placement_instance_meta": instance_meta,
"placement_nodes": n_nodes,
"instance_id": instance_id,
"pp_tokens": actual_pp_tokens,
"tg": tg,
"repeat_index": r,
}
)
runs.append(row)
all_rows.append(row)
# If pp and tg lists have same length, run in tandem (zip)
# Otherwise, run all combinations (cartesian product)
if len(pp_list) == len(tg_list):
pp_tg_pairs = list(zip(pp_list, tg_list))
else:
pp_tg_pairs = list(itertools.product(pp_list, tg_list))
if runs:
prompt_tps = mean(x["stats"]["prompt_tps"] for x in runs)
gen_tps = mean(x["stats"]["generation_tps"] for x in runs)
ptok = mean(x["stats"]["prompt_tokens"] for x in runs)
gtok = mean(x["stats"]["generation_tokens"] for x in runs)
peak = mean(
x["stats"]["peak_memory_usage"]["inBytes"] for x in runs
for pp, tg in pp_tg_pairs:
runs: list[dict[str, Any]] = []
for r in range(args.repeat):
time.sleep(3)
try:
row, actual_pp_tokens = run_one_completion(
client, full_model_id, pp, tg, prompt_sizer
)
except Exception as e:
logger.error(e)
continue
row.update(
{
"model_short_id": short_id,
"model_id": full_model_id,
"placement_sharding": sharding,
"placement_instance_meta": instance_meta,
"placement_nodes": n_nodes,
"instance_id": instance_id,
"pp_tokens": actual_pp_tokens,
"tg": tg,
"repeat_index": r,
}
)
runs.append(row)
all_rows.append(row)
logger.info(
f"prompt_tps={prompt_tps:.2f} gen_tps={gen_tps:.2f} "
f"prompt_tokens={ptok} gen_tokens={gtok} "
f"peak_memory={format_peak_memory(peak)}\n"
)
time.sleep(2)
if runs:
prompt_tps = mean(x["stats"]["prompt_tps"] for x in runs)
gen_tps = mean(x["stats"]["generation_tps"] for x in runs)
ptok = mean(x["stats"]["prompt_tokens"] for x in runs)
gtok = mean(x["stats"]["generation_tokens"] for x in runs)
peak = mean(
x["stats"]["peak_memory_usage"]["inBytes"] for x in runs
)
logger.info(
f"prompt_tps={prompt_tps:.2f} gen_tps={gen_tps:.2f} "
f"prompt_tokens={ptok} gen_tokens={gtok} "
f"peak_memory={format_peak_memory(peak)}\n"
)
time.sleep(2)
finally:
try:
client.request_json("DELETE", f"/instance/{instance_id}")

View File

@@ -5,12 +5,17 @@ exo-eval: Evaluation harness for exo inference system.
Supports multiple evaluation frameworks via TOML configuration:
- lm_eval: Language model evaluation using EleutherAI's lm-evaluation-harness
- livecodebench: Code generation benchmark (https://livecodebench.github.io/)
- swe_bench: SWE-bench evaluation (placeholder for future implementation)
- custom: Custom evaluation scripts
Usage:
uv run python -m bench.exo_eval --config bench/eval_config.toml --model Llama-3.2-1b-Instruct-4bit
uv run python -m bench.exo_eval --config bench/eval_config.toml --model Llama-3.2-1b-Instruct-4bit --dry-run
# Run LiveCodeBench (requires livecodebench package):
# First: git clone https://github.com/LiveCodeBench/LiveCodeBench && cd LiveCodeBench && uv pip install -e .
# Then set type = "livecodebench" in eval_config.toml
"""
from __future__ import annotations
@@ -47,7 +52,7 @@ from bench.exo_bench import (
wait_for_instance_ready,
)
EvalType = Literal["lm_eval", "swe_bench", "custom"]
EvalType = Literal["lm_eval", "swe_bench", "livecodebench", "custom"]
def load_config(config_path: str) -> dict[str, Any]:
@@ -64,7 +69,7 @@ def get_eval_type(config: dict[str, Any]) -> EvalType:
"""Extract evaluation type from config."""
eval_section = config.get("eval", {})
eval_type = eval_section.get("type", "lm_eval")
if eval_type not in ("lm_eval", "swe_bench", "custom"):
if eval_type not in ("lm_eval", "swe_bench", "livecodebench", "custom"):
raise ValueError(f"Unknown eval type: {eval_type}")
return eval_type
@@ -303,7 +308,7 @@ def run_lm_eval(
output_path: str | None,
limit: int | None,
dry_run: bool,
) -> int:
) -> tuple[int, dict[str, Any] | None, float | None]:
"""Run lm_eval evaluation."""
lm_eval_config = config.get("lm_eval", {})
tasks = lm_eval_config.get("tasks", ["mmlu"])
@@ -311,6 +316,7 @@ def run_lm_eval(
tasks = [tasks]
exo_base_url = f"http://{host}:{port}"
effective_output = output_path or lm_eval_config.get("output_path")
# Build args - use native completions or chat completions endpoint directly
args = build_lm_eval_args(
@@ -320,31 +326,53 @@ def run_lm_eval(
if dry_run:
logger.info("[dry-run] Would execute the above command")
return 0
return 0, None, None
try:
result = subprocess.run(args, check=False)
start_time = time.perf_counter()
# Use Popen with process group so we can kill all children on interrupt
proc = subprocess.Popen(args, start_new_session=True)
try:
proc.wait()
except KeyboardInterrupt:
# Kill the entire process group on Ctrl+C
import signal
# Print token usage summary from exo
logger.info("Interrupted - terminating lm_eval processes...")
try:
os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
proc.wait(timeout=5)
except (ProcessLookupError, OSError):
pass # Already dead
except subprocess.TimeoutExpired:
os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
return 130, None, None
elapsed_seconds = time.perf_counter() - start_time
# Fetch and return token usage summary from exo
usage: dict[str, Any] | None = None
try:
import httpx
usage_resp = httpx.get(f"{exo_base_url}/v1/usage", timeout=5)
if usage_resp.status_code == 200:
usage = usage_resp.json()
usage_data: dict[str, Any] = usage_resp.json()
usage = usage_data
logger.info("--- Token Usage (Total) ---")
logger.info(f" Requests: {usage.get('total_requests', 0)}")
logger.info(
f" Prompt tokens: {usage.get('total_prompt_tokens', 0)}"
f" Requests: {usage_data.get('total_requests', 0)}"
)
logger.info(
f" Completion tokens: {usage.get('total_completion_tokens', 0)}"
f" Prompt tokens: {usage_data.get('total_prompt_tokens', 0)}"
)
logger.info(
f" Reasoning tokens: {usage.get('total_reasoning_tokens', 0)}"
f" Completion tokens: {usage_data.get('total_completion_tokens', 0)}"
)
logger.info(f" Total tokens: {usage.get('total_tokens', 0)}")
by_model = usage.get("by_model", {})
logger.info(
f" Reasoning tokens: {usage_data.get('total_reasoning_tokens', 0)}"
)
logger.info(f" Total tokens: {usage_data.get('total_tokens', 0)}")
by_model = usage_data.get("by_model", {})
if by_model:
for model_name, counters in by_model.items():
logger.info(f"--- Token Usage ({model_name}) ---")
@@ -363,10 +391,59 @@ def run_lm_eval(
except Exception:
pass # Usage endpoint not available
return result.returncode
logger.info(f"Evaluation completed in {elapsed_seconds:.2f}s")
# Append token usage to lm_eval's results.json
if effective_output and usage:
_append_token_usage_to_results(effective_output, usage, elapsed_seconds)
return proc.returncode, usage, elapsed_seconds
except FileNotFoundError:
logger.error("lm_eval not found. Install with: uv sync --extra eval")
return 1
return 1, None, None
def _append_token_usage_to_results(
output_path: str, usage: dict[str, Any], elapsed_seconds: float
) -> None:
"""Append token usage data to lm_eval's results.json file."""
output_dir = Path(output_path)
results_file = output_dir / "results.json"
if not results_file.exists():
# lm_eval may put results in a subdirectory named after the model
for subdir in output_dir.iterdir():
if subdir.is_dir():
candidate = subdir / "results.json"
if candidate.exists():
results_file = candidate
break
if not results_file.exists():
logger.warning(f"Could not find results.json in {output_path}")
return
try:
with open(results_file, encoding="utf-8") as f:
results = json.load(f)
# Add token usage to the results
results["token_usage"] = {
"prompt_tokens": usage.get("total_prompt_tokens", 0),
"completion_tokens": usage.get("total_completion_tokens", 0),
"reasoning_tokens": usage.get("total_reasoning_tokens", 0),
"total_tokens": usage.get("total_tokens", 0),
"total_requests": usage.get("total_requests", 0),
"by_model": usage.get("by_model"),
}
results["elapsed_seconds"] = elapsed_seconds
with open(results_file, "w", encoding="utf-8") as f:
json.dump(results, f, indent=2, ensure_ascii=False)
logger.info(f"Added token usage to: {results_file}")
except Exception as e:
logger.warning(f"Failed to append token usage to results.json: {e}")
def run_swe_bench(
@@ -376,7 +453,7 @@ def run_swe_bench(
model: str,
output_path: str | None,
dry_run: bool,
) -> int:
) -> tuple[int, dict[str, Any] | None, float | None]:
"""Run SWE-bench evaluation (placeholder)."""
swe_config = config.get("swe_bench", {})
@@ -395,13 +472,277 @@ def run_swe_bench(
if dry_run:
logger.info("[dry-run] SWE-bench evaluation would be executed")
return 0
return 0, None, None
logger.warning(
"SWE-bench integration is a placeholder. "
"Implement swebench inference and evaluation logic as needed."
)
return 0
return 0, None, None
def run_livecodebench(
config: dict[str, Any],
host: str,
port: int,
model: str,
output_path: str | None,
limit: int | None,
dry_run: bool,
) -> tuple[int, dict[str, Any] | None, float | None]:
"""Run LiveCodeBench evaluation.
LiveCodeBench is a contamination-free benchmark for code generation that
continuously collects new problems from LeetCode, AtCoder, and Codeforces.
See: https://livecodebench.github.io/
"""
lcb_config = config.get("livecodebench", {})
scenario = lcb_config.get("scenario", "codegeneration")
release_version = lcb_config.get("release_version", "release_v5")
# Defaults match Artificial Analysis methodology:
# https://artificialanalysis.ai/methodology/intelligence-benchmarking
temperature = lcb_config.get("temperature", 0) # 0 for non-reasoning models
n_samples = lcb_config.get("n_samples", 1) # pass@1
max_tokens = lcb_config.get("max_tokens", 16384)
use_cache = lcb_config.get("use_cache", False)
fast = lcb_config.get("fast", True) # Use code_generation_lite by default
evaluate = lcb_config.get("evaluate", True)
multiprocess = lcb_config.get("multiprocess", 4)
# Timeouts (high defaults for slow inference)
timeout = lcb_config.get("timeout", 1800) # 30 min per problem
openai_timeout = lcb_config.get("openai_timeout", 3600) # 1 hour per request
exo_base_url = f"http://{host}:{port}/v1"
effective_output = output_path or lcb_config.get("output_path", "bench/lcb_results")
logger.info("LiveCodeBench evaluation configuration:")
logger.info(f" Scenario: {scenario}")
logger.info(f" Release version: {release_version}")
logger.info(f" Model: {model}")
logger.info(f" API endpoint: {exo_base_url}")
logger.info(f" Temperature: {temperature}")
logger.info(f" N samples: {n_samples}")
logger.info(f" Max tokens: {max_tokens}")
logger.info(f" Output path: {effective_output}")
# Build command using our wrapper script that handles:
# 1. Registering custom models in LiveCodeBench's registry
# 2. Patching the OpenAI client to use exo's endpoint
args = [
sys.executable,
"-m",
"bench.livecodebench_runner",
"--base-url",
exo_base_url,
"--model",
model,
"--scenario",
scenario,
"--release_version",
release_version,
"--temperature",
str(temperature),
"--n",
str(n_samples),
"--codegen_n",
str(n_samples),
"--max_tokens",
str(max_tokens),
"--output-dir",
effective_output,
]
if use_cache:
args.append("--use_cache")
if not fast:
args.append("--not_fast")
if evaluate:
args.append("--evaluate")
if multiprocess > 1:
args.extend(["--multiprocess", str(multiprocess)])
# Add timeouts
args.extend(["--timeout", str(timeout)])
args.extend(["--openai_timeout", str(openai_timeout)])
if limit is not None:
args.extend(["--limit", str(limit)])
logger.info(f"LiveCodeBench command: {' '.join(args)}")
if dry_run:
logger.info("[dry-run] Would execute the above command")
return 0, None, None
# Environment is set up by the wrapper script
env = os.environ.copy()
try:
start_time = time.perf_counter()
# Use Popen with process group so we can kill all children on interrupt
proc = subprocess.Popen(args, env=env, start_new_session=True)
try:
proc.wait()
except KeyboardInterrupt:
# Kill the entire process group on Ctrl+C
import signal
logger.info("Interrupted - terminating LiveCodeBench processes...")
try:
os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
proc.wait(timeout=5)
except (ProcessLookupError, OSError):
pass # Already dead
except subprocess.TimeoutExpired:
os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
return 130, None, None
elapsed_seconds = time.perf_counter() - start_time
# Fetch token usage from exo
usage: dict[str, Any] | None = None
try:
import httpx
usage_resp = httpx.get(f"http://{host}:{port}/v1/usage", timeout=5)
if usage_resp.status_code == 200:
usage_data: dict[str, Any] = usage_resp.json()
usage = usage_data
logger.info("--- Token Usage (Total) ---")
logger.info(
f" Requests: {usage_data.get('total_requests', 0)}"
)
logger.info(
f" Prompt tokens: {usage_data.get('total_prompt_tokens', 0)}"
)
logger.info(
f" Completion tokens: {usage_data.get('total_completion_tokens', 0)}"
)
logger.info(f" Total tokens: {usage_data.get('total_tokens', 0)}")
except Exception:
pass # Usage endpoint not available
logger.info(f"LiveCodeBench evaluation completed in {elapsed_seconds:.2f}s")
# Generate results.json from eval files
if proc.returncode == 0:
_generate_livecodebench_results(
effective_output, model, elapsed_seconds, usage, lcb_config
)
return proc.returncode, usage, elapsed_seconds
except FileNotFoundError:
logger.error(
"LiveCodeBench not found. Install with: "
"pip install livecodebench OR "
"git clone https://github.com/LiveCodeBench/LiveCodeBench && "
"cd LiveCodeBench && uv pip install -e ."
)
return 1, None, None
def _generate_livecodebench_results(
output_path: str,
model: str,
elapsed_seconds: float,
usage: dict[str, Any] | None,
lcb_config: dict[str, Any],
) -> None:
"""Generate a results.json file from LiveCodeBench evaluation results."""
output_dir = Path(output_path)
model_dir = output_dir / model
if not model_dir.exists():
logger.warning(f"Model output directory not found: {model_dir}")
return
# Find all eval.json files (not eval_all.json)
eval_files = list(model_dir.glob("*_eval.json"))
eval_files = [f for f in eval_files if "_eval_all.json" not in f.name]
if not eval_files:
logger.warning(f"No eval files found in {model_dir}")
return
# Parse the most recent eval file (by modification time)
eval_files.sort(key=lambda f: f.stat().st_mtime, reverse=True)
latest_eval = eval_files[0]
try:
with open(latest_eval, encoding="utf-8") as f:
eval_data = json.load(f)
# Extract pass@k scores from the first element
scores: dict[str, float] = {}
details: dict[str, Any] = {}
if isinstance(eval_data, list) and len(eval_data) > 0:
first_elem = eval_data[0]
if isinstance(first_elem, dict):
# Extract all pass@k scores
for key, value in first_elem.items():
if key.startswith("pass@") and isinstance(value, (int, float)):
scores[key] = float(value)
elif key == "detail":
details = value
# Count problems from the corresponding output file
output_file_name = latest_eval.name.replace("_eval.json", ".json")
output_file = model_dir / output_file_name
num_problems = 0
if output_file.exists():
with open(output_file, encoding="utf-8") as f:
problems_data = json.load(f)
if isinstance(problems_data, list):
num_problems = len(problems_data)
# Build results.json
results: dict[str, Any] = {
"model": model,
"eval_type": "livecodebench",
"scenario": lcb_config.get("scenario", "codegeneration"),
"release_version": lcb_config.get("release_version", "release_v5"),
"timestamp": datetime.now(timezone.utc).isoformat(),
"elapsed_seconds": elapsed_seconds,
"num_problems": num_problems,
"results": scores,
"config": {
"temperature": lcb_config.get("temperature", 0),
"n_samples": lcb_config.get("n_samples", 1),
"max_tokens": lcb_config.get("max_tokens", 16384),
},
}
if details:
results["details"] = details
if usage:
results["token_usage"] = {
"prompt_tokens": usage.get("total_prompt_tokens", 0),
"completion_tokens": usage.get("total_completion_tokens", 0),
"total_tokens": usage.get("total_tokens", 0),
"total_requests": usage.get("total_requests", 0),
}
# Write results.json to the model directory
results_file = model_dir / "results.json"
with open(results_file, "w", encoding="utf-8") as f:
json.dump(results, f, indent=2, ensure_ascii=False)
logger.info(f"Generated LiveCodeBench results: {results_file}")
# Also log the key metrics
if scores:
scores_str = ", ".join(f"{k}={v:.2%}" for k, v in sorted(scores.items()))
logger.info(f"LiveCodeBench scores: {scores_str}")
except Exception as e:
logger.warning(f"Failed to generate results.json: {e}")
def run_custom_eval(
@@ -411,19 +752,19 @@ def run_custom_eval(
model: str,
output_path: str | None,
dry_run: bool,
) -> int:
) -> tuple[int, dict[str, Any] | None, float | None]:
"""Run custom evaluation script."""
custom_config = config.get("custom", {})
script = custom_config.get("script")
if not script:
logger.error("No script specified in [custom] config section")
return 1
return 1, None, None
script_path = Path(script)
if not script_path.exists():
logger.error(f"Custom script not found: {script}")
return 1
return 1, None, None
script_args = custom_config.get("args", [])
if not isinstance(script_args, list):
@@ -442,10 +783,13 @@ def run_custom_eval(
if dry_run:
logger.info("[dry-run] Would execute the above command")
return 0
return 0, None, None
start_time = time.perf_counter()
result = subprocess.run(cmd, env=env, check=False)
return result.returncode
elapsed_seconds = time.perf_counter() - start_time
logger.info(f"Custom evaluation completed in {elapsed_seconds:.2f}s")
return result.returncode, None, elapsed_seconds
def write_results_metadata(
@@ -457,6 +801,8 @@ def write_results_metadata(
eval_type: EvalType,
return_code: int,
preview: dict[str, Any] | None,
usage: dict[str, Any] | None,
elapsed_seconds: float | None,
) -> None:
"""Write evaluation metadata to a JSON file."""
metadata: dict[str, Any] = {
@@ -468,6 +814,9 @@ def write_results_metadata(
"return_code": return_code,
}
if elapsed_seconds is not None:
metadata["elapsed_seconds"] = elapsed_seconds
if preview:
metadata["placement"] = {
"sharding": preview.get("sharding"),
@@ -477,6 +826,16 @@ def write_results_metadata(
else None,
}
if usage:
metadata["token_usage"] = {
"prompt_tokens": usage.get("total_prompt_tokens", 0),
"completion_tokens": usage.get("total_completion_tokens", 0),
"reasoning_tokens": usage.get("total_reasoning_tokens", 0),
"total_tokens": usage.get("total_tokens", 0),
"total_requests": usage.get("total_requests", 0),
"by_model": usage.get("by_model"),
}
output_dir = Path(output_path)
output_dir.mkdir(parents=True, exist_ok=True)
metadata_path = output_dir / "eval_metadata.json"
@@ -621,8 +980,10 @@ def main() -> int:
try:
# Run evaluation
usage: dict[str, Any] | None = None
elapsed_seconds: float | None = None
if eval_type == "lm_eval":
return_code = run_lm_eval(
return_code, usage, elapsed_seconds = run_lm_eval(
config,
args.host,
args.port,
@@ -632,7 +993,7 @@ def main() -> int:
args.dry_run,
)
elif eval_type == "swe_bench":
return_code = run_swe_bench(
return_code, usage, elapsed_seconds = run_swe_bench(
config,
args.host,
args.port,
@@ -640,8 +1001,18 @@ def main() -> int:
args.output,
args.dry_run,
)
elif eval_type == "livecodebench":
return_code, usage, elapsed_seconds = run_livecodebench(
config,
args.host,
args.port,
full_model_id,
args.output,
args.limit,
args.dry_run,
)
elif eval_type == "custom":
return_code = run_custom_eval(
return_code, usage, elapsed_seconds = run_custom_eval(
config,
args.host,
args.port,
@@ -665,6 +1036,8 @@ def main() -> int:
eval_type,
return_code,
preview,
usage,
elapsed_seconds,
)
return return_code

View File

@@ -0,0 +1,287 @@
#!/usr/bin/env python3
# pyright: reportAny=false, reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false
"""
LiveCodeBench runner wrapper for exo.
This wrapper allows running LiveCodeBench with custom OpenAI-compatible endpoints
by dynamically registering models and configuring the OpenAI client.
Usage:
python -m bench.livecodebench_runner --model my-model --base-url http://localhost:52415/v1 [lcb args...]
The wrapper:
1. Registers the custom model in LiveCodeBench's model registry
2. Sets up environment variables for the OpenAI client
3. Runs the standard LiveCodeBench runner
Requires LiveCodeBench to be installed:
git clone https://github.com/LiveCodeBench/LiveCodeBench
cd LiveCodeBench && uv pip install -e .
"""
from __future__ import annotations
import argparse
import multiprocessing
import os
import signal
import sys
from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING, NoReturn
if TYPE_CHECKING:
from typing import Any
def _cleanup_and_exit(exit_code: int = 130) -> NoReturn:
"""Terminate all child processes and exit."""
# Terminate any active multiprocessing pools
for child in multiprocessing.active_children():
child.terminate()
child.join(timeout=1)
if child.is_alive():
child.kill()
# Force exit to avoid hanging on cleanup
os._exit(exit_code)
def _signal_handler(signum: int, frame: object) -> NoReturn:
"""Handle interrupt signals by terminating all child processes."""
_cleanup_and_exit(130)
def get_lcb_directory() -> Path | None:
"""Find the LiveCodeBench installation directory.
LiveCodeBench uses relative paths like 'lcb_runner/prompts/few_shot_examples/...'
which require running from the LiveCodeBench directory.
"""
# Check environment variable first
if env_path := os.environ.get("LIVECODEBENCH_DIR"):
lcb_path = Path(env_path)
if (lcb_path / "lcb_runner" / "prompts" / "few_shot_examples").exists():
return lcb_path
# Use importlib to find package location without executing module code
# This avoids triggering the relative path imports that would fail
try:
import importlib.util
spec = importlib.util.find_spec("lcb_runner")
if spec and spec.origin:
# spec.origin is the __init__.py path, go up two levels
lcb_path = Path(spec.origin).parent.parent
if (lcb_path / "lcb_runner" / "prompts" / "few_shot_examples").exists():
return lcb_path
except (ImportError, ModuleNotFoundError):
pass
# Check common locations relative to this script
script_dir = Path(__file__).parent.parent # exo/
common_locations = [
script_dir / "LiveCodeBench", # exo/LiveCodeBench
script_dir.parent / "LiveCodeBench", # sibling to exo
]
for loc in common_locations:
if (loc / "lcb_runner" / "prompts" / "few_shot_examples").exists():
return loc
return None
def setup_custom_model(model_name: str, base_url: str) -> None:
"""Register a custom model in LiveCodeBench's registry."""
try:
from lcb_runner.lm_styles import ( # pyright: ignore[reportMissingImports]
LanguageModel,
LanguageModelList,
LanguageModelStore,
LMStyle,
)
except ImportError as e:
print(
"Error: LiveCodeBench not installed. Install with:\n"
" git clone https://github.com/LiveCodeBench/LiveCodeBench\n"
" cd LiveCodeBench && uv pip install -e .",
file=sys.stderr,
)
raise SystemExit(1) from e
# Check if model already exists
if model_name in LanguageModelStore:
return
# Create a new model entry using OpenAIChat style
# This will route through the oai_runner which respects OPENAI_BASE_URL
custom_model = LanguageModel(
model_name=model_name,
model_repr=model_name,
model_style=LMStyle.OpenAIChat,
release_date=datetime.now(),
link=base_url,
)
# Add to the model list and store
LanguageModelList.append(custom_model)
LanguageModelStore[model_name] = custom_model
def patch_openai_client(base_url: str) -> None:
"""Patch the OpenAI client to use a custom base URL.
This patches the oai_runner module to use our custom base URL.
"""
try:
from lcb_runner.runner import oai_runner # noqa: I001 # pyright: ignore[reportMissingImports]
except ImportError as e:
print(f"Error importing required modules: {e}", file=sys.stderr)
raise SystemExit(1) from e
# Store original client creation
original_init = oai_runner.OpenAI
def patched_openai(*args: Any, **kwargs: Any) -> Any:
"""Create OpenAI client with custom base_url."""
# Inject base_url if not already set
if "base_url" not in kwargs:
kwargs["base_url"] = base_url
# Use dummy API key if not set (exo doesn't require auth)
if "api_key" not in kwargs and not os.getenv("OPENAI_KEY"):
kwargs["api_key"] = os.getenv("OPENAI_API_KEY", "exo-local")
return original_init(*args, **kwargs)
# Apply the patch
oai_runner.OpenAI = patched_openai
def main() -> int:
"""Main entry point."""
# Set up signal handlers for clean exit
signal.signal(signal.SIGINT, _signal_handler)
signal.signal(signal.SIGTERM, _signal_handler)
parser = argparse.ArgumentParser(
description="LiveCodeBench runner wrapper for exo",
epilog="Additional arguments are passed to lcb_runner.runner.main",
)
parser.add_argument(
"--base-url",
default=os.environ.get("OPENAI_BASE_URL", "http://localhost:52415/v1"),
help="OpenAI-compatible API base URL (default: OPENAI_BASE_URL or localhost:52415/v1)",
)
parser.add_argument(
"--model",
required=True,
help="Model name to use",
)
parser.add_argument(
"--output-dir",
default=None,
help="Output directory for results (maps to LiveCodeBench's --custom_output_save_name)",
)
parser.add_argument(
"--limit",
type=int,
default=None,
help="Limit number of problems to evaluate (for testing)",
)
# Parse known args, pass rest to LiveCodeBench
args, remaining = parser.parse_known_args()
# Set up environment
os.environ["OPENAI_BASE_URL"] = args.base_url
if "OPENAI_API_KEY" not in os.environ and "OPENAI_KEY" not in os.environ:
os.environ["OPENAI_API_KEY"] = "exo-local"
os.environ["OPENAI_KEY"] = "exo-local"
# Save original directory for output path resolution
original_cwd = os.getcwd()
# Change to LiveCodeBench directory before imports that use relative paths
# LiveCodeBench uses paths like 'lcb_runner/prompts/few_shot_examples/...'
lcb_dir = get_lcb_directory()
if lcb_dir:
os.chdir(lcb_dir)
else:
print(
"Warning: Could not find LiveCodeBench directory. "
"Relative path imports may fail.",
file=sys.stderr,
)
# Setup custom model and patch client
setup_custom_model(args.model, args.base_url)
patch_openai_client(args.base_url)
# Build arguments for LiveCodeBench runner
lcb_args = ["--model", args.model]
# Resolve output directory to absolute path (relative to original cwd)
output_base: str | None = None
if args.output_dir:
output_base = str(Path(original_cwd) / args.output_dir)
lcb_args.extend(remaining)
# Run LiveCodeBench
try:
from lcb_runner.runner import main as lcb_main_module # noqa: I001 # pyright: ignore[reportMissingImports]
from lcb_runner.utils import path_utils # noqa: I001 # pyright: ignore[reportMissingImports]
# Patch output path to use our output directory
if output_base:
original_get_output_path = path_utils.get_output_path
def patched_get_output_path(model_repr: str, runner_args: Any) -> str:
# Get the original path and replace 'output/' with our base
original_path = original_get_output_path(model_repr, runner_args)
# Replace 'output/' prefix with our custom base
if original_path.startswith("output/"):
new_path = str(
Path(output_base) / original_path[7:]
) # Skip 'output/'
else:
new_path = str(Path(output_base) / original_path)
path_utils.ensure_dir(new_path)
print(f"Saving results to: {new_path}")
return new_path
path_utils.get_output_path = patched_get_output_path
# Also patch in main module since it may have imported directly
if hasattr(lcb_main_module, "get_output_path"):
lcb_main_module.get_output_path = patched_get_output_path
# Patch benchmark loading to support --limit
# Must patch in the main module since it imports the function directly
if args.limit is not None:
original_build = lcb_main_module.build_prompt_benchmark
def limited_build(*a: Any, **kw: Any) -> Any:
benchmark, format_prompt = original_build(*a, **kw)
if args.limit and len(benchmark) > args.limit:
print(
f"Limiting benchmark from {len(benchmark)} to {args.limit} problems"
)
benchmark = benchmark[: args.limit]
return benchmark, format_prompt
lcb_main_module.build_prompt_benchmark = limited_build
# Patch sys.argv for argparse in lcb_main
sys.argv = [sys.argv[0], *lcb_args]
lcb_main_module.main()
return 0
except KeyboardInterrupt:
print("\nInterrupted by user", file=sys.stderr)
_cleanup_and_exit(130)
except SystemExit as e:
return e.code if isinstance(e.code, int) else 1
except Exception as e:
print(f"Error running LiveCodeBench: {e}", file=sys.stderr)
return 1
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -18,9 +18,9 @@ dependencies = [
"loguru>=0.7.3",
"exo_pyo3_bindings", # rust bindings
"anyio==4.11.0",
"mlx==0.30.3; sys_platform == 'darwin'",
"mlx[cpu]==0.30.3; sys_platform == 'linux'",
"mlx-lm==0.30.5",
"mlx==0.30.4; sys_platform == 'darwin'",
"mlx[cpu]==0.30.4; sys_platform == 'linux'",
"mlx-lm",
"tiktoken>=0.12.0", # required for kimi k2 tokenizer
"hypercorn>=0.18.0",
"openai-harmony>=0.0.8",
@@ -55,6 +55,11 @@ dev = [
# ]
eval = [
"lm_eval[api]",
# LiveCodeBench dependencies (livecodebench itself must be installed manually due to packaging issues)
# Install with: git clone https://github.com/LiveCodeBench/LiveCodeBench && cd LiveCodeBench && uv pip install -e .
"openai>=1.59.6",
"datasets>=2.14.0,<4.0", # LiveCodeBench requires <4.0 due to dataset script deprecation
"pebble>=5.1.0",
]
###
@@ -68,6 +73,7 @@ members = [
[tool.uv.sources]
exo_pyo3_bindings = { workspace = true }
mlx-lm = { git = "https://github.com/davidmcc73/mlx-lm.git", branch = "main" }
# Uncomment to use local mlx/mlx-lm development versions:
# mlx = { path = "/Users/Shared/mlx", editable=true }
# mlx-lm = { path = "/Users/Shared/mlx-lm", editable=true }

View File

@@ -155,13 +155,23 @@ async def seed_models(seed_dir: str | Path):
async def fetch_file_list_with_cache(
model_id: ModelId, revision: str = "main", recursive: bool = False
model_id: ModelId,
revision: str = "main",
recursive: bool = False,
cache_ttl_seconds: int = 3600,
) -> list[FileListEntry]:
target_dir = (await ensure_models_dir()) / "caches" / model_id.normalize()
await aios.makedirs(target_dir, exist_ok=True)
cache_file = target_dir / f"{model_id.normalize()}--{revision}--file_list.json"
# Always try fresh first
# Use cache if it exists and is fresh (< TTL seconds old)
if await aios.path.exists(cache_file):
cache_age = time.time() - (await aios.stat(cache_file)).st_mtime
if cache_age < cache_ttl_seconds:
async with aiofiles.open(cache_file, "r") as f:
return TypeAdapter(list[FileListEntry]).validate_json(await f.read())
# Cache missing or stale - fetch fresh
try:
file_list = await fetch_file_list_with_retry(
model_id, revision, recursive=recursive
@@ -173,7 +183,7 @@ async def fetch_file_list_with_cache(
)
return file_list
except Exception as e:
# Fetch failed - try cache fallback
# Fetch failed - try cache fallback (even if stale)
if await aios.path.exists(cache_file):
logger.warning(
f"Failed to fetch file list for {model_id}, using cached data: {e}"

View File

@@ -21,7 +21,7 @@ def exo_shard_downloader(max_parallel_downloads: int = 8) -> ShardDownloader:
async def build_base_shard(model_id: ModelId) -> ShardMetadata:
model_card = await ModelCard.from_hf(model_id)
model_card = await ModelCard.load(model_id)
return PipelineShardMetadata(
model_card=model_card,
device_rank=0,

View File

@@ -267,6 +267,11 @@ def main():
os.environ["EXO_FAST_SYNCH"] = "off"
logger.info("FAST_SYNCH forced OFF")
# Set EXO_NO_BATCH env var for runner subprocesses
if args.no_batch:
os.environ["EXO_NO_BATCH"] = "1"
logger.info("Batch inference disabled (serial mode)")
node = anyio.run(Node.create, args)
anyio.run(node.run)
logger.info("EXO Shutdown complete")
@@ -282,6 +287,7 @@ class Args(CamelCaseModel):
no_worker: bool = False
no_downloads: bool = False
fast_synch: bool | None = None # None = auto, True = force on, False = force off
no_batch: bool = False
@classmethod
def parse(cls) -> Self:
@@ -342,6 +348,11 @@ class Args(CamelCaseModel):
dest="fast_synch",
help="Force MLX FAST_SYNCH off",
)
parser.add_argument(
"--no-batch",
action="store_true",
help="Disable batch inference (use serial processing for benchmarking)",
)
args = parser.parse_args()
return cls(**vars(args)) # pyright: ignore[reportAny] - We are intentionally validating here, we can't do it statically

View File

@@ -1,5 +1,5 @@
from enum import Enum
from typing import Annotated
from typing import Annotated, Any
import aiofiles
import aiofiles.os as aios
@@ -7,7 +7,14 @@ import tomlkit
from anyio import Path, open_file
from huggingface_hub import model_info
from loguru import logger
from pydantic import BaseModel, Field, PositiveInt, field_validator
from pydantic import (
AliasChoices,
BaseModel,
Field,
PositiveInt,
field_validator,
model_validator,
)
from exo.shared.constants import EXO_ENABLE_IMAGE_MODELS
from exo.shared.types.common import ModelId
@@ -121,6 +128,22 @@ MODEL_CARDS: dict[str, ModelCard] = {
supports_tensor=True,
tasks=[ModelTask.TextGeneration],
),
"kimi-k2.5": ModelCard(
model_id=ModelId("mlx-community/Kimi-K2.5"),
storage_size=Memory.from_gb(617),
n_layers=61,
hidden_size=7168,
supports_tensor=True,
tasks=[ModelTask.TextGeneration],
),
"kimi-k2.5-4bit": ModelCard(
model_id=ModelId("mlx-community/Kimi-K2.5-4bit"),
storage_size=Memory.from_gb(606),
n_layers=61,
hidden_size=7168,
supports_tensor=True,
tasks=[ModelTask.TextGeneration],
),
# llama-3.1
"llama-3.1-8b": ModelCard(
model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"),
@@ -703,15 +726,18 @@ if EXO_ENABLE_IMAGE_MODELS:
class ConfigData(BaseModel):
model_config = {"extra": "ignore"} # Allow unknown fields
# Common field names for number of layers across different architectures
num_hidden_layers: Annotated[int, Field(ge=0)] | None = None
num_layers: Annotated[int, Field(ge=0)] | None = None
n_layer: Annotated[int, Field(ge=0)] | None = None
n_layers: Annotated[int, Field(ge=0)] | None = None # Sometimes used
num_decoder_layers: Annotated[int, Field(ge=0)] | None = None # Transformer models
decoder_layers: Annotated[int, Field(ge=0)] | None = None # Some architectures
hidden_size: Annotated[int, Field(ge=0)] | None = None
architectures: list[str] | None = None
hidden_size: Annotated[int, Field(ge=0)] | None = None
layer_count: int = Field(
validation_alias=AliasChoices(
"num_hidden_layers",
"num_layers",
"n_layer",
"n_layers",
"num_decoder_layers",
"decoder_layers",
)
)
@property
def supports_tensor(self) -> bool:
@@ -726,25 +752,27 @@ class ConfigData(BaseModel):
["GptOssForCausalLM"],
]
@property
def layer_count(self) -> int:
# Check common field names for layer count
layer_fields = [
self.num_hidden_layers,
self.num_layers,
self.n_layer,
self.n_layers,
self.num_decoder_layers,
self.decoder_layers,
]
@model_validator(mode="before")
@classmethod
def defer_to_text_config(cls, data: dict[str, Any]):
text_config = data.get("text_config")
if text_config is None:
return data
for layer_count in layer_fields:
if layer_count is not None:
return layer_count
for field in [
"architectures",
"hidden_size",
"num_hidden_layers",
"num_layers",
"n_layer",
"n_layers",
"num_decoder_layers",
"decoder_layers",
]:
if (val := text_config.get(field)) is not None: # pyright: ignore[reportAny]
data[field] = val
raise ValueError(
f"No layer count found in config.json: {self.model_dump_json()}"
)
return data
async def get_config_data(model_id: ModelId) -> ConfigData:

View File

@@ -239,10 +239,12 @@ class ChatCompletionTaskParams(BaseModel):
tool_choice: str | dict[str, Any] | None = None
parallel_tool_calls: bool | None = None
user: str | None = None
# Internal flag for benchmark mode - set by API, preserved through serialization
bench: bool = False
class BenchChatCompletionTaskParams(ChatCompletionTaskParams):
pass
bench: bool = True
class PlaceInstanceParams(BaseModel):

View File

@@ -26,6 +26,7 @@ from mlx_lm.models.glm4_moe_lite import Glm4MoeLiteDecoderLayer, Glm4MoeLiteMLP
from mlx_lm.models.glm4_moe_lite import Model as GLM4MoeLiteModel
from mlx_lm.models.gpt_oss import GptOssMoeModel
from mlx_lm.models.gpt_oss import Model as GptOssModel
from mlx_lm.models.kimi_k25 import Model as KimiK25Model
from mlx_lm.models.llama import Model as LlamaModel
from mlx_lm.models.minimax import Model as MiniMaxModel
from mlx_lm.models.ministral3 import Model as Ministral3Model
@@ -216,6 +217,9 @@ def pipeline_auto_parallel(
device_rank, world_size = model_shard_meta.device_rank, model_shard_meta.world_size
layers = layers[start_layer:end_layer]
for layer in layers:
mx.eval(layer) # type: ignore
layers[0] = PipelineFirstLayer(layers[0], device_rank, group=group)
# Wrap intermediate layers with eval checkpoints to prevent GPU timeout
for i in range(1, len(layers) - 1):
@@ -367,7 +371,7 @@ def tensor_auto_parallel(
all_to_sharded_linear_in_place,
sharded_to_all_linear_in_place,
)
elif isinstance(model, (DeepseekV3Model, DeepseekV32Model)):
elif isinstance(model, (DeepseekV3Model, DeepseekV32Model, KimiK25Model)):
tensor_parallel_sharding_strategy = DeepSeekShardingStrategy(
group,
all_to_sharded_linear,
@@ -476,7 +480,7 @@ def _set_layers(model: nn.Module, layers: list[_LayerCallable]) -> None:
# Update DeepSeek V3 specific parameters when layers are shrunk
if isinstance(
model, (DeepseekV3Model, DeepseekV32Model, Glm4MoeModel)
model, (DeepseekV3Model, DeepseekV32Model, Glm4MoeModel, KimiK25Model)
) and hasattr(inner_model_instance, "num_layers"):
logger.info(
f"Setting num_layers to {len(layers)} for model {model.model.__class__.__name__}"
@@ -495,6 +499,66 @@ def _set_layers(model: nn.Module, layers: list[_LayerCallable]) -> None:
raise ValueError("Model must have either a 'layers' or 'h' attribute")
def _patch_deepseek_for_batching(model: nn.Module) -> None:
"""Patch DeepseekV3Model to handle batched total_context in __call__.
The upstream mlx-lm DeepseekV3Model has a bug where total_context becomes
an array (one value per batch item) when batching, but the comparison
`total_context >= self._mla_crossover` expects a scalar.
This patch fixes it by temporarily replacing the cache offset with a scalar
(max across batch) before calling the original __call__, then restoring it.
"""
# Get the inner model (DeepseekV3Model)
inner_model: Any = getattr(model, "model", None)
if inner_model is None:
inner_model = getattr(model, "language_model", None)
if inner_model is not None:
inner_model = getattr(inner_model, "model", None) # pyright: ignore[reportAny]
if inner_model is None:
return
# Get the inner model's class and patch __call__
inner_cls: Any = inner_model.__class__ # pyright: ignore[reportAny]
if hasattr(inner_cls, "_batching_patched"): # pyright: ignore[reportAny]
return # Already patched
original_call: Any = inner_cls.__call__ # pyright: ignore[reportAny]
def patched_inner_call(
self: Any, # pyright: ignore[reportAny]
x: mx.array,
cache: Any = None, # pyright: ignore[reportAny]
) -> mx.array:
# Fix the batching bug where cache[0].offset is an array but the
# comparison `total_context >= self._mla_crossover` expects a scalar.
# We temporarily replace the offset with a scalar (max across batch)
# for the crossover check, then restore it after.
if cache is not None and len(cache) > 0 and hasattr(self, "_mla_crossover"): # pyright: ignore[reportAny]
first_cache = cache[0]
original_offset: Any = first_cache.offset # pyright: ignore[reportAny]
# Check if offset is an array (batched) and needs fixing
if hasattr(original_offset, "shape") and original_offset.shape: # pyright: ignore[reportAny]
# Use max offset for the crossover decision (conservative choice)
scalar_offset = int(mx.max(original_offset).item()) # pyright: ignore[reportAny]
first_cache.offset = scalar_offset
try:
result: Any = original_call(self, x, cache) # pyright: ignore[reportAny]
finally:
# Restore original array offset
first_cache.offset = original_offset
return result # pyright: ignore[reportAny]
return original_call(self, x, cache) # pyright: ignore[reportAny]
inner_cls.__call__ = patched_inner_call
inner_cls._batching_patched = True
logger.info("Patched DeepseekV3Model for batched inference")
class DeepSeekShardingStrategy(TensorParallelShardingStrategy):
def shard_model(
self,
@@ -520,6 +584,9 @@ class DeepSeekShardingStrategy(TensorParallelShardingStrategy):
layer.self_attn.kv_b_proj
)
layer.self_attn.o_proj = self.sharded_to_all_linear(layer.self_attn.o_proj)
# Store pre-shard head count and group for context parallelism
layer.self_attn.context_parallel_total_heads = layer.self_attn.num_heads
layer.self_attn._cp_group = self.group
layer.self_attn.num_heads //= self.N
# Shard the MLP
@@ -542,6 +609,10 @@ class DeepSeekShardingStrategy(TensorParallelShardingStrategy):
mx.eval(layer)
# Store group for context parallelism
if hasattr(model, "model"):
model.model._cp_group = self.group
return model

View File

@@ -8,7 +8,6 @@ from mlx_lm.sample_utils import make_sampler
from mlx_lm.tokenizer_utils import TokenizerWrapper
from exo.shared.types.api import (
BenchChatCompletionTaskParams,
ChatCompletionMessage,
FinishReason,
GenerationStats,
@@ -368,7 +367,7 @@ def mlx_generate(
) -> Generator[GenerationResponse]:
# Ensure that generation stats only contains peak memory for this generation
mx.reset_peak_memory()
is_bench: bool = isinstance(task, BenchChatCompletionTaskParams)
is_bench: bool = task.bench
# Currently we support chat-completion tasks only.
logger.debug(f"task_params: {task}")
@@ -417,6 +416,7 @@ def mlx_generate(
max_tokens = task.max_tokens or MAX_TOKENS
generated_text_parts: list[str] = []
generation_start_time = time.perf_counter()
total_prompt_tokens = len(prompt_tokens) + prefix_hit_length
for out in stream_generate(
model=model,
tokenizer=tokenizer,
@@ -438,7 +438,7 @@ def mlx_generate(
stats = GenerationStats(
prompt_tps=float(prefill_tps or out.prompt_tps),
generation_tps=float(out.generation_tps),
prompt_tokens=int(out.prompt_tokens),
prompt_tokens=total_prompt_tokens,
generation_tokens=int(out.generation_tokens),
peak_memory_usage=Memory.from_gb(out.peak_memory),
)

View File

@@ -165,12 +165,11 @@ def mlx_distributed_init(
jaccl_coordinator = jaccl_coordinators[bound_instance.bound_node_id]
# TODO: update once upstream fixes
logger.info(
f"rank {rank} MLX_JACCL_DEVICES: {coordination_file} with devices: {jaccl_devices_json}"
f"rank {rank} MLX_IBV_DEVICES: {coordination_file} with devices: {jaccl_devices_json}"
)
logger.info(f"rank {rank} MLX_JACCL_COORDINATOR: {jaccl_coordinator}")
os.environ["MLX_JACCL_DEVICES"] = coordination_file
os.environ["MLX_IBV_DEVICES"] = coordination_file
os.environ["MLX_RANK"] = str(rank)
os.environ["MLX_JACCL_COORDINATOR"] = jaccl_coordinator
group = mx.distributed.init(backend="jaccl", strict=True)
@@ -259,10 +258,10 @@ def shard_and_load(
logger.info(f"Group size: {group.size()}, group rank: {group.rank()}")
# Estimate timeout based on model size
base_timeout = float(os.environ.get("EXO_MODEL_LOAD_TIMEOUT", "60"))
# Estimate timeout based on model size (5x default for large queued workloads)
base_timeout = float(os.environ.get("EXO_MODEL_LOAD_TIMEOUT", "300"))
model_size_gb = get_weights_size(shard_metadata).in_bytes / (1024**3)
timeout_seconds = base_timeout + model_size_gb / 5
timeout_seconds = base_timeout + model_size_gb
logger.info(
f"Evaluating model parameters with timeout of {timeout_seconds:.0f}s "
f"(model size: {model_size_gb:.1f}GB)"
@@ -339,8 +338,35 @@ def load_tokenizer_for_model_id(
# Kimi uses a custom TikTokenTokenizer that transformers 5.x can't load via AutoTokenizer
if "kimi-k2" in model_id_lower:
import importlib.util
import types
sys.path.insert(0, str(model_path))
from tokenization_kimi import TikTokenTokenizer # type: ignore[import-not-found] # noqa: I001
# Load tool_declaration_ts first (tokenization_kimi imports it with relative import)
tool_decl_path = model_path / "tool_declaration_ts.py"
if tool_decl_path.exists():
spec = importlib.util.spec_from_file_location(
"tool_declaration_ts", tool_decl_path
)
if spec and spec.loader:
tool_decl_module = importlib.util.module_from_spec(spec)
sys.modules["tool_declaration_ts"] = tool_decl_module
spec.loader.exec_module(tool_decl_module)
# Load tokenization_kimi with patched source (convert relative to absolute import)
tok_path = model_path / "tokenization_kimi.py"
source = tok_path.read_text()
source = source.replace("from .tool_declaration_ts", "from tool_declaration_ts")
spec = importlib.util.spec_from_file_location("tokenization_kimi", tok_path)
if spec:
tok_module = types.ModuleType("tokenization_kimi")
tok_module.__file__ = str(tok_path)
sys.modules["tokenization_kimi"] = tok_module
exec(compile(source, tok_path, "exec"), tok_module.__dict__) # noqa: S102
TikTokenTokenizer = tok_module.TikTokenTokenizer # type: ignore[attr-defined] # noqa: N806
else:
from tokenization_kimi import TikTokenTokenizer # type: ignore[import-not-found] # noqa: I001
hf_tokenizer: Any = TikTokenTokenizer.from_pretrained(model_path) # pyright: ignore[reportUnknownVariableType,reportUnknownMemberType]

View File

@@ -275,9 +275,10 @@ class Worker:
case ChatCompletion():
# Don't wait for acknowledgment for batchable inference tasks
# This allows multiple tasks to reach the runner for batching
await self.runners[self._task_to_runner_id(task)].start_task(
task, wait_for_ack=False
)
# For tensor parallel: all nodes send tasks to their runner
# so non-coordinator can participate in collective ops
runner_id = self._task_to_runner_id(task)
await self.runners[runner_id].start_task(task, wait_for_ack=False)
case task:
await self.runners[self._task_to_runner_id(task)].start_task(task)

View File

@@ -255,8 +255,12 @@ def _ready_to_warmup(
)
# Rank = 0
# For tensor parallel, warmup is skipped so other ranks go directly
# to RunnerReady. We need to accept both WarmingUp and Ready states.
connecting_rank_ready = device_rank == 0 and all(
isinstance(all_runners.get(global_runner_id, None), RunnerWarmingUp)
isinstance(
all_runners.get(global_runner_id, None), (RunnerWarmingUp, RunnerReady)
)
for global_runner_id in shard_assignments.runner_to_shard
if global_runner_id != runner_id
)

View File

@@ -81,6 +81,8 @@ class BatchedInferenceHandler:
device_rank: int,
world_size: int = 1,
max_batch_size: int = 32,
tensor_parallel_group: mx.distributed.Group | None = None,
is_coordinator: bool = True,
):
self.model = model
self.tokenizer = tokenizer
@@ -88,6 +90,8 @@ class BatchedInferenceHandler:
self.device_rank = device_rank
self.world_size = world_size
self.max_batch_size = max_batch_size
self.tensor_parallel_group = tensor_parallel_group
self.is_coordinator = is_coordinator
# Model-specific thinking/reasoning detection
self.is_gpt_oss = isinstance(model, GptOssModel)
@@ -112,6 +116,9 @@ class BatchedInferenceHandler:
# Pending requests waiting to be batched
self.pending: list[PendingRequest] = []
# Track active count for non-coordinators (they don't have uid_to_request)
self._non_coordinator_active_count: int = 0
# Active batch generator and request tracking
self.batch_generator: BatchGenerator | None = None
self.pipelined_generator: PipelinedGenerator | None = None
@@ -138,7 +145,12 @@ class BatchedInferenceHandler:
self.pipelined_generator is not None
and self.pipelined_generator.has_active
)
return self.batch_generator is not None and len(self.uid_to_request) > 0
if self.batch_generator is None:
return False
# For non-coordinators, use internal counter (they don't track uid_to_request)
if not self.is_coordinator:
return self._non_coordinator_active_count > 0
return len(self.uid_to_request) > 0
@property
def has_pending(self) -> bool:
@@ -187,28 +199,130 @@ class BatchedInferenceHandler:
f"Added request to batch queue (pending={len(self.pending)}, active={self.current_batch_size})"
)
def _broadcast_int(self, value: int) -> int:
"""Broadcast an integer from rank 0 to all ranks."""
if self.tensor_parallel_group is None:
return value
arr = mx.array([value if self.is_coordinator else 0], dtype=mx.int32)
synced = mx.distributed.all_sum(arr, group=self.tensor_parallel_group)
mx.eval(synced)
return int(synced.item())
def _broadcast_tokens(self, tokens_list: list[list[int]]) -> list[list[int]]:
"""Broadcast tokenized prompts from rank 0 to all ranks."""
if self.tensor_parallel_group is None:
return tokens_list
# Step 1: Broadcast number of sequences
num_seqs = self._broadcast_int(len(tokens_list))
if num_seqs == 0:
return []
# Step 2: Broadcast length of each sequence
lengths: list[int] = []
for i in range(num_seqs):
length = self._broadcast_int(
len(tokens_list[i])
if self.is_coordinator and i < len(tokens_list)
else 0
)
lengths.append(length)
# Step 3: Broadcast flattened tokens
total_tokens = sum(lengths)
if self.is_coordinator:
flat: list[int] = []
for seq in tokens_list:
flat.extend(seq)
flat_arr = mx.array(flat, dtype=mx.int32)
else:
flat_arr = mx.zeros((total_tokens,), dtype=mx.int32)
# Broadcast via all_sum (rank 0 contributes, others contribute zeros)
synced_flat = mx.distributed.all_sum(flat_arr, group=self.tensor_parallel_group)
mx.eval(synced_flat)
# Unflatten
result: list[list[int]] = []
offset = 0
for length in lengths:
seq_arr = synced_flat[offset : offset + length]
seq: list[int] = [int(x) for x in seq_arr.tolist()] # type: ignore[union-attr]
result.append(seq)
offset += length
return result
def flush(self) -> None:
"""Start processing pending requests by adding them to the batch/pipelined generator."""
if not self.has_pending:
return
# Declare variables with types
tokenized_prompts: list[list[int]]
max_tokens_list: list[int]
samplers: list[Callable[[mx.array], mx.array]]
prompt_token_counts: list[int]
requests_to_flush: list[PendingRequest]
# Determine how many requests to flush (up to available slots)
available_slots = self.max_batch_size - self.current_batch_size
requests_to_flush = self.pending[:available_slots]
self.pending = self.pending[available_slots:]
# For tensor parallel: rank 0 broadcasts batch info, others receive and sync
if self.tensor_parallel_group is not None:
# Broadcast how many to flush
available_slots = self.max_batch_size - self.current_batch_size
num_to_flush = self._broadcast_int(
min(len(self.pending), available_slots) if self.is_coordinator else 0
)
# Prepare batch data - tokenize prompts
tokenized_prompts: list[list[int]] = []
max_tokens_list: list[int] = []
samplers: list[Callable[[mx.array], mx.array]] = []
prompt_token_counts: list[int] = []
if num_to_flush == 0:
return
for req in requests_to_flush:
tokens = self.tokenizer.encode(req.prompt)
tokenized_prompts.append(tokens)
max_tokens_list.append(req.max_tokens)
samplers.append(req.sampler)
prompt_token_counts.append(len(tokens))
# Get requests and tokenize on rank 0
if self.is_coordinator:
requests_to_flush = self.pending[:num_to_flush]
self.pending = self.pending[num_to_flush:]
tokenized_prompts = [
self.tokenizer.encode(req.prompt) for req in requests_to_flush
]
max_tokens_list = [req.max_tokens for req in requests_to_flush]
else:
requests_to_flush = []
tokenized_prompts = []
max_tokens_list = []
# Broadcast tokenized prompts to all ranks
tokenized_prompts = self._broadcast_tokens(tokenized_prompts)
# Broadcast max_tokens
synced_max_tokens: list[int] = []
for i in range(num_to_flush):
mt = self._broadcast_int(
max_tokens_list[i]
if self.is_coordinator and i < len(max_tokens_list)
else 0
)
synced_max_tokens.append(mt)
max_tokens_list = synced_max_tokens
# Create samplers (same on all ranks since we use temp=0 typically)
samplers = [make_sampler(temp=0.0) for _ in range(num_to_flush)]
prompt_token_counts = [len(t) for t in tokenized_prompts]
else:
if not self.has_pending:
return
available_slots = self.max_batch_size - self.current_batch_size
requests_to_flush = self.pending[:available_slots]
self.pending = self.pending[available_slots:]
# Prepare batch data - tokenize prompts
tokenized_prompts = []
max_tokens_list = []
samplers = []
prompt_token_counts = []
for req in requests_to_flush:
tokens = self.tokenizer.encode(req.prompt)
tokenized_prompts.append(tokens)
max_tokens_list.append(req.max_tokens)
samplers.append(req.sampler)
prompt_token_counts.append(len(tokens))
if self.use_pipelined:
self._flush_pipelined(
@@ -314,28 +428,48 @@ class BatchedInferenceHandler:
samplers=samplers, # pyright: ignore[reportCallIssue]
)
for uid, req, prompt_tokens, tokens in zip(
uids, requests_to_flush, prompt_token_counts, tokenized_prompts, strict=True
): # pyright: ignore[reportUnknownArgumentType]
parser = None
if self.is_gpt_oss and self._harmony_encoding is not None:
parser = StreamableParser(self._harmony_encoding, role=Role.ASSISTANT) # pyright: ignore[reportAny]
# Check if prompt contains <think> token - if so, model is already in thinking mode
starts_in_thinking = (
self._think_start_token is not None
and self._think_start_token in tokens
)
self.uid_to_request[uid] = ActiveRequest(
command_id=req.task.command_id,
should_extract_logprobs=req.should_extract_logprobs,
top_k=req.top_k,
prompt_tokens=prompt_tokens,
harmony_parser=parser,
in_thinking=starts_in_thinking,
)
# Only coordinator tracks requests (non-coordinators don't have request objects)
if self.is_coordinator:
for uid, req, prompt_tokens, tokens in zip(
uids, # pyright: ignore[reportUnknownArgumentType]
requests_to_flush,
prompt_token_counts,
tokenized_prompts,
strict=True,
):
parser = None
if self.is_gpt_oss and self._harmony_encoding is not None:
parser = StreamableParser(
self._harmony_encoding, # pyright: ignore[reportAny]
role=Role.ASSISTANT,
)
# Check if prompt contains <think> token - if so, model is already in thinking mode
starts_in_thinking = (
self._think_start_token is not None
and self._think_start_token in tokens
)
self.uid_to_request[uid] = ActiveRequest(
command_id=req.task.command_id,
should_extract_logprobs=req.should_extract_logprobs,
top_k=req.top_k,
prompt_tokens=prompt_tokens,
harmony_parser=parser,
in_thinking=starts_in_thinking,
)
else:
# Non-coordinator: INCREMENT active count (not set) to track all active requests
# across multiple flushes. This ensures is_active remains True when new requests
# are added while existing ones are still generating.
self._non_coordinator_active_count += len(tokenized_prompts)
# Log the actual active count (different tracking for coordinator vs non-coordinator)
actual_active = (
self.current_batch_size
if self.is_coordinator
else self._non_coordinator_active_count
)
logger.info(
f"Flushed {len(requests_to_flush)} requests into batch (active={self.current_batch_size}, uids={list(self.uid_to_request.keys())})"
f"Flushed {len(tokenized_prompts)} requests into batch (active={actual_active}, is_coordinator={self.is_coordinator})"
)
def step(self) -> Generator[Event, None, None]:
@@ -348,7 +482,20 @@ class BatchedInferenceHandler:
yield from self._step_pipelined()
return
if self.batch_generator is None or not self.uid_to_request:
if self.batch_generator is None:
return
# Non-coordinators still need to call next() for model sync but don't emit events
if not self.is_coordinator:
if self._non_coordinator_active_count > 0:
nc_responses: list[Any] = self.batch_generator.next() # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
# Track completions to update active count
for nc_resp in nc_responses: # pyright: ignore[reportUnknownVariableType]
if nc_resp.finish_reason is not None: # pyright: ignore[reportUnknownMemberType]
self._non_coordinator_active_count -= 1
return
if not self.uid_to_request:
return
# Get next tokens for all active requests

View File

@@ -1,5 +1,6 @@
import base64
import json
import os
import time
from collections.abc import Generator
from functools import cache
@@ -71,7 +72,6 @@ from exo.worker.engines.image import (
warmup_image_generator,
)
from exo.worker.engines.mlx import Model
from exo.worker.engines.mlx.cache import KVPrefixCache
from exo.worker.engines.mlx.generator.generate import (
mlx_generate,
warmup_inference,
@@ -87,7 +87,7 @@ from exo.worker.runner.batched_handler import BatchedInferenceHandler
from exo.worker.runner.bootstrap import logger
# Batching configuration
BATCH_ENABLED = True
BATCH_ENABLED = os.environ.get("EXO_NO_BATCH") != "1"
BATCH_MAX_SIZE = 32
@@ -217,6 +217,11 @@ def main(
bound_instance.bound_shard,
)
device_rank = shard_metadata.device_rank
# Determine if this node is the coordinator for tensor parallel
# Use sorted node ordering for consistency with main.py
node_id = bound_instance.bound_node_id
sorted_nodes = sorted(instance.shard_assignments.node_to_runner.keys())
is_tp_coordinator = node_id == sorted_nodes[0]
logger.info("hello from the runner")
if getattr(shard_metadata, "immediate_exception", False):
raise Exception("Fake exception - runner failed to spin up.")
@@ -228,8 +233,8 @@ def main(
model: Model | DistributedImageModel | None = None
tokenizer: TokenizerWrapper | None = None
group = None
kv_prefix_cache: KVPrefixCache | None = None
batch_handler: BatchedInferenceHandler | None = None
is_tensor_parallel = False
current_status: RunnerStatus = RunnerIdle()
logger.info("runner created")
@@ -242,7 +247,13 @@ def main(
Process a single task. Returns True if the runner should continue,
False if it should shut down.
"""
nonlocal current_status, model, tokenizer, group, batch_handler
nonlocal \
current_status, \
model, \
tokenizer, \
group, \
batch_handler, \
is_tensor_parallel
event_sender.send(
TaskStatusUpdated(task_id=task.task_id, task_status=TaskStatus.Running)
)
@@ -297,26 +308,25 @@ def main(
logger.info(f"model has_tool_calling={tokenizer.has_tool_calling}")
# Initialize batch handler for text generation models
is_tensor_parallel = isinstance(shard_metadata, TensorShardMetadata)
if BATCH_ENABLED:
# For tensor parallelism, distributed ops are handled inside model layers
# so batch handler should use world_size=1 (no pipelining)
batch_world_size = (
1
if isinstance(shard_metadata, TensorShardMetadata)
else shard_metadata.world_size
)
batch_handler = BatchedInferenceHandler(
model=model,
tokenizer=tokenizer,
model_id=shard_metadata.model_card.model_id,
device_rank=device_rank,
world_size=batch_world_size,
world_size=1
if is_tensor_parallel
else shard_metadata.world_size,
max_batch_size=BATCH_MAX_SIZE,
tensor_parallel_group=group if is_tensor_parallel else None,
is_coordinator=is_tp_coordinator
if is_tensor_parallel
else True,
)
logger.info(
f"Batch handler initialized (max_batch_size={BATCH_MAX_SIZE}, world_size={batch_world_size})"
f"Batch handler initialized (max_batch_size={BATCH_MAX_SIZE}, tensor_parallel={is_tensor_parallel})"
)
kv_prefix_cache = KVPrefixCache(tokenizer)
elif (
ModelTask.TextToImage in shard_metadata.model_card.tasks
@@ -347,12 +357,22 @@ def main(
assert not isinstance(model, DistributedImageModel)
assert tokenizer
toks = warmup_inference(
model=model,
tokenizer=tokenizer,
# kv_prefix_cache=kv_prefix_cache, # supply for warmup-time prefix caching
)
logger.info(f"warmed up by generating {toks} tokens")
# For tensor parallel with batch handler, skip explicit warmup.
# The batch handler synchronizes all ranks via all_sum in flush(),
# so the first real request warms up the model on all ranks simultaneously.
# Without a batch handler, warmup must run normally to avoid GPU locks
# from mismatched send/recv in serial processing.
if is_tensor_parallel and batch_handler is not None:
logger.info(
"Tensor parallel: skipping warmup (first request will warm up through batch handler)"
)
toks = 0
else:
toks = warmup_inference(
model=model,
tokenizer=tokenizer,
)
logger.info(f"warmed up by generating {toks} tokens")
logger.info(
f"runner initialized in {time.time() - setup_start_time} seconds"
)
@@ -425,6 +445,12 @@ def main(
assert batch_handler is not None
try:
_check_for_debug_prompts(task_params.messages[0].content)
# Non-coordinator TP: don't add to batch handler.
# The batch handler syncs via all_sum in flush();
# non-coordinator participates through that, not through add_request.
if is_tensor_parallel and not is_tp_coordinator:
event_sender.send(TaskAcknowledged(task_id=task.task_id))
return True
batch_handler.add_request(task)
# Update status to running if not already
@@ -599,11 +625,16 @@ def main(
with task_receiver as tasks:
while True:
# Check if batch handler is active and needs processing
# For tensor parallel: both coordinator and non-coordinator go through
# the same loop, but only coordinator receives tasks. This ensures
# flush() all_sum calls are synchronized.
if batch_handler is not None and (
batch_handler.is_active or batch_handler.has_pending
batch_handler.is_active
or batch_handler.has_pending
or is_tensor_parallel
):
# Drain all available tasks before stepping
# Non-coordinator won't receive any (main.py doesn't send to it)
should_break = False
while True:
try:
@@ -627,11 +658,13 @@ def main(
if should_break:
break
# Flush all pending requests before stepping
if batch_handler.has_pending:
logger.info(
f"Flushing batch (pending={len(batch_handler.pending)}, active={batch_handler.current_batch_size})"
)
# Flush: for tensor parallel, always call so all ranks sync via all_sum
# For non-TP, only call when has_pending
if batch_handler.has_pending or is_tensor_parallel:
if batch_handler.has_pending:
logger.info(
f"Flushing batch (pending={len(batch_handler.pending)}, active={batch_handler.current_batch_size})"
)
batch_handler.flush()
# Step generation and emit events

View File

@@ -5,6 +5,7 @@ from exo.shared.types.worker.runners import (
RunnerIdle,
RunnerLoaded,
RunnerLoading,
RunnerReady,
RunnerWarmingUp,
)
from exo.worker.tests.constants import (

335
uv.lock generated
View File

@@ -413,36 +413,46 @@ wheels = [
[[package]]
name = "datasets"
version = "4.5.0"
version = "2.21.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "fsspec", extra = ["http"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "multiprocess", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pandas", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pandas", version = "3.0.0rc2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'darwin') or (python_full_version >= '3.14' and sys_platform == 'linux')" },
{ name = "pandas", version = "3.0.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.14' and sys_platform == 'darwin') or (python_full_version < '3.14' and sys_platform == 'linux')" },
{ name = "pyarrow", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "xxhash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/55/bf/bb927bde63d649296c83e883171ae77074717c1b80fe2868b328bd0dbcbb/datasets-4.5.0.tar.gz", hash = "sha256:00c698ce1c2452e646cc5fad47fef39d3fe78dd650a8a6eb205bb45eb63cd500", size = 588384, upload-time = "2026-01-14T18:27:54.297Z" }
sdist = { url = "https://files.pythonhosted.org/packages/e5/a5/38719e5cff7aa0537a6be37d21cc1fdd7096e9565e8fce2d46a822e10b5b/datasets-2.21.0.tar.gz", hash = "sha256:998f85a8460f1bd982e5bd058f8a0808eef424249e3df1e8cdd594ccd0dc8ba2", size = 2215317, upload-time = "2024-08-14T06:40:44.314Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/fc/d5/0d563ea3c205eee226dc8053cf7682a8ac588db8acecd0eda2b587987a0b/datasets-4.5.0-py3-none-any.whl", hash = "sha256:b5d7e08096ffa407dd69e58b1c0271c9b2506140839b8d99af07375ad31b6726", size = 515196, upload-time = "2026-01-14T18:27:52.419Z" },
{ url = "https://files.pythonhosted.org/packages/72/b3/33c4ad44fa020e3757e9b2fad8a5de53d9079b501e6bbc45bdd18f82f893/datasets-2.21.0-py3-none-any.whl", hash = "sha256:25e4e097110ce28824b746a107727ada94024cba11db8bc588d468414692b65a", size = 527251, upload-time = "2024-08-14T06:40:39.612Z" },
]
[[package]]
name = "dill"
version = "0.4.0"
version = "0.3.8"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
sdist = { url = "https://files.pythonhosted.org/packages/17/4d/ac7ffa80c69ea1df30a8aa11b3578692a5118e7cd1aa157e3ef73b092d15/dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca", size = 184847, upload-time = "2024-01-27T23:42:16.145Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" },
{ url = "https://files.pythonhosted.org/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7", size = 116252, upload-time = "2024-01-27T23:42:14.239Z" },
]
[[package]]
name = "distro"
version = "1.9.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
]
[[package]]
@@ -466,7 +476,8 @@ dependencies = [
{ name = "multiprocess", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pandas", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pandas", version = "3.0.0rc2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'darwin') or (python_full_version >= '3.14' and sys_platform == 'linux')" },
{ name = "pandas", version = "3.0.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.14' and sys_platform == 'darwin') or (python_full_version < '3.14' and sys_platform == 'linux')" },
{ name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "xxhash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -509,7 +520,10 @@ dependencies = [
[package.optional-dependencies]
eval = [
{ name = "datasets", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "lm-eval", extra = ["api"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "openai", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pebble", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
[package.dev-dependencies]
@@ -527,6 +541,7 @@ requires-dist = [
{ name = "aiofiles", specifier = ">=24.1.0" },
{ name = "aiohttp", specifier = ">=3.12.14" },
{ name = "anyio", specifier = "==4.11.0" },
{ name = "datasets", marker = "extra == 'eval'", specifier = ">=2.14.0,<4.0" },
{ name = "exo-pyo3-bindings", editable = "rust/exo_pyo3_bindings" },
{ name = "fastapi", specifier = ">=0.116.1" },
{ name = "filelock", specifier = ">=3.18.0" },
@@ -536,10 +551,12 @@ requires-dist = [
{ name = "lm-eval", extras = ["api"], marker = "extra == 'eval'" },
{ name = "loguru", specifier = ">=0.7.3" },
{ name = "mflux", specifier = "==0.15.4" },
{ name = "mlx", marker = "sys_platform == 'darwin'", specifier = "==0.30.3" },
{ name = "mlx", extras = ["cpu"], marker = "sys_platform == 'linux'", specifier = "==0.30.3" },
{ name = "mlx-lm", specifier = "==0.30.5" },
{ name = "mlx", marker = "sys_platform == 'darwin'", specifier = "==0.30.4" },
{ name = "mlx", extras = ["cpu"], marker = "sys_platform == 'linux'", specifier = "==0.30.4" },
{ name = "mlx-lm", git = "https://github.com/davidmcc73/mlx-lm.git?branch=main" },
{ name = "openai", marker = "extra == 'eval'", specifier = ">=1.59.6" },
{ name = "openai-harmony", specifier = ">=0.0.8" },
{ name = "pebble", marker = "extra == 'eval'", specifier = ">=5.1.0" },
{ name = "pillow", specifier = ">=11.0,<12.0" },
{ name = "psutil", specifier = ">=7.0.0" },
{ name = "pydantic", specifier = ">=2.11.7" },
@@ -697,11 +714,11 @@ wheels = [
[[package]]
name = "fsspec"
version = "2025.10.0"
version = "2024.6.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/24/7f/2747c0d332b9acfa75dc84447a066fdf812b5a6b8d30472b74d309bfe8cb/fsspec-2025.10.0.tar.gz", hash = "sha256:b6789427626f068f9a83ca4e8a3cc050850b6c0f71f99ddb4f542b8266a26a59", size = 309285, upload-time = "2025-10-30T14:58:44.036Z" }
sdist = { url = "https://files.pythonhosted.org/packages/90/b6/eba5024a9889fcfff396db543a34bef0ab9d002278f163129f9f01005960/fsspec-2024.6.1.tar.gz", hash = "sha256:fad7d7e209dd4c1208e3bbfda706620e0da5142bebbd9c384afb95b07e798e49", size = 284584, upload-time = "2024-06-27T14:35:45.467Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
{ url = "https://files.pythonhosted.org/packages/5e/44/73bea497ac69bafde2ee4269292fa3b41f1198f4bb7bbaaabde30ad29d4a/fsspec-2024.6.1-py3-none-any.whl", hash = "sha256:3cb443f8bcd2efb31295a5b9fdb02aee81d8452c80d28f97a6d0959e6cee101e", size = 177561, upload-time = "2024-06-27T14:35:42.023Z" },
]
[package.optional-dependencies]
@@ -926,6 +943,46 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
]
[[package]]
name = "jiter"
version = "0.13.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/0d/5e/4ec91646aee381d01cdb9974e30882c9cd3b8c5d1079d6b5ff4af522439a/jiter-0.13.0.tar.gz", hash = "sha256:f2839f9c2c7e2dffc1bc5929a510e14ce0a946be9365fd1219e7ef342dae14f4", size = 164847, upload-time = "2026-02-02T12:37:56.441Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/91/9c/7ee5a6ff4b9991e1a45263bfc46731634c4a2bde27dfda6c8251df2d958c/jiter-0.13.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1f8a55b848cbabf97d861495cd65f1e5c590246fabca8b48e1747c4dfc8f85bf", size = 306897, upload-time = "2026-02-02T12:36:16.748Z" },
{ url = "https://files.pythonhosted.org/packages/7c/02/be5b870d1d2be5dd6a91bdfb90f248fbb7dcbd21338f092c6b89817c3dbf/jiter-0.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f556aa591c00f2c45eb1b89f68f52441a016034d18b65da60e2d2875bbbf344a", size = 317507, upload-time = "2026-02-02T12:36:18.351Z" },
{ url = "https://files.pythonhosted.org/packages/da/92/b25d2ec333615f5f284f3a4024f7ce68cfa0604c322c6808b2344c7f5d2b/jiter-0.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7e1d61da332ec412350463891923f960c3073cf1aae93b538f0bb4c8cd46efb", size = 350560, upload-time = "2026-02-02T12:36:19.746Z" },
{ url = "https://files.pythonhosted.org/packages/be/ec/74dcb99fef0aca9fbe56b303bf79f6bd839010cb18ad41000bf6cc71eec0/jiter-0.13.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3097d665a27bc96fd9bbf7f86178037db139f319f785e4757ce7ccbf390db6c2", size = 363232, upload-time = "2026-02-02T12:36:21.243Z" },
{ url = "https://files.pythonhosted.org/packages/1b/37/f17375e0bb2f6a812d4dd92d7616e41917f740f3e71343627da9db2824ce/jiter-0.13.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d01ecc3a8cbdb6f25a37bd500510550b64ddf9f7d64a107d92f3ccb25035d0f", size = 483727, upload-time = "2026-02-02T12:36:22.688Z" },
{ url = "https://files.pythonhosted.org/packages/77/d2/a71160a5ae1a1e66c1395b37ef77da67513b0adba73b993a27fbe47eb048/jiter-0.13.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ed9bbc30f5d60a3bdf63ae76beb3f9db280d7f195dfcfa61af792d6ce912d159", size = 370799, upload-time = "2026-02-02T12:36:24.106Z" },
{ url = "https://files.pythonhosted.org/packages/01/99/ed5e478ff0eb4e8aa5fd998f9d69603c9fd3f32de3bd16c2b1194f68361c/jiter-0.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98fbafb6e88256f4454de33c1f40203d09fc33ed19162a68b3b257b29ca7f663", size = 359120, upload-time = "2026-02-02T12:36:25.519Z" },
{ url = "https://files.pythonhosted.org/packages/16/be/7ffd08203277a813f732ba897352797fa9493faf8dc7995b31f3d9cb9488/jiter-0.13.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5467696f6b827f1116556cb0db620440380434591e93ecee7fd14d1a491b6daa", size = 390664, upload-time = "2026-02-02T12:36:26.866Z" },
{ url = "https://files.pythonhosted.org/packages/d1/84/e0787856196d6d346264d6dcccb01f741e5f0bd014c1d9a2ebe149caf4f3/jiter-0.13.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:2d08c9475d48b92892583df9da592a0e2ac49bcd41fae1fec4f39ba6cf107820", size = 513543, upload-time = "2026-02-02T12:36:28.217Z" },
{ url = "https://files.pythonhosted.org/packages/65/50/ecbd258181c4313cf79bca6c88fb63207d04d5bf5e4f65174114d072aa55/jiter-0.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:aed40e099404721d7fcaf5b89bd3b4568a4666358bcac7b6b15c09fb6252ab68", size = 547262, upload-time = "2026-02-02T12:36:29.678Z" },
{ url = "https://files.pythonhosted.org/packages/49/19/a929ec002ad3228bc97ca01dbb14f7632fffdc84a95ec92ceaf4145688ae/jiter-0.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fa476ab5dd49f3bf3a168e05f89358c75a17608dbabb080ef65f96b27c19ab10", size = 316616, upload-time = "2026-02-02T12:36:36.579Z" },
{ url = "https://files.pythonhosted.org/packages/52/56/d19a9a194afa37c1728831e5fb81b7722c3de18a3109e8f282bfc23e587a/jiter-0.13.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ade8cb6ff5632a62b7dbd4757d8c5573f7a2e9ae285d6b5b841707d8363205ef", size = 346850, upload-time = "2026-02-02T12:36:38.058Z" },
{ url = "https://files.pythonhosted.org/packages/36/4a/94e831c6bf287754a8a019cb966ed39ff8be6ab78cadecf08df3bb02d505/jiter-0.13.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9950290340acc1adaded363edd94baebcee7dabdfa8bee4790794cd5cfad2af6", size = 358551, upload-time = "2026-02-02T12:36:39.417Z" },
{ url = "https://files.pythonhosted.org/packages/6e/f5/f1997e987211f6f9bd71b8083047b316208b4aca0b529bb5f8c96c89ef3e/jiter-0.13.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:cc5223ab19fe25e2f0bf2643204ad7318896fe3729bf12fde41b77bfc4fafff0", size = 308804, upload-time = "2026-02-02T12:36:43.496Z" },
{ url = "https://files.pythonhosted.org/packages/cd/8f/5482a7677731fd44881f0204981ce2d7175db271f82cba2085dd2212e095/jiter-0.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9776ebe51713acf438fd9b4405fcd86893ae5d03487546dae7f34993217f8a91", size = 318787, upload-time = "2026-02-02T12:36:45.071Z" },
{ url = "https://files.pythonhosted.org/packages/f3/b9/7257ac59778f1cd025b26a23c5520a36a424f7f1b068f2442a5b499b7464/jiter-0.13.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:879e768938e7b49b5e90b7e3fecc0dbec01b8cb89595861fb39a8967c5220d09", size = 353880, upload-time = "2026-02-02T12:36:47.365Z" },
{ url = "https://files.pythonhosted.org/packages/c3/87/719eec4a3f0841dad99e3d3604ee4cba36af4419a76f3cb0b8e2e691ad67/jiter-0.13.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:682161a67adea11e3aae9038c06c8b4a9a71023228767477d683f69903ebc607", size = 366702, upload-time = "2026-02-02T12:36:48.871Z" },
{ url = "https://files.pythonhosted.org/packages/d2/65/415f0a75cf6921e43365a1bc227c565cb949caca8b7532776e430cbaa530/jiter-0.13.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a13b68cd1cd8cc9de8f244ebae18ccb3e4067ad205220ef324c39181e23bbf66", size = 486319, upload-time = "2026-02-02T12:36:53.006Z" },
{ url = "https://files.pythonhosted.org/packages/54/a2/9e12b48e82c6bbc6081fd81abf915e1443add1b13d8fc586e1d90bb02bb8/jiter-0.13.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87ce0f14c6c08892b610686ae8be350bf368467b6acd5085a5b65441e2bf36d2", size = 372289, upload-time = "2026-02-02T12:36:54.593Z" },
{ url = "https://files.pythonhosted.org/packages/4e/c1/e4693f107a1789a239c759a432e9afc592366f04e901470c2af89cfd28e1/jiter-0.13.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c365005b05505a90d1c47856420980d0237adf82f70c4aff7aebd3c1cc143ad", size = 360165, upload-time = "2026-02-02T12:36:56.112Z" },
{ url = "https://files.pythonhosted.org/packages/17/08/91b9ea976c1c758240614bd88442681a87672eebc3d9a6dde476874e706b/jiter-0.13.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1317fdffd16f5873e46ce27d0e0f7f4f90f0cdf1d86bf6abeaea9f63ca2c401d", size = 389634, upload-time = "2026-02-02T12:36:57.495Z" },
{ url = "https://files.pythonhosted.org/packages/18/23/58325ef99390d6d40427ed6005bf1ad54f2577866594bcf13ce55675f87d/jiter-0.13.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:c05b450d37ba0c9e21c77fef1f205f56bcee2330bddca68d344baebfc55ae0df", size = 514933, upload-time = "2026-02-02T12:36:58.909Z" },
{ url = "https://files.pythonhosted.org/packages/5b/25/69f1120c7c395fd276c3996bb8adefa9c6b84c12bb7111e5c6ccdcd8526d/jiter-0.13.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:775e10de3849d0631a97c603f996f518159272db00fdda0a780f81752255ee9d", size = 548842, upload-time = "2026-02-02T12:37:00.433Z" },
{ url = "https://files.pythonhosted.org/packages/62/92/1661d8b9fd6a3d7a2d89831db26fe3c1509a287d83ad7838831c7b7a5c7e/jiter-0.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:964538479359059a35fb400e769295d4b315ae61e4105396d355a12f7fef09f0", size = 318423, upload-time = "2026-02-02T12:37:05.806Z" },
{ url = "https://files.pythonhosted.org/packages/4f/3b/f77d342a54d4ebcd128e520fc58ec2f5b30a423b0fd26acdfc0c6fef8e26/jiter-0.13.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e104da1db1c0991b3eaed391ccd650ae8d947eab1480c733e5a3fb28d4313e40", size = 351438, upload-time = "2026-02-02T12:37:07.189Z" },
{ url = "https://files.pythonhosted.org/packages/76/b3/ba9a69f0e4209bd3331470c723c2f5509e6f0482e416b612431a5061ed71/jiter-0.13.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e3a5f0cde8ff433b8e88e41aa40131455420fb3649a3c7abdda6145f8cb7202", size = 364774, upload-time = "2026-02-02T12:37:08.579Z" },
{ url = "https://files.pythonhosted.org/packages/b3/16/6cdb31fa342932602458dbb631bfbd47f601e03d2e4950740e0b2100b570/jiter-0.13.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:57aab48f40be1db920a582b30b116fe2435d184f77f0e4226f546794cedd9cf0", size = 487238, upload-time = "2026-02-02T12:37:10.066Z" },
{ url = "https://files.pythonhosted.org/packages/ed/b1/956cc7abaca8d95c13aa8d6c9b3f3797241c246cd6e792934cc4c8b250d2/jiter-0.13.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7772115877c53f62beeb8fd853cab692dbc04374ef623b30f997959a4c0e7e95", size = 372892, upload-time = "2026-02-02T12:37:11.656Z" },
{ url = "https://files.pythonhosted.org/packages/26/c4/97ecde8b1e74f67b8598c57c6fccf6df86ea7861ed29da84629cdbba76c4/jiter-0.13.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1211427574b17b633cfceba5040de8081e5abf114f7a7602f73d2e16f9fdaa59", size = 360309, upload-time = "2026-02-02T12:37:13.244Z" },
{ url = "https://files.pythonhosted.org/packages/4b/d7/eabe3cf46715854ccc80be2cd78dd4c36aedeb30751dbf85a1d08c14373c/jiter-0.13.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7beae3a3d3b5212d3a55d2961db3c292e02e302feb43fce6a3f7a31b90ea6dfe", size = 389607, upload-time = "2026-02-02T12:37:14.881Z" },
{ url = "https://files.pythonhosted.org/packages/df/2d/03963fc0804e6109b82decfb9974eb92df3797fe7222428cae12f8ccaa0c/jiter-0.13.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e5562a0f0e90a6223b704163ea28e831bd3a9faa3512a711f031611e6b06c939", size = 514986, upload-time = "2026-02-02T12:37:16.326Z" },
{ url = "https://files.pythonhosted.org/packages/f6/6c/8c83b45eb3eb1c1e18d841fe30b4b5bc5619d781267ca9bc03e005d8fd0a/jiter-0.13.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:6c26a424569a59140fb51160a56df13f438a2b0967365e987889186d5fc2f6f9", size = 548756, upload-time = "2026-02-02T12:37:17.736Z" },
]
[[package]]
name = "joblib"
version = "1.5.3"
@@ -1275,22 +1332,22 @@ wheels = [
[[package]]
name = "mlx"
version = "0.30.3"
version = "0.30.4"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "mlx-metal", marker = "sys_platform == 'darwin'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/d0/22/42935d593fe82d3b98eb9d60e4620ed99703886635106f89d407c68f33bc/mlx-0.30.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:743fac1e4f9e8e46c8262943c643a31139c255cdb256c99ad496958215ccac1e", size = 569344, upload-time = "2026-01-14T01:16:54.847Z" },
{ url = "https://files.pythonhosted.org/packages/7d/27/f2e7a5236289d45315d0215e8553b4dd7e2faaba3bcb5025b34b25d5ab66/mlx-0.30.3-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:3b04ae81655aa0e63a6e8f2c749de3bbce64cf5b168ae10f39ed086dfa99e7f8", size = 569345, upload-time = "2026-01-14T01:16:56.564Z" },
{ url = "https://files.pythonhosted.org/packages/01/41/06b042457f51952456e9bb46b2c6e205ab3a28fc52d6751b5787fdb762b2/mlx-0.30.3-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:ba9b5bdb1e929cc130af72efd7f73508c0f4e526d224489af7ec1c6419564659", size = 569213, upload-time = "2026-01-14T05:52:10.86Z" },
{ url = "https://files.pythonhosted.org/packages/ec/1e/f62c98fc0d2d878ee4235671f9d406b13cc9240493ba6fcfde2f72c2ff83/mlx-0.30.3-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:dfe5c5b64e55398a22100804abbf9681996b03129e720e36b1727ed704db12b5", size = 617309, upload-time = "2026-01-14T01:16:57.58Z" },
{ url = "https://files.pythonhosted.org/packages/e9/62/811f064693449de740350d27793ce39343a460305ec8d878c318b80921d0/mlx-0.30.3-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:a3364924610929936e6aaf13c71106161258e5a5d3f7813a64c07cc2435f9f55", size = 659521, upload-time = "2026-01-14T01:16:58.719Z" },
{ url = "https://files.pythonhosted.org/packages/82/e2/6e551bd48fb350fbf0ee4cc5cd09485437d260b8f4937f22d8623e14687a/mlx-0.30.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:2c27fd8daaae14ca6cf407fcd236006a6e968f7708c8f61a2709116f2e754852", size = 571920, upload-time = "2026-01-14T01:16:59.683Z" },
{ url = "https://files.pythonhosted.org/packages/82/c0/561d1c9d3d12830b0e7fdcbd807585ef20909e398d4bcdbf25e4367543eb/mlx-0.30.3-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:b755fd4ed4b6a2ae4dee3766b5a2ea52fcbe83ebd1cf018458e18b74139409f3", size = 571921, upload-time = "2026-01-14T01:17:00.868Z" },
{ url = "https://files.pythonhosted.org/packages/42/1a/fb573fc2edc22a777fa254ff5c0c886ffd2c88aeb1f21c45778ef170f990/mlx-0.30.3-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:7e352c0369a2f7e54d4f317b434eab3333918ea9edde1c43c61d36386b6f76bf", size = 571732, upload-time = "2026-01-14T05:52:11.893Z" },
{ url = "https://files.pythonhosted.org/packages/9e/db/d0083e8f2205b3b2dcd9670eb6f0d6c1b7cbfea6b01a1f8bff39142edf44/mlx-0.30.3-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:00ac867f3d003c1477a66a579442c2040ba7ea43ce3c174490d1f8bf379606bd", size = 619635, upload-time = "2026-01-14T01:17:01.812Z" },
{ url = "https://files.pythonhosted.org/packages/ab/90/ab0b93ff0e76da4fe0e878722c76a308cfb950b044a4676e9617276d8ccd/mlx-0.30.3-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:5be7d0329036f09c6ed003ea3e307e97e3144f20a3e4711b01810d7d5013cf2c", size = 659652, upload-time = "2026-01-14T01:17:02.915Z" },
{ url = "https://files.pythonhosted.org/packages/eb/59/b6d138f5598bcd13d8e1d029a207cb8b18b14d5ded43533aef16d2e3852b/mlx-0.30.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e4b1ff6584ddcadcadbd7236f3ec6fe30abd918bcd75e51dd7693c113ab7d5f6", size = 572585, upload-time = "2026-01-27T22:53:26.236Z" },
{ url = "https://files.pythonhosted.org/packages/10/57/72604531d02471c54dd1c71caeb77479297f37ab6aaa1125b457edfce9ee/mlx-0.30.4-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:1f367534078b10dcb660393a554f97732c194977ac8318bb389a76a6307757f8", size = 572587, upload-time = "2026-01-27T22:53:27.828Z" },
{ url = "https://files.pythonhosted.org/packages/e9/5c/1a340ccc5051d222ceb58aa00c42ea5d11f4ae0bd0fc97673bef5d6ff24b/mlx-0.30.4-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:5344d195ac60dcdb871afb3ebb15c22112408f54c91ef507bd16e3928dfff38d", size = 572571, upload-time = "2026-01-27T22:53:29.268Z" },
{ url = "https://files.pythonhosted.org/packages/f3/18/538c13fa6821459d8d2b6db1ac96f60679ef995f373c68be1d743055ba47/mlx-0.30.4-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:6879c7262c8f8f7a1a9ee6f27cbf5fe174d0863189a7672c9eb71cd8611bbaa7", size = 621260, upload-time = "2026-01-27T22:53:30.696Z" },
{ url = "https://files.pythonhosted.org/packages/16/2c/e8aa0847ec97436443a78e87cc3fb95c94a2fe8b4b6ebb65cbaa67b6306c/mlx-0.30.4-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:367ba287ceb5b93a624b560ce8ce02378c03d1d60cc630b57efaf38061596d9b", size = 662522, upload-time = "2026-01-27T22:53:32.975Z" },
{ url = "https://files.pythonhosted.org/packages/98/ab/d0a6303bf0f978e394036841089d58d2c8c305e3efbcce9e4351724b6f5c/mlx-0.30.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f026f3a30013e16034419caef0b0293ba84e69252fc1676d5d8becc92bb5a304", size = 574119, upload-time = "2026-01-27T22:53:34.304Z" },
{ url = "https://files.pythonhosted.org/packages/1e/58/f5ac415a1781877b21e88f9257c7071e48ee91c34ca461e880b74677758a/mlx-0.30.4-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:96ad421cfe62a6fe7fc98521f8af9a530d7d7b6ded402ba6f4eb81a4a3087d1f", size = 574120, upload-time = "2026-01-27T22:53:36.161Z" },
{ url = "https://files.pythonhosted.org/packages/bf/12/9eb62ebf0ca7989efa6dec92e79630ef70e54202b756523bdeadf3c009eb/mlx-0.30.4-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:dfafd24144d91f6b4bd5ef6711458c566fdf507aee6417567fc2da0469619878", size = 574112, upload-time = "2026-01-27T22:53:37.831Z" },
{ url = "https://files.pythonhosted.org/packages/b7/f3/ada2b2126fc7a2634bd30c07418c6ae9657530d4534249c6949dbcc0013d/mlx-0.30.4-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:f016e16ff43dff6240ee91a8ba32226db1d55797a81a64d7af84e0e4409852ba", size = 622977, upload-time = "2026-01-27T22:53:39.885Z" },
{ url = "https://files.pythonhosted.org/packages/c1/8d/fc498b847f9ed8459ee89fb5b06f7237541192a9e6cd965bed9f61114f5c/mlx-0.30.4-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:962f99d637a99058b7d7659b66570f988815f26f2ae9af52c4cd0359fab928e2", size = 662314, upload-time = "2026-01-27T22:53:41.415Z" },
]
[package.optional-dependencies]
@@ -1303,16 +1360,16 @@ cuda13 = [
[[package]]
name = "mlx-cpu"
version = "0.30.3"
version = "0.30.4"
source = { registry = "https://pypi.org/simple" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/91/8c/65bdb167141572ae0fa5d2da445368f454af52c0b93d8440b75df6472eef/mlx_cpu-0.30.3-py3-none-manylinux_2_35_aarch64.whl", hash = "sha256:85ba5601153e034d3ddb18b929147396737e74798c8453d7e078f53f35ae1512", size = 9000768, upload-time = "2026-01-14T01:16:14.144Z" },
{ url = "https://files.pythonhosted.org/packages/24/eb/e31b1edf9bb907add8fc2e386c6683004908acc71a0394e22856f139a9f1/mlx_cpu-0.30.3-py3-none-manylinux_2_35_x86_64.whl", hash = "sha256:5363249de88ab0f54af6ac00cf62d40c64f8e2ecd867598195d9baa9de8f6edb", size = 10569189, upload-time = "2026-01-14T01:16:15.995Z" },
{ url = "https://files.pythonhosted.org/packages/7f/71/8a2f3598d628c6e5fd6ca4c58e080311dc39c558561d8f7fb2d91865f0e6/mlx_cpu-0.30.4-py3-none-manylinux_2_35_aarch64.whl", hash = "sha256:d3de6152e38f8a884d7cadb5e633bcf5fb346434867195709b4f6db8450e3f91", size = 8684835, upload-time = "2026-01-27T22:40:39.919Z" },
{ url = "https://files.pythonhosted.org/packages/ab/c4/eae335cf6859c4a45be52888b754bfceb0ad5363bd05ae0ce3e67fac1dec/mlx_cpu-0.30.4-py3-none-manylinux_2_35_x86_64.whl", hash = "sha256:1b7f076587d1bd028a6f8197fe35721a39b8202e36b05e3aba89d29d79ab6764", size = 10257054, upload-time = "2026-01-27T22:40:42.689Z" },
]
[[package]]
name = "mlx-cuda-13"
version = "0.30.3"
version = "0.30.4"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "nvidia-cublas", marker = "sys_platform == 'linux'" },
@@ -1321,14 +1378,14 @@ dependencies = [
{ name = "nvidia-nccl-cu13", marker = "sys_platform == 'linux'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/63/a0ffea24ee9b1f8967c8ea685c1c7d943112130aad633a3bd780bb381966/mlx_cuda_13-0.30.3-py3-none-manylinux_2_35_aarch64.whl", hash = "sha256:eaecb79cc8ce4c0ddbc802b8f244ecdee38ba1de32aa8105477df9936f35bc7e", size = 65780796, upload-time = "2026-01-14T01:16:27.699Z" },
{ url = "https://files.pythonhosted.org/packages/07/3c/fa9baab3c148fe0811c7f1d9d10966036e596480f4c54e9863cfdfd41970/mlx_cuda_13-0.30.3-py3-none-manylinux_2_35_x86_64.whl", hash = "sha256:9d02d95435bda50ad52572d4a3873479eb3072895c98053ee3efa7f476e64cb6", size = 68127545, upload-time = "2026-01-14T01:16:31.641Z" },
{ url = "https://files.pythonhosted.org/packages/2b/79/e0aec1bf713eb6f6cbda69e1f4d145429e0477c3087aa41755078caafcb7/mlx_cuda_13-0.30.4-py3-none-manylinux_2_35_aarch64.whl", hash = "sha256:4edc42cb2e00a7e51621afd4c0b43154f7e2a15a3c6516878207cda9f85ee133", size = 66771153, upload-time = "2026-01-27T23:38:44.61Z" },
{ url = "https://files.pythonhosted.org/packages/f6/ff/1793ec5ec7f486bc44356ac5d355a42577bf5c4c72c42feb9b237bc00838/mlx_cuda_13-0.30.4-py3-none-manylinux_2_35_x86_64.whl", hash = "sha256:23a4b617e8bcd5581e6d257ac09fee85f0195114523a863ba84118cfac4abb26", size = 69665061, upload-time = "2026-01-27T23:38:48.653Z" },
]
[[package]]
name = "mlx-lm"
version = "0.30.5"
source = { registry = "https://pypi.org/simple" }
version = "0.30.6"
source = { git = "https://github.com/davidmcc73/mlx-lm.git?branch=main#9b4c6b7aff64cf64d2ff6ce027b35b56a68c4182" }
dependencies = [
{ name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "mlx", marker = "sys_platform == 'darwin'" },
@@ -1338,19 +1395,15 @@ dependencies = [
{ name = "sentencepiece", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/0b/90/4469d9f75f196e6255f59a89441abe0079925d30a001462e1c1c4bc4e6a1/mlx_lm-0.30.5.tar.gz", hash = "sha256:9e6cb258c65b766c6af25cb90958aef40acab67139f05839eef19864cb3154f6", size = 262367, upload-time = "2026-01-25T15:29:30.125Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/89/ba/66db6e1e5f1ef506655b562932f6bd8f72600116d5f31f92d71c1f200b3f/mlx_lm-0.30.5-py3-none-any.whl", hash = "sha256:a80bc8e3efdebe81813b0f6eb403fb66a7a15071e256f4e7102ada986acb75bb", size = 366716, upload-time = "2026-01-25T15:29:28.29Z" },
]
[[package]]
name = "mlx-metal"
version = "0.30.3"
version = "0.30.4"
source = { registry = "https://pypi.org/simple" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f6/63/4d8f6fefb507c028df4454dabfe8d8e0ad2961bb06510b6aca23d2d5b2be/mlx_metal-0.30.3-py3-none-macosx_14_0_arm64.whl", hash = "sha256:6276312b02353714c7c6515169569fe1c4bebe3229c8ecf1fdb375a13e78c966", size = 37716245, upload-time = "2026-01-14T01:16:34.838Z" },
{ url = "https://files.pythonhosted.org/packages/35/91/1d452e48a4bb4958844fd3bb28ae31b8de110549c009ebec5024ce27ebf3/mlx_metal-0.30.3-py3-none-macosx_15_0_arm64.whl", hash = "sha256:c096c0a3428f3f96a06220f97a36f9528b18bc05173f821eb05bc8458e723fa8", size = 37712125, upload-time = "2026-01-14T01:16:38.619Z" },
{ url = "https://files.pythonhosted.org/packages/fe/36/7a3cbca85542b5ca4faf871e35927f43aa0e3fc830ae5b699780fe723677/mlx_metal-0.30.3-py3-none-macosx_26_0_arm64.whl", hash = "sha256:69068533bd1ee8b0379ce5de57ed5fd313577a10ecab58e1332fd1ff7248a75e", size = 46488962, upload-time = "2026-01-14T05:52:04.523Z" },
{ url = "https://files.pythonhosted.org/packages/95/b1/a50b84aaa76a60605606df49196456f31871148485ede7cbe3267a25a51e/mlx_metal-0.30.4-py3-none-macosx_14_0_arm64.whl", hash = "sha256:10c417f86778ac5529ecd2180f90de35f2d3a0fcad4d5176d211d651504c4922", size = 38260996, upload-time = "2026-01-27T22:52:50.172Z" },
{ url = "https://files.pythonhosted.org/packages/b6/f0/6cce9e0ea545f61d0fa27dc6cd30ffa0e44f17bf859e5d75a34a9ba0da56/mlx_metal-0.30.4-py3-none-macosx_15_0_arm64.whl", hash = "sha256:f48f52490f0fcb2be924312d50c3a12625249d396a2a119ce4f7b0d388543ca9", size = 38255657, upload-time = "2026-01-27T22:52:53.683Z" },
{ url = "https://files.pythonhosted.org/packages/07/fc/345f627bb88479cb53c3f37ad1947f865830060a3d792eec05954f53384d/mlx_metal-0.30.4-py3-none-macosx_26_0_arm64.whl", hash = "sha256:9a9fb6f9169eeb38a7f78389fe78306a1b5167fa489096bc50f9ca72074d7a95", size = 47541040, upload-time = "2026-01-27T22:52:57.059Z" },
]
[[package]]
@@ -1442,19 +1495,18 @@ wheels = [
[[package]]
name = "multiprocess"
version = "0.70.18"
version = "0.70.16"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "dill", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" }
sdist = { url = "https://files.pythonhosted.org/packages/b5/ae/04f39c5d0d0def03247c2893d6f2b83c136bf3320a2154d7b8858f2ba72d/multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1", size = 1772603, upload-time = "2024-01-28T18:52:34.85Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ba/d8/0cba6cf51a1a31f20471fbc823a716170c73012ddc4fb85d706630ed6e8f/multiprocess-0.70.18-py310-none-any.whl", hash = "sha256:60c194974c31784019c1f459d984e8f33ee48f10fcf42c309ba97b30d9bd53ea", size = 134948, upload-time = "2025-04-17T03:11:20.223Z" },
{ url = "https://files.pythonhosted.org/packages/4b/88/9039f2fed1012ef584751d4ceff9ab4a51e5ae264898f0b7cbf44340a859/multiprocess-0.70.18-py311-none-any.whl", hash = "sha256:5aa6eef98e691281b3ad923be2832bf1c55dd2c859acd73e5ec53a66aae06a1d", size = 144462, upload-time = "2025-04-17T03:11:21.657Z" },
{ url = "https://files.pythonhosted.org/packages/bf/b6/5f922792be93b82ec6b5f270bbb1ef031fd0622847070bbcf9da816502cc/multiprocess-0.70.18-py312-none-any.whl", hash = "sha256:9b78f8e5024b573730bfb654783a13800c2c0f2dfc0c25e70b40d184d64adaa2", size = 150287, upload-time = "2025-04-17T03:11:22.69Z" },
{ url = "https://files.pythonhosted.org/packages/ee/25/7d7e78e750bc1aecfaf0efbf826c69a791d2eeaf29cf20cba93ff4cced78/multiprocess-0.70.18-py313-none-any.whl", hash = "sha256:871743755f43ef57d7910a38433cfe41319e72be1bbd90b79c7a5ac523eb9334", size = 151917, upload-time = "2025-04-17T03:11:24.044Z" },
{ url = "https://files.pythonhosted.org/packages/3b/c3/ca84c19bd14cdfc21c388fdcebf08b86a7a470ebc9f5c3c084fc2dbc50f7/multiprocess-0.70.18-py38-none-any.whl", hash = "sha256:dbf705e52a154fe5e90fb17b38f02556169557c2dd8bb084f2e06c2784d8279b", size = 132636, upload-time = "2025-04-17T03:11:24.936Z" },
{ url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" },
{ url = "https://files.pythonhosted.org/packages/bc/f7/7ec7fddc92e50714ea3745631f79bd9c96424cb2702632521028e57d3a36/multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02", size = 134824, upload-time = "2024-01-28T18:52:26.062Z" },
{ url = "https://files.pythonhosted.org/packages/50/15/b56e50e8debaf439f44befec5b2af11db85f6e0f344c3113ae0be0593a91/multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a", size = 143519, upload-time = "2024-01-28T18:52:28.115Z" },
{ url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741, upload-time = "2024-01-28T18:52:29.395Z" },
{ url = "https://files.pythonhosted.org/packages/ea/89/38df130f2c799090c978b366cfdf5b96d08de5b29a4a293df7f7429fa50b/multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", size = 132628, upload-time = "2024-01-28T18:52:30.853Z" },
{ url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351, upload-time = "2024-01-28T18:52:31.981Z" },
]
[[package]]
@@ -1559,40 +1611,26 @@ wheels = [
[[package]]
name = "numpy"
version = "2.4.1"
version = "2.2.6"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/24/62/ae72ff66c0f1fd959925b4c11f8c2dea61f47f6acaea75a08512cdfe3fed/numpy-2.4.1.tar.gz", hash = "sha256:a1ceafc5042451a858231588a104093474c6a5c57dcc724841f5c888d237d690", size = 20721320, upload-time = "2026-01-10T06:44:59.619Z" }
sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/68/732d4b7811c00775f3bd522a21e8dd5a23f77eb11acdeb663e4a4ebf0ef4/numpy-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d797454e37570cfd61143b73b8debd623c3c0952959adb817dd310a483d58a1b", size = 16652495, upload-time = "2026-01-10T06:43:06.283Z" },
{ url = "https://files.pythonhosted.org/packages/20/ca/857722353421a27f1465652b2c66813eeeccea9d76d5f7b74b99f298e60e/numpy-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82c55962006156aeef1629b953fd359064aa47e4d82cfc8e67f0918f7da3344f", size = 12368657, upload-time = "2026-01-10T06:43:09.094Z" },
{ url = "https://files.pythonhosted.org/packages/81/0d/2377c917513449cc6240031a79d30eb9a163d32a91e79e0da47c43f2c0c8/numpy-2.4.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:71abbea030f2cfc3092a0ff9f8c8fdefdc5e0bf7d9d9c99663538bb0ecdac0b9", size = 5197256, upload-time = "2026-01-10T06:43:13.634Z" },
{ url = "https://files.pythonhosted.org/packages/17/39/569452228de3f5de9064ac75137082c6214be1f5c532016549a7923ab4b5/numpy-2.4.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b55aa56165b17aaf15520beb9cbd33c9039810e0d9643dd4379e44294c7303e", size = 6545212, upload-time = "2026-01-10T06:43:15.661Z" },
{ url = "https://files.pythonhosted.org/packages/8c/a4/77333f4d1e4dac4395385482557aeecf4826e6ff517e32ca48e1dafbe42a/numpy-2.4.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0faba4a331195bfa96f93dd9dfaa10b2c7aa8cda3a02b7fd635e588fe821bf5", size = 14402871, upload-time = "2026-01-10T06:43:17.324Z" },
{ url = "https://files.pythonhosted.org/packages/ba/87/d341e519956273b39d8d47969dd1eaa1af740615394fe67d06f1efa68773/numpy-2.4.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3e3087f53e2b4428766b54932644d148613c5a595150533ae7f00dab2f319a8", size = 16359305, upload-time = "2026-01-10T06:43:19.376Z" },
{ url = "https://files.pythonhosted.org/packages/32/91/789132c6666288eaa20ae8066bb99eba1939362e8f1a534949a215246e97/numpy-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:49e792ec351315e16da54b543db06ca8a86985ab682602d90c60ef4ff4db2a9c", size = 16181909, upload-time = "2026-01-10T06:43:21.808Z" },
{ url = "https://files.pythonhosted.org/packages/cf/b8/090b8bd27b82a844bb22ff8fdf7935cb1980b48d6e439ae116f53cdc2143/numpy-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:79e9e06c4c2379db47f3f6fc7a8652e7498251789bf8ff5bd43bf478ef314ca2", size = 18284380, upload-time = "2026-01-10T06:43:23.957Z" },
{ url = "https://files.pythonhosted.org/packages/da/a1/354583ac5c4caa566de6ddfbc42744409b515039e085fab6e0ff942e0df5/numpy-2.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f93bc6892fe7b0663e5ffa83b61aab510aacffd58c16e012bb9352d489d90cb7", size = 12496156, upload-time = "2026-01-10T06:43:34.237Z" },
{ url = "https://files.pythonhosted.org/packages/51/b0/42807c6e8cce58c00127b1dc24d365305189991f2a7917aa694a109c8d7d/numpy-2.4.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:178de8f87948163d98a4c9ab5bee4ce6519ca918926ec8df195af582de28544d", size = 5324663, upload-time = "2026-01-10T06:43:36.211Z" },
{ url = "https://files.pythonhosted.org/packages/fe/55/7a621694010d92375ed82f312b2f28017694ed784775269115323e37f5e2/numpy-2.4.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:98b35775e03ab7f868908b524fc0a84d38932d8daf7b7e1c3c3a1b6c7a2c9f15", size = 6645224, upload-time = "2026-01-10T06:43:37.884Z" },
{ url = "https://files.pythonhosted.org/packages/50/96/9fa8635ed9d7c847d87e30c834f7109fac5e88549d79ef3324ab5c20919f/numpy-2.4.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:941c2a93313d030f219f3a71fd3d91a728b82979a5e8034eb2e60d394a2b83f9", size = 14462352, upload-time = "2026-01-10T06:43:39.479Z" },
{ url = "https://files.pythonhosted.org/packages/03/d1/8cf62d8bb2062da4fb82dd5d49e47c923f9c0738032f054e0a75342faba7/numpy-2.4.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:529050522e983e00a6c1c6b67411083630de8b57f65e853d7b03d9281b8694d2", size = 16407279, upload-time = "2026-01-10T06:43:41.93Z" },
{ url = "https://files.pythonhosted.org/packages/86/1c/95c86e17c6b0b31ce6ef219da00f71113b220bcb14938c8d9a05cee0ff53/numpy-2.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2302dc0224c1cbc49bb94f7064f3f923a971bfae45c33870dcbff63a2a550505", size = 16248316, upload-time = "2026-01-10T06:43:44.121Z" },
{ url = "https://files.pythonhosted.org/packages/30/b4/e7f5ff8697274c9d0fa82398b6a372a27e5cef069b37df6355ccb1f1db1a/numpy-2.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9171a42fcad32dcf3fa86f0a4faa5e9f8facefdb276f54b8b390d90447cff4e2", size = 18329884, upload-time = "2026-01-10T06:43:46.613Z" },
{ url = "https://files.pythonhosted.org/packages/1b/a7/ef08d25698e0e4b4efbad8d55251d20fe2a15f6d9aa7c9b30cd03c165e6f/numpy-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3869ea1ee1a1edc16c29bbe3a2f2a4e515cc3a44d43903ad41e0cacdbaf733dc", size = 16652046, upload-time = "2026-01-10T06:43:54.797Z" },
{ url = "https://files.pythonhosted.org/packages/8f/39/e378b3e3ca13477e5ac70293ec027c438d1927f18637e396fe90b1addd72/numpy-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e867df947d427cdd7a60e3e271729090b0f0df80f5f10ab7dd436f40811699c3", size = 12378858, upload-time = "2026-01-10T06:43:57.099Z" },
{ url = "https://files.pythonhosted.org/packages/c3/74/7ec6154f0006910ed1fdbb7591cf4432307033102b8a22041599935f8969/numpy-2.4.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:e3bd2cb07841166420d2fa7146c96ce00cb3410664cbc1a6be028e456c4ee220", size = 5207417, upload-time = "2026-01-10T06:43:59.037Z" },
{ url = "https://files.pythonhosted.org/packages/f7/b7/053ac11820d84e42f8feea5cb81cc4fcd1091499b45b1ed8c7415b1bf831/numpy-2.4.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:f0a90aba7d521e6954670550e561a4cb925713bd944445dbe9e729b71f6cabee", size = 6542643, upload-time = "2026-01-10T06:44:01.852Z" },
{ url = "https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d558123217a83b2d1ba316b986e9248a1ed1971ad495963d555ccd75dcb1556", size = 14418963, upload-time = "2026-01-10T06:44:04.047Z" },
{ url = "https://files.pythonhosted.org/packages/eb/c0/3ed5083d94e7ffd7c404e54619c088e11f2e1939a9544f5397f4adb1b8ba/numpy-2.4.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f44de05659b67d20499cbc96d49f2650769afcb398b79b324bb6e297bfe3844", size = 16363811, upload-time = "2026-01-10T06:44:06.207Z" },
{ url = "https://files.pythonhosted.org/packages/0e/68/42b66f1852bf525050a67315a4fb94586ab7e9eaa541b1bef530fab0c5dd/numpy-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:69e7419c9012c4aaf695109564e3387f1259f001b4326dfa55907b098af082d3", size = 16197643, upload-time = "2026-01-10T06:44:08.33Z" },
{ url = "https://files.pythonhosted.org/packages/d2/40/e8714fc933d85f82c6bfc7b998a0649ad9769a32f3494ba86598aaf18a48/numpy-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2ffd257026eb1b34352e749d7cc1678b5eeec3e329ad8c9965a797e08ccba205", size = 18289601, upload-time = "2026-01-10T06:44:10.841Z" },
{ url = "https://files.pythonhosted.org/packages/de/bc/ea3f2c96fcb382311827231f911723aeff596364eb6e1b6d1d91128aa29b/numpy-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e53170557d37ae404bf8d542ca5b7c629d6efa1117dac6a83e394142ea0a43f", size = 12498774, upload-time = "2026-01-10T06:44:19.467Z" },
{ url = "https://files.pythonhosted.org/packages/aa/ab/ef9d939fe4a812648c7a712610b2ca6140b0853c5efea361301006c02ae5/numpy-2.4.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:a73044b752f5d34d4232f25f18160a1cc418ea4507f5f11e299d8ac36875f8a0", size = 5327274, upload-time = "2026-01-10T06:44:23.189Z" },
{ url = "https://files.pythonhosted.org/packages/bd/31/d381368e2a95c3b08b8cf7faac6004849e960f4a042d920337f71cef0cae/numpy-2.4.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:fb1461c99de4d040666ca0444057b06541e5642f800b71c56e6ea92d6a853a0c", size = 6648306, upload-time = "2026-01-10T06:44:25.012Z" },
{ url = "https://files.pythonhosted.org/packages/c8/e5/0989b44ade47430be6323d05c23207636d67d7362a1796ccbccac6773dd2/numpy-2.4.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423797bdab2eeefbe608d7c1ec7b2b4fd3c58d51460f1ee26c7500a1d9c9ee93", size = 14464653, upload-time = "2026-01-10T06:44:26.706Z" },
{ url = "https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52b5f61bdb323b566b528899cc7db2ba5d1015bda7ea811a8bcf3c89c331fa42", size = 16405144, upload-time = "2026-01-10T06:44:29.378Z" },
{ url = "https://files.pythonhosted.org/packages/f8/a3/0c63fe66b534888fa5177cc7cef061541064dbe2b4b60dcc60ffaf0d2157/numpy-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42d7dd5fa36d16d52a84f821eb96031836fd405ee6955dd732f2023724d0aa01", size = 16247425, upload-time = "2026-01-10T06:44:31.721Z" },
{ url = "https://files.pythonhosted.org/packages/6b/2b/55d980cfa2c93bd40ff4c290bf824d792bd41d2fe3487b07707559071760/numpy-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e7b6b5e28bbd47b7532698e5db2fe1db693d84b58c254e4389d99a27bb9b8f6b", size = 18330053, upload-time = "2026-01-10T06:44:34.617Z" },
{ url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" },
{ url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" },
{ url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" },
{ url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" },
{ url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" },
{ url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" },
{ url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" },
{ url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" },
{ url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" },
{ url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" },
{ url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" },
{ url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" },
{ url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" },
{ url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" },
{ url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" },
{ url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" },
]
[[package]]
@@ -1729,10 +1767,10 @@ wheels = [
[[package]]
name = "nvidia-nccl-cu12"
version = "2.27.5"
version = "2.27.3"
source = { registry = "https://pypi.org/simple" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
{ url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload-time = "2025-06-03T21:58:04.013Z" },
]
[[package]]
@@ -1752,14 +1790,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
]
[[package]]
name = "nvidia-nvshmem-cu12"
version = "3.3.20"
source = { registry = "https://pypi.org/simple" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145, upload-time = "2025-08-04T20:25:19.995Z" },
]
[[package]]
name = "nvidia-nvtx-cu12"
version = "12.8.90"
@@ -1768,6 +1798,25 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
]
[[package]]
name = "openai"
version = "2.16.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "distro", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "httpx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "jiter", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "sniffio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b1/6c/e4c964fcf1d527fdf4739e7cc940c60075a4114d50d03871d5d5b1e13a88/openai-2.16.0.tar.gz", hash = "sha256:42eaa22ca0d8ded4367a77374104d7a2feafee5bd60a107c3c11b5243a11cd12", size = 629649, upload-time = "2026-01-27T23:28:02.579Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/16/83/0315bf2cfd75a2ce8a7e54188e9456c60cec6c0cf66728ed07bd9859ff26/openai-2.16.0-py3-none-any.whl", hash = "sha256:5f46643a8f42899a84e80c38838135d7038e7718333ce61396994f887b09a59b", size = 1068612, upload-time = "2026-01-27T23:28:00.356Z" },
]
[[package]]
name = "openai-harmony"
version = "0.0.8"
@@ -1814,13 +1863,57 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/40/35/ddf3a6e8fc754fb939e2ea36fde96c28189184d6115afcf60011bb438ae5/packaging-26.0rc1-py3-none-any.whl", hash = "sha256:ecf921b33c620e357b1eed2ac3bc6313b1582874b0282d0773b6797b79cb0786", size = 74021, upload-time = "2026-01-09T17:41:17.134Z" },
]
[[package]]
name = "pandas"
version = "3.0.0rc2"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version >= '3.14' and sys_platform == 'darwin'",
"python_full_version >= '3.14' and sys_platform == 'linux'",
]
dependencies = [
{ name = "numpy", marker = "(python_full_version >= '3.14' and sys_platform == 'darwin') or (python_full_version >= '3.14' and sys_platform == 'linux')" },
{ name = "python-dateutil", marker = "(python_full_version >= '3.14' and sys_platform == 'darwin') or (python_full_version >= '3.14' and sys_platform == 'linux')" },
]
sdist = { url = "https://files.pythonhosted.org/packages/11/1f/08274a71f6198b47fa6abb92c99ab87bfa088c3fca71b467a874c7a8db47/pandas-3.0.0rc2.tar.gz", hash = "sha256:729cd21b8387d9cba48ab357d2eea72215af6b631440fa00b2cb8ea5f83c7dbc", size = 4611940, upload-time = "2026-01-14T22:43:44.022Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/09/b7/9d31c2e0ef9950e6ca6238683a265867264ddc2ed25cd1643a102fe6488f/pandas-3.0.0rc2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:683ecc5e0f1080bdf2f5cb8d707b1dd506bdd77b199394151d7864f5e9da0a0b", size = 10276423, upload-time = "2026-01-14T22:42:33.95Z" },
{ url = "https://files.pythonhosted.org/packages/08/8f/435edcea6bec50778ccccf0bd271f7d0eebccca6d04384e1f8e413b2c9f8/pandas-3.0.0rc2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:673e4f7550a61f994a8f1835f71187657953e970077d74fb0723d1fcc98d4b80", size = 9820608, upload-time = "2026-01-14T22:42:35.865Z" },
{ url = "https://files.pythonhosted.org/packages/e4/43/5cd6162ad4b311dc8340442d7d71d79c4ff0e1b009e349790099a10f98fd/pandas-3.0.0rc2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55adad848bb154cdca4a6501c5c961979a1788c4e8e2aa14bd42ea748264ee0e", size = 10323489, upload-time = "2026-01-14T22:42:38.13Z" },
{ url = "https://files.pythonhosted.org/packages/ab/5a/f4370b79eb4212abadea8905e3df058268043260f9fa97d86c0df54aa500/pandas-3.0.0rc2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:358185abd60c12e78ecec4affaa86f7775e847b77547bffff5f15f6992149d99", size = 10831839, upload-time = "2026-01-14T22:42:40.484Z" },
{ url = "https://files.pythonhosted.org/packages/f5/11/c953c00093be4ea3d2b1846a8c1f2f260bcae90ff61c3898da5406fa7ea2/pandas-3.0.0rc2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:361f57da30ad16486d51410e957033864171930316e7f28caf8ded2fd00eea79", size = 11330676, upload-time = "2026-01-14T22:42:42.933Z" },
{ url = "https://files.pythonhosted.org/packages/65/96/e4bca78cfee5b8b7b921b609673768f8752a282bdbde669a43591a27e2b8/pandas-3.0.0rc2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d1408423ab06a278ae30aabdc3339cadff3dfe1b4d6ce77cf34e17810c00907c", size = 11883758, upload-time = "2026-01-14T22:42:45.444Z" },
{ url = "https://files.pythonhosted.org/packages/9c/b0/1bf38be28e25be6d07ceedeccf9b2cf9d7e6553598915f2e5e96a123fbf9/pandas-3.0.0rc2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e5ba7f285c4ca98f6d2f136657239a57dee7f992e1401776287ff61232c43e4d", size = 10711017, upload-time = "2026-01-14T22:42:51.954Z" },
{ url = "https://files.pythonhosted.org/packages/4d/c2/fb57d65a4583fb7cadd75040f02ceeea8922dc1c0ae3b9ba9f713d27f518/pandas-3.0.0rc2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:629d2050db9427ca875ad9b36569deff91625c8ae8440ab8aa31d0648cc735d8", size = 10362629, upload-time = "2026-01-14T22:42:54.231Z" },
{ url = "https://files.pythonhosted.org/packages/d1/64/44bdc4a30dd7a00510ffeb13fbb1dfd66df4761b4abd88805aff53b50cab/pandas-3.0.0rc2-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f765354b411f9b1a2fb376d3efc03b9bd09034d55a6ec154969ca765c32713ad", size = 10283691, upload-time = "2026-01-14T22:42:56.292Z" },
{ url = "https://files.pythonhosted.org/packages/26/fb/4ac312b87a08eb5f8bba574fa09cb4d599c90bcf15e8331d0ad496d1023f/pandas-3.0.0rc2-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e9dc4fa215c768b6e92b1c589458dd0b071b8b362d276f189d491666f79e97e1", size = 10671727, upload-time = "2026-01-14T22:42:58.241Z" },
{ url = "https://files.pythonhosted.org/packages/70/95/095b4fd1fbfea415b0a3728e94876ed1c4e5887bbc0e76817f96de641403/pandas-3.0.0rc2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d74eb4e76cf84ef0ce8225d02ebdc41c432b286f2ce46e482193108579230a78", size = 11296898, upload-time = "2026-01-14T22:43:00.575Z" },
{ url = "https://files.pythonhosted.org/packages/ca/32/c843f435b4b2873d1f9ad413019f755390fb823ebfcbff297fc546ecd189/pandas-3.0.0rc2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d669ff45857a6b7db9ba8020ec4443d4a93b919da87f67b8bd71938848495dcb", size = 11743350, upload-time = "2026-01-14T22:43:03.348Z" },
{ url = "https://files.pythonhosted.org/packages/78/54/dcd99e9856271a0f0546c96a564c43d1445344a8e8f1ae3e7aa11b941007/pandas-3.0.0rc2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9e156705991758d892a6d008a76e2638d0d4f076deb2753a38034f64b7f4597f", size = 10280879, upload-time = "2026-01-14T22:43:07.574Z" },
{ url = "https://files.pythonhosted.org/packages/d4/a0/52ff58f8f4f210c41e2b7b777da1b6f754006576c34d130843147b663a44/pandas-3.0.0rc2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c26d22710d5b5f65e0e36ed1a73d08e96d8866b58bd56b45a93901c8ceffa152", size = 9870217, upload-time = "2026-01-14T22:43:10.306Z" },
{ url = "https://files.pythonhosted.org/packages/2a/9b/4a472aae89afd5c93d2ccb6e74ac94eef8b37c5fb2c56c647b1fd3e08813/pandas-3.0.0rc2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5000fcee5565e41053ff5d59e0d3d1b0784b492aa0f33b89972648de8615baee", size = 10399840, upload-time = "2026-01-14T22:43:12.702Z" },
{ url = "https://files.pythonhosted.org/packages/a0/2f/81c647c3f905023d06826762d86d76cc27435fe66396e1cebcddede5ea83/pandas-3.0.0rc2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ddd8ac07a9b30039a38512c8b917a52fb8b87f3bec110dd349801ebc5c128661", size = 10854606, upload-time = "2026-01-14T22:43:14.751Z" },
{ url = "https://files.pythonhosted.org/packages/b9/e2/2eabf98e251e0becaf58c97f6b4e4150d1ba013814059583688a281ecacb/pandas-3.0.0rc2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:650a812ad0164548c91181306b3ebd8c41acc44b8f03bc447725c6ec84b61751", size = 11402310, upload-time = "2026-01-14T22:43:16.735Z" },
{ url = "https://files.pythonhosted.org/packages/5b/7c/c8c224a49de3f5a6e82af813c19055b551abb3456757bed825f8ebf3be62/pandas-3.0.0rc2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c7ac831248102b92f4decafb5970fe0eed0c42d788b599485361aa225b26f2b4", size = 11922515, upload-time = "2026-01-14T22:43:18.861Z" },
{ url = "https://files.pythonhosted.org/packages/a8/9e/ea3f5c1ccd292fd1d31b92c5fc3ab03364e63b54e3f24ba9a27dc4350253/pandas-3.0.0rc2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:b31021fa98cdffe6cd6745edc53fece4b0f853290fd4a28f41bdf6a0dc644aa6", size = 10731009, upload-time = "2026-01-14T22:43:27.253Z" },
{ url = "https://files.pythonhosted.org/packages/f7/8c/9f11cb31727589ada2d333a1c174fbe92b32de935d2efc354e0fa63184ff/pandas-3.0.0rc2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:59c707013d019efba0af7cf731179d788f4d7309f367dc495082d61d201ad487", size = 10379214, upload-time = "2026-01-14T22:43:29.399Z" },
{ url = "https://files.pythonhosted.org/packages/53/43/dfd47f186323205197204616f21bb833b18cbbdb817f32d63222fe249639/pandas-3.0.0rc2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8376665f907f6ee142987ebcd3f2e78f40833a7b03cd9a7c9a72774f9f0d4759", size = 10301411, upload-time = "2026-01-14T22:43:31.627Z" },
{ url = "https://files.pythonhosted.org/packages/99/10/28cb0417e80dddc82b69c8bc3e5d3b64c62102a50e4cbe8716ddaeb62b23/pandas-3.0.0rc2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c22c471eec0a7c3bbfdbfe2a5cac0a2dc74ea24711534e0d69e0c308b3ad35ce", size = 10701696, upload-time = "2026-01-14T22:43:33.842Z" },
{ url = "https://files.pythonhosted.org/packages/62/7d/51edfecfbe95a83b472acd0ca676690dba7bdc86936c82ab485a6d6a0d47/pandas-3.0.0rc2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4ac9bf1a92277599eb3f6fa2f30028c26861d48c736a77b6c098c56b46e53b92", size = 11318395, upload-time = "2026-01-14T22:43:35.802Z" },
{ url = "https://files.pythonhosted.org/packages/5b/f7/b962099d9706f14afcdc4aafe61f7710a37577eae6ac2098838e9fd3a52f/pandas-3.0.0rc2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:47966fb3d7b236a91e4f26bc6fb1a27ed09c95ead6d147958052c350abd545e3", size = 11772875, upload-time = "2026-01-14T22:43:37.949Z" },
]
[[package]]
name = "pandas"
version = "3.0.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version < '3.14' and sys_platform == 'darwin'",
"python_full_version < '3.14' and sys_platform == 'linux'",
]
dependencies = [
{ name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "numpy", marker = "(python_full_version < '3.14' and sys_platform == 'darwin') or (python_full_version < '3.14' and sys_platform == 'linux')" },
{ name = "python-dateutil", marker = "(python_full_version < '3.14' and sys_platform == 'darwin') or (python_full_version < '3.14' and sys_platform == 'linux')" },
]
sdist = { url = "https://files.pythonhosted.org/packages/de/da/b1dc0481ab8d55d0f46e343cfe67d4551a0e14fcee52bd38ca1bd73258d8/pandas-3.0.0.tar.gz", hash = "sha256:0facf7e87d38f721f0af46fe70d97373a37701b1c09f7ed7aeeb292ade5c050f", size = 4633005, upload-time = "2026-01-21T15:52:04.726Z" }
wheels = [
@@ -1859,6 +1952,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/9a/70/875f4a23bfc4731703a5835487d0d2fb999031bd415e7d17c0ae615c18b7/pathvalidate-3.3.1-py3-none-any.whl", hash = "sha256:5263baab691f8e1af96092fa5137ee17df5bdfbd6cff1fcac4d6ef4bc2e1735f", size = 24305, upload-time = "2025-06-15T09:07:19.117Z" },
]
[[package]]
name = "pebble"
version = "5.2.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/66/3b/7debef984e227a70798963cf2e5ea90882f62bca659b33cbd421a453abd1/pebble-5.2.0.tar.gz", hash = "sha256:8e0a5f6a1cfdd0ac1bfc4a789e20d2b4b895de976e547d23b7de23b71ef39b34", size = 39811, upload-time = "2026-01-25T12:05:11.422Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b5/de/1cce5274efcb921484998864820f2ba41679ea472daef748a7bc03fc0bb7/pebble-5.2.0-py3-none-any.whl", hash = "sha256:6237a792a78524648857ec6d2dae069c91a45bdef18daf957078a56e2dd8e0a8", size = 34881, upload-time = "2026-01-25T12:05:09.714Z" },
]
[[package]]
name = "peft"
version = "0.18.1"
@@ -2662,11 +2764,11 @@ wheels = [
[[package]]
name = "setuptools"
version = "80.9.0"
version = "79.0.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
sdist = { url = "https://files.pythonhosted.org/packages/bb/71/b6365e6325b3290e14957b2c3a804a529968c77a049b2ed40c095f749707/setuptools-79.0.1.tar.gz", hash = "sha256:128ce7b8f33c3079fd1b067ecbb4051a66e8526e7b65f6cec075dfc650ddfa88", size = 1367909, upload-time = "2025-04-23T22:20:59.241Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
{ url = "https://files.pythonhosted.org/packages/0d/6d/b4752b044bf94cb802d88a888dc7d288baaf77d7910b7dedda74b5ceea0c/setuptools-79.0.1-py3-none-any.whl", hash = "sha256:e147c0549f27767ba362f9da434eab9c5dc0045d5304feb602a0af001089fc51", size = 1256281, upload-time = "2025-04-23T22:20:56.768Z" },
]
[[package]]
@@ -2854,7 +2956,7 @@ wheels = [
[[package]]
name = "torch"
version = "2.9.1"
version = "2.8.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2874,7 +2976,6 @@ dependencies = [
{ name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
{ name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
{ name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
{ name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
{ name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
{ name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "sympy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2882,18 +2983,12 @@ dependencies = [
{ name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/20/60/8fc5e828d050bddfab469b3fe78e5ab9a7e53dda9c3bdc6a43d17ce99e63/torch-2.9.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c29455d2b910b98738131990394da3e50eea8291dfeb4b12de71ecf1fdeb21cb", size = 104135743, upload-time = "2025-11-12T15:21:34.936Z" },
{ url = "https://files.pythonhosted.org/packages/f2/b7/6d3f80e6918213babddb2a37b46dbb14c15b14c5f473e347869a51f40e1f/torch-2.9.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:524de44cd13931208ba2c4bde9ec7741fd4ae6bfd06409a604fc32f6520c2bc9", size = 899749493, upload-time = "2025-11-12T15:24:36.356Z" },
{ url = "https://files.pythonhosted.org/packages/28/0e/2a37247957e72c12151b33a01e4df651d9d155dd74d8cfcbfad15a79b44a/torch-2.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5be4bf7496f1e3ffb1dd44b672adb1ac3f081f204c5ca81eba6442f5f634df8e", size = 74830751, upload-time = "2025-11-12T15:21:43.792Z" },
{ url = "https://files.pythonhosted.org/packages/4b/f7/7a18745edcd7b9ca2381aa03353647bca8aace91683c4975f19ac233809d/torch-2.9.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:30a3e170a84894f3652434b56d59a64a2c11366b0ed5776fab33c2439396bf9a", size = 104142929, upload-time = "2025-11-12T15:21:48.319Z" },
{ url = "https://files.pythonhosted.org/packages/f4/dd/f1c0d879f2863ef209e18823a988dc7a1bf40470750e3ebe927efdb9407f/torch-2.9.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8301a7b431e51764629208d0edaa4f9e4c33e6df0f2f90b90e261d623df6a4e2", size = 899748978, upload-time = "2025-11-12T15:23:04.568Z" },
{ url = "https://files.pythonhosted.org/packages/40/60/71c698b466dd01e65d0e9514b5405faae200c52a76901baf6906856f17e4/torch-2.9.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:2c14b3da5df416cf9cb5efab83aa3056f5b8cd8620b8fde81b4987ecab730587", size = 74480347, upload-time = "2025-11-12T15:21:57.648Z" },
{ url = "https://files.pythonhosted.org/packages/48/50/c4b5112546d0d13cc9eaa1c732b823d676a9f49ae8b6f97772f795874a03/torch-2.9.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1edee27a7c9897f4e0b7c14cfc2f3008c571921134522d5b9b5ec4ebbc69041a", size = 74433245, upload-time = "2025-11-12T15:22:39.027Z" },
{ url = "https://files.pythonhosted.org/packages/81/c9/2628f408f0518b3bae49c95f5af3728b6ab498c8624ab1e03a43dd53d650/torch-2.9.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:19d144d6b3e29921f1fc70503e9f2fc572cde6a5115c0c0de2f7ca8b1483e8b6", size = 104134804, upload-time = "2025-11-12T15:22:35.222Z" },
{ url = "https://files.pythonhosted.org/packages/28/fc/5bc91d6d831ae41bf6e9e6da6468f25330522e92347c9156eb3f1cb95956/torch-2.9.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:c432d04376f6d9767a9852ea0def7b47a7bbc8e7af3b16ac9cf9ce02b12851c9", size = 899747132, upload-time = "2025-11-12T15:23:36.068Z" },
{ url = "https://files.pythonhosted.org/packages/bd/b2/2d15a52516b2ea3f414643b8de68fa4cb220d3877ac8b1028c83dc8ca1c4/torch-2.9.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cb10896a1f7fedaddbccc2017ce6ca9ecaaf990f0973bdfcf405439750118d2c", size = 74823558, upload-time = "2025-11-12T15:22:43.392Z" },
{ url = "https://files.pythonhosted.org/packages/86/5c/5b2e5d84f5b9850cd1e71af07524d8cbb74cba19379800f1f9f7c997fc70/torch-2.9.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:0a2bd769944991c74acf0c4ef23603b9c777fdf7637f115605a4b2d8023110c7", size = 104145788, upload-time = "2025-11-12T15:23:52.109Z" },
{ url = "https://files.pythonhosted.org/packages/a9/8c/3da60787bcf70add986c4ad485993026ac0ca74f2fc21410bc4eb1bb7695/torch-2.9.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:07c8a9660bc9414c39cac530ac83b1fb1b679d7155824144a40a54f4a47bfa73", size = 899735500, upload-time = "2025-11-12T15:24:08.788Z" },
{ url = "https://files.pythonhosted.org/packages/10/4e/469ced5a0603245d6a19a556e9053300033f9c5baccf43a3d25ba73e189e/torch-2.8.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2b2f96814e0345f5a5aed9bf9734efa913678ed19caf6dc2cddb7930672d6128", size = 101936856, upload-time = "2025-08-06T14:54:01.526Z" },
{ url = "https://files.pythonhosted.org/packages/16/82/3948e54c01b2109238357c6f86242e6ecbf0c63a1af46906772902f82057/torch-2.8.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:65616ca8ec6f43245e1f5f296603e33923f4c30f93d65e103d9e50c25b35150b", size = 887922844, upload-time = "2025-08-06T14:55:50.78Z" },
{ url = "https://files.pythonhosted.org/packages/de/69/8b7b13bba430f5e21d77708b616f767683629fc4f8037564a177d20f90ed/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:1a62a1ec4b0498930e2543535cf70b1bef8c777713de7ceb84cd79115f553767", size = 73915128, upload-time = "2025-08-06T14:54:34.769Z" },
{ url = "https://files.pythonhosted.org/packages/15/0e/8a800e093b7f7430dbaefa80075aee9158ec22e4c4fc3c1a66e4fb96cb4f/torch-2.8.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:83c13411a26fac3d101fe8035a6b0476ae606deb8688e904e796a3534c197def", size = 102020139, upload-time = "2025-08-06T14:54:39.047Z" },
{ url = "https://files.pythonhosted.org/packages/4a/15/5e488ca0bc6162c86a33b58642bc577c84ded17c7b72d97e49b5833e2d73/torch-2.8.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8f0a9d617a66509ded240add3754e462430a6c1fc5589f86c17b433dd808f97a", size = 887990692, upload-time = "2025-08-06T14:56:18.286Z" },
{ url = "https://files.pythonhosted.org/packages/04/6e/650bb7f28f771af0cb791b02348db8b7f5f64f40f6829ee82aa6ce99aabe/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7b677e17f5a3e69fdef7eb3b9da72622f8d322692930297e4ccb52fefc6c8211", size = 73632395, upload-time = "2025-08-06T14:55:28.645Z" },
]
[[package]]
@@ -2920,7 +3015,7 @@ wheels = [
[[package]]
name = "transformers"
version = "5.0.0rc3"
version = "5.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2929,26 +3024,26 @@ dependencies = [
{ name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "safetensors", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "tokenizers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "typer-slim", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/3f/a3/7c116a8d85f69ea7749cf4c2df79e64c35d028e5fc7ea0168f299d03b8c7/transformers-5.0.0rc3.tar.gz", hash = "sha256:a0315b92b7e087617ade42ec9e6e92ee7620541cc5d6a3331886c52cbe306f5c", size = 8388520, upload-time = "2026-01-14T16:49:02.952Z" }
sdist = { url = "https://files.pythonhosted.org/packages/bc/79/845941711811789c85fb7e2599cea425a14a07eda40f50896b9d3fda7492/transformers-5.0.0.tar.gz", hash = "sha256:5f5634efed6cf76ad068cc5834c7adbc32db78bbd6211fb70df2325a9c37dec8", size = 8424830, upload-time = "2026-01-26T10:46:46.813Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1e/f2/ae2b8968764253bdf38a48dee3c299b8d0bedf7c8ffbe3449fca9bd95338/transformers-5.0.0rc3-py3-none-any.whl", hash = "sha256:383fad27f4f73092d330e45fae384681e5c8521e1dc1cf6cb1a297780e68bf2d", size = 10107087, upload-time = "2026-01-14T16:48:59.393Z" },
{ url = "https://files.pythonhosted.org/packages/52/f3/ac976fa8e305c9e49772527e09fbdc27cc6831b8a2f6b6063406626be5dd/transformers-5.0.0-py3-none-any.whl", hash = "sha256:587086f249ce64c817213cf36afdb318d087f790723e9b3d4500b97832afd52d", size = 10142091, upload-time = "2026-01-26T10:46:43.88Z" },
]
[[package]]
name = "triton"
version = "3.5.1"
version = "3.4.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "setuptools", marker = "sys_platform == 'linux'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/27/46/8c3bbb5b0a19313f50edcaa363b599e5a1a5ac9683ead82b9b80fe497c8d/triton-3.5.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3f4346b6ebbd4fad18773f5ba839114f4826037c9f2f34e0148894cd5dd3dba", size = 170470410, upload-time = "2025-11-11T17:41:06.319Z" },
{ url = "https://files.pythonhosted.org/packages/37/92/e97fcc6b2c27cdb87ce5ee063d77f8f26f19f06916aa680464c8104ef0f6/triton-3.5.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0b4d2c70127fca6a23e247f9348b8adde979d2e7a20391bfbabaac6aebc7e6a8", size = 170579924, upload-time = "2025-11-11T17:41:12.455Z" },
{ url = "https://files.pythonhosted.org/packages/a4/e6/c595c35e5c50c4bc56a7bac96493dad321e9e29b953b526bbbe20f9911d0/triton-3.5.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0637b1efb1db599a8e9dc960d53ab6e4637db7d4ab6630a0974705d77b14b60", size = 170480488, upload-time = "2025-11-11T17:41:18.222Z" },
{ url = "https://files.pythonhosted.org/packages/16/b5/b0d3d8b901b6a04ca38df5e24c27e53afb15b93624d7fd7d658c7cd9352a/triton-3.5.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bac7f7d959ad0f48c0e97d6643a1cc0fd5786fe61cb1f83b537c6b2d54776478", size = 170582192, upload-time = "2025-11-11T17:41:23.963Z" },
{ url = "https://files.pythonhosted.org/packages/30/7b/0a685684ed5322d2af0bddefed7906674f67974aa88b0fae6e82e3b766f6/triton-3.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00be2964616f4c619193cb0d1b29a99bd4b001d7dc333816073f92cf2a8ccdeb", size = 155569223, upload-time = "2025-07-30T19:58:44.017Z" },
{ url = "https://files.pythonhosted.org/packages/20/63/8cb444ad5cdb25d999b7d647abac25af0ee37d292afc009940c05b82dda0/triton-3.4.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7936b18a3499ed62059414d7df563e6c163c5e16c3773678a3ee3d417865035d", size = 155659780, upload-time = "2025-07-30T19:58:51.171Z" },
]
[[package]]