mirror of
https://github.com/exo-explore/exo.git
synced 2026-05-19 04:05:23 -04:00
Fix Gemma 4 E2B TP + DeepSeek V32 thinking parsing (#1967)
This commit is contained in:
@@ -1370,9 +1370,11 @@ class Gemma4ShardingStrategy(TensorParallelShardingStrategy):
|
||||
|
||||
attn = layer.self_attn
|
||||
attn.q_proj = self.all_to_sharded_linear(attn.q_proj)
|
||||
attn.k_proj = self.all_to_sharded_linear(attn.k_proj)
|
||||
if not attn.use_k_eq_v:
|
||||
attn.v_proj = self.all_to_sharded_linear(attn.v_proj)
|
||||
has_kv: bool = cast(bool, attn.has_kv)
|
||||
if has_kv:
|
||||
attn.k_proj = self.all_to_sharded_linear(attn.k_proj)
|
||||
if not attn.use_k_eq_v:
|
||||
attn.v_proj = self.all_to_sharded_linear(attn.v_proj)
|
||||
attn.o_proj = self.sharded_to_all_linear(attn.o_proj)
|
||||
attn.n_heads //= self.N
|
||||
attn.n_kv_heads //= self.N
|
||||
|
||||
@@ -17,6 +17,13 @@ TOOL_CALLS_START = f"<{DSML_TOKEN}function_calls>"
|
||||
TOOL_CALLS_END = f"</{DSML_TOKEN}function_calls>"
|
||||
_ORPHAN_THINK_END = ASSISTANT_TOKEN + THINKING_END
|
||||
_FIXED_THINK_BLOCK = ASSISTANT_TOKEN + THINKING_START + "\n" + THINKING_END
|
||||
_FUNCTION_RESULTS_CLOSE = "</function_results>"
|
||||
_ORPHAN_TOOL_RESULT_SUFFIX = _FUNCTION_RESULTS_CLOSE + "\n\n" + THINKING_END
|
||||
_EMPTY_THINK_BLOCKS = (
|
||||
THINKING_START + "\n\n" + THINKING_END,
|
||||
THINKING_START + "\n" + THINKING_END,
|
||||
THINKING_START + THINKING_END,
|
||||
)
|
||||
|
||||
|
||||
def encode_messages(
|
||||
@@ -35,7 +42,11 @@ def encode_messages(
|
||||
add_default_bos_token=add_default_bos_token,
|
||||
tools=tools,
|
||||
)
|
||||
return prompt.replace(_ORPHAN_THINK_END, _FIXED_THINK_BLOCK)
|
||||
prompt = prompt.replace(_ORPHAN_TOOL_RESULT_SUFFIX, _FUNCTION_RESULTS_CLOSE)
|
||||
prompt = prompt.replace(_ORPHAN_THINK_END, _FIXED_THINK_BLOCK)
|
||||
for empty in _EMPTY_THINK_BLOCKS:
|
||||
prompt = prompt.replace(empty, "")
|
||||
return prompt
|
||||
|
||||
|
||||
_INVOKE_PATTERN = re.compile(
|
||||
|
||||
@@ -547,7 +547,6 @@ def render_chat_template(
|
||||
)
|
||||
if partial_assistant_content:
|
||||
prompt += partial_assistant_content
|
||||
logger.info(prompt)
|
||||
return prompt
|
||||
|
||||
for msg in formatted_messages:
|
||||
|
||||
@@ -79,6 +79,13 @@ def apply_all_parsers(
|
||||
issubclass(model_type, DeepseekV32Model)
|
||||
and "deepseek" in model_id.normalize().lower()
|
||||
):
|
||||
if tokenizer.has_thinking:
|
||||
generator = parse_thinking_models(
|
||||
generator,
|
||||
tokenizer.think_start,
|
||||
tokenizer.think_end,
|
||||
starts_in_thinking=detect_thinking_prompt_suffix(prompt, tokenizer),
|
||||
)
|
||||
generator = parse_deepseek_v32(generator)
|
||||
else:
|
||||
if tokenizer.has_thinking:
|
||||
@@ -210,11 +217,10 @@ def parse_deepseek_v32(
|
||||
|
||||
Uses accumulated-text matching (not per-token marker checks) because
|
||||
DSML markers like <|DSML|function_calls> may span multiple tokens.
|
||||
Also handles <think>...</think> blocks for thinking mode.
|
||||
Thinking tag handling is delegated to parse_thinking_models, which
|
||||
wraps this parser in apply_all_parsers.
|
||||
"""
|
||||
from exo.worker.engines.mlx.dsml_encoding import (
|
||||
THINKING_END,
|
||||
THINKING_START,
|
||||
TOOL_CALLS_END,
|
||||
TOOL_CALLS_START,
|
||||
parse_dsml_output,
|
||||
@@ -222,7 +228,6 @@ def parse_deepseek_v32(
|
||||
|
||||
accumulated = ""
|
||||
in_tool_call = False
|
||||
thinking = False
|
||||
# Tokens buffered while we detect the start of a DSML block
|
||||
pending_buffer: list[GenerationResponse] = []
|
||||
# Text accumulated during a tool call block
|
||||
@@ -264,29 +269,6 @@ def parse_deepseek_v32(
|
||||
yield response
|
||||
break
|
||||
|
||||
# ── Handle thinking tags ──
|
||||
if not thinking and THINKING_START in response.text:
|
||||
thinking = True
|
||||
# Yield any text before the <think> tag
|
||||
before = response.text[: response.text.index(THINKING_START)]
|
||||
if before:
|
||||
yield response.model_copy(update={"text": before})
|
||||
continue
|
||||
|
||||
if thinking and THINKING_END in response.text:
|
||||
thinking = False
|
||||
# Yield any text after the </think> tag
|
||||
after = response.text[
|
||||
response.text.index(THINKING_END) + len(THINKING_END) :
|
||||
]
|
||||
if after:
|
||||
yield response.model_copy(update={"text": after, "is_thinking": False})
|
||||
continue
|
||||
|
||||
if thinking:
|
||||
yield response.model_copy(update={"is_thinking": True})
|
||||
continue
|
||||
|
||||
# ── Handle tool call accumulation ──
|
||||
if in_tool_call:
|
||||
tool_call_text += response.text
|
||||
|
||||
@@ -96,7 +96,12 @@ def run_gpt_oss_pipeline_device(
|
||||
n_layers=24,
|
||||
)
|
||||
|
||||
model, tokenizer = shard_and_load(shard_meta, group, on_layer_loaded=None)
|
||||
gen = shard_and_load(shard_meta, group)
|
||||
try:
|
||||
while True:
|
||||
next(gen)
|
||||
except StopIteration as stop:
|
||||
model, tokenizer = stop.value
|
||||
model = cast(Model, model)
|
||||
|
||||
# Generate a prompt of exact token length
|
||||
@@ -172,7 +177,12 @@ def run_gpt_oss_tensor_parallel_device(
|
||||
n_layers=24,
|
||||
)
|
||||
|
||||
model, tokenizer = shard_and_load(shard_meta, group, on_layer_loaded=None)
|
||||
gen = shard_and_load(shard_meta, group)
|
||||
try:
|
||||
while True:
|
||||
next(gen)
|
||||
except StopIteration as stop:
|
||||
model, tokenizer = stop.value
|
||||
model = cast(Model, model)
|
||||
|
||||
base_text = "The quick brown fox jumps over the lazy dog. "
|
||||
|
||||
@@ -174,7 +174,12 @@ def _run_pipeline_device(
|
||||
n_layers=TOTAL_LAYERS,
|
||||
)
|
||||
|
||||
model, tokenizer = shard_and_load(shard_meta, group, on_layer_loaded=None)
|
||||
gen = shard_and_load(shard_meta, group)
|
||||
try:
|
||||
while True:
|
||||
next(gen)
|
||||
except StopIteration as stop:
|
||||
model, tokenizer = stop.value
|
||||
model = cast(Any, model)
|
||||
|
||||
prompt, task = _build_prompt(tokenizer, prompt_tokens)
|
||||
|
||||
@@ -20,7 +20,25 @@ from exo.worker.engines.mlx.dsml_encoding import (
|
||||
encode_messages,
|
||||
parse_dsml_output,
|
||||
)
|
||||
from exo.worker.runner.llm_inference.model_output_parsers import parse_deepseek_v32
|
||||
from exo.worker.runner.llm_inference.model_output_parsers import (
|
||||
parse_deepseek_v32,
|
||||
parse_thinking_models,
|
||||
)
|
||||
|
||||
|
||||
def _parse_deepseek_with_thinking(
|
||||
source: Generator[GenerationResponse | None],
|
||||
starts_in_thinking: bool = False,
|
||||
) -> Generator[GenerationResponse | ToolCallResponse | None]:
|
||||
return parse_deepseek_v32(
|
||||
parse_thinking_models(
|
||||
source,
|
||||
think_start=THINKING_START,
|
||||
think_end=THINKING_END,
|
||||
starts_in_thinking=starts_in_thinking,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# ── Shared fixtures ──────────────────────────────────────────────
|
||||
|
||||
@@ -333,9 +351,7 @@ class TestE2EThinkingAndToolCall:
|
||||
assert prompt.endswith(THINKING_START)
|
||||
|
||||
# Simulate: model outputs <think>, thinks, closes thinking, then tool call.
|
||||
# In the full pipeline, parse_thinking_models handles the case where
|
||||
# <think> is in the prompt. Here we test parse_deepseek_v32 directly,
|
||||
# which detects <think>/<think> markers in the stream.
|
||||
# Use the full production chain (parse_thinking_models → parse_deepseek_v32).
|
||||
model_tokens = [
|
||||
THINKING_START,
|
||||
"The user wants weather",
|
||||
@@ -353,7 +369,7 @@ class TestE2EThinkingAndToolCall:
|
||||
TOOL_CALLS_END,
|
||||
]
|
||||
|
||||
results = list(parse_deepseek_v32(_simulate_tokens(model_tokens)))
|
||||
results = list(_parse_deepseek_with_thinking(_simulate_tokens(model_tokens)))
|
||||
|
||||
gen_results = [r for r in results if isinstance(r, GenerationResponse)]
|
||||
tool_results = [r for r in results if isinstance(r, ToolCallResponse)]
|
||||
@@ -387,7 +403,7 @@ class TestE2EThinkingAndToolCall:
|
||||
prompt_no_think = encode_messages(
|
||||
messages, tools=_WEATHER_TOOLS, thinking_mode="chat"
|
||||
)
|
||||
assert prompt_no_think.endswith(THINKING_END)
|
||||
assert not prompt_no_think.endswith(THINKING_START)
|
||||
|
||||
# Both should have the same tool definitions
|
||||
assert "get_weather" in prompt_think
|
||||
@@ -597,7 +613,9 @@ class TestE2EFullRoundTrip:
|
||||
f"</{DSML_TOKEN}invoke>\n",
|
||||
TOOL_CALLS_END,
|
||||
]
|
||||
results_1 = list(parse_deepseek_v32(_simulate_tokens(model_tokens_1)))
|
||||
results_1 = list(
|
||||
_parse_deepseek_with_thinking(_simulate_tokens(model_tokens_1))
|
||||
)
|
||||
|
||||
# Verify: thinking tokens + tool call
|
||||
gen_1 = [r for r in results_1 if isinstance(r, GenerationResponse)]
|
||||
@@ -660,7 +678,9 @@ class TestE2EFullRoundTrip:
|
||||
THINKING_END,
|
||||
"The weather in Hangzhou is currently cloudy with temperatures between 7°C and 13°C.",
|
||||
]
|
||||
results_2 = list(parse_deepseek_v32(_simulate_tokens(model_tokens_2)))
|
||||
results_2 = list(
|
||||
_parse_deepseek_with_thinking(_simulate_tokens(model_tokens_2))
|
||||
)
|
||||
|
||||
gen_2 = [r for r in results_2 if isinstance(r, GenerationResponse)]
|
||||
tool_2 = [r for r in results_2 if isinstance(r, ToolCallResponse)]
|
||||
|
||||
@@ -380,6 +380,110 @@ class TestGenericToolCallsFinishReason:
|
||||
# ── Double parser chain (parse_thinking_models → parse_deepseek_v32) ──
|
||||
|
||||
|
||||
class TestDeepSeekV32StartsInThinking:
|
||||
"""Regression tests for deepseek v3.2 where the chat template appends
|
||||
<think> to the prompt so the model starts already inside a thinking block.
|
||||
"""
|
||||
|
||||
def test_reasoning_tagged_when_starts_in_thinking(self):
|
||||
tokens = [
|
||||
_make_response("let me", 0),
|
||||
_make_response(" think", 1),
|
||||
_make_response(THINKING_END, 2),
|
||||
_make_response("\n", 3),
|
||||
_make_response("42", 4, finish_reason="stop"),
|
||||
]
|
||||
thinking = parse_thinking_models(
|
||||
_queue_source(tokens),
|
||||
think_start=THINKING_START,
|
||||
think_end=THINKING_END,
|
||||
starts_in_thinking=True,
|
||||
)
|
||||
results = _step_until_finish(parse_deepseek_v32(thinking))
|
||||
gens = [
|
||||
r
|
||||
for r in results
|
||||
if isinstance(r, GenerationResponse) and r.finish_reason is None
|
||||
]
|
||||
texts = [(r.text, r.is_thinking) for r in gens]
|
||||
assert texts == [("let me", True), (" think", True), ("\n", False)]
|
||||
final = [
|
||||
r
|
||||
for r in results
|
||||
if isinstance(r, GenerationResponse) and r.finish_reason is not None
|
||||
]
|
||||
assert len(final) == 1
|
||||
assert final[0].text == "42"
|
||||
assert final[0].is_thinking is False
|
||||
|
||||
def test_starts_in_thinking_then_tool_call(self):
|
||||
tokens = [
|
||||
_make_response("need weather", 0),
|
||||
_make_response(THINKING_END, 1),
|
||||
_make_response("\n\n", 2),
|
||||
_make_response(TOOL_CALLS_START, 3),
|
||||
_make_response("\n", 4),
|
||||
_make_response(f'<{DSML_TOKEN}invoke name="get_weather">\n', 5),
|
||||
_make_response(
|
||||
f'<{DSML_TOKEN}parameter name="city" string="true">NYC</{DSML_TOKEN}parameter>\n',
|
||||
6,
|
||||
),
|
||||
_make_response(f"</{DSML_TOKEN}invoke>\n", 7),
|
||||
_make_response(TOOL_CALLS_END, 8, finish_reason="stop"),
|
||||
]
|
||||
thinking = parse_thinking_models(
|
||||
_queue_source(tokens),
|
||||
think_start=THINKING_START,
|
||||
think_end=THINKING_END,
|
||||
starts_in_thinking=True,
|
||||
)
|
||||
results = _step_until_finish(parse_deepseek_v32(thinking))
|
||||
reasoning_gens = [
|
||||
r
|
||||
for r in results
|
||||
if isinstance(r, GenerationResponse)
|
||||
and r.finish_reason is None
|
||||
and r.is_thinking
|
||||
]
|
||||
assert [r.text for r in reasoning_gens] == ["need weather"]
|
||||
tool_results = [r for r in results if isinstance(r, ToolCallResponse)]
|
||||
assert len(tool_results) == 1
|
||||
assert tool_results[0].tool_calls[0].name == "get_weather"
|
||||
|
||||
def test_reasoning_tokens_counted_starts_in_thinking(self):
|
||||
usage = Usage(
|
||||
prompt_tokens=10,
|
||||
completion_tokens=5,
|
||||
total_tokens=15,
|
||||
prompt_tokens_details=PromptTokensDetails(cached_tokens=0),
|
||||
completion_tokens_details=CompletionTokensDetails(reasoning_tokens=0),
|
||||
)
|
||||
tokens = [
|
||||
_make_response("reasoning", 0),
|
||||
_make_response(" more", 1),
|
||||
_make_response(THINKING_END, 2),
|
||||
_make_response("\n", 3),
|
||||
GenerationResponse(text="42", token=4, finish_reason="stop", usage=usage),
|
||||
]
|
||||
thinking = parse_thinking_models(
|
||||
_queue_source(tokens),
|
||||
think_start=THINKING_START,
|
||||
think_end=THINKING_END,
|
||||
starts_in_thinking=True,
|
||||
)
|
||||
results = _step_until_finish(
|
||||
count_reasoning_tokens(parse_deepseek_v32(thinking))
|
||||
)
|
||||
final = [
|
||||
r
|
||||
for r in results
|
||||
if isinstance(r, GenerationResponse) and r.finish_reason is not None
|
||||
]
|
||||
assert len(final) == 1
|
||||
assert final[0].usage is not None
|
||||
assert final[0].usage.completion_tokens_details.reasoning_tokens == 2
|
||||
|
||||
|
||||
class TestBatchGeneratorSingleNext:
|
||||
def test_finish_reason_with_buffered_tokens_drain_loop(self):
|
||||
from exo.worker.runner.llm_inference.batch_generator import GeneratorQueue
|
||||
|
||||
32
uv.lock
generated
32
uv.lock
generated
@@ -395,7 +395,7 @@ dependencies = [
|
||||
{ name = "mflux", marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx", version = "0.31.1", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx", version = "0.31.2.dev20260422+ec49d18e", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#ec49d18ec4cfba0e0c7a37f20d1cf4d75fe56731" }, marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx-lm", version = "0.31.3", source = { git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Ffix-arrayscache-leak#62bbd5b2ef8d29d02dd08af303c22518341a46b0" }, marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx-lm", version = "0.31.3", source = { git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Ffix-arrayscache-leak#c7010341e1f41ac15815feb5dc55134f44e3b044" }, marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx-vlm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "msgspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "openai-harmony", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
@@ -418,19 +418,19 @@ cpu = [
|
||||
{ name = "mlx", version = "0.31.1", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'linux' and extra == 'extra-3-exo-cpu') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx", version = "0.31.2.dev20260422+ec49d18e", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#ec49d18ec4cfba0e0c7a37f20d1cf4d75fe56731" }, marker = "(sys_platform == 'darwin' and extra == 'extra-3-exo-cpu') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx-cpu", marker = "sys_platform == 'linux'" },
|
||||
{ name = "mlx-lm", version = "0.31.3", source = { git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Ffix-arrayscache-leak#62bbd5b2ef8d29d02dd08af303c22518341a46b0" }, marker = "sys_platform == 'linux'" },
|
||||
{ name = "mlx-lm", version = "0.31.3", source = { git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Ffix-arrayscache-leak#c7010341e1f41ac15815feb5dc55134f44e3b044" }, marker = "sys_platform == 'linux'" },
|
||||
]
|
||||
cuda12 = [
|
||||
{ name = "mlx", version = "0.31.1", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'linux' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx", version = "0.31.2.dev20260422+ec49d18e", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#ec49d18ec4cfba0e0c7a37f20d1cf4d75fe56731" }, marker = "(sys_platform == 'darwin' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx-cuda-12", marker = "sys_platform == 'linux'" },
|
||||
{ name = "mlx-lm", version = "0.31.3", source = { git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Ffix-arrayscache-leak#62bbd5b2ef8d29d02dd08af303c22518341a46b0" }, marker = "sys_platform == 'linux'" },
|
||||
{ name = "mlx-lm", version = "0.31.3", source = { git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Ffix-arrayscache-leak#c7010341e1f41ac15815feb5dc55134f44e3b044" }, marker = "sys_platform == 'linux'" },
|
||||
]
|
||||
cuda13 = [
|
||||
{ name = "mlx", version = "0.31.1", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'linux' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx", version = "0.31.2.dev20260422+ec49d18e", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#ec49d18ec4cfba0e0c7a37f20d1cf4d75fe56731" }, marker = "(sys_platform == 'darwin' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx-cuda-13", marker = "sys_platform == 'linux'" },
|
||||
{ name = "mlx-lm", version = "0.31.3", source = { git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Ffix-arrayscache-leak#62bbd5b2ef8d29d02dd08af303c22518341a46b0" }, marker = "sys_platform == 'linux'" },
|
||||
{ name = "mlx-lm", version = "0.31.3", source = { git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Ffix-arrayscache-leak#c7010341e1f41ac15815feb5dc55134f44e3b044" }, marker = "sys_platform == 'linux'" },
|
||||
]
|
||||
|
||||
[package.dev-dependencies]
|
||||
@@ -1326,7 +1326,7 @@ wheels = [
|
||||
[[package]]
|
||||
name = "mlx-lm"
|
||||
version = "0.31.3"
|
||||
source = { git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Ffix-arrayscache-leak#62bbd5b2ef8d29d02dd08af303c22518341a46b0" }
|
||||
source = { git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Ffix-arrayscache-leak#c7010341e1f41ac15815feb5dc55134f44e3b044" }
|
||||
resolution-markers = [
|
||||
"sys_platform == 'darwin'",
|
||||
"sys_platform == 'linux'",
|
||||
@@ -1353,7 +1353,7 @@ dependencies = [
|
||||
{ name = "mlx", version = "0.31.1", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx", version = "0.31.2.dev20260422+ec49d18e", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git?branch=address-rdma-gpu-locks#ec49d18ec4cfba0e0c7a37f20d1cf4d75fe56731" }, marker = "sys_platform == 'darwin' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx-lm", version = "0.31.3", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'linux' and extra != 'extra-3-exo-cpu' and extra != 'extra-3-exo-cuda12' and extra != 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx-lm", version = "0.31.3", source = { git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Ffix-arrayscache-leak#62bbd5b2ef8d29d02dd08af303c22518341a46b0" }, marker = "sys_platform == 'darwin' or (sys_platform == 'linux' and extra == 'extra-3-exo-cpu') or (sys_platform == 'linux' and extra == 'extra-3-exo-cuda12') or (sys_platform == 'linux' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "mlx-lm", version = "0.31.3", source = { git = "https://github.com/rltakashige/mlx-lm?branch=leo%2Ffix-arrayscache-leak#c7010341e1f41ac15815feb5dc55134f44e3b044" }, marker = "sys_platform == 'darwin' or (sys_platform == 'linux' and extra == 'extra-3-exo-cpu') or (sys_platform == 'linux' and extra == 'extra-3-exo-cuda12') or (sys_platform == 'linux' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "opencv-python", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "pillow", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
@@ -2634,7 +2634,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "transformers"
|
||||
version = "5.3.0"
|
||||
version = "5.2.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
@@ -2645,11 +2645,11 @@ dependencies = [
|
||||
{ name = "safetensors", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "tokenizers", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "typer", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
{ name = "typer-slim", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fc/1a/70e830d53ecc96ce69cfa8de38f163712d2b43ac52fbd743f39f56025c31/transformers-5.3.0.tar.gz", hash = "sha256:009555b364029da9e2946d41f1c5de9f15e6b1df46b189b7293f33a161b9c557", size = 8830831, upload-time = "2026-03-04T17:41:46.119Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/bd/7e/8a0c57d562015e5b16c97c1f0b8e0e92ead2c7c20513225dc12c2043ba9f/transformers-5.2.0.tar.gz", hash = "sha256:0088b8b46ccc9eff1a1dca72b5d618a5ee3b1befc3e418c9512b35dea9f9a650", size = 8618176, upload-time = "2026-02-16T18:54:02.867Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/88/ae8320064e32679a5429a2c9ebbc05c2bf32cefb6e076f9b07f6d685a9b4/transformers-5.3.0-py3-none-any.whl", hash = "sha256:50ac8c89c3c7033444fb3f9f53138096b997ebb70d4b5e50a2e810bf12d3d29a", size = 10661827, upload-time = "2026-03-04T17:41:42.722Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4e/93/79754b0ca486e556c2b95d4f5afc66aaf4b260694f3d6e1b51da2d036691/transformers-5.2.0-py3-none-any.whl", hash = "sha256:9ecaf243dc45bee11a7d93f8caf03746accc0cb069181bbf4ad8566c53e854b4", size = 10403304, upload-time = "2026-02-16T18:53:59.699Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2706,6 +2706,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typer-slim"
|
||||
version = "0.24.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "typer", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda12') or (extra == 'extra-3-exo-cpu' and extra == 'extra-3-exo-cuda13') or (extra == 'extra-3-exo-cuda12' and extra == 'extra-3-exo-cuda13')" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a7/a7/e6aecc4b4eb59598829a3b5076a93aff291b4fdaa2ded25efc4e1f4d219c/typer_slim-0.24.0.tar.gz", hash = "sha256:f0ed36127183f52ae6ced2ecb2521789995992c521a46083bfcdbb652d22ad34", size = 4776, upload-time = "2026-02-16T22:08:51.2Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a7/24/5480c20380dfd18cf33d14784096dca45a24eae6102e91d49a718d3b6855/typer_slim-0.24.0-py3-none-any.whl", hash = "sha256:d5d7ee1ee2834d5020c7c616ed5e0d0f29b9a4b1dd283bdebae198ec09778d0e", size = 3394, upload-time = "2026-02-16T22:08:49.92Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "types-aiofiles"
|
||||
version = "25.1.0.20251011"
|
||||
|
||||
Reference in New Issue
Block a user