Compare commits

..

1 Commits

Author SHA1 Message Date
Alex Cheema
c84b565a32 feat: disable RDMA instance type when no Thunderbolt 5 hardware detected
When the cluster has fewer than 2 nodes with Thunderbolt 5 hardware,
the MLX RDMA button is now disabled with a tooltip explaining that
RDMA requires Thunderbolt 5. This prevents users from selecting an
instance type that will fail at placement time.

Closes #1351

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 10:01:24 -08:00
4 changed files with 62 additions and 87 deletions

View File

@@ -114,6 +114,16 @@
});
let tb5InfoDismissed = $state(false);
// Whether the cluster has >=2 nodes with RDMA-capable (TB5) hardware
const hasRdmaCapableNodes = $derived.by(() => {
const ids = tbIdentifiers;
if (!ids) return false;
const tb5NodeCount = Object.values(ids).filter(
(node) => node.interfaces.length > 0,
).length;
return tb5NodeCount >= 2;
});
// Helper to get friendly node name from node ID
function getNodeName(nodeId: string): string {
const node = data?.nodes?.[nodeId];
@@ -2882,28 +2892,48 @@
</span>
MLX Ring
</button>
<button
onclick={() => {
selectedInstanceType = "MlxIbv";
saveLaunchDefaults();
}}
class="flex items-center gap-2 py-2 px-4 text-sm font-mono border rounded transition-all duration-200 cursor-pointer {selectedInstanceType ===
'MlxIbv'
? 'bg-transparent text-exo-yellow border-exo-yellow'
: 'bg-transparent text-white/70 border-exo-medium-gray/50 hover:border-exo-yellow/50'}"
>
<span
class="w-4 h-4 rounded-full border-2 flex items-center justify-center {selectedInstanceType ===
'MlxIbv'
? 'border-exo-yellow'
: 'border-exo-medium-gray'}"
<div class="group relative">
<button
onclick={() => {
if (!hasRdmaCapableNodes) return;
selectedInstanceType = "MlxIbv";
saveLaunchDefaults();
}}
disabled={!hasRdmaCapableNodes}
class="flex items-center gap-2 py-2 px-4 text-sm font-mono border rounded transition-all duration-200 {hasRdmaCapableNodes
? 'cursor-pointer'
: 'cursor-not-allowed opacity-40'} {selectedInstanceType ===
'MlxIbv' && hasRdmaCapableNodes
? 'bg-transparent text-exo-yellow border-exo-yellow'
: 'bg-transparent text-white/70 border-exo-medium-gray/50'} {hasRdmaCapableNodes &&
selectedInstanceType !== 'MlxIbv'
? 'hover:border-exo-yellow/50'
: ''}"
>
{#if selectedInstanceType === "MlxIbv"}
<span class="w-2 h-2 rounded-full bg-exo-yellow"></span>
{/if}
</span>
MLX RDMA
</button>
<span
class="w-4 h-4 rounded-full border-2 flex items-center justify-center {selectedInstanceType ===
'MlxIbv' && hasRdmaCapableNodes
? 'border-exo-yellow'
: 'border-exo-medium-gray'}"
>
{#if selectedInstanceType === "MlxIbv" && hasRdmaCapableNodes}
<span class="w-2 h-2 rounded-full bg-exo-yellow"
></span>
{/if}
</span>
MLX RDMA
</button>
{#if !hasRdmaCapableNodes}
<div
class="absolute top-full left-0 mt-2 w-64 p-3 rounded border border-blue-400/30 bg-exo-dark-gray/95 backdrop-blur-sm opacity-0 invisible group-hover:opacity-100 group-hover:visible transition-all duration-200 z-50 shadow-lg"
>
<p class="text-xs text-white/80">
RDMA requires Thunderbolt 5 hardware on at least 2
nodes. Your devices do not have Thunderbolt 5.
</p>
</div>
{/if}
</div>
</div>
</div>

View File

@@ -165,7 +165,6 @@ def is_custom_card(model_id: ModelId) -> bool:
class ConfigData(BaseModel):
model_config = {"extra": "ignore"} # Allow unknown fields
model_type: str | None = None
architectures: list[str] | None = None
hidden_size: Annotated[int, Field(ge=0)] | None = None
layer_count: int = Field(
@@ -201,7 +200,6 @@ class ConfigData(BaseModel):
return data
for field in [
"model_type",
"architectures",
"hidden_size",
"num_hidden_layers",

View File

@@ -269,52 +269,19 @@ def get_tokenizer(model_path: Path, shard_metadata: ShardMetadata) -> TokenizerW
return load_tokenizer_for_model_id(shard_metadata.model_card.model_id, model_path)
def _read_model_type_from_config(model_path: Path) -> str | None:
"""Read the model_type field from config.json at the given model path.
Returns None if config.json doesn't exist or doesn't contain model_type.
def get_eos_token_ids_for_model(model_id: ModelId) -> list[int] | None:
"""
config_path = model_path / "config.json"
if not config_path.exists():
return None
try:
with open(config_path) as f:
config: dict[str, Any] = json.load(f) # pyright: ignore[reportAny]
model_type: Any = config.get("model_type")
if model_type is None:
text_config: Any = config.get("text_config")
if isinstance(text_config, dict):
model_type = text_config.get("model_type") # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
return model_type if isinstance(model_type, str) else None
except (json.JSONDecodeError, OSError):
return None
Get the EOS token IDs for a model based on its ID.
def get_eos_token_ids_for_model(
model_id: ModelId, model_type: str | None = None
) -> list[int] | None:
"""Get the EOS token IDs for a model based on its architecture type.
Uses model_type from config.json when available, falls back to model_id
string matching for backward compatibility.
Some models require explicit EOS token configuration that isn't in their
tokenizer config. This function returns the known EOS token IDs for such models.
Args:
model_id: The HuggingFace model ID
model_type: The model_type field from config.json (e.g., "kimi", "glm4")
Returns:
List of EOS token IDs, or None if the model uses standard tokenizer config
"""
if model_type is not None:
if model_type == "kimi":
return [163586]
elif model_type == "glm4_moe_lite":
# 154820: <|endoftext|>, 154827: <|user|>, 154829: <|observation|>
return [154820, 154827, 154829]
elif model_type.startswith("glm"):
return [151336, 151329, 151338]
# Fallback: string matching on model_id
model_id_lower = model_id.lower()
if "kimi-k2" in model_id_lower:
return [163586]
@@ -329,10 +296,11 @@ def get_eos_token_ids_for_model(
def load_tokenizer_for_model_id(
model_id: ModelId, model_path: Path
) -> TokenizerWrapper:
"""Load tokenizer for a model given its ID and local path.
"""
Load tokenizer for a model given its ID and local path.
Uses model_type from config.json for architecture detection when available,
falling back to model_id string matching for backward compatibility.
This is the core tokenizer loading logic, handling special cases for different
model families (Kimi, GLM, etc.) and transformers 5.x compatibility.
Args:
model_id: The HuggingFace model ID (e.g., "moonshotai/Kimi-K2-Instruct")
@@ -341,21 +309,11 @@ def load_tokenizer_for_model_id(
Returns:
TokenizerWrapper instance configured for the model
"""
model_type = _read_model_type_from_config(model_path)
model_id_lower = model_id.lower()
eos_token_ids = get_eos_token_ids_for_model(model_id, model_type=model_type)
is_kimi = (
model_type == "kimi" if model_type is not None else "kimi-k2" in model_id_lower
)
is_gemma3 = (
model_type == "gemma3"
if model_type is not None
else "gemma-3" in model_id_lower
)
eos_token_ids = get_eos_token_ids_for_model(model_id)
# Kimi uses a custom TikTokenTokenizer that transformers 5.x can't load via AutoTokenizer
if is_kimi:
if "kimi-k2" in model_id_lower:
import importlib.util
import types
@@ -409,7 +367,7 @@ def load_tokenizer_for_model_id(
eos_token_ids=eos_token_ids,
)
if is_gemma3:
if "gemma-3" in model_id_lower:
gemma_3_eos_id = 1
gemma_3_end_of_turn_id = 106
if tokenizer.eos_token_ids is not None:

View File

@@ -24,7 +24,6 @@ from exo.worker.engines.mlx.utils_mlx import (
# Files needed for tokenizer functionality
TOKENIZER_FILE_PATTERNS = [
"config.json",
"tokenizer.json",
"tokenizer_config.json",
"special_tokens_map.json",
@@ -339,9 +338,6 @@ async def test_kimi_tokenizer_specifically():
# Verify EOS token is set
assert eos_token_ids == [163586], "Kimi EOS token should be [163586]"
# Verify architecture-based detection gives same result
assert get_eos_token_ids_for_model(model_id, model_type="kimi") == [163586]
# Test GLM tokenizer since it also has special handling
@pytest.mark.asyncio
@@ -382,10 +378,3 @@ async def test_glm_tokenizer_specifically():
151329,
151338,
], "GLM EOS tokens should be correct"
# Verify architecture-based detection gives same result
assert get_eos_token_ids_for_model(model_id, model_type="glm4") == [
151336,
151329,
151338,
]