feat: disable RDMA instance type when no Thunderbolt 5 hardware detected

When the cluster has fewer than 2 nodes with Thunderbolt 5 hardware, the MLX RDMA button is now disabled with a tooltip explaining that RDMA requires Thunderbolt 5. This prevents users from selecting an instance type that will fail at placement time. Closes #1351 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 07:17:30 -05:00 · 2026-02-17 10:01:24 -08:00
4 changed files with 62 additions and 87 deletions
--- a/dashboard/src/routes/+page.svelte
+++ b/dashboard/src/routes/+page.svelte
@@ -114,6 +114,16 @@
  });
  let tb5InfoDismissed = $state(false);

+  // Whether the cluster has >=2 nodes with RDMA-capable (TB5) hardware
+  const hasRdmaCapableNodes = $derived.by(() => {
+    const ids = tbIdentifiers;
+    if (!ids) return false;
+    const tb5NodeCount = Object.values(ids).filter(
+      (node) => node.interfaces.length > 0,
+    ).length;
+    return tb5NodeCount >= 2;
+  });
+
  // Helper to get friendly node name from node ID
  function getNodeName(nodeId: string): string {
    const node = data?.nodes?.[nodeId];
@@ -2882,28 +2892,48 @@
                    </span>
                    MLX Ring
                  </button>
-                  <button
-                    onclick={() => {
-                      selectedInstanceType = "MlxIbv";
-                      saveLaunchDefaults();
-                    }}
-                    class="flex items-center gap-2 py-2 px-4 text-sm font-mono border rounded transition-all duration-200 cursor-pointer {selectedInstanceType ===
-                    'MlxIbv'
-                      ? 'bg-transparent text-exo-yellow border-exo-yellow'
-                      : 'bg-transparent text-white/70 border-exo-medium-gray/50 hover:border-exo-yellow/50'}"
-                  >
-                    <span
-                      class="w-4 h-4 rounded-full border-2 flex items-center justify-center {selectedInstanceType ===
-                      'MlxIbv'
-                        ? 'border-exo-yellow'
-                        : 'border-exo-medium-gray'}"
+                  <div class="group relative">
+                    <button
+                      onclick={() => {
+                        if (!hasRdmaCapableNodes) return;
+                        selectedInstanceType = "MlxIbv";
+                        saveLaunchDefaults();
+                      }}
+                      disabled={!hasRdmaCapableNodes}
+                      class="flex items-center gap-2 py-2 px-4 text-sm font-mono border rounded transition-all duration-200 {hasRdmaCapableNodes
+                        ? 'cursor-pointer'
+                        : 'cursor-not-allowed opacity-40'} {selectedInstanceType ===
+                        'MlxIbv' && hasRdmaCapableNodes
+                        ? 'bg-transparent text-exo-yellow border-exo-yellow'
+                        : 'bg-transparent text-white/70 border-exo-medium-gray/50'} {hasRdmaCapableNodes &&
+                      selectedInstanceType !== 'MlxIbv'
+                        ? 'hover:border-exo-yellow/50'
+                        : ''}"
                    >
-                      {#if selectedInstanceType === "MlxIbv"}
-                        <span class="w-2 h-2 rounded-full bg-exo-yellow"></span>
-                      {/if}
-                    </span>
-                    MLX RDMA
-                  </button>
+                      <span
+                        class="w-4 h-4 rounded-full border-2 flex items-center justify-center {selectedInstanceType ===
+                          'MlxIbv' && hasRdmaCapableNodes
+                          ? 'border-exo-yellow'
+                          : 'border-exo-medium-gray'}"
+                      >
+                        {#if selectedInstanceType === "MlxIbv" && hasRdmaCapableNodes}
+                          <span class="w-2 h-2 rounded-full bg-exo-yellow"
+                          ></span>
+                        {/if}
+                      </span>
+                      MLX RDMA
+                    </button>
+                    {#if !hasRdmaCapableNodes}
+                      <div
+                        class="absolute top-full left-0 mt-2 w-64 p-3 rounded border border-blue-400/30 bg-exo-dark-gray/95 backdrop-blur-sm opacity-0 invisible group-hover:opacity-100 group-hover:visible transition-all duration-200 z-50 shadow-lg"
+                      >
+                        <p class="text-xs text-white/80">
+                          RDMA requires Thunderbolt 5 hardware on at least 2
+                          nodes. Your devices do not have Thunderbolt 5.
+                        </p>
+                      </div>
+                    {/if}
+                  </div>
                </div>
              </div>

--- a/src/exo/shared/models/model_cards.py
+++ b/src/exo/shared/models/model_cards.py
@@ -165,7 +165,6 @@ def is_custom_card(model_id: ModelId) -> bool:
 class ConfigData(BaseModel):
    model_config = {"extra": "ignore"}  # Allow unknown fields

-    model_type: str | None = None
    architectures: list[str] | None = None
    hidden_size: Annotated[int, Field(ge=0)] | None = None
    layer_count: int = Field(
@@ -201,7 +200,6 @@ class ConfigData(BaseModel):
            return data

        for field in [
-            "model_type",
            "architectures",
            "hidden_size",
            "num_hidden_layers",
--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -269,52 +269,19 @@ def get_tokenizer(model_path: Path, shard_metadata: ShardMetadata) -> TokenizerW
    return load_tokenizer_for_model_id(shard_metadata.model_card.model_id, model_path)


-def _read_model_type_from_config(model_path: Path) -> str | None:
-    """Read the model_type field from config.json at the given model path.
-
-    Returns None if config.json doesn't exist or doesn't contain model_type.
+def get_eos_token_ids_for_model(model_id: ModelId) -> list[int] | None:
    """
-    config_path = model_path / "config.json"
-    if not config_path.exists():
-        return None
-    try:
-        with open(config_path) as f:
-            config: dict[str, Any] = json.load(f)  # pyright: ignore[reportAny]
-        model_type: Any = config.get("model_type")
-        if model_type is None:
-            text_config: Any = config.get("text_config")
-            if isinstance(text_config, dict):
-                model_type = text_config.get("model_type")  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
-        return model_type if isinstance(model_type, str) else None
-    except (json.JSONDecodeError, OSError):
-        return None
+    Get the EOS token IDs for a model based on its ID.

-
-def get_eos_token_ids_for_model(
-    model_id: ModelId, model_type: str | None = None
-) -> list[int] | None:
-    """Get the EOS token IDs for a model based on its architecture type.
-
-    Uses model_type from config.json when available, falls back to model_id
-    string matching for backward compatibility.
+    Some models require explicit EOS token configuration that isn't in their
+    tokenizer config. This function returns the known EOS token IDs for such models.

    Args:
        model_id: The HuggingFace model ID
-        model_type: The model_type field from config.json (e.g., "kimi", "glm4")

    Returns:
        List of EOS token IDs, or None if the model uses standard tokenizer config
    """
-    if model_type is not None:
-        if model_type == "kimi":
-            return [163586]
-        elif model_type == "glm4_moe_lite":
-            # 154820: <|endoftext|>, 154827: <|user|>, 154829: <|observation|>
-            return [154820, 154827, 154829]
-        elif model_type.startswith("glm"):
-            return [151336, 151329, 151338]
-
-    # Fallback: string matching on model_id
    model_id_lower = model_id.lower()
    if "kimi-k2" in model_id_lower:
        return [163586]
@@ -329,10 +296,11 @@ def get_eos_token_ids_for_model(
 def load_tokenizer_for_model_id(
    model_id: ModelId, model_path: Path
 ) -> TokenizerWrapper:
-    """Load tokenizer for a model given its ID and local path.
+    """
+    Load tokenizer for a model given its ID and local path.

-    Uses model_type from config.json for architecture detection when available,
-    falling back to model_id string matching for backward compatibility.
+    This is the core tokenizer loading logic, handling special cases for different
+    model families (Kimi, GLM, etc.) and transformers 5.x compatibility.

    Args:
        model_id: The HuggingFace model ID (e.g., "moonshotai/Kimi-K2-Instruct")
@@ -341,21 +309,11 @@ def load_tokenizer_for_model_id(
    Returns:
        TokenizerWrapper instance configured for the model
    """
-    model_type = _read_model_type_from_config(model_path)
    model_id_lower = model_id.lower()
-    eos_token_ids = get_eos_token_ids_for_model(model_id, model_type=model_type)
-
-    is_kimi = (
-        model_type == "kimi" if model_type is not None else "kimi-k2" in model_id_lower
-    )
-    is_gemma3 = (
-        model_type == "gemma3"
-        if model_type is not None
-        else "gemma-3" in model_id_lower
-    )
+    eos_token_ids = get_eos_token_ids_for_model(model_id)

    # Kimi uses a custom TikTokenTokenizer that transformers 5.x can't load via AutoTokenizer
-    if is_kimi:
+    if "kimi-k2" in model_id_lower:
        import importlib.util
        import types

@@ -409,7 +367,7 @@ def load_tokenizer_for_model_id(
        eos_token_ids=eos_token_ids,
    )

-    if is_gemma3:
+    if "gemma-3" in model_id_lower:
        gemma_3_eos_id = 1
        gemma_3_end_of_turn_id = 106
        if tokenizer.eos_token_ids is not None:
--- a/src/exo/worker/tests/unittests/test_mlx/test_tokenizers.py
+++ b/src/exo/worker/tests/unittests/test_mlx/test_tokenizers.py
@@ -24,7 +24,6 @@ from exo.worker.engines.mlx.utils_mlx import (

 # Files needed for tokenizer functionality
 TOKENIZER_FILE_PATTERNS = [
-    "config.json",
    "tokenizer.json",
    "tokenizer_config.json",
    "special_tokens_map.json",
@@ -339,9 +338,6 @@ async def test_kimi_tokenizer_specifically():
    # Verify EOS token is set
    assert eos_token_ids == [163586], "Kimi EOS token should be [163586]"

-    # Verify architecture-based detection gives same result
-    assert get_eos_token_ids_for_model(model_id, model_type="kimi") == [163586]
-

 # Test GLM tokenizer since it also has special handling
@pytest.mark.asyncio
@@ -382,10 +378,3 @@ async def test_glm_tokenizer_specifically():
        151329,
        151338,
    ], "GLM EOS tokens should be correct"
-
-    # Verify architecture-based detection gives same result
-    assert get_eos_token_ids_for_model(model_id, model_type="glm4") == [
-        151336,
-        151329,
-        151338,
-    ]