Handle MessageTooLarge error in router to prevent node crash

When a message exceeds gossipsub's 1 MiB limit, the resulting RuntimeError("MessageTooLarge") was unhandled in _networking_publish, causing the entire TaskGroup to fail and crash the node. Now catch this error and log it, dropping the oversized message gracefully. Fixes #1296 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 23:36:30 -05:00 · 2026-02-17 10:34:22 -08:00
4 changed files with 19 additions and 66 deletions
--- a/src/exo/routing/router.py
+++ b/src/exo/routing/router.py
@@ -211,6 +211,14 @@ class Router:
                    pass
                except AllQueuesFullError:
                    logger.warning(f"All peer queues full, dropping message on {topic}")
+                except RuntimeError as e:
+                    if "MessageTooLarge" in str(e):
+                        logger.error(
+                            f"Message too large for gossipsub on topic {topic} "
+                            f"({len(data)} bytes), dropping message"
+                        )
+                    else:
+                        raise


 def get_node_id_keypair(
--- a/src/exo/shared/models/model_cards.py
+++ b/src/exo/shared/models/model_cards.py
@@ -165,7 +165,6 @@ def is_custom_card(model_id: ModelId) -> bool:
 class ConfigData(BaseModel):
    model_config = {"extra": "ignore"}  # Allow unknown fields

-    model_type: str | None = None
    architectures: list[str] | None = None
    hidden_size: Annotated[int, Field(ge=0)] | None = None
    layer_count: int = Field(
@@ -201,7 +200,6 @@ class ConfigData(BaseModel):
            return data

        for field in [
-            "model_type",
            "architectures",
            "hidden_size",
            "num_hidden_layers",
--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -269,52 +269,19 @@ def get_tokenizer(model_path: Path, shard_metadata: ShardMetadata) -> TokenizerW
    return load_tokenizer_for_model_id(shard_metadata.model_card.model_id, model_path)


-def _read_model_type_from_config(model_path: Path) -> str | None:
-    """Read the model_type field from config.json at the given model path.
-
-    Returns None if config.json doesn't exist or doesn't contain model_type.
+def get_eos_token_ids_for_model(model_id: ModelId) -> list[int] | None:
    """
-    config_path = model_path / "config.json"
-    if not config_path.exists():
-        return None
-    try:
-        with open(config_path) as f:
-            config: dict[str, Any] = json.load(f)  # pyright: ignore[reportAny]
-        model_type: Any = config.get("model_type")
-        if model_type is None:
-            text_config: Any = config.get("text_config")
-            if isinstance(text_config, dict):
-                model_type = text_config.get("model_type")  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
-        return model_type if isinstance(model_type, str) else None
-    except (json.JSONDecodeError, OSError):
-        return None
+    Get the EOS token IDs for a model based on its ID.

-
-def get_eos_token_ids_for_model(
-    model_id: ModelId, model_type: str | None = None
-) -> list[int] | None:
-    """Get the EOS token IDs for a model based on its architecture type.
-
-    Uses model_type from config.json when available, falls back to model_id
-    string matching for backward compatibility.
+    Some models require explicit EOS token configuration that isn't in their
+    tokenizer config. This function returns the known EOS token IDs for such models.

    Args:
        model_id: The HuggingFace model ID
-        model_type: The model_type field from config.json (e.g., "kimi", "glm4")

    Returns:
        List of EOS token IDs, or None if the model uses standard tokenizer config
    """
-    if model_type is not None:
-        if model_type == "kimi":
-            return [163586]
-        elif model_type == "glm4_moe_lite":
-            # 154820: <|endoftext|>, 154827: <|user|>, 154829: <|observation|>
-            return [154820, 154827, 154829]
-        elif model_type.startswith("glm"):
-            return [151336, 151329, 151338]
-
-    # Fallback: string matching on model_id
    model_id_lower = model_id.lower()
    if "kimi-k2" in model_id_lower:
        return [163586]
@@ -329,10 +296,11 @@ def get_eos_token_ids_for_model(
 def load_tokenizer_for_model_id(
    model_id: ModelId, model_path: Path
 ) -> TokenizerWrapper:
-    """Load tokenizer for a model given its ID and local path.
+    """
+    Load tokenizer for a model given its ID and local path.

-    Uses model_type from config.json for architecture detection when available,
-    falling back to model_id string matching for backward compatibility.
+    This is the core tokenizer loading logic, handling special cases for different
+    model families (Kimi, GLM, etc.) and transformers 5.x compatibility.

    Args:
        model_id: The HuggingFace model ID (e.g., "moonshotai/Kimi-K2-Instruct")
@@ -341,21 +309,11 @@ def load_tokenizer_for_model_id(
    Returns:
        TokenizerWrapper instance configured for the model
    """
-    model_type = _read_model_type_from_config(model_path)
    model_id_lower = model_id.lower()
-    eos_token_ids = get_eos_token_ids_for_model(model_id, model_type=model_type)
-
-    is_kimi = (
-        model_type == "kimi" if model_type is not None else "kimi-k2" in model_id_lower
-    )
-    is_gemma3 = (
-        model_type == "gemma3"
-        if model_type is not None
-        else "gemma-3" in model_id_lower
-    )
+    eos_token_ids = get_eos_token_ids_for_model(model_id)

    # Kimi uses a custom TikTokenTokenizer that transformers 5.x can't load via AutoTokenizer
-    if is_kimi:
+    if "kimi-k2" in model_id_lower:
        import importlib.util
        import types

@@ -409,7 +367,7 @@ def load_tokenizer_for_model_id(
        eos_token_ids=eos_token_ids,
    )

-    if is_gemma3:
+    if "gemma-3" in model_id_lower:
        gemma_3_eos_id = 1
        gemma_3_end_of_turn_id = 106
        if tokenizer.eos_token_ids is not None:
--- a/src/exo/worker/tests/unittests/test_mlx/test_tokenizers.py
+++ b/src/exo/worker/tests/unittests/test_mlx/test_tokenizers.py
@@ -24,7 +24,6 @@ from exo.worker.engines.mlx.utils_mlx import (

 # Files needed for tokenizer functionality
 TOKENIZER_FILE_PATTERNS = [
-    "config.json",
    "tokenizer.json",
    "tokenizer_config.json",
    "special_tokens_map.json",
@@ -339,9 +338,6 @@ async def test_kimi_tokenizer_specifically():
    # Verify EOS token is set
    assert eos_token_ids == [163586], "Kimi EOS token should be [163586]"

-    # Verify architecture-based detection gives same result
-    assert get_eos_token_ids_for_model(model_id, model_type="kimi") == [163586]
-

 # Test GLM tokenizer since it also has special handling
@pytest.mark.asyncio
@@ -382,10 +378,3 @@ async def test_glm_tokenizer_specifically():
        151329,
        151338,
    ], "GLM EOS tokens should be correct"
-
-    # Verify architecture-based detection gives same result
-    assert get_eos_token_ids_for_model(model_id, model_type="glm4") == [
-        151336,
-        151329,
-        151338,
-    ]