wow

Mark slow tests as slow (#1220 )
## Motivation   ## Changes  ## Why It Works  ## Test Plan ### Manual Testing    ### Automated Testing
2026-01-20 11:58:57 -05:00 · 2026-01-20 15:07:03 +00:00 · 2026-01-20 15:03:46 +00:00 · 2026-01-20 14:56:20 +00:00 · 2026-01-20 14:51:26 +00:00 · 2026-01-20 14:46:20 +00:00
55 changed files with 552 additions and 656 deletions
--- a/packaging/pyinstaller/exo.spec
+++ b/packaging/pyinstaller/exo.spec
@@ -10,7 +10,6 @@ PROJECT_ROOT = Path.cwd()
 SOURCE_ROOT = PROJECT_ROOT / "src"
 ENTRYPOINT = SOURCE_ROOT / "exo" / "__main__.py"
 DASHBOARD_DIR = PROJECT_ROOT / "dashboard" / "build"
-RESOURCES_DIR = PROJECT_ROOT / "resources"
 EXO_SHARED_MODELS_DIR = SOURCE_ROOT / "exo" / "shared" / "models"

 if not ENTRYPOINT.is_file():
@@ -19,9 +18,6 @@ if not ENTRYPOINT.is_file():
 if not DASHBOARD_DIR.is_dir():
    raise SystemExit(f"Dashboard assets are missing: {DASHBOARD_DIR}")

-if not RESOURCES_DIR.is_dir():
-    raise SystemExit(f"Resources are missing: {RESOURCES_DIR}")
-
 if not EXO_SHARED_MODELS_DIR.is_dir():
    raise SystemExit(f"Shared model assets are missing: {EXO_SHARED_MODELS_DIR}")

@@ -62,7 +58,6 @@ HIDDEN_IMPORTS = sorted(

 DATAS: list[tuple[str, str]] = [
    (str(DASHBOARD_DIR), "dashboard"),
-    (str(RESOURCES_DIR), "resources"),
    (str(MLX_LIB_DIR), "mlx/lib"),
    (str(EXO_SHARED_MODELS_DIR), "exo/shared/models"),
 ]
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ dependencies = [
    "hypercorn>=0.18.0",
    "openai-harmony>=0.0.8",
    "httpx>=0.28.1",
+    "tomlkit>=0.14.0",
 ]

 [project.scripts]
--- a/resources/mlx-community--DeepSeek-V3.1-4bit.toml
+++ b/resources/mlx-community--DeepSeek-V3.1-4bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/DeepSeek-V3.1-4bit"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-
-[storage_size]
-in_bytes = 405874409472
--- a/resources/mlx-community--DeepSeek-V3.1-8bit.toml
+++ b/resources/mlx-community--DeepSeek-V3.1-8bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/DeepSeek-V3.1-8bit"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-
-[storage_size]
-in_bytes = 765577920512
--- a/resources/mlx-community--GLM-4.5-Air-8bit.toml
+++ b/resources/mlx-community--GLM-4.5-Air-8bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/GLM-4.5-Air-8bit"
-n_layers = 46
-hidden_size = 4096
-supports_tensor = false
-
-[storage_size]
-in_bytes = 122406567936
--- a/resources/mlx-community--GLM-4.5-Air-bf16.toml
+++ b/resources/mlx-community--GLM-4.5-Air-bf16.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/GLM-4.5-Air-bf16"
-n_layers = 46
-hidden_size = 4096
-supports_tensor = true
-
-[storage_size]
-in_bytes = 229780750336
--- a/resources/mlx-community--GLM-4.7-4bit.toml
+++ b/resources/mlx-community--GLM-4.7-4bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/GLM-4.7-4bit"
-n_layers = 91
-hidden_size = 5120
-supports_tensor = true
-
-[storage_size]
-in_bytes = 198556925568
--- a/resources/mlx-community--GLM-4.7-6bit.toml
+++ b/resources/mlx-community--GLM-4.7-6bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/GLM-4.7-6bit"
-n_layers = 91
-hidden_size = 5120
-supports_tensor = true
-
-[storage_size]
-in_bytes = 286737579648
--- a/resources/mlx-community--GLM-4.7-8bit-gs32.toml
+++ b/resources/mlx-community--GLM-4.7-8bit-gs32.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/GLM-4.7-8bit-gs32"
-n_layers = 91
-hidden_size = 5120
-supports_tensor = true
-
-[storage_size]
-in_bytes = 396963397248
--- a/resources/mlx-community--Kimi-K2-Instruct-4bit.toml
+++ b/resources/mlx-community--Kimi-K2-Instruct-4bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Kimi-K2-Instruct-4bit"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-
-[storage_size]
-in_bytes = 620622774272
--- a/resources/mlx-community--Kimi-K2-Thinking.toml
+++ b/resources/mlx-community--Kimi-K2-Thinking.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Kimi-K2-Thinking"
-n_layers = 61
-hidden_size = 7168
-supports_tensor = true
-
-[storage_size]
-in_bytes = 706522120192
--- a/resources/mlx-community--Llama-3.2-1B-Instruct-4bit.toml
+++ b/resources/mlx-community--Llama-3.2-1B-Instruct-4bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Llama-3.2-1B-Instruct-4bit"
-n_layers = 16
-hidden_size = 2048
-supports_tensor = true
-
-[storage_size]
-in_bytes = 729808896
--- a/resources/mlx-community--Llama-3.2-3B-Instruct-4bit.toml
+++ b/resources/mlx-community--Llama-3.2-3B-Instruct-4bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Llama-3.2-3B-Instruct-4bit"
-n_layers = 28
-hidden_size = 3072
-supports_tensor = true
-
-[storage_size]
-in_bytes = 1863319552
--- a/resources/mlx-community--Llama-3.2-3B-Instruct-8bit.toml
+++ b/resources/mlx-community--Llama-3.2-3B-Instruct-8bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Llama-3.2-3B-Instruct-8bit"
-n_layers = 28
-hidden_size = 3072
-supports_tensor = true
-
-[storage_size]
-in_bytes = 3501195264
--- a/resources/mlx-community--Llama-3.3-70B-Instruct-4bit.toml
+++ b/resources/mlx-community--Llama-3.3-70B-Instruct-4bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Llama-3.3-70B-Instruct-4bit"
-n_layers = 80
-hidden_size = 8192
-supports_tensor = true
-
-[storage_size]
-in_bytes = 40652242944
--- a/resources/mlx-community--Llama-3.3-70B-Instruct-8bit.toml
+++ b/resources/mlx-community--Llama-3.3-70B-Instruct-8bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Llama-3.3-70B-Instruct-8bit"
-n_layers = 80
-hidden_size = 8192
-supports_tensor = true
-
-[storage_size]
-in_bytes = 76799803392
--- a/resources/mlx-community--Meta-Llama-3.1-70B-Instruct-4bit.toml
+++ b/resources/mlx-community--Meta-Llama-3.1-70B-Instruct-4bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
-n_layers = 80
-hidden_size = 8192
-supports_tensor = true
-
-[storage_size]
-in_bytes = 40652242944
--- a/resources/mlx-community--Meta-Llama-3.1-8B-Instruct-4bit.toml
+++ b/resources/mlx-community--Meta-Llama-3.1-8B-Instruct-4bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
-n_layers = 32
-hidden_size = 4096
-supports_tensor = true
-
-[storage_size]
-in_bytes = 4637851648
--- a/resources/mlx-community--Meta-Llama-3.1-8B-Instruct-8bit.toml
+++ b/resources/mlx-community--Meta-Llama-3.1-8B-Instruct-8bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
-n_layers = 32
-hidden_size = 4096
-supports_tensor = true
-
-[storage_size]
-in_bytes = 8954839040
--- a/resources/mlx-community--Meta-Llama-3.1-8B-Instruct-bf16.toml
+++ b/resources/mlx-community--Meta-Llama-3.1-8B-Instruct-bf16.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"
-n_layers = 32
-hidden_size = 4096
-supports_tensor = true
-
-[storage_size]
-in_bytes = 16882073600
--- a/resources/mlx-community--MiniMax-M2.1-3bit.toml
+++ b/resources/mlx-community--MiniMax-M2.1-3bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/MiniMax-M2.1-3bit"
-n_layers = 61
-hidden_size = 3072
-supports_tensor = true
-
-[storage_size]
-in_bytes = 100086644736
--- a/resources/mlx-community--MiniMax-M2.1-8bit.toml
+++ b/resources/mlx-community--MiniMax-M2.1-8bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/MiniMax-M2.1-8bit"
-n_layers = 61
-hidden_size = 3072
-supports_tensor = true
-
-[storage_size]
-in_bytes = 242986745856
--- a/resources/mlx-community--Qwen3-0.6B-4bit.toml
+++ b/resources/mlx-community--Qwen3-0.6B-4bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Qwen3-0.6B-4bit"
-n_layers = 28
-hidden_size = 1024
-supports_tensor = false
-
-[storage_size]
-in_bytes = 342884352
--- a/resources/mlx-community--Qwen3-0.6B-8bit.toml
+++ b/resources/mlx-community--Qwen3-0.6B-8bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Qwen3-0.6B-8bit"
-n_layers = 28
-hidden_size = 1024
-supports_tensor = false
-
-[storage_size]
-in_bytes = 698351616
--- a/resources/mlx-community--Qwen3-235B-A22B-Instruct-2507-4bit.toml
+++ b/resources/mlx-community--Qwen3-235B-A22B-Instruct-2507-4bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"
-n_layers = 94
-hidden_size = 4096
-supports_tensor = true
-
-[storage_size]
-in_bytes = 141733920768
--- a/resources/mlx-community--Qwen3-235B-A22B-Instruct-2507-8bit.toml
+++ b/resources/mlx-community--Qwen3-235B-A22B-Instruct-2507-8bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"
-n_layers = 94
-hidden_size = 4096
-supports_tensor = true
-
-[storage_size]
-in_bytes = 268435456000
--- a/resources/mlx-community--Qwen3-30B-A3B-4bit.toml
+++ b/resources/mlx-community--Qwen3-30B-A3B-4bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Qwen3-30B-A3B-4bit"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-
-[storage_size]
-in_bytes = 17612931072
--- a/resources/mlx-community--Qwen3-30B-A3B-8bit.toml
+++ b/resources/mlx-community--Qwen3-30B-A3B-8bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Qwen3-30B-A3B-8bit"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-
-[storage_size]
-in_bytes = 33279705088
--- a/resources/mlx-community--Qwen3-Coder-480B-A35B-Instruct-4bit.toml
+++ b/resources/mlx-community--Qwen3-Coder-480B-A35B-Instruct-4bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"
-n_layers = 62
-hidden_size = 6144
-supports_tensor = true
-
-[storage_size]
-in_bytes = 289910292480
--- a/resources/mlx-community--Qwen3-Coder-480B-A35B-Instruct-8bit.toml
+++ b/resources/mlx-community--Qwen3-Coder-480B-A35B-Instruct-8bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"
-n_layers = 62
-hidden_size = 6144
-supports_tensor = true
-
-[storage_size]
-in_bytes = 579820584960
--- a/resources/mlx-community--Qwen3-Next-80B-A3B-Instruct-4bit.toml
+++ b/resources/mlx-community--Qwen3-Next-80B-A3B-Instruct-4bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-
-[storage_size]
-in_bytes = 46976204800
--- a/resources/mlx-community--Qwen3-Next-80B-A3B-Instruct-8bit.toml
+++ b/resources/mlx-community--Qwen3-Next-80B-A3B-Instruct-8bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-
-[storage_size]
-in_bytes = 88814387200
--- a/resources/mlx-community--Qwen3-Next-80B-A3B-Thinking-4bit.toml
+++ b/resources/mlx-community--Qwen3-Next-80B-A3B-Thinking-4bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-
-[storage_size]
-in_bytes = 88814387200
--- a/resources/mlx-community--Qwen3-Next-80B-A3B-Thinking-8bit.toml
+++ b/resources/mlx-community--Qwen3-Next-80B-A3B-Thinking-8bit.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"
-n_layers = 48
-hidden_size = 2048
-supports_tensor = true
-
-[storage_size]
-in_bytes = 88814387200
--- a/resources/mlx-community--gpt-oss-120b-MXFP4-Q8.toml
+++ b/resources/mlx-community--gpt-oss-120b-MXFP4-Q8.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/gpt-oss-120b-MXFP4-Q8"
-n_layers = 36
-hidden_size = 2880
-supports_tensor = true
-
-[storage_size]
-in_bytes = 70652212224
--- a/resources/mlx-community--gpt-oss-20b-MXFP4-Q8.toml
+++ b/resources/mlx-community--gpt-oss-20b-MXFP4-Q8.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/gpt-oss-20b-MXFP4-Q8"
-n_layers = 24
-hidden_size = 2880
-supports_tensor = true
-
-[storage_size]
-in_bytes = 12025908224
--- a/resources/mlx-community--llama-3.3-70b-instruct-fp16.toml
+++ b/resources/mlx-community--llama-3.3-70b-instruct-fp16.toml
@@ -1,7 +0,0 @@
-model_id = "mlx-community/llama-3.3-70b-instruct-fp16"
-n_layers = 80
-hidden_size = 8192
-supports_tensor = true
-
-[storage_size]
-in_bytes = 144383672320
--- a/src/exo/master/api.py
+++ b/src/exo/master/api.py
@@ -1,6 +1,5 @@
 import time
 from collections.abc import AsyncGenerator
-from dataclasses import dataclass, field
 from http import HTTPStatus
 from typing import cast

@@ -20,7 +19,12 @@ from exo.master.placement import place_instance as get_instance_placements
 from exo.shared.apply import apply
 from exo.shared.election import ElectionMessage
 from exo.shared.logging import InterceptLogger
-from exo.shared.models.model_cards import ModelCard, ModelId, get_model_cards
+from exo.shared.models.model_cards import (
+    MODEL_CARDS,
+    ModelCard,
+    ModelId,
+    get_model_card,
+)
 from exo.shared.types.api import (
    BenchChatCompletionResponse,
    BenchChatCompletionTaskParams,
@@ -65,7 +69,7 @@ from exo.shared.types.worker.instances import Instance, InstanceId, InstanceMeta
 from exo.shared.types.worker.shards import Sharding
 from exo.utils.banner import print_startup_banner
 from exo.utils.channels import Receiver, Sender, channel
-from exo.utils.dashboard_path import RuntimeResources, find_directory
+from exo.utils.dashboard_path import find_dashboard
 from exo.utils.event_buffer import OrderedBuffer


@@ -86,52 +90,57 @@ def chunk_to_response(
    )


-@dataclass(eq=False)
-class API:
-    node_id: NodeId
-    session_id: SessionId
-    port: int
-    app: FastAPI
-    global_event_receiver: Receiver[ForwarderEvent]
-    command_sender: Sender[ForwarderCommand]
-    election_receiver: Receiver[ElectionMessage]
-    state = field(init=False, default_factory=State)
-    _event_log: list[Event] = field(init=False, default_factory=list)
-    event_buffer: OrderedBuffer[Event] = field(init=False, default_factory=OrderedBuffer)
-    _chat_completion_queues: dict[CommandId, Sender[TokenChunk]] = field(init=False, default_factory=dict)
-    _tg: TaskGroup = field(init=False, default_factory=create_task_group)
-    last_completed_election: int = field(init=False, default=0)
-    paused: bool = field(init=False, default = False)
-    paused_ev: anyio.Event = field(init=False, default_factory=anyio.Event)
+async def resolve_model_card(model_id: ModelId) -> ModelCard:
+    if model_id in MODEL_CARDS:
+        model_card = MODEL_CARDS[model_id]
+        return model_card
+    else:
+        return await get_model_card(model_id)

-    @classmethod
-    async def create(
-        cls,
+
+class API:
+    def __init__(
+        self,
        node_id: NodeId,
        session_id: SessionId,
        *,
        port: int,
+        # Ideally this would be a MasterForwarderEvent but type system says no :(
        global_event_receiver: Receiver[ForwarderEvent],
        command_sender: Sender[ForwarderCommand],
+        # This lets us pause the API if an election is running
        election_receiver: Receiver[ElectionMessage],
    ) -> None:
-        app = FastAPI()
-        app.mount(
+        self.state = State()
+        self._event_log: list[Event] = []
+        self.command_sender = command_sender
+        self.global_event_receiver = global_event_receiver
+        self.election_receiver = election_receiver
+        self.event_buffer: OrderedBuffer[Event] = OrderedBuffer[Event]()
+        self.node_id: NodeId = node_id
+        self.session_id: SessionId = session_id
+        self.last_completed_election: int = 0
+        self.port = port
+
+        self.paused: bool = False
+        self.paused_ev: anyio.Event = anyio.Event()
+
+        self.app = FastAPI()
+        self._setup_exception_handlers()
+        self._setup_cors()
+        self._setup_routes()
+
+        self.app.mount(
            "/",
            StaticFiles(
-                directory=await find_directory(RuntimeResources.Dashboard),
+                directory=find_dashboard(),
                html=True,
            ),
            name="dashboard",
        )

-        cls(node_id, session_id, port, app, global_event_receiver, command_sender, election_receiver)
-
-    def __post_init__(self) -> None:
-        self._setup_exception_handlers()
-        self._setup_cors()
-        self._setup_routes()
-
+        self._chat_completion_queues: dict[CommandId, Sender[TokenChunk]] = {}
+        self._tg: TaskGroup | None = None

    def reset(self, new_session_id: SessionId, result_clock: int):
        logger.info("Resetting API State")
@@ -208,7 +217,7 @@ class API:
        self, payload: CreateInstanceParams
    ) -> CreateInstanceResponse:
        instance = payload.instance
-        model_card = await ModelCard.from_hf(instance.shard_assignments.model_id)
+        model_card = await resolve_model_card(instance.shard_assignments.model_id)
        required_memory = model_card.storage_size
        available_memory = self._calculate_total_available_memory()

@@ -231,7 +240,7 @@ class API:

    async def get_placement(
        self,
-        model_id: str,
+        model_id: ModelId,
        sharding: Sharding = Sharding.Pipeline,
        instance_meta: InstanceMeta = InstanceMeta.MlxRing,
        min_nodes: int = 1,
@@ -274,7 +283,7 @@ class API:
        if len(list(self.state.topology.list_nodes())) == 0:
            return PlacementPreviewResponse(previews=[])

-        cards = [card for card in await get_model_cards() if card.short_id == model_id]
+        cards = [card for card in MODEL_CARDS.values() if card.model_id == model_id]
        if not cards:
            raise HTTPException(status_code=404, detail=f"Model {model_id} not found")

@@ -546,7 +555,7 @@ class API:
        self, payload: ChatCompletionTaskParams
    ) -> ChatCompletionResponse | StreamingResponse:
        """Handle chat completions, supporting both streaming and non-streaming responses."""
-        model_card = await resolve_model_card(payload.model)
+        model_card = await resolve_model_card(ModelId(payload.model))
        payload.model = model_card.model_id

        if not any(
@@ -573,7 +582,7 @@ class API:
    async def bench_chat_completions(
        self, payload: BenchChatCompletionTaskParams
    ) -> BenchChatCompletionResponse:
-        model_card = await resolve_model_card(payload.model)
+        model_card = await resolve_model_card(ModelId(payload.model))
        payload.model = model_card.model_id

        if not any(
@@ -615,7 +624,7 @@ class API:
                    storage_size_megabytes=int(card.storage_size.in_mb),
                    supports_tensor=card.supports_tensor,
                )
-                for card in model_cards()
+                for card in MODEL_CARDS.values()
            ]
        )

--- a/src/exo/master/tests/test_placement.py
+++ b/src/exo/master/tests/test_placement.py
@@ -276,9 +276,7 @@ def test_placement_selects_leaf_nodes(
    # arrange
    topology = Topology()

-    # Model requires more than any single node but fits within a 3-node cycle
-    model_card.storage_size.in_bytes = 1500
-    model_card.n_layers = 12
+    model_card.storage_size = Memory.from_bytes(1000)

    node_id_a = NodeId()
    node_id_b = NodeId()
--- a/src/exo/shared/models/model_cards.py
+++ b/src/exo/shared/models/model_cards.py
@@ -6,29 +6,14 @@ import tomlkit
 from anyio import Path, open_file
 from huggingface_hub import model_info
 from loguru import logger
-from pydantic import BaseModel, Field, PositiveInt, ValidationError
-from tomlkit.exceptions import TOMLKitError
+from pydantic import BaseModel, Field, PositiveInt

-from exo.shared.models.model_cards import ModelCard, ModelId
-from exo.shared.types.common import Id
+from exo.shared.types.common import ModelId
 from exo.shared.types.memory import Memory
-from exo.utils.dashboard_path import RuntimeResources, find_directory
 from exo.utils.pydantic_ext import CamelCaseModel
-from exo.worker.download.download_utils import (
-    ModelSafetensorsIndex,
-    download_file_with_retry,
-    ensure_models_dir,
-)

+_card_cache: dict[str, "ModelCard"] = {}

-class ModelId(Id):
-    def normalize(self) -> str:
-        return self.replace("/", "--")
-
-    def short(self) -> str:
-        return self.split("/")[-1]
-
-_card_cache: dict[str, ModelCard] = {}

 class ModelCard(CamelCaseModel):
    model_id: ModelId
@@ -43,32 +28,22 @@ class ModelCard(CamelCaseModel):
            data = tomlkit.dumps(py)  # pyright: ignore[reportUnknownMemberType]
            await f.write(data)

-    async def save_to_default_path(self) -> None:
-        dir = await find_directory(RuntimeResources.Resources)
-        await self.save(dir / self.model_id.normalize())
-
    @staticmethod
-    async def load_from_path(path: Path) -> ModelCard:
+    async def load_from_path(path: Path) -> "ModelCard":
        async with await open_file(path, "r") as f:
            py = tomlkit.loads(await f.read())
            return ModelCard.model_validate(py)

    @staticmethod
-    async def load_from_default_path(model_id: ModelId) -> ModelCard:
-        return await ModelCard.load_from_path(await find_directory(RuntimeResources.Resources) / model_id.normalize())
+    async def load(model_id: ModelId) -> "ModelCard":
+        if model_id in MODEL_CARDS:
+            return MODEL_CARDS[model_id]
+        return await ModelCard.from_hf(model_id)

    @staticmethod
-    async def load(model_id: ModelId) -> ModelCard:
-        try:
-            return await ModelCard.load_from_default_path(model_id)
-        except (ValidationError, TOMLKitError, FileNotFoundError):
-            return await ModelCard.from_hf(model_id)
-
-
-    @staticmethod
-    async def from_hf(model_id: ModelId) -> ModelCard:
+    async def from_hf(model_id: ModelId) -> "ModelCard":
        """Fetches storage size and number of layers for a Hugging Face model, returns Pydantic ModelMeta."""
-        if (mc := _card_cache.get(model_id, None)) is not None:
+        if (mc := _card_cache.get(model_id)) is not None:
            return mc
        config_data = await get_config_data(model_id)
        num_layers = config_data.layer_count
@@ -85,19 +60,249 @@ class ModelCard(CamelCaseModel):
        _card_cache[model_id] = mc
        return mc

-# TODO: should we cache this? how do we check for changes
-async def get_model_cards() -> list[ModelCard]:
-    dir = await find_directory(RuntimeResources.Resources)
-    cards: list[ModelCard] = []
-    async for file in dir.glob("*.toml"):
-        try:
-            cards.append(await ModelCard.load_from_path(file))
-        except (TOMLKitError, ValidationError):
-            continue
-
-    return cards

 MODEL_CARDS: dict[str, ModelCard] = {
+    # deepseek v3
+    "deepseek-v3.1-4bit": ModelCard(
+        model_id=ModelId("mlx-community/DeepSeek-V3.1-4bit"),
+        storage_size=Memory.from_gb(378),
+        n_layers=61,
+        hidden_size=7168,
+        supports_tensor=True,
+    ),
+    "deepseek-v3.1-8bit": ModelCard(
+        model_id=ModelId("mlx-community/DeepSeek-V3.1-8bit"),
+        storage_size=Memory.from_gb(713),
+        n_layers=61,
+        hidden_size=7168,
+        supports_tensor=True,
+    ),
+    # kimi k2
+    "kimi-k2-instruct-4bit": ModelCard(
+        model_id=ModelId("mlx-community/Kimi-K2-Instruct-4bit"),
+        storage_size=Memory.from_gb(578),
+        n_layers=61,
+        hidden_size=7168,
+        supports_tensor=True,
+    ),
+    "kimi-k2-thinking": ModelCard(
+        model_id=ModelId("mlx-community/Kimi-K2-Thinking"),
+        storage_size=Memory.from_gb(658),
+        n_layers=61,
+        hidden_size=7168,
+        supports_tensor=True,
+    ),
+    # llama-3.1
+    "llama-3.1-8b": ModelCard(
+        model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"),
+        storage_size=Memory.from_mb(4423),
+        n_layers=32,
+        hidden_size=4096,
+        supports_tensor=True,
+    ),
+    "llama-3.1-8b-8bit": ModelCard(
+        model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"),
+        storage_size=Memory.from_mb(8540),
+        n_layers=32,
+        hidden_size=4096,
+        supports_tensor=True,
+    ),
+    "llama-3.1-8b-bf16": ModelCard(
+        model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"),
+        storage_size=Memory.from_mb(16100),
+        n_layers=32,
+        hidden_size=4096,
+        supports_tensor=True,
+    ),
+    "llama-3.1-70b": ModelCard(
+        model_id=ModelId("mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"),
+        storage_size=Memory.from_mb(38769),
+        n_layers=80,
+        hidden_size=8192,
+        supports_tensor=True,
+    ),
+    # llama-3.2
+    "llama-3.2-1b": ModelCard(
+        model_id=ModelId("mlx-community/Llama-3.2-1B-Instruct-4bit"),
+        storage_size=Memory.from_mb(696),
+        n_layers=16,
+        hidden_size=2048,
+        supports_tensor=True,
+    ),
+    "llama-3.2-3b": ModelCard(
+        model_id=ModelId("mlx-community/Llama-3.2-3B-Instruct-4bit"),
+        storage_size=Memory.from_mb(1777),
+        n_layers=28,
+        hidden_size=3072,
+        supports_tensor=True,
+    ),
+    "llama-3.2-3b-8bit": ModelCard(
+        model_id=ModelId("mlx-community/Llama-3.2-3B-Instruct-8bit"),
+        storage_size=Memory.from_mb(3339),
+        n_layers=28,
+        hidden_size=3072,
+        supports_tensor=True,
+    ),
+    # llama-3.3
+    "llama-3.3-70b": ModelCard(
+        model_id=ModelId("mlx-community/Llama-3.3-70B-Instruct-4bit"),
+        storage_size=Memory.from_mb(38769),
+        n_layers=80,
+        hidden_size=8192,
+        supports_tensor=True,
+    ),
+    "llama-3.3-70b-8bit": ModelCard(
+        model_id=ModelId("mlx-community/Llama-3.3-70B-Instruct-8bit"),
+        storage_size=Memory.from_mb(73242),
+        n_layers=80,
+        hidden_size=8192,
+        supports_tensor=True,
+    ),
+    "llama-3.3-70b-fp16": ModelCard(
+        model_id=ModelId("mlx-community/llama-3.3-70b-instruct-fp16"),
+        storage_size=Memory.from_mb(137695),
+        n_layers=80,
+        hidden_size=8192,
+        supports_tensor=True,
+    ),
+    # qwen3
+    "qwen3-0.6b": ModelCard(
+        model_id=ModelId("mlx-community/Qwen3-0.6B-4bit"),
+        storage_size=Memory.from_mb(327),
+        n_layers=28,
+        hidden_size=1024,
+        supports_tensor=False,
+    ),
+    "qwen3-0.6b-8bit": ModelCard(
+        model_id=ModelId("mlx-community/Qwen3-0.6B-8bit"),
+        storage_size=Memory.from_mb(666),
+        n_layers=28,
+        hidden_size=1024,
+        supports_tensor=False,
+    ),
+    "qwen3-30b": ModelCard(
+        model_id=ModelId("mlx-community/Qwen3-30B-A3B-4bit"),
+        storage_size=Memory.from_mb(16797),
+        n_layers=48,
+        hidden_size=2048,
+        supports_tensor=True,
+    ),
+    "qwen3-30b-8bit": ModelCard(
+        model_id=ModelId("mlx-community/Qwen3-30B-A3B-8bit"),
+        storage_size=Memory.from_mb(31738),
+        n_layers=48,
+        hidden_size=2048,
+        supports_tensor=True,
+    ),
+    "qwen3-80b-a3B-4bit": ModelCard(
+        model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"),
+        storage_size=Memory.from_mb(44800),
+        n_layers=48,
+        hidden_size=2048,
+        supports_tensor=True,
+    ),
+    "qwen3-80b-a3B-8bit": ModelCard(
+        model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"),
+        storage_size=Memory.from_mb(84700),
+        n_layers=48,
+        hidden_size=2048,
+        supports_tensor=True,
+    ),
+    "qwen3-80b-a3B-thinking-4bit": ModelCard(
+        model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"),
+        storage_size=Memory.from_mb(84700),
+        n_layers=48,
+        hidden_size=2048,
+        supports_tensor=True,
+    ),
+    "qwen3-80b-a3B-thinking-8bit": ModelCard(
+        model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"),
+        storage_size=Memory.from_mb(84700),
+        n_layers=48,
+        hidden_size=2048,
+        supports_tensor=True,
+    ),
+    "qwen3-235b-a22b-4bit": ModelCard(
+        model_id=ModelId("mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"),
+        storage_size=Memory.from_gb(132),
+        n_layers=94,
+        hidden_size=4096,
+        supports_tensor=True,
+    ),
+    "qwen3-235b-a22b-8bit": ModelCard(
+        model_id=ModelId("mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"),
+        storage_size=Memory.from_gb(250),
+        n_layers=94,
+        hidden_size=4096,
+        supports_tensor=True,
+    ),
+    "qwen3-coder-480b-a35b-4bit": ModelCard(
+        model_id=ModelId("mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"),
+        storage_size=Memory.from_gb(270),
+        n_layers=62,
+        hidden_size=6144,
+        supports_tensor=True,
+    ),
+    "qwen3-coder-480b-a35b-8bit": ModelCard(
+        model_id=ModelId("mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"),
+        storage_size=Memory.from_gb(540),
+        n_layers=62,
+        hidden_size=6144,
+        supports_tensor=True,
+    ),
+    # gpt-oss
+    "gpt-oss-120b-MXFP4-Q8": ModelCard(
+        model_id=ModelId("mlx-community/gpt-oss-120b-MXFP4-Q8"),
+        storage_size=Memory.from_kb(68_996_301),
+        n_layers=36,
+        hidden_size=2880,
+        supports_tensor=True,
+    ),
+    "gpt-oss-20b-MXFP4-Q8": ModelCard(
+        model_id=ModelId("mlx-community/gpt-oss-20b-MXFP4-Q8"),
+        storage_size=Memory.from_kb(11_744_051),
+        n_layers=24,
+        hidden_size=2880,
+        supports_tensor=True,
+    ),
+    # glm 4.5
+    "glm-4.5-air-8bit": ModelCard(
+        # Needs to be quantized g32 or g16 to work with tensor parallel
+        model_id=ModelId("mlx-community/GLM-4.5-Air-8bit"),
+        storage_size=Memory.from_gb(114),
+        n_layers=46,
+        hidden_size=4096,
+        supports_tensor=False,
+    ),
+    "glm-4.5-air-bf16": ModelCard(
+        model_id=ModelId("mlx-community/GLM-4.5-Air-bf16"),
+        storage_size=Memory.from_gb(214),
+        n_layers=46,
+        hidden_size=4096,
+        supports_tensor=True,
+    ),
+    # glm 4.7
+    "glm-4.7-4bit": ModelCard(
+        model_id=ModelId("mlx-community/GLM-4.7-4bit"),
+        storage_size=Memory.from_bytes(198556925568),
+        n_layers=91,
+        hidden_size=5120,
+        supports_tensor=True,
+    ),
+    "glm-4.7-6bit": ModelCard(
+        model_id=ModelId("mlx-community/GLM-4.7-6bit"),
+        storage_size=Memory.from_bytes(286737579648),
+        n_layers=91,
+        hidden_size=5120,
+        supports_tensor=True,
+    ),
+    "glm-4.7-8bit-gs32": ModelCard(
+        model_id=ModelId("mlx-community/GLM-4.7-8bit-gs32"),
+        storage_size=Memory.from_bytes(396963397248),
+        n_layers=91,
+        hidden_size=5120,
+        supports_tensor=True,
+    ),
    # glm 4.7 flash
    "glm-4.7-flash-4bit": ModelCard(
        model_id=ModelId("mlx-community/GLM-4.7-Flash-4bit"),
@@ -127,10 +332,28 @@ MODEL_CARDS: dict[str, ModelCard] = {
        hidden_size=2048,
        supports_tensor=True,
    ),
+    # minimax-m2
+    "minimax-m2.1-8bit": ModelCard(
+        model_id=ModelId("mlx-community/MiniMax-M2.1-8bit"),
+        storage_size=Memory.from_bytes(242986745856),
+        n_layers=61,
+        hidden_size=3072,
+        supports_tensor=True,
+    ),
+    "minimax-m2.1-3bit": ModelCard(
+        model_id=ModelId("mlx-community/MiniMax-M2.1-3bit"),
+        storage_size=Memory.from_bytes(100086644736),
+        n_layers=61,
+        hidden_size=3072,
+        supports_tensor=True,
+    ),
 }

-
-
+from exo.worker.download.download_utils import (  # noqa: E402
+    ModelSafetensorsIndex,
+    download_file_with_retry,
+    ensure_models_dir,
+)


 class ConfigData(BaseModel):
@@ -168,10 +391,10 @@ class ConfigData(BaseModel):

 async def get_config_data(model_id: ModelId) -> ConfigData:
    """Downloads and parses config.json for a model."""
-    target_dir = (await ensure_models_dir()) / model_id.normalize()
+    target_dir = (await ensure_models_dir()) / str(model_id).replace("/", "--")
    await aios.makedirs(target_dir, exist_ok=True)
    config_path = await download_file_with_retry(
-        str(model_id),
+        model_id,
        "main",
        "config.json",
        target_dir,
@@ -185,10 +408,10 @@ async def get_config_data(model_id: ModelId) -> ConfigData:

 async def get_safetensors_size(model_id: ModelId) -> Memory:
    """Gets model size from safetensors index or falls back to HF API."""
-    target_dir = (await ensure_models_dir()) / model_id.normalize()
+    target_dir = (await ensure_models_dir()) / str(model_id).replace("/", "--")
    await aios.makedirs(target_dir, exist_ok=True)
    index_path = await download_file_with_retry(
-        str(model_id),
+        model_id,
        "main",
        "model.safetensors.index.json",
        target_dir,
@@ -207,3 +430,34 @@ async def get_safetensors_size(model_id: ModelId) -> Memory:
    if info.safetensors is None:
        raise ValueError(f"No safetensors info found for {model_id}")
    return Memory.from_bytes(info.safetensors.total)
+
+
+_model_card_cache: dict[str, ModelCard] = {}
+
+
+async def get_model_card(model_id: ModelId) -> ModelCard:
+    if model_id in _model_card_cache:
+        return _model_card_cache[model_id]
+    model_card = await _get_model_card(model_id)
+    _model_card_cache[model_id] = model_card
+    return model_card
+
+
+async def _get_model_card(model_id: ModelId) -> ModelCard:
+    """Fetches storage size and number of layers for a Hugging Face model, returns Pydantic ModelMeta."""
+    config_data = await get_config_data(model_id)
+    num_layers = config_data.layer_count
+    mem_size_bytes = await get_safetensors_size(model_id)
+    model_card = next(
+        (card for card in MODEL_CARDS.values() if card.model_id == model_id),
+        None,
+    )
+
+    return ModelCard(
+        model_id=model_id,
+        storage_size=mem_size_bytes,
+        n_layers=num_layers,
+        hidden_size=config_data.hidden_size or 0,
+        # TODO: all custom models currently do not support tensor. We could add a dynamic test for this?
+        supports_tensor=model_card.supports_tensor if model_card is not None else False,
+    )
--- a/src/exo/shared/models/model_meta.py
+++ b/src/exo/shared/models/model_meta.py
@@ -1,112 +0,0 @@
-from typing import Annotated
-
-import aiofiles
-import aiofiles.os as aios
-from huggingface_hub import model_info
-from loguru import logger
-from pydantic import BaseModel, Field
-
-from exo.shared.models.model_cards import ModelCard, ModelId
-from exo.shared.types.memory import Memory
-from exo.worker.download.download_utils import (
-    ModelSafetensorsIndex,
-    download_file_with_retry,
-    ensure_models_dir,
-)
-
-
-class ConfigData(BaseModel):
-    model_config = {"extra": "ignore"}  # Allow unknown fields
-
-    # Common field names for number of layers across different architectures
-    num_hidden_layers: Annotated[int, Field(ge=0)] | None = None
-    num_layers: Annotated[int, Field(ge=0)] | None = None
-    n_layer: Annotated[int, Field(ge=0)] | None = None
-    n_layers: Annotated[int, Field(ge=0)] | None = None  # Sometimes used
-    num_decoder_layers: Annotated[int, Field(ge=0)] | None = None  # Transformer models
-    decoder_layers: Annotated[int, Field(ge=0)] | None = None  # Some architectures
-    hidden_size: Annotated[int, Field(ge=0)] | None = None
-
-    @property
-    def layer_count(self) -> int:
-        # Check common field names for layer count
-        layer_fields = [
-            self.num_hidden_layers,
-            self.num_layers,
-            self.n_layer,
-            self.n_layers,
-            self.num_decoder_layers,
-            self.decoder_layers,
-        ]
-
-        for layer_count in layer_fields:
-            if layer_count is not None:
-                return layer_count
-
-        raise ValueError(
-            f"No layer count found in config.json: {self.model_dump_json()}"
-        )
-
-
-async def get_config_data(model_id: str) -> ConfigData:
-    """Downloads and parses config.json for a model."""
-    target_dir = (await ensure_models_dir()) / str(model_id).replace("/", "--")
-    await aios.makedirs(target_dir, exist_ok=True)
-    config_path = await download_file_with_retry(
-        model_id,
-        "main",
-        "config.json",
-        target_dir,
-        lambda curr_bytes, total_bytes, is_renamed: logger.info(
-            f"Downloading config.json for {model_id}: {curr_bytes}/{total_bytes} ({is_renamed=})"
-        ),
-    )
-    async with aiofiles.open(config_path, "r") as f:
-        return ConfigData.model_validate_json(await f.read())
-
-
-async def get_safetensors_size(model_id: str) -> Memory:
-    """Gets model size from safetensors index or falls back to HF API."""
-    target_dir = (await ensure_models_dir()) / str(model_id).replace("/", "--")
-    await aios.makedirs(target_dir, exist_ok=True)
-    index_path = await download_file_with_retry(
-        model_id,
-        "main",
-        "model.safetensors.index.json",
-        target_dir,
-        lambda curr_bytes, total_bytes, is_renamed: logger.info(
-            f"Downloading model.safetensors.index.json for {model_id}: {curr_bytes}/{total_bytes} ({is_renamed=})"
-        ),
-    )
-    async with aiofiles.open(index_path, "r") as f:
-        index_data = ModelSafetensorsIndex.model_validate_json(await f.read())
-
-    metadata = index_data.metadata
-    if metadata is not None:
-        return Memory.from_bytes(metadata.total_size)
-
-    info = model_info(model_id)
-    if info.safetensors is None:
-        raise ValueError(f"No safetensors info found for {model_id}")
-    return Memory.from_bytes(info.safetensors.total)
-
-_model_card_cache: dict[str, ModelCard] = {}
-
-async def get_model_card(model_id: str) -> ModelCard:
-    """Fetches storage size and number of layers for a Hugging Face model, returns Pydantic ModelMeta."""
-    if model_id in _model_card_cache:
-        return _model_card_cache[model_id]
-    config_data = await get_config_data(model_id)
-    num_layers = config_data.layer_count
-    mem_size_bytes = await get_safetensors_size(model_id)
-
-    mc = ModelCard(
-        model_id=ModelId(model_id),
-        storage_size=mem_size_bytes,
-        n_layers=num_layers,
-        hidden_size=config_data.hidden_size or 0,
-        # TODO: all custom models currently do not support tensor. We could add a dynamic test for this?
-        supports_tensor=False,
-    )
-    _model_card_cache[model_id] = mc
-    return mc
--- a/src/exo/shared/types/api.py
+++ b/src/exo/shared/types/api.py
@@ -168,7 +168,7 @@ class BenchChatCompletionTaskParams(ChatCompletionTaskParams):


 class PlaceInstanceParams(BaseModel):
-    model_id: str
+    model_id: ModelId
    sharding: Sharding = Sharding.Pipeline
    instance_meta: InstanceMeta = InstanceMeta.MlxRing
    min_nodes: int = 1
--- a/src/exo/shared/types/common.py
+++ b/src/exo/shared/types/common.py
@@ -25,6 +25,14 @@ class NodeId(Id):
    pass


+class ModelId(Id):
+    def normalize(self) -> str:
+        return self.replace("/", "--")
+
+    def short(self) -> str:
+        return self.split("/")[-1]
+
+
 class SessionId(CamelCaseModel):
    master_node_id: NodeId
    election_clock: int
--- a/src/exo/shared/types/worker/downloads.py
+++ b/src/exo/shared/types/worker/downloads.py
@@ -1,3 +1,8 @@
+from datetime import timedelta
+from typing import Literal
+
+from pydantic import BaseModel, ConfigDict, Field, PositiveInt
+
 from exo.shared.types.common import NodeId
 from exo.shared.types.memory import Memory
 from exo.shared.types.worker.shards import ShardMetadata
@@ -42,3 +47,50 @@ class DownloadOngoing(BaseDownloadProgress):
 DownloadProgress = (
    DownloadPending | DownloadCompleted | DownloadFailed | DownloadOngoing
 )
+
+
+class ModelSafetensorsIndexMetadata(BaseModel):
+    total_size: PositiveInt
+
+
+class ModelSafetensorsIndex(BaseModel):
+    metadata: ModelSafetensorsIndexMetadata | None
+    weight_map: dict[str, str]
+
+
+class FileListEntry(BaseModel):
+    type: Literal["file", "directory"]
+    path: str
+    size: int | None = None
+
+
+class RepoFileDownloadProgress(BaseModel):
+    repo_id: str
+    repo_revision: str
+    file_path: str
+    downloaded: Memory
+    downloaded_this_session: Memory
+    total: Memory
+    speed: float
+    eta: timedelta
+    status: Literal["not_started", "in_progress", "complete"]
+    start_time: float
+
+    model_config = ConfigDict(frozen=True)
+
+
+class RepoDownloadProgress(BaseModel):
+    repo_id: str
+    repo_revision: str
+    shard: ShardMetadata
+    completed_files: int
+    total_files: int
+    downloaded_bytes: Memory
+    downloaded_bytes_this_session: Memory
+    total_bytes: Memory
+    overall_speed: float
+    overall_eta: timedelta
+    status: Literal["not_started", "in_progress", "complete"]
+    file_progress: dict[str, RepoFileDownloadProgress] = Field(default_factory=dict)
+
+    model_config = ConfigDict(frozen=True)
--- a/src/exo/utils/dashboard_path.py
+++ b/src/exo/utils/dashboard_path.py
@@ -1,72 +1,45 @@
-import enum
 import os
 import sys
+from pathlib import Path
 from typing import cast

-from anyio import Path

-
-class RuntimeResources(enum.Enum):
-    Dashboard = enum.auto
-    Resources = enum.auto
-
-_dir_cache: dict[RuntimeResources, Path]
-
-async def find_directory(rr: RuntimeResources) -> Path:
-    dir = (
-        _dir_cache.get(rr, None)
-        or await _find_in_env(rr)
-        or await _find_in_repo(rr)
-        or await _find_in_bundle(rr)
+def find_dashboard() -> Path:
+    dashboard = (
+        _find_dashboard_in_env()
+        or _find_dashboard_in_repo()
+        or _find_dashboard_in_bundle()
    )
-    if not dir:
+    if not dashboard:
        raise FileNotFoundError(
-            "Unable to locate directory - make sure the dashboard has been built and the runtime resources (model cards) exist."
+            "Unable to locate dashboard assets - make sure the dashboard has been built, or export DASHBOARD_DIR if you've built the dashboard elsewhere."
        )
-    _dir_cache[rr] = dir
-    return dir
+    return dashboard


-async def _find_in_env(rr: RuntimeResources) -> Path | None:
-    match rr:
-        case RuntimeResources.Dashboard:
-            env = os.environ.get("DASHBOARD_DIR")
-        case RuntimeResources.Resources:
-            env = os.environ.get("RESOURCES_DIR")
+def _find_dashboard_in_env() -> Path | None:
+    env = os.environ.get("DASHBOARD_DIR")
    if not env:
        return None
-    resolved_env = await (await Path(env).expanduser()).resolve()
+    resolved_env = Path(env).expanduser().resolve()

    return resolved_env


-async def _find_in_repo(rr: RuntimeResources) -> Path | None:
-    current_module = await Path(__file__).resolve()
+def _find_dashboard_in_repo() -> Path | None:
+    current_module = Path(__file__).resolve()
    for parent in current_module.parents:
-        match rr:
-            case RuntimeResources.Dashboard:
-                build = parent / "dashboard" / "build"
-                if await build.is_dir() and await (build / "index.html").exists():
-                    return build
-            case RuntimeResources.Resources:
-                res = parent / "resources"
-                if await res.is_dir():
-                    return res
+        build = parent / "dashboard" / "build"
+        if build.is_dir() and (build / "index.html").exists():
+            return build
    return None


-async def _find_in_bundle(rr: RuntimeResources) -> Path | None:
+def _find_dashboard_in_bundle() -> Path | None:
    frozen_root = cast(str | None, getattr(sys, "_MEIPASS", None))
    if frozen_root is None:
        return None
-
-    match rr:
-        case RuntimeResources.Dashboard:
-            candidate = Path(frozen_root) / "dashboard"
-            if await candidate.is_dir():
-                return candidate
-        case RuntimeResources.Resources:
-            candidate = Path(frozen_root) / "resources"
-            if await candidate.is_dir():
-                return candidate
+    candidate = Path(frozen_root) / "dashboard"
+    if candidate.is_dir():
+        return candidate
    return None
--- a/src/exo/worker/download/download_utils.py
+++ b/src/exo/worker/download/download_utils.py
@@ -17,17 +17,20 @@ import aiohttp
 import certifi
 from loguru import logger
 from pydantic import (
-    BaseModel,
-    ConfigDict,
    DirectoryPath,
-    Field,
-    PositiveInt,
    TypeAdapter,
 )

 from exo.shared.constants import EXO_MODELS_DIR
+from exo.shared.types.common import ModelId
 from exo.shared.types.memory import Memory
-from exo.shared.types.worker.downloads import DownloadProgressData
+from exo.shared.types.worker.downloads import (
+    DownloadProgressData,
+    FileListEntry,
+    ModelSafetensorsIndex,
+    RepoDownloadProgress,
+    RepoFileDownloadProgress,
+)
 from exo.shared.types.worker.shards import ShardMetadata
 from exo.worker.download.huggingface_utils import (
    filter_repo_objects,
@@ -37,53 +40,6 @@ from exo.worker.download.huggingface_utils import (
 )


-class ModelSafetensorsIndexMetadata(BaseModel):
-    total_size: PositiveInt
-
-
-class ModelSafetensorsIndex(BaseModel):
-    metadata: ModelSafetensorsIndexMetadata | None
-    weight_map: dict[str, str]
-
-
-class FileListEntry(BaseModel):
-    type: Literal["file", "directory"]
-    path: str
-    size: int | None = None
-
-
-class RepoFileDownloadProgress(BaseModel):
-    repo_id: str
-    repo_revision: str
-    file_path: str
-    downloaded: Memory
-    downloaded_this_session: Memory
-    total: Memory
-    speed: float
-    eta: timedelta
-    status: Literal["not_started", "in_progress", "complete"]
-    start_time: float
-
-    model_config = ConfigDict(frozen=True)
-
-
-class RepoDownloadProgress(BaseModel):
-    repo_id: str
-    repo_revision: str
-    shard: ShardMetadata
-    completed_files: int
-    total_files: int
-    downloaded_bytes: Memory
-    downloaded_bytes_this_session: Memory
-    total_bytes: Memory
-    overall_speed: float
-    overall_eta: timedelta
-    status: Literal["not_started", "in_progress", "complete"]
-    file_progress: dict[str, RepoFileDownloadProgress] = Field(default_factory=dict)
-
-    model_config = ConfigDict(frozen=True)
-
-
 def trim_etag(etag: str) -> str:
    if (etag[0] == '"' and etag[-1] == '"') or (etag[0] == "'" and etag[-1] == "'"):
        return etag[1:-1]
@@ -125,12 +81,12 @@ def map_repo_download_progress_to_download_progress_data(
    )


-def build_model_path(model_id: str) -> DirectoryPath:
-    return EXO_MODELS_DIR / model_id.replace("/", "--")
+def build_model_path(model_id: ModelId) -> DirectoryPath:
+    return EXO_MODELS_DIR / model_id.normalize()


-async def resolve_model_path_for_repo(repo_id: str) -> Path:
-    return (await ensure_models_dir()) / repo_id.replace("/", "--")
+async def resolve_model_path_for_repo(model_id: ModelId) -> Path:
+    return (await ensure_models_dir()) / model_id.normalize()


 async def ensure_models_dir() -> Path:
@@ -138,8 +94,8 @@ async def ensure_models_dir() -> Path:
    return EXO_MODELS_DIR


-async def delete_model(repo_id: str) -> bool:
-    model_dir = await ensure_models_dir() / repo_id.replace("/", "--")
+async def delete_model(model_id: ModelId) -> bool:
+    model_dir = await ensure_models_dir() / model_id.normalize()
    if not await aios.path.exists(model_dir):
        return False
    await asyncio.to_thread(shutil.rmtree, model_dir, ignore_errors=False)
@@ -164,19 +120,17 @@ async def seed_models(seed_dir: str | Path):


 async def fetch_file_list_with_cache(
-    repo_id: str, revision: str = "main", recursive: bool = False
+    model_id: ModelId, revision: str = "main", recursive: bool = False
 ) -> list[FileListEntry]:
-    target_dir = (
-        (await ensure_models_dir()) / "caches" / str(repo_id).replace("/", "--")
-    )
+    target_dir = (await ensure_models_dir()) / "caches" / model_id.normalize()
    await aios.makedirs(target_dir, exist_ok=True)
-    cache_file = (
-        target_dir / f"{repo_id.replace('/', '--')}--{revision}--file_list.json"
-    )
+    cache_file = target_dir / f"{model_id.normalize()}--{revision}--file_list.json"
    if await aios.path.exists(cache_file):
        async with aiofiles.open(cache_file, "r") as f:
            return TypeAdapter(list[FileListEntry]).validate_json(await f.read())
-    file_list = await fetch_file_list_with_retry(repo_id, revision, recursive=recursive)
+    file_list = await fetch_file_list_with_retry(
+        model_id, revision, recursive=recursive
+    )
    await aios.makedirs(cache_file.parent, exist_ok=True)
    async with aiofiles.open(cache_file, "w") as f:
        await f.write(TypeAdapter(list[FileListEntry]).dump_json(file_list).decode())
@@ -184,25 +138,25 @@ async def fetch_file_list_with_cache(


 async def fetch_file_list_with_retry(
-    repo_id: str, revision: str = "main", path: str = "", recursive: bool = False
+    model_id: ModelId, revision: str = "main", path: str = "", recursive: bool = False
 ) -> list[FileListEntry]:
    n_attempts = 30
    for attempt in range(n_attempts):
        try:
-            return await _fetch_file_list(repo_id, revision, path, recursive)
+            return await _fetch_file_list(model_id, revision, path, recursive)
        except Exception as e:
            if attempt == n_attempts - 1:
                raise e
            await asyncio.sleep(min(8, 0.1 * float(2.0 ** int(attempt))))
    raise Exception(
-        f"Failed to fetch file list for {repo_id=} {revision=} {path=} {recursive=}"
+        f"Failed to fetch file list for {model_id=} {revision=} {path=} {recursive=}"
    )


 async def _fetch_file_list(
-    repo_id: str, revision: str = "main", path: str = "", recursive: bool = False
+    model_id: ModelId, revision: str = "main", path: str = "", recursive: bool = False
 ) -> list[FileListEntry]:
-    api_url = f"{get_hf_endpoint()}/api/models/{repo_id}/tree/{revision}"
+    api_url = f"{get_hf_endpoint()}/api/models/{model_id}/tree/{revision}"
    url = f"{api_url}/{path}" if path else api_url

    headers = await get_download_headers()
@@ -219,7 +173,7 @@ async def _fetch_file_list(
                    files.append(FileListEntry.model_validate(item))
                elif item.type == "directory" and recursive:
                    subfiles = await _fetch_file_list(
-                        repo_id, revision, item.path, recursive
+                        model_id, revision, item.path, recursive
                    )
                    files.extend(subfiles)
            return files
@@ -276,10 +230,10 @@ async def calc_hash(path: Path, hash_type: Literal["sha1", "sha256"] = "sha1") -


 async def file_meta(
-    repo_id: str, revision: str, path: str, redirected_location: str | None = None
+    model_id: ModelId, revision: str, path: str, redirected_location: str | None = None
 ) -> tuple[int, str]:
    url = (
-        urljoin(f"{get_hf_endpoint()}/{repo_id}/resolve/{revision}/", path)
+        urljoin(f"{get_hf_endpoint()}/{model_id}/resolve/{revision}/", path)
        if redirected_location is None
        else f"{get_hf_endpoint()}{redirected_location}"
    )
@@ -298,7 +252,7 @@ async def file_meta(
                return content_length, etag
            # Otherwise, follow the redirect to get authoritative size/hash
            redirected_location = r.headers.get("location")
-            return await file_meta(repo_id, revision, path, redirected_location)
+            return await file_meta(model_id, revision, path, redirected_location)
        content_length = int(
            r.headers.get("x-linked-size") or r.headers.get("content-length") or 0
        )
@@ -310,7 +264,7 @@ async def file_meta(


 async def download_file_with_retry(
-    repo_id: str,
+    model_id: ModelId,
    revision: str,
    path: str,
    target_dir: Path,
@@ -320,23 +274,23 @@ async def download_file_with_retry(
    for attempt in range(n_attempts):
        try:
            return await _download_file(
-                repo_id, revision, path, target_dir, on_progress
+                model_id, revision, path, target_dir, on_progress
            )
        except Exception as e:
            if isinstance(e, FileNotFoundError) or attempt == n_attempts - 1:
                raise e
            logger.error(
-                f"Download error on attempt {attempt}/{n_attempts} for {repo_id=} {revision=} {path=} {target_dir=}"
+                f"Download error on attempt {attempt}/{n_attempts} for {model_id=} {revision=} {path=} {target_dir=}"
            )
            logger.error(traceback.format_exc())
            await asyncio.sleep(min(8, 0.1 * (2.0**attempt)))
    raise Exception(
-        f"Failed to download file {repo_id=} {revision=} {path=} {target_dir=}"
+        f"Failed to download file {model_id=} {revision=} {path=} {target_dir=}"
    )


 async def _download_file(
-    repo_id: str,
+    model_id: ModelId,
    revision: str,
    path: str,
    target_dir: Path,
@@ -345,7 +299,7 @@ async def _download_file(
    if await aios.path.exists(target_dir / path):
        return target_dir / path
    await aios.makedirs((target_dir / path).parent, exist_ok=True)
-    length, etag = await file_meta(repo_id, revision, path)
+    length, etag = await file_meta(model_id, revision, path)
    remote_hash = etag[:-5] if etag.endswith("-gzip") else etag
    partial_path = target_dir / f"{path}.partial"
    resume_byte_pos = (
@@ -354,7 +308,7 @@ async def _download_file(
        else None
    )
    if resume_byte_pos != length:
-        url = urljoin(f"{get_hf_endpoint()}/{repo_id}/resolve/{revision}/", path)
+        url = urljoin(f"{get_hf_endpoint()}/{model_id}/resolve/{revision}/", path)
        headers = await get_download_headers()
        if resume_byte_pos:
            headers["Range"] = f"bytes={resume_byte_pos}-"
@@ -394,7 +348,7 @@ async def _download_file(

 def calculate_repo_progress(
    shard: ShardMetadata,
-    repo_id: str,
+    model_id: ModelId,
    revision: str,
    file_progress: dict[str, RepoFileDownloadProgress],
    all_start_time: float,
@@ -423,7 +377,7 @@ def calculate_repo_progress(
        else "not_started"
    )
    return RepoDownloadProgress(
-        repo_id=repo_id,
+        repo_id=model_id,
        repo_revision=revision,
        shard=shard,
        completed_files=len(
@@ -442,11 +396,11 @@ def calculate_repo_progress(
    )


-async def get_weight_map(repo_id: str, revision: str = "main") -> dict[str, str]:
-    target_dir = (await ensure_models_dir()) / str(repo_id).replace("/", "--")
+async def get_weight_map(model_id: ModelId, revision: str = "main") -> dict[str, str]:
+    target_dir = (await ensure_models_dir()) / model_id.normalize()
    await aios.makedirs(target_dir, exist_ok=True)
    index_file = await download_file_with_retry(
-        repo_id, revision, "model.safetensors.index.json", target_dir
+        model_id, revision, "model.safetensors.index.json", target_dir
    )
    async with aiofiles.open(index_file, "r") as f:
        index_data = ModelSafetensorsIndex.model_validate_json(await f.read())
@@ -478,7 +432,7 @@ async def get_downloaded_size(path: Path) -> int:


 async def download_progress_for_local_path(
-    repo_id: str, shard: ShardMetadata, local_path: Path
+    model_id: ModelId, shard: ShardMetadata, local_path: Path
 ) -> RepoDownloadProgress:
    file_progress: dict[str, RepoFileDownloadProgress] = {}
    total_files = 0
@@ -492,7 +446,7 @@ async def download_progress_for_local_path(
                    size = (await aios.stat(file_path)).st_size
                    rel_path = str(file_path.relative_to(local_path))
                    file_progress[rel_path] = RepoFileDownloadProgress(
-                        repo_id=repo_id,
+                        repo_id=model_id,
                        repo_revision="local",
                        file_path=rel_path,
                        downloaded=Memory.from_bytes(size),
@@ -509,7 +463,7 @@ async def download_progress_for_local_path(
        raise ValueError(f"Local path {local_path} is not a directory")

    return RepoDownloadProgress(
-        repo_id=repo_id,
+        repo_id=model_id,
        repo_revision="local",
        shard=shard,
        completed_files=total_files,
@@ -539,7 +493,7 @@ async def download_shard(
        logger.info(f"Using local model path {shard.model_card.model_id}")
        local_path = Path(str(shard.model_card.model_id))
        return local_path, await download_progress_for_local_path(
-            str(shard.model_card.model_id), shard, local_path
+            shard.model_card.model_id, shard, local_path
        )

    revision = "main"
@@ -558,7 +512,7 @@ async def download_shard(
    # TODO: currently not recursive. Some models might require subdirectories - thus this will need to be changed.
    #  Update: <- This does not seem to be the case. Yay?
    file_list = await fetch_file_list_with_cache(
-        str(shard.model_card.model_id), revision, recursive=True
+        shard.model_card.model_id, revision, recursive=True
    )
    filtered_file_list = list(
        filter_repo_objects(
@@ -592,7 +546,7 @@ async def download_shard(
            else timedelta(seconds=0)
        )
        file_progress[file.path] = RepoFileDownloadProgress(
-            repo_id=str(shard.model_card.model_id),
+            repo_id=shard.model_card.model_id,
            repo_revision=revision,
            file_path=file.path,
            downloaded=Memory.from_bytes(curr_bytes),
@@ -609,7 +563,7 @@ async def download_shard(
            shard,
            calculate_repo_progress(
                shard,
-                str(shard.model_card.model_id),
+                shard.model_card.model_id,
                revision,
                file_progress,
                all_start_time,
@@ -619,7 +573,7 @@ async def download_shard(
    for file in filtered_file_list:
        downloaded_bytes = await get_downloaded_size(target_dir / file.path)
        file_progress[file.path] = RepoFileDownloadProgress(
-            repo_id=str(shard.model_card.model_id),
+            repo_id=shard.model_card.model_id,
            repo_revision=revision,
            file_path=file.path,
            downloaded=Memory.from_bytes(downloaded_bytes),
@@ -643,7 +597,7 @@ async def download_shard(
    async def download_with_semaphore(file: FileListEntry) -> None:
        async with semaphore:
            await download_file_with_retry(
-                str(shard.model_card.model_id),
+                shard.model_card.model_id,
                revision,
                file.path,
                target_dir,
@@ -657,7 +611,7 @@ async def download_shard(
            *[download_with_semaphore(file) for file in filtered_file_list]
        )
    final_repo_progress = calculate_repo_progress(
-        shard, str(shard.model_card.model_id), revision, file_progress, all_start_time
+        shard, shard.model_card.model_id, revision, file_progress, all_start_time
    )
    await on_progress(shard, final_repo_progress)
    if gguf := next((f for f in filtered_file_list if f.path.endswith(".gguf")), None):
--- a/src/exo/worker/download/impl_shard_downloader.py
+++ b/src/exo/worker/download/impl_shard_downloader.py
@@ -3,7 +3,7 @@ from collections.abc import Awaitable
 from pathlib import Path
 from typing import AsyncIterator, Callable

-from exo.shared.models.model_cards import ModelCard, get_model_cards
+from exo.shared.models.model_cards import MODEL_CARDS, ModelId, get_model_card
 from exo.shared.types.worker.shards import (
    PipelineShardMetadata,
    ShardMetadata,
@@ -18,8 +18,8 @@ def exo_shard_downloader(max_parallel_downloads: int = 8) -> ShardDownloader:
    )


-async def build_base_shard(model_id: str) -> ShardMetadata:
-    model_card = await ModelCard.from_hf(model_id)
+async def build_base_shard(model_id: ModelId) -> ShardMetadata:
+    model_card = await get_model_card(model_id)
    return PipelineShardMetadata(
        model_card=model_card,
        device_rank=0,
@@ -30,7 +30,7 @@ async def build_base_shard(model_id: str) -> ShardMetadata:
    )


-async def build_full_shard(model_id: str) -> PipelineShardMetadata:
+async def build_full_shard(model_id: ModelId) -> PipelineShardMetadata:
    base_shard = await build_base_shard(model_id)
    return PipelineShardMetadata(
        model_card=base_shard.model_card,
@@ -147,7 +147,7 @@ class ResumableShardDownloader(ShardDownloader):
        self,
    ) -> AsyncIterator[tuple[Path, RepoDownloadProgress]]:
        async def _status_for_model(
-            model_id: str,
+            model_id: ModelId,
        ) -> tuple[Path, RepoDownloadProgress]:
            """Helper coroutine that builds the shard for a model and gets its download status."""
            shard = await build_full_shard(model_id)
@@ -158,7 +158,7 @@ class ResumableShardDownloader(ShardDownloader):
        # Kick off download status coroutines concurrently
        tasks = [
            asyncio.create_task(_status_for_model(model_card.model_id))
-            for model_card in await get_model_cards()
+            for model_card in MODEL_CARDS.values()
        ]

        for task in asyncio.as_completed(tasks):
--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -23,6 +23,7 @@ from mlx_lm.models.deepseek_v3 import DeepseekV3Model
 from mlx_lm.models.gpt_oss import Model as GptOssModel
 from mlx_lm.tokenizer_utils import TokenizerWrapper

+from exo.shared.models.model_cards import ModelId
 from exo.worker.engines.mlx.constants import (
    CACHE_GROUP_SIZE,
    KV_CACHE_BITS,
@@ -296,7 +297,7 @@ def get_tokenizer(model_path: Path, shard_metadata: ShardMetadata) -> TokenizerW
    return load_tokenizer_for_model_id(shard_metadata.model_card.model_id, model_path)


-def get_eos_token_ids_for_model(model_id: str) -> list[int] | None:
+def get_eos_token_ids_for_model(model_id: ModelId) -> list[int] | None:
    """
    Get the EOS token IDs for a model based on its ID.

@@ -320,7 +321,9 @@ def get_eos_token_ids_for_model(model_id: str) -> list[int] | None:
    return None


-def load_tokenizer_for_model_id(model_id: str, model_path: Path) -> TokenizerWrapper:
+def load_tokenizer_for_model_id(
+    model_id: ModelId, model_path: Path
+) -> TokenizerWrapper:
    """
    Load tokenizer for a model given its ID and local path.

--- a/src/exo/worker/main.py
+++ b/src/exo/worker/main.py
@@ -413,11 +413,6 @@ class Worker:
            )
            for nid in conns:
                for ip in conns[nid]:
-                    if "127.0.0.1" in ip or "localhost" in ip:
-                        logger.warning(
-                            f"Loopback connection should not happen: {ip=} for {nid=}"
-                        )
-
                    edge = SocketConnection(
                        # nonsense multiaddr
                        sink_multiaddr=Multiaddr(address=f"/ip4/{ip}/tcp/52415")
@@ -438,6 +433,9 @@ class Worker:
            for conn in self.state.topology.out_edges(self.node_id):
                if not isinstance(conn.edge, SocketConnection):
                    continue
+                # ignore mDNS discovered connections
+                if conn.edge.sink_multiaddr.port != 52415:
+                    continue
                if (
                    conn.sink not in conns
                    or conn.edge.sink_multiaddr.ip_address
--- a/src/exo/worker/tests/unittests/test_mlx/conftest.py
+++ b/src/exo/worker/tests/unittests/test_mlx/conftest.py
@@ -11,8 +11,9 @@ import mlx.core as mx
 import mlx.nn as nn

 from exo.shared.constants import EXO_MODELS_DIR
-from exo.shared.models.model_cards import ModelCard, ModelId
+from exo.shared.models.model_cards import ModelCard
 from exo.shared.types.api import ChatCompletionMessage
+from exo.shared.types.common import ModelId
 from exo.shared.types.memory import Memory
 from exo.shared.types.tasks import ChatCompletionTaskParams
 from exo.shared.types.worker.shards import PipelineShardMetadata, TensorShardMetadata
--- a/src/exo/worker/tests/unittests/test_mlx/test_distributed_fix.py
+++ b/src/exo/worker/tests/unittests/test_mlx/test_distributed_fix.py
@@ -18,6 +18,7 @@ def _check_model_exists() -> bool:


 pytestmark = [
+    pytest.mark.slow,
    pytest.mark.skipif(
        not _check_model_exists(),
        reason=f"GPT-OSS model not found at {DEFAULT_GPT_OSS_CONFIG.model_path}",
--- a/src/exo/worker/tests/unittests/test_mlx/test_tokenizers.py
+++ b/src/exo/worker/tests/unittests/test_mlx/test_tokenizers.py
@@ -11,7 +11,7 @@ from pathlib import Path

 import pytest

-from exo.shared.models.model_cards import MODEL_CARDS, ModelCard
+from exo.shared.models.model_cards import MODEL_CARDS, ModelCard, ModelId
 from exo.worker.download.download_utils import (
    download_file_with_retry,
    ensure_models_dir,
@@ -50,9 +50,9 @@ def is_tokenizer_file(filename: str) -> bool:
    return False


-async def download_tokenizer_files(model_id: str) -> Path:
+async def download_tokenizer_files(model_id: ModelId) -> Path:
    """Download only the tokenizer-related files for a model."""
-    target_dir = await ensure_models_dir() / model_id.replace("/", "--")
+    target_dir = await ensure_models_dir() / model_id.normalize()
    target_dir.mkdir(parents=True, exist_ok=True)

    file_list = await fetch_file_list_with_cache(model_id, "main", recursive=True)
@@ -72,22 +72,24 @@ async def download_tokenizer_files(model_id: str) -> Path:


 # Get a sample of models to test (one per family to keep tests fast)
-def get_test_models() -> list[tuple[str, ModelCard]]:
+def get_test_models() -> list[ModelCard]:
    """Get a representative sample of models to test."""
    # Pick one model from each family to test
-    families: dict[str, tuple[str, ModelCard]] = {}
-    for _, card in MODEL_CARDS.items():
+    families: dict[str, ModelCard] = {}
+    for card in MODEL_CARDS.values():
        # Extract family name (e.g., "llama-3.1" from "llama-3.1-8b")
        parts = card.model_id.short().split("-")
        family = "-".join(parts[:2]) if len(parts) >= 2 else parts[0]

        if family not in families:
-            families[family] = (card.model_id.short(), card)
+            families[family] = card

    return list(families.values())


-TEST_MODELS: list[tuple[str, ModelCard]] = get_test_models()
+TEST_MODELS: list[ModelCard] = get_test_models()
+
+pytestmark = pytest.mark.slow


@pytest.fixture(scope="module")
@@ -99,14 +101,13 @@ def event_loop():


@pytest.mark.parametrize(
-    "short_id,model_card",
+    "model_card",
    TEST_MODELS,
-    ids=[m[0] for m in TEST_MODELS],
 )
@pytest.mark.asyncio
 async def test_tokenizer_encode_decode(short_id: str, model_card: ModelCard) -> None:
    """Test that tokenizer can encode and decode text correctly."""
-    model_id = str(model_card.model_id)
+    model_id = model_card.model_id

    # Download tokenizer files
    model_path = await download_tokenizer_files(model_id)
@@ -165,16 +166,15 @@ async def test_tokenizer_encode_decode(short_id: str, model_card: ModelCard) ->


@pytest.mark.parametrize(
-    "short_id,model_card",
+    "model_card",
    TEST_MODELS,
-    ids=[m[0] for m in TEST_MODELS],
 )
@pytest.mark.asyncio
 async def test_tokenizer_has_required_attributes(
    short_id: str, model_card: ModelCard
 ) -> None:
    """Test that tokenizer has required attributes for inference."""
-    model_id = str(model_card.model_id)
+    model_id = model_card.model_id

    model_path = await download_tokenizer_files(model_id)

@@ -207,19 +207,18 @@ async def test_tokenizer_has_required_attributes(


@pytest.mark.parametrize(
-    "short_id,model_card",
+    "model_card",
    TEST_MODELS,
-    ids=[m[0] for m in TEST_MODELS],
 )
@pytest.mark.asyncio
-async def test_tokenizer_special_tokens(short_id: str, model_card: ModelCard) -> None:
+async def test_tokenizer_special_tokens(model_card: ModelCard) -> None:
    """Test that tokenizer can encode text containing special tokens.

    This is critical because the actual inference path uses prompts with
    special tokens from chat templates. If special tokens aren't handled
    correctly, encoding will fail.
    """
-    model_id = str(model_card.model_id)
+    model_id = model_card.model_id

    model_path = await download_tokenizer_files(model_id)

@@ -299,16 +298,14 @@ async def test_tokenizer_special_tokens(short_id: str, model_card: ModelCard) ->
 async def test_kimi_tokenizer_specifically():
    """Test Kimi tokenizer with its specific patches and quirks."""
    kimi_models = [
-        (short_id, card)
-        for short_id, card in MODEL_CARDS.items()
-        if "kimi" in short_id.lower()
+        card for card in MODEL_CARDS.values() if "kimi" in card.model_id.lower()
    ]

    if not kimi_models:
        pytest.skip("No Kimi models found in MODEL_CARDS")

-    _, model_card = kimi_models[0]
-    model_id = str(model_card.model_id)
+    model_card = kimi_models[0]
+    model_id = model_card.model_id

    model_path = await download_tokenizer_files(model_id)

@@ -347,17 +344,15 @@ async def test_kimi_tokenizer_specifically():
@pytest.mark.asyncio
 async def test_glm_tokenizer_specifically():
    """Test GLM tokenizer with its specific EOS tokens."""
-    glm_models = [
-        (short_id, card)
-        for short_id, card in MODEL_CARDS.items()
-        if "glm" in short_id.lower()
+    glm_model_cards = [
+        card for card in MODEL_CARDS.values() if "glm" in card.model_id.lower()
    ]

-    if not glm_models:
+    if not glm_model_cards:
        pytest.skip("No GLM models found in MODEL_CARDS")

-    _, model_card = glm_models[0]
-    model_id = str(model_card.model_id)
+    model_card = glm_model_cards[0]
+    model_id = model_card.model_id

    model_path = await download_tokenizer_files(model_id)

--- a/src/exo/worker/tests/unittests/test_plan/test_download_and_loading.py
+++ b/src/exo/worker/tests/unittests/test_plan/test_download_and_loading.py
@@ -1,6 +1,5 @@
 import exo.worker.plan as plan_mod
-from exo.shared.models.model_cards import ModelId
-from exo.shared.types.common import NodeId
+from exo.shared.types.common import ModelId, NodeId
 from exo.shared.types.memory import Memory
 from exo.shared.types.tasks import LoadModel
 from exo.shared.types.worker.downloads import DownloadCompleted, DownloadProgress
--- a/tests/headless_runner.py
+++ b/tests/headless_runner.py
@@ -12,7 +12,7 @@ from loguru import logger
 from pydantic import BaseModel

 from exo.shared.logging import InterceptLogger, logger_setup
-from exo.shared.models.model_cards import ModelId
+from exo.shared.models.model_cards import MODEL_CARDS, ModelId
 from exo.shared.types.api import ChatCompletionMessage, ChatCompletionTaskParams
 from exo.shared.types.commands import CommandId
 from exo.shared.types.common import Host, NodeId
@@ -89,22 +89,22 @@ async def tb_detection():

 async def assert_downloads():
    sd = exo_shard_downloader()
-    # await sd.ensure_shard(ModelId("mlx-community/Qwen3-0.6B-8bit")))
+    # await sd.ensure_shard(await build_full_shard(MODEL_CARDS["qwen3-0.6b"].model_id))
    await sd.ensure_shard(
-        await build_full_shard(ModelId("mlx-community/Llama-3.1-8b-bf16"))
+        await build_full_shard(MODEL_CARDS["llama-3.1-8b-bf16"].model_id)
    )
-    await sd.ensure_shard(await build_full_shard(ModelId("mlx-community/Qwen3-30b-A3B")))
+    await sd.ensure_shard(await build_full_shard(MODEL_CARDS["qwen3-30b"].model_id))
    await sd.ensure_shard(
-        await build_full_shard(ModelId("mlx-commmunity/gpt-oss-120b-MXFP4-Q8"))
+        await build_full_shard(MODEL_CARDS["gpt-oss-120b-MXFP4-Q8"].model_id)
    )
    await sd.ensure_shard(
-        await build_full_shard(ModelId("mlx-community/gpt-oss-20b-4bit"))
+        await build_full_shard(MODEL_CARDS["gpt-oss-20b-4bit"].model_id)
    )
    await sd.ensure_shard(
-        await build_full_shard(ModelId("mlx-community/GLM-4.7-8bit-gs32"))
+        await build_full_shard(MODEL_CARDS["glm-4.7-8bit-gs32"].model_id)
    )
    await sd.ensure_shard(
-        await build_full_shard(ModelId("mlx-community/MiniMax-M2.1-8bit"))
+        await build_full_shard(MODEL_CARDS["minimax-m2.1-8bit"].model_id)
    )


--- a/uv.lock
+++ b/uv.lock
@@ -248,6 +248,7 @@ dependencies = [
    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "rustworkx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "tiktoken", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "tomlkit", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "types-aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]

@@ -281,6 +282,7 @@ requires-dist = [
    { name = "pydantic", specifier = ">=2.11.7" },
    { name = "rustworkx", specifier = ">=0.17.1" },
    { name = "tiktoken", specifier = ">=0.12.0" },
+    { name = "tomlkit", specifier = ">=0.14.0" },
    { name = "types-aiofiles", specifier = ">=24.1.0.20250708" },
 ]

@@ -315,6 +317,16 @@ dev = [
    { name = "pytest-asyncio", specifier = ">=1.0.0" },
 ]

+[[package]]
+name = "tomlkit"
+version = "0.14.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/af/14b24e41977adb296d6bd1fb59402cf7d60ce364f90c890bd2ec65c43b5a/tomlkit-0.14.0.tar.gz", hash = "sha256:cf00efca415dbd57575befb1f6634c4f42d2d87dbba376128adb42c121b87064", size = 187167 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b5/11/87d6d29fb5d237229d67973a6c9e06e048f01cf4994dee194ab0ea841814/tomlkit-0.14.0-py3-none-any.whl", hash = "sha256:592064ed85b40fa213469f81ac584f67a4f2992509a7c3ea2d632208623a3680", size = 39310 },
+]
+
+
 [[package]]
 name = "fastapi"
 version = "0.128.0"
Author	SHA1	Message	Date
Evan	e8078f5a0e	wow	2026-01-20 15:07:03 +00:00
rltakashige	8b709e68b2	Mark slow tests as slow (#1220 ) ## Motivation <!-- Why is this change needed? What problem does it solve? --> <!-- If it fixes an open issue, please link to the issue here --> ## Changes <!-- Describe what you changed in detail --> ## Why It Works <!-- Explain why your approach solves the problem --> ## Test Plan ### Manual Testing <!-- Hardware: (e.g., MacBook Pro M1 Max 32GB, Mac Mini M2 16GB, connected via Thunderbolt 4) --> <!-- What you did: --> <!-- - --> ### Automated Testing <!-- Describe changes to automated tests, or how existing tests cover this change --> <!-- - -->	2026-01-20 15:03:46 +00:00
Evan Quiney	4da6eeb11f	fix a test broken by #1204 (#1219 ) bad merge broke a test - fix it	2026-01-20 14:56:20 +00:00
Evan	3d2eee4884	quiet localhost log this log is just noise - remove it	2026-01-20 14:51:26 +00:00
Evan	116558839e	don't clear mdns discovered connections pingers currently removes mdns discovered connections - these systems should be independent	2026-01-20 14:46:20 +00:00
Evan Quiney	d4f551c602	Simplify model cards (#1204 ) ## Motivation We have a lot of unneeded data in the model card - lets just keep the necessary stuff and add back more data when we need it ## Test Plan EXO still runs! (pipeline on 2) Co-authored-by: rltakashige <rl.takashige@gmail.com>	2026-01-20 11:01:19 +00:00