mirror of
https://github.com/exo-explore/exo.git
synced 2026-01-20 11:58:57 -05:00
Compare commits
7 Commits
new-bridge
...
model-card
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
92b24196c3 | ||
|
|
3bf7770988 | ||
|
|
8392463a70 | ||
|
|
9c1f6224b0 | ||
|
|
f370dbd1e0 | ||
|
|
6a38f9efba | ||
|
|
0475de6431 |
@@ -10,6 +10,7 @@ PROJECT_ROOT = Path.cwd()
|
||||
SOURCE_ROOT = PROJECT_ROOT / "src"
|
||||
ENTRYPOINT = SOURCE_ROOT / "exo" / "__main__.py"
|
||||
DASHBOARD_DIR = PROJECT_ROOT / "dashboard" / "build"
|
||||
RESOURCES_DIR = PROJECT_ROOT / "resources"
|
||||
EXO_SHARED_MODELS_DIR = SOURCE_ROOT / "exo" / "shared" / "models"
|
||||
|
||||
if not ENTRYPOINT.is_file():
|
||||
@@ -18,6 +19,9 @@ if not ENTRYPOINT.is_file():
|
||||
if not DASHBOARD_DIR.is_dir():
|
||||
raise SystemExit(f"Dashboard assets are missing: {DASHBOARD_DIR}")
|
||||
|
||||
if not RESOURCES_DIR.is_dir():
|
||||
raise SystemExit(f"Resources are missing: {RESOURCES_DIR}")
|
||||
|
||||
if not EXO_SHARED_MODELS_DIR.is_dir():
|
||||
raise SystemExit(f"Shared model assets are missing: {EXO_SHARED_MODELS_DIR}")
|
||||
|
||||
@@ -58,6 +62,7 @@ HIDDEN_IMPORTS = sorted(
|
||||
|
||||
DATAS: list[tuple[str, str]] = [
|
||||
(str(DASHBOARD_DIR), "dashboard"),
|
||||
(str(RESOURCES_DIR), "resources"),
|
||||
(str(MLX_LIB_DIR), "mlx/lib"),
|
||||
(str(EXO_SHARED_MODELS_DIR), "exo/shared/models"),
|
||||
]
|
||||
|
||||
7
resources/mlx-community--DeepSeek-V3.1-4bit.toml
Normal file
7
resources/mlx-community--DeepSeek-V3.1-4bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/DeepSeek-V3.1-4bit"
|
||||
n_layers = 61
|
||||
hidden_size = 7168
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 405874409472
|
||||
7
resources/mlx-community--DeepSeek-V3.1-8bit.toml
Normal file
7
resources/mlx-community--DeepSeek-V3.1-8bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/DeepSeek-V3.1-8bit"
|
||||
n_layers = 61
|
||||
hidden_size = 7168
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 765577920512
|
||||
7
resources/mlx-community--GLM-4.5-Air-8bit.toml
Normal file
7
resources/mlx-community--GLM-4.5-Air-8bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/GLM-4.5-Air-8bit"
|
||||
n_layers = 46
|
||||
hidden_size = 4096
|
||||
supports_tensor = false
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 122406567936
|
||||
7
resources/mlx-community--GLM-4.5-Air-bf16.toml
Normal file
7
resources/mlx-community--GLM-4.5-Air-bf16.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/GLM-4.5-Air-bf16"
|
||||
n_layers = 46
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 229780750336
|
||||
7
resources/mlx-community--GLM-4.7-4bit.toml
Normal file
7
resources/mlx-community--GLM-4.7-4bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/GLM-4.7-4bit"
|
||||
n_layers = 91
|
||||
hidden_size = 5120
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 198556925568
|
||||
7
resources/mlx-community--GLM-4.7-6bit.toml
Normal file
7
resources/mlx-community--GLM-4.7-6bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/GLM-4.7-6bit"
|
||||
n_layers = 91
|
||||
hidden_size = 5120
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 286737579648
|
||||
7
resources/mlx-community--GLM-4.7-8bit-gs32.toml
Normal file
7
resources/mlx-community--GLM-4.7-8bit-gs32.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/GLM-4.7-8bit-gs32"
|
||||
n_layers = 91
|
||||
hidden_size = 5120
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 396963397248
|
||||
7
resources/mlx-community--Kimi-K2-Instruct-4bit.toml
Normal file
7
resources/mlx-community--Kimi-K2-Instruct-4bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Kimi-K2-Instruct-4bit"
|
||||
n_layers = 61
|
||||
hidden_size = 7168
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 620622774272
|
||||
7
resources/mlx-community--Kimi-K2-Thinking.toml
Normal file
7
resources/mlx-community--Kimi-K2-Thinking.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Kimi-K2-Thinking"
|
||||
n_layers = 61
|
||||
hidden_size = 7168
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 706522120192
|
||||
7
resources/mlx-community--Llama-3.2-1B-Instruct-4bit.toml
Normal file
7
resources/mlx-community--Llama-3.2-1B-Instruct-4bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Llama-3.2-1B-Instruct-4bit"
|
||||
n_layers = 16
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 729808896
|
||||
7
resources/mlx-community--Llama-3.2-3B-Instruct-4bit.toml
Normal file
7
resources/mlx-community--Llama-3.2-3B-Instruct-4bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Llama-3.2-3B-Instruct-4bit"
|
||||
n_layers = 28
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 1863319552
|
||||
7
resources/mlx-community--Llama-3.2-3B-Instruct-8bit.toml
Normal file
7
resources/mlx-community--Llama-3.2-3B-Instruct-8bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Llama-3.2-3B-Instruct-8bit"
|
||||
n_layers = 28
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 3501195264
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Llama-3.3-70B-Instruct-4bit"
|
||||
n_layers = 80
|
||||
hidden_size = 8192
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 40652242944
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Llama-3.3-70B-Instruct-8bit"
|
||||
n_layers = 80
|
||||
hidden_size = 8192
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 76799803392
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
|
||||
n_layers = 80
|
||||
hidden_size = 8192
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 40652242944
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
|
||||
n_layers = 32
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 4637851648
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
|
||||
n_layers = 32
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 8954839040
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"
|
||||
n_layers = 32
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 16882073600
|
||||
7
resources/mlx-community--MiniMax-M2.1-3bit.toml
Normal file
7
resources/mlx-community--MiniMax-M2.1-3bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/MiniMax-M2.1-3bit"
|
||||
n_layers = 61
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 100086644736
|
||||
7
resources/mlx-community--MiniMax-M2.1-8bit.toml
Normal file
7
resources/mlx-community--MiniMax-M2.1-8bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/MiniMax-M2.1-8bit"
|
||||
n_layers = 61
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 242986745856
|
||||
7
resources/mlx-community--Qwen3-0.6B-4bit.toml
Normal file
7
resources/mlx-community--Qwen3-0.6B-4bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Qwen3-0.6B-4bit"
|
||||
n_layers = 28
|
||||
hidden_size = 1024
|
||||
supports_tensor = false
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 342884352
|
||||
7
resources/mlx-community--Qwen3-0.6B-8bit.toml
Normal file
7
resources/mlx-community--Qwen3-0.6B-8bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Qwen3-0.6B-8bit"
|
||||
n_layers = 28
|
||||
hidden_size = 1024
|
||||
supports_tensor = false
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 698351616
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"
|
||||
n_layers = 94
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 141733920768
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"
|
||||
n_layers = 94
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 268435456000
|
||||
7
resources/mlx-community--Qwen3-30B-A3B-4bit.toml
Normal file
7
resources/mlx-community--Qwen3-30B-A3B-4bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Qwen3-30B-A3B-4bit"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 17612931072
|
||||
7
resources/mlx-community--Qwen3-30B-A3B-8bit.toml
Normal file
7
resources/mlx-community--Qwen3-30B-A3B-8bit.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Qwen3-30B-A3B-8bit"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 33279705088
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"
|
||||
n_layers = 62
|
||||
hidden_size = 6144
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 289910292480
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"
|
||||
n_layers = 62
|
||||
hidden_size = 6144
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 579820584960
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 46976204800
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 88814387200
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 88814387200
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 88814387200
|
||||
7
resources/mlx-community--gpt-oss-120b-MXFP4-Q8.toml
Normal file
7
resources/mlx-community--gpt-oss-120b-MXFP4-Q8.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/gpt-oss-120b-MXFP4-Q8"
|
||||
n_layers = 36
|
||||
hidden_size = 2880
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 70652212224
|
||||
7
resources/mlx-community--gpt-oss-20b-MXFP4-Q8.toml
Normal file
7
resources/mlx-community--gpt-oss-20b-MXFP4-Q8.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/gpt-oss-20b-MXFP4-Q8"
|
||||
n_layers = 24
|
||||
hidden_size = 2880
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 12025908224
|
||||
@@ -0,0 +1,7 @@
|
||||
model_id = "mlx-community/llama-3.3-70b-instruct-fp16"
|
||||
n_layers = 80
|
||||
hidden_size = 8192
|
||||
supports_tensor = true
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 144383672320
|
||||
@@ -1,5 +1,6 @@
|
||||
import time
|
||||
from collections.abc import AsyncGenerator
|
||||
from dataclasses import dataclass, field
|
||||
from http import HTTPStatus
|
||||
from typing import cast
|
||||
|
||||
@@ -19,8 +20,7 @@ from exo.master.placement import place_instance as get_instance_placements
|
||||
from exo.shared.apply import apply
|
||||
from exo.shared.election import ElectionMessage
|
||||
from exo.shared.logging import InterceptLogger
|
||||
from exo.shared.models.model_cards import MODEL_CARDS, ModelCard, ModelId
|
||||
from exo.shared.models.model_meta import get_model_card
|
||||
from exo.shared.models.model_cards import ModelCard, ModelId, get_model_cards
|
||||
from exo.shared.types.api import (
|
||||
BenchChatCompletionResponse,
|
||||
BenchChatCompletionTaskParams,
|
||||
@@ -65,7 +65,7 @@ from exo.shared.types.worker.instances import Instance, InstanceId, InstanceMeta
|
||||
from exo.shared.types.worker.shards import Sharding
|
||||
from exo.utils.banner import print_startup_banner
|
||||
from exo.utils.channels import Receiver, Sender, channel
|
||||
from exo.utils.dashboard_path import find_dashboard
|
||||
from exo.utils.dashboard_path import RuntimeResources, find_directory
|
||||
from exo.utils.event_buffer import OrderedBuffer
|
||||
|
||||
|
||||
@@ -86,57 +86,52 @@ def chunk_to_response(
|
||||
)
|
||||
|
||||
|
||||
async def resolve_model_card(model_id: str) -> ModelCard:
|
||||
if model_id in MODEL_CARDS:
|
||||
model_card = MODEL_CARDS[model_id]
|
||||
return model_card
|
||||
else:
|
||||
return await get_model_card(model_id)
|
||||
|
||||
|
||||
@dataclass(eq=False)
|
||||
class API:
|
||||
def __init__(
|
||||
self,
|
||||
node_id: NodeId
|
||||
session_id: SessionId
|
||||
port: int
|
||||
app: FastAPI
|
||||
global_event_receiver: Receiver[ForwarderEvent]
|
||||
command_sender: Sender[ForwarderCommand]
|
||||
election_receiver: Receiver[ElectionMessage]
|
||||
state = field(init=False, default_factory=State)
|
||||
_event_log: list[Event] = field(init=False, default_factory=list)
|
||||
event_buffer: OrderedBuffer[Event] = field(init=False, default_factory=OrderedBuffer)
|
||||
_chat_completion_queues: dict[CommandId, Sender[TokenChunk]] = field(init=False, default_factory=dict)
|
||||
_tg: TaskGroup = field(init=False, default_factory=create_task_group)
|
||||
last_completed_election: int = field(init=False, default=0)
|
||||
paused: bool = field(init=False, default = False)
|
||||
paused_ev: anyio.Event = field(init=False, default_factory=anyio.Event)
|
||||
|
||||
@classmethod
|
||||
async def create(
|
||||
cls,
|
||||
node_id: NodeId,
|
||||
session_id: SessionId,
|
||||
*,
|
||||
port: int,
|
||||
# Ideally this would be a MasterForwarderEvent but type system says no :(
|
||||
global_event_receiver: Receiver[ForwarderEvent],
|
||||
command_sender: Sender[ForwarderCommand],
|
||||
# This lets us pause the API if an election is running
|
||||
election_receiver: Receiver[ElectionMessage],
|
||||
) -> None:
|
||||
self.state = State()
|
||||
self._event_log: list[Event] = []
|
||||
self.command_sender = command_sender
|
||||
self.global_event_receiver = global_event_receiver
|
||||
self.election_receiver = election_receiver
|
||||
self.event_buffer: OrderedBuffer[Event] = OrderedBuffer[Event]()
|
||||
self.node_id: NodeId = node_id
|
||||
self.session_id: SessionId = session_id
|
||||
self.last_completed_election: int = 0
|
||||
self.port = port
|
||||
|
||||
self.paused: bool = False
|
||||
self.paused_ev: anyio.Event = anyio.Event()
|
||||
|
||||
self.app = FastAPI()
|
||||
self._setup_exception_handlers()
|
||||
self._setup_cors()
|
||||
self._setup_routes()
|
||||
|
||||
self.app.mount(
|
||||
app = FastAPI()
|
||||
app.mount(
|
||||
"/",
|
||||
StaticFiles(
|
||||
directory=find_dashboard(),
|
||||
directory=await find_directory(RuntimeResources.Dashboard),
|
||||
html=True,
|
||||
),
|
||||
name="dashboard",
|
||||
)
|
||||
|
||||
self._chat_completion_queues: dict[CommandId, Sender[TokenChunk]] = {}
|
||||
self._tg: TaskGroup | None = None
|
||||
cls(node_id, session_id, port, app, global_event_receiver, command_sender, election_receiver)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
self._setup_exception_handlers()
|
||||
self._setup_cors()
|
||||
self._setup_routes()
|
||||
|
||||
|
||||
def reset(self, new_session_id: SessionId, result_clock: int):
|
||||
logger.info("Resetting API State")
|
||||
@@ -213,7 +208,7 @@ class API:
|
||||
self, payload: CreateInstanceParams
|
||||
) -> CreateInstanceResponse:
|
||||
instance = payload.instance
|
||||
model_card = await resolve_model_card(instance.shard_assignments.model_id)
|
||||
model_card = await ModelCard.from_hf(instance.shard_assignments.model_id)
|
||||
required_memory = model_card.storage_size
|
||||
available_memory = self._calculate_total_available_memory()
|
||||
|
||||
@@ -279,7 +274,7 @@ class API:
|
||||
if len(list(self.state.topology.list_nodes())) == 0:
|
||||
return PlacementPreviewResponse(previews=[])
|
||||
|
||||
cards = [card for card in MODEL_CARDS.values() if card.model_id == model_id]
|
||||
cards = [card for card in await get_model_cards() if card.short_id == model_id]
|
||||
if not cards:
|
||||
raise HTTPException(status_code=404, detail=f"Model {model_id} not found")
|
||||
|
||||
@@ -620,7 +615,7 @@ class API:
|
||||
storage_size_megabytes=int(card.storage_size.in_mb),
|
||||
supports_tensor=card.supports_tensor,
|
||||
)
|
||||
for card in MODEL_CARDS.values()
|
||||
for card in model_cards()
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@@ -1,8 +1,24 @@
|
||||
from pydantic import PositiveInt
|
||||
from typing import Annotated
|
||||
|
||||
import aiofiles
|
||||
import aiofiles.os as aios
|
||||
import tomlkit
|
||||
from anyio import Path, open_file
|
||||
from huggingface_hub import model_info
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel, Field, PositiveInt, ValidationError
|
||||
from tomlkit.exceptions import TOMLKitError
|
||||
|
||||
from exo.shared.models.model_cards import ModelCard, ModelId
|
||||
from exo.shared.types.common import Id
|
||||
from exo.shared.types.memory import Memory
|
||||
from exo.utils.dashboard_path import RuntimeResources, find_directory
|
||||
from exo.utils.pydantic_ext import CamelCaseModel
|
||||
from exo.worker.download.download_utils import (
|
||||
ModelSafetensorsIndex,
|
||||
download_file_with_retry,
|
||||
ensure_models_dir,
|
||||
)
|
||||
|
||||
|
||||
class ModelId(Id):
|
||||
@@ -12,6 +28,7 @@ class ModelId(Id):
|
||||
def short(self) -> str:
|
||||
return self.split("/")[-1]
|
||||
|
||||
_card_cache: dict[str, ModelCard] = {}
|
||||
|
||||
class ModelCard(CamelCaseModel):
|
||||
model_id: ModelId
|
||||
@@ -20,249 +37,67 @@ class ModelCard(CamelCaseModel):
|
||||
hidden_size: PositiveInt
|
||||
supports_tensor: bool
|
||||
|
||||
async def save(self, path: Path) -> None:
|
||||
async with await open_file(path, "w") as f:
|
||||
py = self.model_dump()
|
||||
data = tomlkit.dumps(py) # pyright: ignore[reportUnknownMemberType]
|
||||
await f.write(data)
|
||||
|
||||
async def save_to_default_path(self) -> None:
|
||||
dir = await find_directory(RuntimeResources.Resources)
|
||||
await self.save(dir / self.model_id.normalize())
|
||||
|
||||
@staticmethod
|
||||
async def load_from_path(path: Path) -> ModelCard:
|
||||
async with await open_file(path, "r") as f:
|
||||
py = tomlkit.loads(await f.read())
|
||||
return ModelCard.model_validate(py)
|
||||
|
||||
@staticmethod
|
||||
async def load_from_default_path(model_id: ModelId) -> ModelCard:
|
||||
return await ModelCard.load_from_path(await find_directory(RuntimeResources.Resources) / model_id.normalize())
|
||||
|
||||
@staticmethod
|
||||
async def load(model_id: ModelId) -> ModelCard:
|
||||
try:
|
||||
return await ModelCard.load_from_default_path(model_id)
|
||||
except (ValidationError, TOMLKitError, FileNotFoundError):
|
||||
return await ModelCard.from_hf(model_id)
|
||||
|
||||
|
||||
@staticmethod
|
||||
async def from_hf(model_id: ModelId) -> ModelCard:
|
||||
"""Fetches storage size and number of layers for a Hugging Face model, returns Pydantic ModelMeta."""
|
||||
if (mc := _card_cache.get(model_id, None)) is not None:
|
||||
return mc
|
||||
config_data = await get_config_data(model_id)
|
||||
num_layers = config_data.layer_count
|
||||
mem_size_bytes = await get_safetensors_size(model_id)
|
||||
|
||||
mc = ModelCard(
|
||||
model_id=ModelId(model_id),
|
||||
storage_size=mem_size_bytes,
|
||||
n_layers=num_layers,
|
||||
hidden_size=config_data.hidden_size or 0,
|
||||
# TODO: all custom models currently do not support tensor. We could add a dynamic test for this?
|
||||
supports_tensor=False,
|
||||
)
|
||||
_card_cache[model_id] = mc
|
||||
return mc
|
||||
|
||||
# TODO: should we cache this? how do we check for changes
|
||||
async def get_model_cards() -> list[ModelCard]:
|
||||
dir = await find_directory(RuntimeResources.Resources)
|
||||
cards: list[ModelCard] = []
|
||||
async for file in dir.glob("*.toml"):
|
||||
try:
|
||||
cards.append(await ModelCard.load_from_path(file))
|
||||
except (TOMLKitError, ValidationError):
|
||||
continue
|
||||
|
||||
return cards
|
||||
|
||||
MODEL_CARDS: dict[str, ModelCard] = {
|
||||
# deepseek v3
|
||||
"deepseek-v3.1-4bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/DeepSeek-V3.1-4bit"),
|
||||
storage_size=Memory.from_gb(378),
|
||||
n_layers=61,
|
||||
hidden_size=7168,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"deepseek-v3.1-8bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/DeepSeek-V3.1-8bit"),
|
||||
storage_size=Memory.from_gb(713),
|
||||
n_layers=61,
|
||||
hidden_size=7168,
|
||||
supports_tensor=True,
|
||||
),
|
||||
# kimi k2
|
||||
"kimi-k2-instruct-4bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/Kimi-K2-Instruct-4bit"),
|
||||
storage_size=Memory.from_gb(578),
|
||||
n_layers=61,
|
||||
hidden_size=7168,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"kimi-k2-thinking": ModelCard(
|
||||
model_id=ModelId("mlx-community/Kimi-K2-Thinking"),
|
||||
storage_size=Memory.from_gb(658),
|
||||
n_layers=61,
|
||||
hidden_size=7168,
|
||||
supports_tensor=True,
|
||||
),
|
||||
# llama-3.1
|
||||
"llama-3.1-8b": ModelCard(
|
||||
model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"),
|
||||
storage_size=Memory.from_mb(4423),
|
||||
n_layers=32,
|
||||
hidden_size=4096,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"llama-3.1-8b-8bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"),
|
||||
storage_size=Memory.from_mb(8540),
|
||||
n_layers=32,
|
||||
hidden_size=4096,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"llama-3.1-8b-bf16": ModelCard(
|
||||
model_id=ModelId("mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"),
|
||||
storage_size=Memory.from_mb(16100),
|
||||
n_layers=32,
|
||||
hidden_size=4096,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"llama-3.1-70b": ModelCard(
|
||||
model_id=ModelId("mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"),
|
||||
storage_size=Memory.from_mb(38769),
|
||||
n_layers=80,
|
||||
hidden_size=8192,
|
||||
supports_tensor=True,
|
||||
),
|
||||
# llama-3.2
|
||||
"llama-3.2-1b": ModelCard(
|
||||
model_id=ModelId("mlx-community/Llama-3.2-1B-Instruct-4bit"),
|
||||
storage_size=Memory.from_mb(696),
|
||||
n_layers=16,
|
||||
hidden_size=2048,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"llama-3.2-3b": ModelCard(
|
||||
model_id=ModelId("mlx-community/Llama-3.2-3B-Instruct-4bit"),
|
||||
storage_size=Memory.from_mb(1777),
|
||||
n_layers=28,
|
||||
hidden_size=3072,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"llama-3.2-3b-8bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/Llama-3.2-3B-Instruct-8bit"),
|
||||
storage_size=Memory.from_mb(3339),
|
||||
n_layers=28,
|
||||
hidden_size=3072,
|
||||
supports_tensor=True,
|
||||
),
|
||||
# llama-3.3
|
||||
"llama-3.3-70b": ModelCard(
|
||||
model_id=ModelId("mlx-community/Llama-3.3-70B-Instruct-4bit"),
|
||||
storage_size=Memory.from_mb(38769),
|
||||
n_layers=80,
|
||||
hidden_size=8192,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"llama-3.3-70b-8bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/Llama-3.3-70B-Instruct-8bit"),
|
||||
storage_size=Memory.from_mb(73242),
|
||||
n_layers=80,
|
||||
hidden_size=8192,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"llama-3.3-70b-fp16": ModelCard(
|
||||
model_id=ModelId("mlx-community/llama-3.3-70b-instruct-fp16"),
|
||||
storage_size=Memory.from_mb(137695),
|
||||
n_layers=80,
|
||||
hidden_size=8192,
|
||||
supports_tensor=True,
|
||||
),
|
||||
# qwen3
|
||||
"qwen3-0.6b": ModelCard(
|
||||
model_id=ModelId("mlx-community/Qwen3-0.6B-4bit"),
|
||||
storage_size=Memory.from_mb(327),
|
||||
n_layers=28,
|
||||
hidden_size=1024,
|
||||
supports_tensor=False,
|
||||
),
|
||||
"qwen3-0.6b-8bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/Qwen3-0.6B-8bit"),
|
||||
storage_size=Memory.from_mb(666),
|
||||
n_layers=28,
|
||||
hidden_size=1024,
|
||||
supports_tensor=False,
|
||||
),
|
||||
"qwen3-30b": ModelCard(
|
||||
model_id=ModelId("mlx-community/Qwen3-30B-A3B-4bit"),
|
||||
storage_size=Memory.from_mb(16797),
|
||||
n_layers=48,
|
||||
hidden_size=2048,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"qwen3-30b-8bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/Qwen3-30B-A3B-8bit"),
|
||||
storage_size=Memory.from_mb(31738),
|
||||
n_layers=48,
|
||||
hidden_size=2048,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"qwen3-80b-a3B-4bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"),
|
||||
storage_size=Memory.from_mb(44800),
|
||||
n_layers=48,
|
||||
hidden_size=2048,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"qwen3-80b-a3B-8bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"),
|
||||
storage_size=Memory.from_mb(84700),
|
||||
n_layers=48,
|
||||
hidden_size=2048,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"qwen3-80b-a3B-thinking-4bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"),
|
||||
storage_size=Memory.from_mb(84700),
|
||||
n_layers=48,
|
||||
hidden_size=2048,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"qwen3-80b-a3B-thinking-8bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"),
|
||||
storage_size=Memory.from_mb(84700),
|
||||
n_layers=48,
|
||||
hidden_size=2048,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"qwen3-235b-a22b-4bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"),
|
||||
storage_size=Memory.from_gb(132),
|
||||
n_layers=94,
|
||||
hidden_size=4096,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"qwen3-235b-a22b-8bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"),
|
||||
storage_size=Memory.from_gb(250),
|
||||
n_layers=94,
|
||||
hidden_size=4096,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"qwen3-coder-480b-a35b-4bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"),
|
||||
storage_size=Memory.from_gb(270),
|
||||
n_layers=62,
|
||||
hidden_size=6144,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"qwen3-coder-480b-a35b-8bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"),
|
||||
storage_size=Memory.from_gb(540),
|
||||
n_layers=62,
|
||||
hidden_size=6144,
|
||||
supports_tensor=True,
|
||||
),
|
||||
# gpt-oss
|
||||
"gpt-oss-120b-MXFP4-Q8": ModelCard(
|
||||
model_id=ModelId("mlx-community/gpt-oss-120b-MXFP4-Q8"),
|
||||
storage_size=Memory.from_kb(68_996_301),
|
||||
n_layers=36,
|
||||
hidden_size=2880,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"gpt-oss-20b-MXFP4-Q8": ModelCard(
|
||||
model_id=ModelId("mlx-community/gpt-oss-20b-MXFP4-Q8"),
|
||||
storage_size=Memory.from_kb(11_744_051),
|
||||
n_layers=24,
|
||||
hidden_size=2880,
|
||||
supports_tensor=True,
|
||||
),
|
||||
# glm 4.5
|
||||
"glm-4.5-air-8bit": ModelCard(
|
||||
# Needs to be quantized g32 or g16 to work with tensor parallel
|
||||
model_id=ModelId("mlx-community/GLM-4.5-Air-8bit"),
|
||||
storage_size=Memory.from_gb(114),
|
||||
n_layers=46,
|
||||
hidden_size=4096,
|
||||
supports_tensor=False,
|
||||
),
|
||||
"glm-4.5-air-bf16": ModelCard(
|
||||
model_id=ModelId("mlx-community/GLM-4.5-Air-bf16"),
|
||||
storage_size=Memory.from_gb(214),
|
||||
n_layers=46,
|
||||
hidden_size=4096,
|
||||
supports_tensor=True,
|
||||
),
|
||||
# glm 4.7
|
||||
"glm-4.7-4bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/GLM-4.7-4bit"),
|
||||
storage_size=Memory.from_bytes(198556925568),
|
||||
n_layers=91,
|
||||
hidden_size=5120,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"glm-4.7-6bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/GLM-4.7-6bit"),
|
||||
storage_size=Memory.from_bytes(286737579648),
|
||||
n_layers=91,
|
||||
hidden_size=5120,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"glm-4.7-8bit-gs32": ModelCard(
|
||||
model_id=ModelId("mlx-community/GLM-4.7-8bit-gs32"),
|
||||
storage_size=Memory.from_bytes(396963397248),
|
||||
n_layers=91,
|
||||
hidden_size=5120,
|
||||
supports_tensor=True,
|
||||
),
|
||||
# glm 4.7 flash
|
||||
"glm-4.7-flash-4bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/GLM-4.7-Flash-4bit"),
|
||||
@@ -292,19 +127,83 @@ MODEL_CARDS: dict[str, ModelCard] = {
|
||||
hidden_size=2048,
|
||||
supports_tensor=True,
|
||||
),
|
||||
# minimax-m2
|
||||
"minimax-m2.1-8bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/MiniMax-M2.1-8bit"),
|
||||
storage_size=Memory.from_bytes(242986745856),
|
||||
n_layers=61,
|
||||
hidden_size=3072,
|
||||
supports_tensor=True,
|
||||
),
|
||||
"minimax-m2.1-3bit": ModelCard(
|
||||
model_id=ModelId("mlx-community/MiniMax-M2.1-3bit"),
|
||||
storage_size=Memory.from_bytes(100086644736),
|
||||
n_layers=61,
|
||||
hidden_size=3072,
|
||||
supports_tensor=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class ConfigData(BaseModel):
|
||||
model_config = {"extra": "ignore"} # Allow unknown fields
|
||||
|
||||
# Common field names for number of layers across different architectures
|
||||
num_hidden_layers: Annotated[int, Field(ge=0)] | None = None
|
||||
num_layers: Annotated[int, Field(ge=0)] | None = None
|
||||
n_layer: Annotated[int, Field(ge=0)] | None = None
|
||||
n_layers: Annotated[int, Field(ge=0)] | None = None # Sometimes used
|
||||
num_decoder_layers: Annotated[int, Field(ge=0)] | None = None # Transformer models
|
||||
decoder_layers: Annotated[int, Field(ge=0)] | None = None # Some architectures
|
||||
hidden_size: Annotated[int, Field(ge=0)] | None = None
|
||||
|
||||
@property
|
||||
def layer_count(self) -> int:
|
||||
# Check common field names for layer count
|
||||
layer_fields = [
|
||||
self.num_hidden_layers,
|
||||
self.num_layers,
|
||||
self.n_layer,
|
||||
self.n_layers,
|
||||
self.num_decoder_layers,
|
||||
self.decoder_layers,
|
||||
]
|
||||
|
||||
for layer_count in layer_fields:
|
||||
if layer_count is not None:
|
||||
return layer_count
|
||||
|
||||
raise ValueError(
|
||||
f"No layer count found in config.json: {self.model_dump_json()}"
|
||||
)
|
||||
|
||||
|
||||
async def get_config_data(model_id: ModelId) -> ConfigData:
|
||||
"""Downloads and parses config.json for a model."""
|
||||
target_dir = (await ensure_models_dir()) / model_id.normalize()
|
||||
await aios.makedirs(target_dir, exist_ok=True)
|
||||
config_path = await download_file_with_retry(
|
||||
str(model_id),
|
||||
"main",
|
||||
"config.json",
|
||||
target_dir,
|
||||
lambda curr_bytes, total_bytes, is_renamed: logger.info(
|
||||
f"Downloading config.json for {model_id}: {curr_bytes}/{total_bytes} ({is_renamed=})"
|
||||
),
|
||||
)
|
||||
async with aiofiles.open(config_path, "r") as f:
|
||||
return ConfigData.model_validate_json(await f.read())
|
||||
|
||||
|
||||
async def get_safetensors_size(model_id: ModelId) -> Memory:
|
||||
"""Gets model size from safetensors index or falls back to HF API."""
|
||||
target_dir = (await ensure_models_dir()) / model_id.normalize()
|
||||
await aios.makedirs(target_dir, exist_ok=True)
|
||||
index_path = await download_file_with_retry(
|
||||
str(model_id),
|
||||
"main",
|
||||
"model.safetensors.index.json",
|
||||
target_dir,
|
||||
lambda curr_bytes, total_bytes, is_renamed: logger.info(
|
||||
f"Downloading model.safetensors.index.json for {model_id}: {curr_bytes}/{total_bytes} ({is_renamed=})"
|
||||
),
|
||||
)
|
||||
async with aiofiles.open(index_path, "r") as f:
|
||||
index_data = ModelSafetensorsIndex.model_validate_json(await f.read())
|
||||
|
||||
metadata = index_data.metadata
|
||||
if metadata is not None:
|
||||
return Memory.from_bytes(metadata.total_size)
|
||||
|
||||
info = model_info(model_id)
|
||||
if info.safetensors is None:
|
||||
raise ValueError(f"No safetensors info found for {model_id}")
|
||||
return Memory.from_bytes(info.safetensors.total)
|
||||
|
||||
@@ -6,7 +6,7 @@ from huggingface_hub import model_info
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from exo.shared.models.model_cards import MODEL_CARDS, ModelCard, ModelId
|
||||
from exo.shared.models.model_cards import ModelCard, ModelId
|
||||
from exo.shared.types.memory import Memory
|
||||
from exo.worker.download.download_utils import (
|
||||
ModelSafetensorsIndex,
|
||||
@@ -90,33 +90,23 @@ async def get_safetensors_size(model_id: str) -> Memory:
|
||||
raise ValueError(f"No safetensors info found for {model_id}")
|
||||
return Memory.from_bytes(info.safetensors.total)
|
||||
|
||||
|
||||
_model_card_cache: dict[str, ModelCard] = {}
|
||||
|
||||
|
||||
async def get_model_card(model_id: str) -> ModelCard:
|
||||
"""Fetches storage size and number of layers for a Hugging Face model, returns Pydantic ModelMeta."""
|
||||
if model_id in _model_card_cache:
|
||||
return _model_card_cache[model_id]
|
||||
model_card = await _get_model_card(model_id)
|
||||
_model_card_cache[model_id] = model_card
|
||||
return model_card
|
||||
|
||||
|
||||
async def _get_model_card(model_id: str) -> ModelCard:
|
||||
"""Fetches storage size and number of layers for a Hugging Face model, returns Pydantic ModelMeta."""
|
||||
config_data = await get_config_data(model_id)
|
||||
num_layers = config_data.layer_count
|
||||
mem_size_bytes = await get_safetensors_size(model_id)
|
||||
model_card = next(
|
||||
(card for card in MODEL_CARDS.values() if card.model_id == ModelId(model_id)),
|
||||
None,
|
||||
)
|
||||
|
||||
return ModelCard(
|
||||
mc = ModelCard(
|
||||
model_id=ModelId(model_id),
|
||||
storage_size=mem_size_bytes,
|
||||
n_layers=num_layers,
|
||||
hidden_size=config_data.hidden_size or 0,
|
||||
# TODO: all custom models currently do not support tensor. We could add a dynamic test for this?
|
||||
supports_tensor=model_card.supports_tensor if model_card is not None else False,
|
||||
supports_tensor=False,
|
||||
)
|
||||
_model_card_cache[model_id] = mc
|
||||
return mc
|
||||
|
||||
@@ -1,45 +1,72 @@
|
||||
import enum
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import cast
|
||||
|
||||
from anyio import Path
|
||||
|
||||
def find_dashboard() -> Path:
|
||||
dashboard = (
|
||||
_find_dashboard_in_env()
|
||||
or _find_dashboard_in_repo()
|
||||
or _find_dashboard_in_bundle()
|
||||
|
||||
class RuntimeResources(enum.Enum):
|
||||
Dashboard = enum.auto
|
||||
Resources = enum.auto
|
||||
|
||||
_dir_cache: dict[RuntimeResources, Path]
|
||||
|
||||
async def find_directory(rr: RuntimeResources) -> Path:
|
||||
dir = (
|
||||
_dir_cache.get(rr, None)
|
||||
or await _find_in_env(rr)
|
||||
or await _find_in_repo(rr)
|
||||
or await _find_in_bundle(rr)
|
||||
)
|
||||
if not dashboard:
|
||||
if not dir:
|
||||
raise FileNotFoundError(
|
||||
"Unable to locate dashboard assets - make sure the dashboard has been built, or export DASHBOARD_DIR if you've built the dashboard elsewhere."
|
||||
"Unable to locate directory - make sure the dashboard has been built and the runtime resources (model cards) exist."
|
||||
)
|
||||
return dashboard
|
||||
_dir_cache[rr] = dir
|
||||
return dir
|
||||
|
||||
|
||||
def _find_dashboard_in_env() -> Path | None:
|
||||
env = os.environ.get("DASHBOARD_DIR")
|
||||
async def _find_in_env(rr: RuntimeResources) -> Path | None:
|
||||
match rr:
|
||||
case RuntimeResources.Dashboard:
|
||||
env = os.environ.get("DASHBOARD_DIR")
|
||||
case RuntimeResources.Resources:
|
||||
env = os.environ.get("RESOURCES_DIR")
|
||||
if not env:
|
||||
return None
|
||||
resolved_env = Path(env).expanduser().resolve()
|
||||
resolved_env = await (await Path(env).expanduser()).resolve()
|
||||
|
||||
return resolved_env
|
||||
|
||||
|
||||
def _find_dashboard_in_repo() -> Path | None:
|
||||
current_module = Path(__file__).resolve()
|
||||
async def _find_in_repo(rr: RuntimeResources) -> Path | None:
|
||||
current_module = await Path(__file__).resolve()
|
||||
for parent in current_module.parents:
|
||||
build = parent / "dashboard" / "build"
|
||||
if build.is_dir() and (build / "index.html").exists():
|
||||
return build
|
||||
match rr:
|
||||
case RuntimeResources.Dashboard:
|
||||
build = parent / "dashboard" / "build"
|
||||
if await build.is_dir() and await (build / "index.html").exists():
|
||||
return build
|
||||
case RuntimeResources.Resources:
|
||||
res = parent / "resources"
|
||||
if await res.is_dir():
|
||||
return res
|
||||
return None
|
||||
|
||||
|
||||
def _find_dashboard_in_bundle() -> Path | None:
|
||||
async def _find_in_bundle(rr: RuntimeResources) -> Path | None:
|
||||
frozen_root = cast(str | None, getattr(sys, "_MEIPASS", None))
|
||||
if frozen_root is None:
|
||||
return None
|
||||
candidate = Path(frozen_root) / "dashboard"
|
||||
if candidate.is_dir():
|
||||
return candidate
|
||||
|
||||
match rr:
|
||||
case RuntimeResources.Dashboard:
|
||||
candidate = Path(frozen_root) / "dashboard"
|
||||
if await candidate.is_dir():
|
||||
return candidate
|
||||
case RuntimeResources.Resources:
|
||||
candidate = Path(frozen_root) / "resources"
|
||||
if await candidate.is_dir():
|
||||
return candidate
|
||||
return None
|
||||
|
||||
@@ -3,8 +3,7 @@ from collections.abc import Awaitable
|
||||
from pathlib import Path
|
||||
from typing import AsyncIterator, Callable
|
||||
|
||||
from exo.shared.models.model_cards import MODEL_CARDS
|
||||
from exo.shared.models.model_meta import get_model_card
|
||||
from exo.shared.models.model_cards import ModelCard, get_model_cards
|
||||
from exo.shared.types.worker.shards import (
|
||||
PipelineShardMetadata,
|
||||
ShardMetadata,
|
||||
@@ -20,7 +19,7 @@ def exo_shard_downloader(max_parallel_downloads: int = 8) -> ShardDownloader:
|
||||
|
||||
|
||||
async def build_base_shard(model_id: str) -> ShardMetadata:
|
||||
model_card = await get_model_card(model_id)
|
||||
model_card = await ModelCard.from_hf(model_id)
|
||||
return PipelineShardMetadata(
|
||||
model_card=model_card,
|
||||
device_rank=0,
|
||||
@@ -159,7 +158,7 @@ class ResumableShardDownloader(ShardDownloader):
|
||||
# Kick off download status coroutines concurrently
|
||||
tasks = [
|
||||
asyncio.create_task(_status_for_model(model_card.model_id))
|
||||
for model_card in MODEL_CARDS.values()
|
||||
for model_card in await get_model_cards()
|
||||
]
|
||||
|
||||
for task in asyncio.as_completed(tasks):
|
||||
|
||||
@@ -12,7 +12,7 @@ from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
|
||||
from exo.shared.logging import InterceptLogger, logger_setup
|
||||
from exo.shared.models.model_cards import MODEL_CARDS, ModelId
|
||||
from exo.shared.models.model_cards import ModelId
|
||||
from exo.shared.types.api import ChatCompletionMessage, ChatCompletionTaskParams
|
||||
from exo.shared.types.commands import CommandId
|
||||
from exo.shared.types.common import Host, NodeId
|
||||
@@ -89,22 +89,22 @@ async def tb_detection():
|
||||
|
||||
async def assert_downloads():
|
||||
sd = exo_shard_downloader()
|
||||
# await sd.ensure_shard(await build_full_shard(MODEL_CARDS["qwen3-0.6b"].model_id))
|
||||
# await sd.ensure_shard(ModelId("mlx-community/Qwen3-0.6B-8bit")))
|
||||
await sd.ensure_shard(
|
||||
await build_full_shard(MODEL_CARDS["llama-3.1-8b-bf16"].model_id)
|
||||
await build_full_shard(ModelId("mlx-community/Llama-3.1-8b-bf16"))
|
||||
)
|
||||
await sd.ensure_shard(await build_full_shard(MODEL_CARDS["qwen3-30b"].model_id))
|
||||
await sd.ensure_shard(await build_full_shard(ModelId("mlx-community/Qwen3-30b-A3B")))
|
||||
await sd.ensure_shard(
|
||||
await build_full_shard(MODEL_CARDS["gpt-oss-120b-MXFP4-Q8"].model_id)
|
||||
await build_full_shard(ModelId("mlx-commmunity/gpt-oss-120b-MXFP4-Q8"))
|
||||
)
|
||||
await sd.ensure_shard(
|
||||
await build_full_shard(MODEL_CARDS["gpt-oss-20b-4bit"].model_id)
|
||||
await build_full_shard(ModelId("mlx-community/gpt-oss-20b-4bit"))
|
||||
)
|
||||
await sd.ensure_shard(
|
||||
await build_full_shard(MODEL_CARDS["glm-4.7-8bit-gs32"].model_id)
|
||||
await build_full_shard(ModelId("mlx-community/GLM-4.7-8bit-gs32"))
|
||||
)
|
||||
await sd.ensure_shard(
|
||||
await build_full_shard(MODEL_CARDS["minimax-m2.1-8bit"].model_id)
|
||||
await build_full_shard(ModelId("mlx-community/MiniMax-M2.1-8bit"))
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
PREFS="${PREFS:-/Library/Preferences/SystemConfiguration/preferences.plist}"
|
||||
|
||||
tmpdir="$(mktemp -d)"
|
||||
trap 'rm -rf "$tmpdir"' EXIT
|
||||
injson="$tmpdir/in.json"
|
||||
outjson="$tmpdir/out.json"
|
||||
plutil -convert json -o "$injson" "$PREFS"
|
||||
|
||||
perl -Mstrict -Mwarnings -MJSON::PP -e '
|
||||
my ($in, $out) = @ARGV;
|
||||
|
||||
open my $fh, "<", $in or die "open $in: $!";
|
||||
local $/;
|
||||
my $txt = <$fh>;
|
||||
close $fh;
|
||||
|
||||
my $json = JSON::PP->new->utf8->relaxed(1);
|
||||
my $d = $json->decode($txt);
|
||||
|
||||
if (ref($d->{VirtualNetworkInterfaces}) eq "HASH"
|
||||
&& ref($d->{VirtualNetworkInterfaces}{Bridge}) eq "HASH") {
|
||||
delete $d->{VirtualNetworkInterfaces}{Bridge}{bridge0};
|
||||
}
|
||||
|
||||
my @bridge_svcs;
|
||||
if (ref($d->{NetworkServices}) eq "HASH") {
|
||||
for my $k (keys %{ $d->{NetworkServices} }) {
|
||||
my $svc = $d->{NetworkServices}{$k};
|
||||
next unless ref($svc) eq "HASH";
|
||||
my $iface = $svc->{Interface};
|
||||
next unless ref($iface) eq "HASH";
|
||||
my $dev = $iface->{DeviceName};
|
||||
if (defined $dev && $dev eq "bridge0") {
|
||||
push @bridge_svcs, $k;
|
||||
}
|
||||
}
|
||||
delete @{ $d->{NetworkServices} }{ @bridge_svcs } if @bridge_svcs;
|
||||
}
|
||||
|
||||
my %is_bridge = map { $_ => 1 } @bridge_svcs;
|
||||
|
||||
if (ref($d->{Sets}) eq "HASH") {
|
||||
for my $setk (keys %{ $d->{Sets} }) {
|
||||
my $set = $d->{Sets}{$setk};
|
||||
next unless ref($set) eq "HASH";
|
||||
my $net = $set->{Network};
|
||||
next unless ref($net) eq "HASH";
|
||||
|
||||
if (ref($net->{Interface}) eq "HASH") {
|
||||
delete $net->{Interface}{bridge0};
|
||||
}
|
||||
|
||||
if (ref($net->{Service}) eq "HASH" && @bridge_svcs) {
|
||||
for my $svc (@bridge_svcs) {
|
||||
delete $net->{Service}{$svc};
|
||||
}
|
||||
}
|
||||
|
||||
my $g = $net->{Global};
|
||||
if (ref($g) eq "HASH"
|
||||
&& ref($g->{IPv4}) eq "HASH"
|
||||
&& ref($g->{IPv4}{ServiceOrder}) eq "ARRAY"
|
||||
&& @bridge_svcs) {
|
||||
|
||||
my @so = @{ $g->{IPv4}{ServiceOrder} };
|
||||
@so = grep { !defined($_) || !$is_bridge{$_} } @so;
|
||||
$g->{IPv4}{ServiceOrder} = \@so;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
open my $oh, ">", $out or die "open $out: $!";
|
||||
print $oh JSON::PP->new->utf8->canonical(1)->pretty(1)->encode($d);
|
||||
close $oh;
|
||||
' "$injson" "$outjson"
|
||||
|
||||
# Convert JSON -> plist (write back as binary1; change to xml1 if you prefer)
|
||||
plutil -convert xml1 -o "$PREFS" "$outjson"
|
||||
|
||||
# Ask configd to reload SystemConfiguration state
|
||||
killall -HUP configd 2>/dev/null || true
|
||||
Reference in New Issue
Block a user