mirror of
https://github.com/exo-explore/exo.git
synced 2026-02-20 07:46:42 -05:00
Compare commits
2 Commits
feat/bug-r
...
alexcheema
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8a18486565 | ||
|
|
dd34ad1a51 |
@@ -50,6 +50,9 @@ EXO_TEST_LOG = EXO_CACHE_HOME / "exo_test.log"
|
||||
EXO_NODE_ID_KEYPAIR = EXO_CONFIG_HOME / "node_id.keypair"
|
||||
EXO_CONFIG_FILE = EXO_CONFIG_HOME / "config.toml"
|
||||
|
||||
# User-added custom models (config)
|
||||
EXO_USER_MODELS_DIR = EXO_CONFIG_HOME / "models"
|
||||
|
||||
# libp2p topics for event forwarding
|
||||
LIBP2P_LOCAL_EVENTS_TOPIC = "worker_events"
|
||||
LIBP2P_GLOBAL_EVENTS_TOPIC = "global_events"
|
||||
|
||||
81
src/exo/shared/models/architecture_support.py
Normal file
81
src/exo/shared/models/architecture_support.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""Architecture support mapping for tensor parallelism and capabilities.
|
||||
|
||||
This module provides a single source of truth for which model architectures
|
||||
support tensor parallelism and other capabilities. The mapping is derived
|
||||
from the MLX model classes in exo.worker.engines.mlx.auto_parallel.
|
||||
"""
|
||||
|
||||
from typing import Final
|
||||
|
||||
# Model architectures (HuggingFace model_type values) that support tensor parallelism.
|
||||
# This mapping corresponds to the model classes in auto_parallel.py:
|
||||
#
|
||||
# | model_type | MLX Class |
|
||||
# |-----------------|---------------------|
|
||||
# | llama | LlamaModel |
|
||||
# | mistral | LlamaModel |
|
||||
# | qwen2 | LlamaModel |
|
||||
# | ministral3 | Ministral3Model |
|
||||
# | deepseek_v3 | DeepseekV3Model |
|
||||
# | deepseek_v32 | DeepseekV32Model |
|
||||
# | minimax | MiniMaxModel |
|
||||
# | qwen3_moe | Qwen3MoeModel |
|
||||
# | glm4_moe | Glm4MoeModel |
|
||||
# | qwen3_next | Qwen3NextModel |
|
||||
# | gpt_oss | GptOssModel |
|
||||
# | gpt_oss_moe | GptOssMoeModel |
|
||||
#
|
||||
TENSOR_PARALLEL_ARCHITECTURES: Final[frozenset[str]] = frozenset(
|
||||
{
|
||||
"llama",
|
||||
"mistral",
|
||||
"qwen2",
|
||||
"ministral3",
|
||||
"deepseek_v3",
|
||||
"deepseek_v32",
|
||||
"minimax",
|
||||
"qwen3_moe",
|
||||
"glm4_moe",
|
||||
"qwen3_next",
|
||||
"gpt_oss",
|
||||
"gpt_oss_moe",
|
||||
}
|
||||
)
|
||||
|
||||
# Model architectures (HuggingFace model_type values) that support vision input.
|
||||
# These architectures have native image understanding capabilities.
|
||||
VISION_ARCHITECTURES: Final[frozenset[str]] = frozenset(
|
||||
{
|
||||
"llava", # LLaVA vision-language models
|
||||
"qwen2_5_vl", # Qwen 2.5 Vision-Language
|
||||
"qwen2_vl", # Qwen 2 Vision-Language
|
||||
"phi4mm", # Phi-4 multimodal
|
||||
"mllama", # Llama 3.2 Vision (MLlama)
|
||||
"paligemma", # PaLI-GEMMA
|
||||
"idefics2", # IDEFICS2
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def supports_tensor_parallel(architecture: str) -> bool:
|
||||
"""Check if an architecture supports tensor parallelism.
|
||||
|
||||
Args:
|
||||
architecture: The HuggingFace model_type value (e.g., "llama", "qwen2").
|
||||
|
||||
Returns:
|
||||
True if the architecture supports tensor parallelism, False otherwise.
|
||||
"""
|
||||
return architecture.lower() in TENSOR_PARALLEL_ARCHITECTURES
|
||||
|
||||
|
||||
def supports_vision(architecture: str) -> bool:
|
||||
"""Check if an architecture supports vision/image input.
|
||||
|
||||
Args:
|
||||
architecture: The HuggingFace model_type value (e.g., "llava", "qwen2_vl").
|
||||
|
||||
Returns:
|
||||
True if the architecture supports vision input, False otherwise.
|
||||
"""
|
||||
return architecture.lower() in VISION_ARCHITECTURES
|
||||
11
src/exo/shared/models/cards/deepseek-v3.1-4bit.json
Normal file
11
src/exo/shared/models/cards/deepseek-v3.1-4bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/DeepSeek-V3.1-4bit",
|
||||
"name": "DeepSeek V3.1 (4-bit)",
|
||||
"description": "DeepSeek V3.1 is a large language model trained on the DeepSeek V3.1 dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 405874409472,
|
||||
"n_layers": 61,
|
||||
"hidden_size": 7168,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/deepseek-v3.1-8bit.json
Normal file
11
src/exo/shared/models/cards/deepseek-v3.1-8bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/DeepSeek-V3.1-8bit",
|
||||
"name": "DeepSeek V3.1 (8-bit)",
|
||||
"description": "DeepSeek V3.1 is a large language model trained on the DeepSeek V3.1 dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 765577920512,
|
||||
"n_layers": 61,
|
||||
"hidden_size": 7168,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/glm-4.5-air-8bit.json
Normal file
11
src/exo/shared/models/cards/glm-4.5-air-8bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/GLM-4.5-Air-8bit",
|
||||
"name": "GLM 4.5 Air 8bit",
|
||||
"description": "GLM 4.5 Air 8bit",
|
||||
"tags": [],
|
||||
"supports_tensor": false,
|
||||
"storage_size_bytes": 122406567936,
|
||||
"n_layers": 46,
|
||||
"hidden_size": 4096,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/glm-4.5-air-bf16.json
Normal file
11
src/exo/shared/models/cards/glm-4.5-air-bf16.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/GLM-4.5-Air-bf16",
|
||||
"name": "GLM 4.5 Air bf16",
|
||||
"description": "GLM 4.5 Air bf16",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 229780750336,
|
||||
"n_layers": 46,
|
||||
"hidden_size": 4096,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/glm-4.7-4bit.json
Normal file
11
src/exo/shared/models/cards/glm-4.7-4bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/GLM-4.7-4bit",
|
||||
"name": "GLM 4.7 4bit",
|
||||
"description": "GLM 4.7 4bit",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 198556925568,
|
||||
"n_layers": 91,
|
||||
"hidden_size": 5120,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/glm-4.7-6bit.json
Normal file
11
src/exo/shared/models/cards/glm-4.7-6bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/GLM-4.7-6bit",
|
||||
"name": "GLM 4.7 6bit",
|
||||
"description": "GLM 4.7 6bit",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 286737579648,
|
||||
"n_layers": 91,
|
||||
"hidden_size": 5120,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/glm-4.7-8bit-gs32.json
Normal file
11
src/exo/shared/models/cards/glm-4.7-8bit-gs32.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/GLM-4.7-8bit-gs32",
|
||||
"name": "GLM 4.7 8bit (gs32)",
|
||||
"description": "GLM 4.7 8bit (gs32)",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 396963397248,
|
||||
"n_layers": 91,
|
||||
"hidden_size": 5120,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/gpt-oss-120b-MXFP4-Q8.json
Normal file
11
src/exo/shared/models/cards/gpt-oss-120b-MXFP4-Q8.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/gpt-oss-120b-MXFP4-Q8",
|
||||
"name": "GPT-OSS 120B (MXFP4-Q8, MLX)",
|
||||
"description": "OpenAI's GPT-OSS 120B is a 117B-parameter Mixture-of-Experts model designed for high-reasoning and general-purpose use; this variant is a 4-bit MLX conversion for Apple Silicon.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 70652212224,
|
||||
"n_layers": 36,
|
||||
"hidden_size": 2880,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/gpt-oss-20b-MXFP4-Q8.json
Normal file
11
src/exo/shared/models/cards/gpt-oss-20b-MXFP4-Q8.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/gpt-oss-20b-MXFP4-Q8",
|
||||
"name": "GPT-OSS 20B (MXFP4-Q8, MLX)",
|
||||
"description": "OpenAI's GPT-OSS 20B is a medium-sized MoE model for lower-latency and local or specialized use cases; this variant is a 4-bit MLX conversion for Apple Silicon.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 12025908224,
|
||||
"n_layers": 24,
|
||||
"hidden_size": 2880,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/kimi-k2-instruct-4bit.json
Normal file
11
src/exo/shared/models/cards/kimi-k2-instruct-4bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Kimi-K2-Instruct-4bit",
|
||||
"name": "Kimi K2 Instruct (4-bit)",
|
||||
"description": "Kimi K2 is a large language model trained on the Kimi K2 dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 620622774272,
|
||||
"n_layers": 61,
|
||||
"hidden_size": 7168,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/kimi-k2-thinking.json
Normal file
11
src/exo/shared/models/cards/kimi-k2-thinking.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Kimi-K2-Thinking",
|
||||
"name": "Kimi K2 Thinking (4-bit)",
|
||||
"description": "Kimi K2 Thinking is the latest, most capable version of open-source thinking model.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 706522120192,
|
||||
"n_layers": 61,
|
||||
"hidden_size": 7168,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/llama-3.1-70b.json
Normal file
11
src/exo/shared/models/cards/llama-3.1-70b.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit",
|
||||
"name": "Llama 3.1 70B (4-bit)",
|
||||
"description": "Llama 3.1 is a large language model trained on the Llama 3.1 dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 40652242944,
|
||||
"n_layers": 80,
|
||||
"hidden_size": 8192,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/llama-3.1-8b-8bit.json
Normal file
11
src/exo/shared/models/cards/llama-3.1-8b-8bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit",
|
||||
"name": "Llama 3.1 8B (8-bit)",
|
||||
"description": "Llama 3.1 is a large language model trained on the Llama 3.1 dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 8954839040,
|
||||
"n_layers": 32,
|
||||
"hidden_size": 4096,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/llama-3.1-8b-bf16.json
Normal file
11
src/exo/shared/models/cards/llama-3.1-8b-bf16.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-bf16",
|
||||
"name": "Llama 3.1 8B (BF16)",
|
||||
"description": "Llama 3.1 is a large language model trained on the Llama 3.1 dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 16882073600,
|
||||
"n_layers": 32,
|
||||
"hidden_size": 4096,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/llama-3.1-8b.json
Normal file
11
src/exo/shared/models/cards/llama-3.1-8b.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit",
|
||||
"name": "Llama 3.1 8B (4-bit)",
|
||||
"description": "Llama 3.1 is a large language model trained on the Llama 3.1 dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 4637851648,
|
||||
"n_layers": 32,
|
||||
"hidden_size": 4096,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/llama-3.2-1b.json
Normal file
11
src/exo/shared/models/cards/llama-3.2-1b.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Llama-3.2-1B-Instruct-4bit",
|
||||
"name": "Llama 3.2 1B (4-bit)",
|
||||
"description": "Llama 3.2 is a large language model trained on the Llama 3.2 dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 729808896,
|
||||
"n_layers": 16,
|
||||
"hidden_size": 2048,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/llama-3.2-3b-8bit.json
Normal file
11
src/exo/shared/models/cards/llama-3.2-3b-8bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Llama-3.2-3B-Instruct-8bit",
|
||||
"name": "Llama 3.2 3B (8-bit)",
|
||||
"description": "Llama 3.2 is a large language model trained on the Llama 3.2 dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 3501195264,
|
||||
"n_layers": 28,
|
||||
"hidden_size": 3072,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/llama-3.2-3b.json
Normal file
11
src/exo/shared/models/cards/llama-3.2-3b.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Llama-3.2-3B-Instruct-4bit",
|
||||
"name": "Llama 3.2 3B (4-bit)",
|
||||
"description": "Llama 3.2 is a large language model trained on the Llama 3.2 dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 1863319552,
|
||||
"n_layers": 28,
|
||||
"hidden_size": 3072,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/llama-3.3-70b-8bit.json
Normal file
11
src/exo/shared/models/cards/llama-3.3-70b-8bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Llama-3.3-70B-Instruct-8bit",
|
||||
"name": "Llama 3.3 70B (8-bit)",
|
||||
"description": "The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 76799803392,
|
||||
"n_layers": 80,
|
||||
"hidden_size": 8192,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/llama-3.3-70b-fp16.json
Normal file
11
src/exo/shared/models/cards/llama-3.3-70b-fp16.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/llama-3.3-70b-instruct-fp16",
|
||||
"name": "Llama 3.3 70B (FP16)",
|
||||
"description": "The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 144383672320,
|
||||
"n_layers": 80,
|
||||
"hidden_size": 8192,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/llama-3.3-70b.json
Normal file
11
src/exo/shared/models/cards/llama-3.3-70b.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Llama-3.3-70B-Instruct-4bit",
|
||||
"name": "Llama 3.3 70B (4-bit)",
|
||||
"description": "The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 40652242944,
|
||||
"n_layers": 80,
|
||||
"hidden_size": 8192,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/minimax-m2.1-3bit.json
Normal file
11
src/exo/shared/models/cards/minimax-m2.1-3bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/MiniMax-M2.1-3bit",
|
||||
"name": "MiniMax M2.1 3bit",
|
||||
"description": "MiniMax M2.1 3bit",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 100086644736,
|
||||
"n_layers": 61,
|
||||
"hidden_size": 3072,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/minimax-m2.1-8bit.json
Normal file
11
src/exo/shared/models/cards/minimax-m2.1-8bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/MiniMax-M2.1-8bit",
|
||||
"name": "MiniMax M2.1 8bit",
|
||||
"description": "MiniMax M2.1 8bit",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 242986745856,
|
||||
"n_layers": 61,
|
||||
"hidden_size": 3072,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/qwen3-0.6b-8bit.json
Normal file
11
src/exo/shared/models/cards/qwen3-0.6b-8bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Qwen3-0.6B-8bit",
|
||||
"name": "Qwen3 0.6B (8-bit)",
|
||||
"description": "Qwen3 0.6B is a large language model trained on the Qwen3 0.6B dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": false,
|
||||
"storage_size_bytes": 698351616,
|
||||
"n_layers": 28,
|
||||
"hidden_size": 1024,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/qwen3-0.6b.json
Normal file
11
src/exo/shared/models/cards/qwen3-0.6b.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Qwen3-0.6B-4bit",
|
||||
"name": "Qwen3 0.6B (4-bit)",
|
||||
"description": "Qwen3 0.6B is a large language model trained on the Qwen3 0.6B dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": false,
|
||||
"storage_size_bytes": 342884352,
|
||||
"n_layers": 28,
|
||||
"hidden_size": 1024,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/qwen3-235b-a22b-4bit.json
Normal file
11
src/exo/shared/models/cards/qwen3-235b-a22b-4bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit",
|
||||
"name": "Qwen3 235B A22B (4-bit)",
|
||||
"description": "Qwen3 235B (Active 22B) is a large language model trained on the Qwen3 235B dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 141733920768,
|
||||
"n_layers": 94,
|
||||
"hidden_size": 4096,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/qwen3-235b-a22b-8bit.json
Normal file
11
src/exo/shared/models/cards/qwen3-235b-a22b-8bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit",
|
||||
"name": "Qwen3 235B A22B (8-bit)",
|
||||
"description": "Qwen3 235B (Active 22B) is a large language model trained on the Qwen3 235B dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 268435456000,
|
||||
"n_layers": 94,
|
||||
"hidden_size": 4096,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/qwen3-30b-8bit.json
Normal file
11
src/exo/shared/models/cards/qwen3-30b-8bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Qwen3-30B-A3B-8bit",
|
||||
"name": "Qwen3 30B A3B (8-bit)",
|
||||
"description": "Qwen3 30B is a large language model trained on the Qwen3 30B dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 33279705088,
|
||||
"n_layers": 48,
|
||||
"hidden_size": 2048,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/qwen3-30b.json
Normal file
11
src/exo/shared/models/cards/qwen3-30b.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Qwen3-30B-A3B-4bit",
|
||||
"name": "Qwen3 30B A3B (4-bit)",
|
||||
"description": "Qwen3 30B is a large language model trained on the Qwen3 30B dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 17612931072,
|
||||
"n_layers": 48,
|
||||
"hidden_size": 2048,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/qwen3-80b-a3B-4bit.json
Normal file
11
src/exo/shared/models/cards/qwen3-80b-a3B-4bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit",
|
||||
"name": "Qwen3 80B A3B (4-bit)",
|
||||
"description": "Qwen3 80B",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 46976204800,
|
||||
"n_layers": 48,
|
||||
"hidden_size": 2048,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/qwen3-80b-a3B-8bit.json
Normal file
11
src/exo/shared/models/cards/qwen3-80b-a3B-8bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit",
|
||||
"name": "Qwen3 80B A3B (8-bit)",
|
||||
"description": "Qwen3 80B",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 88814387200,
|
||||
"n_layers": 48,
|
||||
"hidden_size": 2048,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/qwen3-80b-a3B-thinking-4bit.json
Normal file
11
src/exo/shared/models/cards/qwen3-80b-a3B-thinking-4bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit",
|
||||
"name": "Qwen3 80B A3B Thinking (4-bit)",
|
||||
"description": "Qwen3 80B Reasoning model",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 88814387200,
|
||||
"n_layers": 48,
|
||||
"hidden_size": 2048,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/qwen3-80b-a3B-thinking-8bit.json
Normal file
11
src/exo/shared/models/cards/qwen3-80b-a3B-thinking-8bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit",
|
||||
"name": "Qwen3 80B A3B Thinking (8-bit)",
|
||||
"description": "Qwen3 80B Reasoning model",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 88814387200,
|
||||
"n_layers": 48,
|
||||
"hidden_size": 2048,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/qwen3-coder-480b-a35b-4bit.json
Normal file
11
src/exo/shared/models/cards/qwen3-coder-480b-a35b-4bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit",
|
||||
"name": "Qwen3 Coder 480B A35B (4-bit)",
|
||||
"description": "Qwen3 Coder 480B (Active 35B) is a large language model trained on the Qwen3 Coder 480B dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 289910292480,
|
||||
"n_layers": 62,
|
||||
"hidden_size": 6144,
|
||||
"is_user_added": false
|
||||
}
|
||||
11
src/exo/shared/models/cards/qwen3-coder-480b-a35b-8bit.json
Normal file
11
src/exo/shared/models/cards/qwen3-coder-480b-a35b-8bit.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_id": "mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit",
|
||||
"name": "Qwen3 Coder 480B A35B (8-bit)",
|
||||
"description": "Qwen3 Coder 480B (Active 35B) is a large language model trained on the Qwen3 Coder 480B dataset.",
|
||||
"tags": [],
|
||||
"supports_tensor": true,
|
||||
"storage_size_bytes": 579820584960,
|
||||
"n_layers": 62,
|
||||
"hidden_size": 6144,
|
||||
"is_user_added": false
|
||||
}
|
||||
399
src/exo/shared/models/model_registry.py
Normal file
399
src/exo/shared/models/model_registry.py
Normal file
@@ -0,0 +1,399 @@
|
||||
"""Model registry for loading and managing model configurations.
|
||||
|
||||
This module provides a registry that loads model configurations from:
|
||||
1. New registry structure: base_models.json + variants.json (grouped models)
|
||||
2. Legacy JSON files in the cards/ directory (shipped with exo)
|
||||
3. User-added JSON files in ~/.exo/models/ (created via dashboard)
|
||||
|
||||
The registry automatically combines base model metadata with variant data
|
||||
to produce complete ModelConfig objects with grouping information.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Final, cast
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from exo.shared.constants import EXO_USER_MODELS_DIR
|
||||
from exo.shared.models.architecture_support import (
|
||||
supports_tensor_parallel,
|
||||
supports_vision,
|
||||
)
|
||||
from exo.shared.models.registry import (
|
||||
BASE_MODELS_BY_ID,
|
||||
VARIANTS,
|
||||
)
|
||||
from exo.shared.types.models import ModelConfig
|
||||
|
||||
# Directory containing built-in model config JSON files (legacy format)
|
||||
BUILTIN_CARDS_DIR: Final[Path] = Path(__file__).parent / "cards"
|
||||
|
||||
|
||||
def derive_capabilities(
|
||||
architecture: str,
|
||||
manual_capabilities: list[str] | None = None,
|
||||
) -> list[str]:
|
||||
"""Derive model capabilities from architecture and manual overrides.
|
||||
|
||||
Priority:
|
||||
1. Manual capabilities from base_models.json (if provided)
|
||||
2. Architecture-based detection (for vision)
|
||||
3. Default: ["text"]
|
||||
|
||||
Args:
|
||||
architecture: The HuggingFace model_type value.
|
||||
manual_capabilities: Explicit capabilities from base_models.json.
|
||||
|
||||
Returns:
|
||||
List of capability strings (e.g., ["text", "vision", "code"]).
|
||||
"""
|
||||
# If manual capabilities are provided, use them as primary source
|
||||
if manual_capabilities:
|
||||
return manual_capabilities
|
||||
|
||||
capabilities = ["text"] # All models support text by default
|
||||
|
||||
# Architecture-based detection for vision
|
||||
if supports_vision(architecture):
|
||||
capabilities.append("vision")
|
||||
|
||||
return capabilities
|
||||
|
||||
|
||||
def _model_id_to_filename(model_id: str) -> str:
|
||||
"""Convert model_id to a valid filename by replacing / with --.
|
||||
|
||||
Examples:
|
||||
"mlx-community/Meta-Llama-3.1-8B-Instruct-4bit" -> "mlx-community--Meta-Llama-3.1-8B-Instruct-4bit"
|
||||
"""
|
||||
return model_id.replace("/", "--")
|
||||
|
||||
|
||||
def _quantization_display_name(quantization: str) -> str:
|
||||
"""Format quantization for display in model name."""
|
||||
quant_lower = quantization.lower()
|
||||
if quant_lower in ("4bit", "4-bit"):
|
||||
return "4-bit"
|
||||
if quant_lower in ("8bit", "8-bit"):
|
||||
return "8-bit"
|
||||
if quant_lower in ("bf16", "bfloat16"):
|
||||
return "BF16"
|
||||
if quant_lower in ("fp16", "float16"):
|
||||
return "FP16"
|
||||
if quant_lower in ("3bit", "3-bit"):
|
||||
return "3-bit"
|
||||
if quant_lower in ("6bit", "6-bit"):
|
||||
return "6-bit"
|
||||
# Return as-is for other quantizations
|
||||
return quantization.upper()
|
||||
|
||||
|
||||
def _load_config_from_file(path: Path) -> ModelConfig | None:
|
||||
"""Load a single model config from a JSON file."""
|
||||
try:
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
data = cast(dict[str, Any], json.load(f))
|
||||
return ModelConfig.model_validate(data)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load model config from {path}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _variant_to_config(variant: dict[str, object]) -> ModelConfig | None:
|
||||
"""Convert a variant + base model data to a ModelConfig."""
|
||||
base_model_id = str(variant.get("base_model", ""))
|
||||
base_model = BASE_MODELS_BY_ID.get(base_model_id)
|
||||
|
||||
if base_model is None:
|
||||
logger.warning(f"Base model {base_model_id} not found for variant {variant}")
|
||||
return None
|
||||
|
||||
model_id = str(variant["model_id"])
|
||||
quantization = str(variant.get("quantization", ""))
|
||||
storage_size_raw = variant.get("storage_size_bytes", 0)
|
||||
storage_size_bytes = int(str(storage_size_raw)) if storage_size_raw else 0
|
||||
|
||||
base_name = str(base_model["name"])
|
||||
architecture = str(base_model.get("architecture", ""))
|
||||
n_layers_raw = base_model.get("n_layers", 0)
|
||||
n_layers = int(str(n_layers_raw)) if n_layers_raw else 1
|
||||
hidden_size_raw = base_model.get("hidden_size", 0)
|
||||
hidden_size = int(str(hidden_size_raw)) if hidden_size_raw else 1
|
||||
description = str(base_model.get("description", ""))
|
||||
|
||||
# Extract new UI display fields from base model
|
||||
tagline = str(base_model.get("tagline", ""))
|
||||
family = str(base_model.get("family", ""))
|
||||
manual_capabilities_raw = base_model.get("capabilities")
|
||||
manual_capabilities = (
|
||||
cast(list[str], manual_capabilities_raw)
|
||||
if isinstance(manual_capabilities_raw, list)
|
||||
else None
|
||||
)
|
||||
capabilities = derive_capabilities(architecture, manual_capabilities)
|
||||
|
||||
# Generate display name: "Base Name (Quantization)"
|
||||
quant_display = _quantization_display_name(quantization)
|
||||
display_name = f"{base_name} ({quant_display})"
|
||||
|
||||
# Derive supports_tensor from architecture
|
||||
tensor_support = supports_tensor_parallel(architecture)
|
||||
|
||||
return ModelConfig(
|
||||
model_id=model_id,
|
||||
name=display_name,
|
||||
description=description,
|
||||
tags=[],
|
||||
supports_tensor=tensor_support,
|
||||
storage_size_bytes=storage_size_bytes,
|
||||
n_layers=n_layers,
|
||||
hidden_size=hidden_size,
|
||||
is_user_added=False,
|
||||
architecture=architecture,
|
||||
base_model_id=base_model_id,
|
||||
base_model_name=base_name,
|
||||
quantization=quantization,
|
||||
tagline=tagline,
|
||||
capabilities=capabilities,
|
||||
family=family,
|
||||
)
|
||||
|
||||
|
||||
class ModelRegistry:
|
||||
"""Registry for model configurations.
|
||||
|
||||
Loads configurations from:
|
||||
1. Registry structure (base_models.json + variants.json)
|
||||
2. Built-in cards/ directory (legacy format)
|
||||
3. User ~/.exo/models/
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._configs: dict[str, ModelConfig] = {}
|
||||
self._reload()
|
||||
|
||||
def _migrate_user_model_files(self) -> None:
|
||||
"""Migrate user model files from old naming format to new format.
|
||||
|
||||
Old format: {short_id}.json (e.g., "meta-llama-3.1-8b-instruct-4bit.json")
|
||||
New format: {model_id.replace("/", "--")}.json (e.g., "mlx-community--Meta-Llama-3.1-8B-Instruct-4bit.json")
|
||||
"""
|
||||
if not EXO_USER_MODELS_DIR.exists():
|
||||
return
|
||||
|
||||
for path in EXO_USER_MODELS_DIR.glob("*.json"):
|
||||
# New format files contain "--" (org--repo)
|
||||
if "--" in path.stem:
|
||||
continue
|
||||
|
||||
# This is an old format file, load and migrate
|
||||
config = _load_config_from_file(path)
|
||||
if config is None:
|
||||
continue
|
||||
|
||||
# Create new filename from model_id
|
||||
new_filename = _model_id_to_filename(config.model_id) + ".json"
|
||||
new_path = EXO_USER_MODELS_DIR / new_filename
|
||||
|
||||
if new_path.exists():
|
||||
# New file already exists, just remove the old one
|
||||
logger.info(f"Removing old user model file (already migrated): {path}")
|
||||
path.unlink()
|
||||
else:
|
||||
# Rename to new format
|
||||
logger.info(f"Migrating user model file: {path} -> {new_path}")
|
||||
path.rename(new_path)
|
||||
|
||||
def _reload(self) -> None:
|
||||
"""Reload all model configurations from disk."""
|
||||
self._configs.clear()
|
||||
registry_count = 0
|
||||
legacy_count = 0
|
||||
|
||||
# First, load from new registry structure (base_models + variants)
|
||||
for variant in VARIANTS:
|
||||
config = _variant_to_config(variant)
|
||||
if config is not None:
|
||||
# Use model_id as the key
|
||||
self._configs[config.model_id] = config
|
||||
registry_count += 1
|
||||
|
||||
# Then load legacy built-in models (these can override registry if same model_id)
|
||||
if BUILTIN_CARDS_DIR.exists():
|
||||
for path in BUILTIN_CARDS_DIR.glob("*.json"):
|
||||
config = _load_config_from_file(path)
|
||||
if config is None:
|
||||
continue
|
||||
# Skip if already loaded from registry
|
||||
if config.model_id in self._configs:
|
||||
continue
|
||||
self._configs[config.model_id] = config
|
||||
legacy_count += 1
|
||||
|
||||
# Migrate old user model files before loading
|
||||
self._migrate_user_model_files()
|
||||
|
||||
# Load user models (these can override built-in if same model_id)
|
||||
user_count = 0
|
||||
if EXO_USER_MODELS_DIR.exists():
|
||||
for path in EXO_USER_MODELS_DIR.glob("*.json"):
|
||||
config = _load_config_from_file(path)
|
||||
if config is not None:
|
||||
self._configs[config.model_id] = config
|
||||
user_count += 1
|
||||
|
||||
logger.info(
|
||||
f"Loaded {len(self._configs)} model configs "
|
||||
f"(registry: {registry_count}, legacy: {legacy_count}, user: {user_count})"
|
||||
)
|
||||
|
||||
def get(self, model_id: str) -> ModelConfig | None:
|
||||
"""Get a model config by model_id (HuggingFace repo path)."""
|
||||
return self._configs.get(model_id)
|
||||
|
||||
def list_all(self) -> dict[str, ModelConfig]:
|
||||
"""Return all model configurations."""
|
||||
return dict(self._configs)
|
||||
|
||||
def list_builtin(self) -> dict[str, ModelConfig]:
|
||||
"""Return only built-in model configurations."""
|
||||
return {k: v for k, v in self._configs.items() if not v.is_user_added}
|
||||
|
||||
def list_user_added(self) -> dict[str, ModelConfig]:
|
||||
"""Return only user-added model configurations."""
|
||||
return {k: v for k, v in self._configs.items() if v.is_user_added}
|
||||
|
||||
def list_grouped(self) -> dict[str, list[ModelConfig]]:
|
||||
"""Return models grouped by base_model_id.
|
||||
|
||||
Returns:
|
||||
Dict mapping base_model_id to list of variant configs.
|
||||
Models without a base_model_id are grouped under their model_id.
|
||||
"""
|
||||
grouped: dict[str, list[ModelConfig]] = {}
|
||||
for model_id, config in self._configs.items():
|
||||
# Use base_model_id if available, otherwise use model_id
|
||||
group_key = config.base_model_id if config.base_model_id else model_id
|
||||
if group_key not in grouped:
|
||||
grouped[group_key] = []
|
||||
grouped[group_key].append(config)
|
||||
|
||||
# Sort variants within each group by storage size (smallest first)
|
||||
for variants in grouped.values():
|
||||
variants.sort(key=lambda c: c.storage_size_bytes)
|
||||
|
||||
return grouped
|
||||
|
||||
def get_base_model_variants(self, base_model_id: str) -> list[ModelConfig]:
|
||||
"""Get all variants for a given base model.
|
||||
|
||||
Args:
|
||||
base_model_id: The base model identifier (e.g., "llama-3.1-8b")
|
||||
|
||||
Returns:
|
||||
List of ModelConfig objects for all variants, sorted by storage size.
|
||||
"""
|
||||
variants = [
|
||||
config
|
||||
for config in self._configs.values()
|
||||
if config.base_model_id == base_model_id
|
||||
]
|
||||
variants.sort(key=lambda c: c.storage_size_bytes)
|
||||
return variants
|
||||
|
||||
def add_user_model(self, config: ModelConfig) -> str:
|
||||
"""Add a user model configuration and persist to disk.
|
||||
|
||||
Args:
|
||||
config: The model configuration to add
|
||||
|
||||
Returns:
|
||||
The model_id of the added model.
|
||||
"""
|
||||
model_id = config.model_id
|
||||
|
||||
# Ensure is_user_added is True
|
||||
config = config.model_copy(update={"is_user_added": True})
|
||||
|
||||
# Create user models directory if needed
|
||||
EXO_USER_MODELS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Save to JSON file using model_id as filename
|
||||
filename = _model_id_to_filename(model_id) + ".json"
|
||||
path = EXO_USER_MODELS_DIR / filename
|
||||
with path.open("w", encoding="utf-8") as f:
|
||||
json.dump(config.model_dump(), f, indent=2)
|
||||
|
||||
# Add to registry
|
||||
self._configs[model_id] = config
|
||||
logger.info(f"Added user model: {model_id}")
|
||||
|
||||
return model_id
|
||||
|
||||
def remove_user_model(self, model_id: str) -> bool:
|
||||
"""Remove a user-added model.
|
||||
|
||||
Args:
|
||||
model_id: The model_id of the model to remove.
|
||||
|
||||
Returns:
|
||||
True if the model was removed, False if not found or not user-added.
|
||||
"""
|
||||
config = self._configs.get(model_id)
|
||||
if config is None or not config.is_user_added:
|
||||
return False
|
||||
|
||||
# Remove JSON file
|
||||
filename = _model_id_to_filename(model_id) + ".json"
|
||||
path = EXO_USER_MODELS_DIR / filename
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
|
||||
# Remove from registry
|
||||
del self._configs[model_id]
|
||||
logger.info(f"Removed user model: {model_id}")
|
||||
|
||||
return True
|
||||
|
||||
def update_user_model(self, model_id: str, **updates: object) -> ModelConfig | None:
|
||||
"""Update a user-added model configuration.
|
||||
|
||||
Args:
|
||||
model_id: The model_id of the model to update.
|
||||
**updates: Field updates to apply.
|
||||
|
||||
Returns:
|
||||
The updated config, or None if not found or not user-added.
|
||||
"""
|
||||
config = self._configs.get(model_id)
|
||||
if config is None or not config.is_user_added:
|
||||
return None
|
||||
|
||||
# Apply updates
|
||||
updated_config = config.model_copy(update=updates)
|
||||
|
||||
# Persist
|
||||
filename = _model_id_to_filename(model_id) + ".json"
|
||||
path = EXO_USER_MODELS_DIR / filename
|
||||
with path.open("w", encoding="utf-8") as f:
|
||||
json.dump(updated_config.model_dump(), f, indent=2)
|
||||
|
||||
self._configs[model_id] = updated_config
|
||||
logger.info(f"Updated user model: {model_id}")
|
||||
|
||||
return updated_config
|
||||
|
||||
|
||||
# Global registry instance (lazy-loaded)
|
||||
_registry: ModelRegistry | None = None
|
||||
|
||||
|
||||
def get_registry() -> ModelRegistry:
|
||||
"""Get the global model registry instance."""
|
||||
global _registry
|
||||
if _registry is None:
|
||||
_registry = ModelRegistry()
|
||||
return _registry
|
||||
50
src/exo/shared/models/registry/__init__.py
Normal file
50
src/exo/shared/models/registry/__init__.py
Normal file
@@ -0,0 +1,50 @@
|
||||
"""Model registry with base models and quantization variants.
|
||||
|
||||
This module provides access to the model registry data stored in JSON files.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Final, cast
|
||||
|
||||
_REGISTRY_DIR: Final[Path] = Path(__file__).parent
|
||||
|
||||
# Load base models and variants data at import time
|
||||
with (_REGISTRY_DIR / "base_models.json").open("r", encoding="utf-8") as f:
|
||||
_base_models_data: dict[str, Any] = cast(dict[str, Any], json.load(f))
|
||||
BASE_MODELS: Final[list[dict[str, object]]] = cast(
|
||||
list[dict[str, object]], _base_models_data["base_models"]
|
||||
)
|
||||
|
||||
with (_REGISTRY_DIR / "variants.json").open("r", encoding="utf-8") as f:
|
||||
_variants_data: dict[str, Any] = cast(dict[str, Any], json.load(f))
|
||||
VARIANTS: Final[list[dict[str, object]]] = cast(
|
||||
list[dict[str, object]], _variants_data["variants"]
|
||||
)
|
||||
|
||||
# Create lookup dicts for fast access
|
||||
BASE_MODELS_BY_ID: Final[dict[str, dict[str, object]]] = {
|
||||
str(m["id"]): m for m in BASE_MODELS
|
||||
}
|
||||
|
||||
# Variants are keyed by model_id (full HuggingFace repo path)
|
||||
VARIANTS_BY_MODEL_ID: Final[dict[str, dict[str, object]]] = {
|
||||
str(v["model_id"]): v for v in VARIANTS
|
||||
}
|
||||
|
||||
VARIANTS_BY_BASE_MODEL: Final[dict[str, list[dict[str, object]]]] = {}
|
||||
for _variant in VARIANTS:
|
||||
_base_model_id = str(_variant["base_model"])
|
||||
if _base_model_id not in VARIANTS_BY_BASE_MODEL:
|
||||
VARIANTS_BY_BASE_MODEL[_base_model_id] = []
|
||||
VARIANTS_BY_BASE_MODEL[_base_model_id].append(_variant)
|
||||
|
||||
__all__ = [
|
||||
"BASE_MODELS",
|
||||
"VARIANTS",
|
||||
"BASE_MODELS_BY_ID",
|
||||
"VARIANTS_BY_MODEL_ID",
|
||||
"VARIANTS_BY_BASE_MODEL",
|
||||
]
|
||||
202
src/exo/shared/models/registry/base_models.json
Normal file
202
src/exo/shared/models/registry/base_models.json
Normal file
@@ -0,0 +1,202 @@
|
||||
{
|
||||
"base_models": [
|
||||
{
|
||||
"id": "llama-3.1-8b",
|
||||
"family": "llama",
|
||||
"name": "Llama 3.1 8B",
|
||||
"description": "Meta's Llama 3.1 8B instruction-tuned model with 128K context window",
|
||||
"tagline": "Fast and capable instruction-following model",
|
||||
"capabilities": ["text"],
|
||||
"architecture": "llama",
|
||||
"n_layers": 32,
|
||||
"hidden_size": 4096
|
||||
},
|
||||
{
|
||||
"id": "llama-3.1-70b",
|
||||
"family": "llama",
|
||||
"name": "Llama 3.1 70B",
|
||||
"description": "Meta's Llama 3.1 70B instruction-tuned model with 128K context window",
|
||||
"tagline": "Powerful general-purpose model with 128K context",
|
||||
"capabilities": ["text", "code"],
|
||||
"architecture": "llama",
|
||||
"n_layers": 80,
|
||||
"hidden_size": 8192
|
||||
},
|
||||
{
|
||||
"id": "llama-3.2-1b",
|
||||
"family": "llama",
|
||||
"name": "Llama 3.2 1B",
|
||||
"description": "Meta's Llama 3.2 1B lightweight instruction-tuned model",
|
||||
"tagline": "Ultra-lightweight for edge and mobile",
|
||||
"capabilities": ["text"],
|
||||
"architecture": "llama",
|
||||
"n_layers": 16,
|
||||
"hidden_size": 2048
|
||||
},
|
||||
{
|
||||
"id": "llama-3.2-3b",
|
||||
"family": "llama",
|
||||
"name": "Llama 3.2 3B",
|
||||
"description": "Meta's Llama 3.2 3B instruction-tuned model",
|
||||
"tagline": "Compact model balancing size and capability",
|
||||
"capabilities": ["text"],
|
||||
"architecture": "llama",
|
||||
"n_layers": 28,
|
||||
"hidden_size": 3072
|
||||
},
|
||||
{
|
||||
"id": "llama-3.3-70b",
|
||||
"family": "llama",
|
||||
"name": "Llama 3.3 70B",
|
||||
"description": "Meta's Llama 3.3 70B instruction-tuned model with improved performance",
|
||||
"tagline": "Meta's flagship open-weight model",
|
||||
"capabilities": ["text", "code"],
|
||||
"architecture": "llama",
|
||||
"n_layers": 80,
|
||||
"hidden_size": 8192
|
||||
},
|
||||
{
|
||||
"id": "deepseek-v3.1",
|
||||
"family": "deepseek",
|
||||
"name": "DeepSeek V3.1",
|
||||
"description": "DeepSeek's V3.1 model with 685B parameters using MoE architecture",
|
||||
"tagline": "State-of-the-art MoE with 685B parameters",
|
||||
"capabilities": ["text", "thinking", "code"],
|
||||
"architecture": "deepseek_v32",
|
||||
"n_layers": 61,
|
||||
"hidden_size": 7168
|
||||
},
|
||||
{
|
||||
"id": "kimi-k2",
|
||||
"family": "kimi",
|
||||
"name": "Kimi K2",
|
||||
"description": "Moonshot AI's Kimi K2 large language model",
|
||||
"tagline": "Powerful MoE from Moonshot AI",
|
||||
"capabilities": ["text", "code"],
|
||||
"architecture": "deepseek_v3",
|
||||
"n_layers": 61,
|
||||
"hidden_size": 7168
|
||||
},
|
||||
{
|
||||
"id": "qwen3-0.6b",
|
||||
"family": "qwen",
|
||||
"name": "Qwen3 0.6B",
|
||||
"description": "Alibaba's Qwen3 0.6B lightweight model",
|
||||
"tagline": "Tiny but capable for simple tasks",
|
||||
"capabilities": ["text"],
|
||||
"architecture": "qwen2",
|
||||
"n_layers": 28,
|
||||
"hidden_size": 1024
|
||||
},
|
||||
{
|
||||
"id": "qwen3-30b-a3b",
|
||||
"family": "qwen",
|
||||
"name": "Qwen3 30B A3B",
|
||||
"description": "Alibaba's Qwen3 30B with A3B MoE architecture",
|
||||
"tagline": "Efficient MoE for balanced performance",
|
||||
"capabilities": ["text", "code"],
|
||||
"architecture": "qwen3_moe",
|
||||
"n_layers": 48,
|
||||
"hidden_size": 2048
|
||||
},
|
||||
{
|
||||
"id": "qwen3-80b-a3b",
|
||||
"family": "qwen",
|
||||
"name": "Qwen3 80B A3B",
|
||||
"description": "Alibaba's Qwen3 80B with A3B MoE architecture",
|
||||
"tagline": "Large-scale MoE with strong capabilities",
|
||||
"capabilities": ["text", "code"],
|
||||
"architecture": "qwen3_moe",
|
||||
"n_layers": 128,
|
||||
"hidden_size": 2048
|
||||
},
|
||||
{
|
||||
"id": "qwen3-80b-a3b-thinking",
|
||||
"family": "qwen",
|
||||
"name": "Qwen3 80B A3B Thinking",
|
||||
"description": "Alibaba's Qwen3 80B A3B with enhanced reasoning capabilities",
|
||||
"tagline": "Reasoning-enhanced large MoE model",
|
||||
"capabilities": ["text", "thinking", "code"],
|
||||
"architecture": "qwen3_moe",
|
||||
"n_layers": 128,
|
||||
"hidden_size": 2048
|
||||
},
|
||||
{
|
||||
"id": "qwen3-235b-a22b",
|
||||
"family": "qwen",
|
||||
"name": "Qwen3 235B A22B",
|
||||
"description": "Alibaba's Qwen3 235B with A22B MoE architecture",
|
||||
"tagline": "Massive open model for general intelligence",
|
||||
"capabilities": ["text", "code"],
|
||||
"architecture": "qwen3_moe",
|
||||
"n_layers": 94,
|
||||
"hidden_size": 4096
|
||||
},
|
||||
{
|
||||
"id": "qwen3-coder-480b-a35b",
|
||||
"family": "qwen",
|
||||
"name": "Qwen3 Coder 480B A35B",
|
||||
"description": "Alibaba's Qwen3 Coder 480B optimized for code generation",
|
||||
"tagline": "Massive code-specialized MoE model",
|
||||
"capabilities": ["text", "code"],
|
||||
"architecture": "qwen3_moe",
|
||||
"n_layers": 128,
|
||||
"hidden_size": 5120
|
||||
},
|
||||
{
|
||||
"id": "gpt-oss-120b",
|
||||
"family": "gpt-oss",
|
||||
"name": "GPT-OSS 120B",
|
||||
"description": "Open source GPT model with 120B parameters",
|
||||
"tagline": "Large open-source GPT with MoE",
|
||||
"capabilities": ["text", "code"],
|
||||
"architecture": "gpt_oss_moe",
|
||||
"n_layers": 128,
|
||||
"hidden_size": 4096
|
||||
},
|
||||
{
|
||||
"id": "gpt-oss-20b",
|
||||
"family": "gpt-oss",
|
||||
"name": "GPT-OSS 20B",
|
||||
"description": "Open source GPT model with 20B parameters",
|
||||
"tagline": "Compact open-source GPT",
|
||||
"capabilities": ["text"],
|
||||
"architecture": "gpt_oss",
|
||||
"n_layers": 48,
|
||||
"hidden_size": 2560
|
||||
},
|
||||
{
|
||||
"id": "glm-4.5-air",
|
||||
"family": "glm",
|
||||
"name": "GLM 4.5 Air",
|
||||
"description": "Zhipu AI's GLM 4.5 Air model",
|
||||
"tagline": "Lightweight Chinese-English bilingual model",
|
||||
"capabilities": ["text"],
|
||||
"architecture": "glm4_moe",
|
||||
"n_layers": 40,
|
||||
"hidden_size": 4096
|
||||
},
|
||||
{
|
||||
"id": "glm-4.7",
|
||||
"family": "glm",
|
||||
"name": "GLM 4.7",
|
||||
"description": "Zhipu AI's GLM 4.7 model with MoE architecture",
|
||||
"tagline": "Large-scale Chinese-English MoE",
|
||||
"capabilities": ["text", "code"],
|
||||
"architecture": "glm4_moe",
|
||||
"n_layers": 91,
|
||||
"hidden_size": 5120
|
||||
},
|
||||
{
|
||||
"id": "minimax-m2.1",
|
||||
"family": "minimax",
|
||||
"name": "MiniMax M2.1",
|
||||
"description": "MiniMax's M2.1 large language model",
|
||||
"tagline": "High-performance general-purpose model",
|
||||
"capabilities": ["text", "code"],
|
||||
"architecture": "minimax",
|
||||
"n_layers": 80,
|
||||
"hidden_size": 6144
|
||||
}
|
||||
]
|
||||
}
|
||||
214
src/exo/shared/models/registry/variants.json
Normal file
214
src/exo/shared/models/registry/variants.json
Normal file
@@ -0,0 +1,214 @@
|
||||
{
|
||||
"variants": [
|
||||
{
|
||||
"base_model": "llama-3.1-8b",
|
||||
"model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 4637851648
|
||||
},
|
||||
{
|
||||
"base_model": "llama-3.1-8b",
|
||||
"model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit",
|
||||
"quantization": "8bit",
|
||||
"storage_size_bytes": 8954839040
|
||||
},
|
||||
{
|
||||
"base_model": "llama-3.1-8b",
|
||||
"model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-bf16",
|
||||
"quantization": "bf16",
|
||||
"storage_size_bytes": 16065732608
|
||||
},
|
||||
{
|
||||
"base_model": "llama-3.1-70b",
|
||||
"model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 40324489216
|
||||
},
|
||||
{
|
||||
"base_model": "llama-3.2-1b",
|
||||
"model_id": "mlx-community/Llama-3.2-1B-Instruct-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 874968576
|
||||
},
|
||||
{
|
||||
"base_model": "llama-3.2-3b",
|
||||
"model_id": "mlx-community/Llama-3.2-3B-Instruct-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 2019430400
|
||||
},
|
||||
{
|
||||
"base_model": "llama-3.2-3b",
|
||||
"model_id": "mlx-community/Llama-3.2-3B-Instruct-8bit",
|
||||
"quantization": "8bit",
|
||||
"storage_size_bytes": 3620454400
|
||||
},
|
||||
{
|
||||
"base_model": "llama-3.3-70b",
|
||||
"model_id": "mlx-community/Llama-3.3-70B-Instruct-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 40324489216
|
||||
},
|
||||
{
|
||||
"base_model": "llama-3.3-70b",
|
||||
"model_id": "mlx-community/Llama-3.3-70B-Instruct-8bit",
|
||||
"quantization": "8bit",
|
||||
"storage_size_bytes": 74823761920
|
||||
},
|
||||
{
|
||||
"base_model": "llama-3.3-70b",
|
||||
"model_id": "mlx-community/Llama-3.3-70B-Instruct",
|
||||
"quantization": "fp16",
|
||||
"storage_size_bytes": 140734881792
|
||||
},
|
||||
{
|
||||
"base_model": "deepseek-v3.1",
|
||||
"model_id": "mlx-community/DeepSeek-V3.1-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 405874409472
|
||||
},
|
||||
{
|
||||
"base_model": "deepseek-v3.1",
|
||||
"model_id": "mlx-community/DeepSeek-V3.1-8bit",
|
||||
"quantization": "8bit",
|
||||
"storage_size_bytes": 737851965440
|
||||
},
|
||||
{
|
||||
"base_model": "kimi-k2",
|
||||
"model_id": "mlx-community/Kimi-K2-Instruct-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 607696764928
|
||||
},
|
||||
{
|
||||
"base_model": "kimi-k2",
|
||||
"model_id": "mlx-community/Kimi-K2-Thinking-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 607696764928
|
||||
},
|
||||
{
|
||||
"base_model": "qwen3-0.6b",
|
||||
"model_id": "mlx-community/Qwen3-0.6B-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 460873728
|
||||
},
|
||||
{
|
||||
"base_model": "qwen3-0.6b",
|
||||
"model_id": "mlx-community/Qwen3-0.6B-8bit",
|
||||
"quantization": "8bit",
|
||||
"storage_size_bytes": 752879616
|
||||
},
|
||||
{
|
||||
"base_model": "qwen3-30b-a3b",
|
||||
"model_id": "mlx-community/Qwen3-30B-A3B-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 17612931072
|
||||
},
|
||||
{
|
||||
"base_model": "qwen3-30b-a3b",
|
||||
"model_id": "mlx-community/Qwen3-30B-A3B-8bit",
|
||||
"quantization": "8bit",
|
||||
"storage_size_bytes": 32161292288
|
||||
},
|
||||
{
|
||||
"base_model": "qwen3-80b-a3b",
|
||||
"model_id": "mlx-community/Qwen3-80B-A3B-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 46170324992
|
||||
},
|
||||
{
|
||||
"base_model": "qwen3-80b-a3b",
|
||||
"model_id": "mlx-community/Qwen3-80B-A3B-8bit",
|
||||
"quantization": "8bit",
|
||||
"storage_size_bytes": 84458057728
|
||||
},
|
||||
{
|
||||
"base_model": "qwen3-80b-a3b-thinking",
|
||||
"model_id": "mlx-community/Qwen3-80B-A3B-Thinking-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 46170324992
|
||||
},
|
||||
{
|
||||
"base_model": "qwen3-80b-a3b-thinking",
|
||||
"model_id": "mlx-community/Qwen3-80B-A3B-Thinking-8bit",
|
||||
"quantization": "8bit",
|
||||
"storage_size_bytes": 84458057728
|
||||
},
|
||||
{
|
||||
"base_model": "qwen3-235b-a22b",
|
||||
"model_id": "mlx-community/Qwen3-235B-A22B-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 131953045504
|
||||
},
|
||||
{
|
||||
"base_model": "qwen3-235b-a22b",
|
||||
"model_id": "mlx-community/Qwen3-235B-A22B-8bit",
|
||||
"quantization": "8bit",
|
||||
"storage_size_bytes": 247587201024
|
||||
},
|
||||
{
|
||||
"base_model": "qwen3-coder-480b-a35b",
|
||||
"model_id": "mlx-community/Qwen3-Coder-480B-A35B-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 274063867904
|
||||
},
|
||||
{
|
||||
"base_model": "qwen3-coder-480b-a35b",
|
||||
"model_id": "mlx-community/Qwen3-Coder-480B-A35B-8bit",
|
||||
"quantization": "8bit",
|
||||
"storage_size_bytes": 506312851456
|
||||
},
|
||||
{
|
||||
"base_model": "gpt-oss-120b",
|
||||
"model_id": "mlx-community/GPT-OSS-120B-MXFP4-Q8",
|
||||
"quantization": "mxfp4-q8",
|
||||
"storage_size_bytes": 67108864000
|
||||
},
|
||||
{
|
||||
"base_model": "gpt-oss-20b",
|
||||
"model_id": "mlx-community/GPT-OSS-20B-MXFP4-Q8",
|
||||
"quantization": "mxfp4-q8",
|
||||
"storage_size_bytes": 11811160064
|
||||
},
|
||||
{
|
||||
"base_model": "glm-4.5-air",
|
||||
"model_id": "mlx-community/GLM-4.5-Air-8bit",
|
||||
"quantization": "8bit",
|
||||
"storage_size_bytes": 9663676416
|
||||
},
|
||||
{
|
||||
"base_model": "glm-4.5-air",
|
||||
"model_id": "mlx-community/GLM-4.5-Air-bf16",
|
||||
"quantization": "bf16",
|
||||
"storage_size_bytes": 18253611008
|
||||
},
|
||||
{
|
||||
"base_model": "glm-4.7",
|
||||
"model_id": "mlx-community/GLM-4.7-4bit",
|
||||
"quantization": "4bit",
|
||||
"storage_size_bytes": 198556925568
|
||||
},
|
||||
{
|
||||
"base_model": "glm-4.7",
|
||||
"model_id": "mlx-community/GLM-4.7-6bit",
|
||||
"quantization": "6bit",
|
||||
"storage_size_bytes": 282281095168
|
||||
},
|
||||
{
|
||||
"base_model": "glm-4.7",
|
||||
"model_id": "mlx-community/GLM-4.7-8bit-gs32",
|
||||
"quantization": "8bit",
|
||||
"storage_size_bytes": 374997565440
|
||||
},
|
||||
{
|
||||
"base_model": "minimax-m2.1",
|
||||
"model_id": "mlx-community/MiniMax-M2.1-8bit",
|
||||
"quantization": "8bit",
|
||||
"storage_size_bytes": 483183656960
|
||||
},
|
||||
{
|
||||
"base_model": "minimax-m2.1",
|
||||
"model_id": "mlx-community/MiniMax-M2.1-3bit",
|
||||
"quantization": "3bit",
|
||||
"storage_size_bytes": 213674106880
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user