Files
exo/shared/models/model_cards.py
Matt Beton 1fe4ed3442 Worker Exception & Timeout Refactor
Co-authored-by: Gelu Vrabie <gelu@exolabs.net>
Co-authored-by: Alex Cheema <alexcheema123@gmail.com>
Co-authored-by: Seth Howes <sethshowes@gmail.com>
2025-08-02 08:28:37 -07:00

266 lines
8.8 KiB
Python

from typing import List
from pydantic import BaseModel
from shared.types.models import ModelMetadata
class ModelCard(BaseModel):
short_id: str
model_id: str
name: str
description: str
tags: List[str]
metadata: ModelMetadata
MODEL_CARDS: dict[str, ModelCard] = {
"deepseek-v3-0324": ModelCard(
short_id="deepseek-v3-0324",
model_id="mlx-community/DeepSeek-v3-0324-8bit",
name="DeepSeek V3 fp8",
description="""DeepSeek V3 is a large language model trained on the DeepSeek V3 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/DeepSeek-v3-0324-8bit",
pretty_name="DeepSeek V3 fp8",
storage_size_kilobytes=754998771712//1024,
n_layers=61,
),
),
"llama-3.3": ModelCard(
short_id="llama-3.3",
model_id="mlx-community/Llama-3.3-70B-Instruct-4bit",
name="Llama 3.3 70B",
description="""The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/Llama-3.3-70B-Instruct-4bit",
pretty_name="Llama 3.3 70B",
storage_size_kilobytes=38758160,
n_layers=80,
),
),
"llama-3.3:70b": ModelCard(
short_id="llama-3.3:70b",
model_id="mlx-community/Llama-3.3-70B-Instruct-4bit",
name="Llama 3.3 70B",
description="""The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/Llama-3.3-70B-Instruct-4bit",
pretty_name="Llama 3.3 70B",
storage_size_kilobytes=38758160,
n_layers=80,
),
),
"llama-3.2": ModelCard(
short_id="llama-3.2",
model_id="mlx-community/Llama-3.2-1B-Instruct-4bit",
name="Llama 3.2 1B",
description="""Llama 3.2 is a large language model trained on the Llama 3.2 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/Llama-3.2-1B-Instruct-4bit",
pretty_name="Llama 3.2 1B",
storage_size_kilobytes=678948,
n_layers=16,
),
),
"llama-3.2:1b": ModelCard(
short_id="llama-3.2:1b",
model_id="mlx-community/Llama-3.2-1B-Instruct-4bit",
name="Llama 3.2 1B",
description="""Llama 3.2 is a large language model trained on the Llama 3.2 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/Llama-3.2-1B-Instruct-4bit",
pretty_name="Llama 3.2 1B",
storage_size_kilobytes=678948,
n_layers=16,
),
),
"llama-3.2:3b": ModelCard(
short_id="llama-3.2:3b",
model_id="mlx-community/Llama-3.2-3B-Instruct-4bit",
name="Llama 3.2 3B",
description="""Llama 3.2 is a large language model trained on the Llama 3.2 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/Llama-3.2-3B-Instruct-4bit",
pretty_name="Llama 3.2 3B",
storage_size_kilobytes=1765062,
n_layers=28,
),
),
"llama-3.1:8b": ModelCard(
short_id="llama-3.1:8b",
model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit",
name="Llama 3.1 8B",
description="""Llama 3.1 is a large language model trained on the Llama 3.1 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit",
pretty_name="Llama 3.1 8B",
storage_size_kilobytes=4411528,
n_layers=32,
),
),
"llama-3.1-70b": ModelCard(
short_id="llama-3.1-70b",
model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit",
name="Llama 3.1 70B",
description="""Llama 3.1 is a large language model trained on the Llama 3.1 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit",
pretty_name="Llama 3.1 70B",
storage_size_kilobytes=38758160,
n_layers=80,
),
),
"deepseek-r1": ModelCard(
short_id="deepseek-r1",
model_id="mlx-community/DeepSeek-R1-4bit",
name="DeepSeek R1 671B (4-bit)",
description="""DeepSeek R1 is a large language model trained on the DeepSeek R1 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/DeepSeek-R1-4bit",
pretty_name="DeepSeek R1 671B (4-bit)",
storage_size_kilobytes=409706307,
n_layers=61,
),
),
"deepseek-r1:671b": ModelCard(
short_id="deepseek-r1:671b",
model_id="mlx-community/DeepSeek-R1-4bit",
name="DeepSeek R1 671B",
description="""DeepSeek R1 is a large language model trained on the DeepSeek R1 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/DeepSeek-R1-4bit",
pretty_name="DeepSeek R1 671B",
storage_size_kilobytes=409706307,
n_layers=61,
),
),
"deepseek-v3": ModelCard(
short_id="deepseek-v3",
model_id="mlx-community/DeepSeek-V3-0324-4bit",
name="DeepSeek V3 4B",
description="""DeepSeek V3 is a large language model trained on the DeepSeek V3 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/DeepSeek-V3-0324-4bit",
pretty_name="DeepSeek V3 4B",
storage_size_kilobytes=368756663,
n_layers=61,
),
),
"deepseek-v3:671b": ModelCard(
short_id="deepseek-v3:671b",
model_id="mlx-community/DeepSeek-V3-0324-4bit",
name="DeepSeek V3 671B",
description="""DeepSeek V3 is a large language model trained on the DeepSeek V3 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/DeepSeek-V3-0324-4bit",
pretty_name="DeepSeek V3 671B",
storage_size_kilobytes=368756663,
n_layers=61,
),
),
"phi-3-mini": ModelCard(
short_id="phi-3-mini",
model_id="mlx-community/Phi-3-mini-128k-instruct-4bit",
name="Phi 3 Mini 128k",
description="""Phi 3 Mini is a large language model trained on the Phi 3 Mini dataset.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/Phi-3-mini-128k-instruct-4bit",
pretty_name="Phi 3 Mini 128k",
storage_size_kilobytes=2099262,
n_layers=32,
),
),
"phi-3-mini:128k": ModelCard(
short_id="phi-3-mini:128k",
model_id="mlx-community/Phi-3-mini-128k-instruct-4bit",
name="Phi 3 Mini 128k",
description="""Phi 3 Mini is a large language model trained on the Phi 3 Mini dataset.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/Phi-3-mini-128k-instruct-4bit",
pretty_name="Phi 3 Mini 128k",
storage_size_kilobytes=2099262,
n_layers=32,
),
),
"qwen3-0.6b": ModelCard(
short_id="qwen3-0.6b",
model_id="mlx-community/Qwen3-0.6B-4bit",
name="Qwen3 0.6B",
description="""Qwen3 0.6B is a large language model trained on the Qwen3 0.6B dataset.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/Qwen3-0.6B-4bit",
pretty_name="Qwen3 0.6B",
storage_size_kilobytes=327512,
n_layers=28,
),
),
"qwen3-30b": ModelCard(
short_id="qwen3-30b",
model_id="mlx-community/Qwen3-30B-A3B-4bit",
name="Qwen3 30B (Active 3B)",
description="""Qwen3 30B is a large language model trained on the Qwen3 30B dataset.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/Qwen3-30B-A3B-4bit",
pretty_name="Qwen3 30B (Active 3B)",
storage_size_kilobytes=16772092,
n_layers=48,
),
),
"granite-3.3-2b": ModelCard(
short_id="granite-3.3-2b",
model_id="mlx-community/granite-3.3-2b-instruct-fp16",
name="Granite 3.3 2B",
description="""Granite-3.3-2B-Instruct is a 2-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/granite-3.3-2b-instruct-fp16",
pretty_name="Granite 3.3 2B",
storage_size_kilobytes=4948320,
n_layers=40,
),
),
"granite-3.3-8b": ModelCard(
short_id="granite-3.3-8b",
model_id="mlx-community/granite-3.3-8b-instruct-fp16",
name="Granite 3.3 8B",
description="""Granite-3.3-8B-Instruct is a 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities.""",
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/granite-3.3-8b-instruct-fp16",
pretty_name="Granite 3.3 8B",
storage_size_kilobytes=15958720,
n_layers=40,
),
),
"smol-lm-135m": ModelCard(
short_id="smol-lm-135m",
model_id="mlx-community/SmolLM-135M-4bit",
name="Smol LM 135M",
description="""SmolLM is a series of state-of-the-art small language models available in three sizes: 135M, 360M, and 1.7B parameters. """,
tags=[],
metadata=ModelMetadata(
model_id="mlx-community/SmolLM-135M-4bit",
pretty_name="Smol LM 135M",
storage_size_kilobytes=73940,
n_layers=30,
),
),
}