Compare commits

..

1 Commits

Author SHA1 Message Date
Evan
36dadd0082 add glm-47 and deepseek-v32 2026-01-13 13:24:25 +00:00
2 changed files with 64 additions and 28 deletions

View File

@@ -70,34 +70,36 @@ MODEL_CARDS: dict[str, ModelCard] = {
supports_tensor=True,
),
),
# "deepseek-v3.2": ModelCard(
# short_id="deepseek-v3.2",
# model_id=ModelId("mlx-community/DeepSeek-V3.2-8bit"),
# name="DeepSeek V3.2 (8-bit)",
# description="""DeepSeek V3.2 is a large language model trained on the DeepSeek V3.2 dataset.""",
# tags=[],
# metadata=ModelMetadata(
# model_id=ModelId("mlx-community/DeepSeek-V3.2-8bit"),
# pretty_name="DeepSeek V3.2 (8-bit)",
# storage_size=Memory.from_kb(754706307),
# n_layers=61,
# hidden_size=7168,
# ),
# ),
# "deepseek-v3.2-4bit": ModelCard(
# short_id="deepseek-v3.2-4bit",
# model_id=ModelId("mlx-community/DeepSeek-V3.2-4bit"),
# name="DeepSeek V3.2 (4-bit)",
# description="""DeepSeek V3.2 is a large language model trained on the DeepSeek V3.2 dataset.""",
# tags=[],
# metadata=ModelMetadata(
# model_id=ModelId("mlx-community/DeepSeek-V3.2-4bit"),
# pretty_name="DeepSeek V3.2 (4-bit)",
# storage_size=Memory.from_kb(754706307 // 2), # TODO !!!!!
# n_layers=61,
# hidden_size=7168,
# ),
# ),
"deepseek-v3.2": ModelCard(
short_id="deepseek-v3.2",
model_id=ModelId("mlx-community/DeepSeek-V3.2-8bit"),
name="DeepSeek V3.2 (8-bit)",
description="""DeepSeek V3.2 is a large language model trained on the DeepSeek V3.2 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id=ModelId("mlx-community/DeepSeek-V3.2-8bit"),
pretty_name="DeepSeek V3.2 (8-bit)",
storage_size=Memory.from_kb(754706307),
n_layers=61,
hidden_size=7168,
supports_tensor=True,
),
),
"deepseek-v3.2-4bit": ModelCard(
short_id="deepseek-v3.2-4bit",
model_id=ModelId("mlx-community/DeepSeek-V3.2-4bit"),
name="DeepSeek V3.2 (4-bit)",
description="""DeepSeek V3.2 is a large language model trained on the DeepSeek V3.2 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id=ModelId("mlx-community/DeepSeek-V3.2-4bit"),
pretty_name="DeepSeek V3.2 (4-bit)",
storage_size=Memory.from_kb(754706307 // 2), # TODO !!!!!
n_layers=61,
hidden_size=7168,
supports_tensor=True,
),
),
# deepseek r1
# "deepseek-r1-0528-4bit": ModelCard(
# short_id="deepseek-r1-0528-4bit",
@@ -554,6 +556,36 @@ MODEL_CARDS: dict[str, ModelCard] = {
supports_tensor=True,
),
),
"glm-4.7-4bit": ModelCard(
short_id="glm-4.7-4bit",
model_id=ModelId("mlx-community/GLM-4.7-4bit"),
name="GLM 4.7 4bit",
description="GLM 4.7 4bit",
tags=[],
metadata=ModelMetadata(
model_id=ModelId("mlx-community/GLM-4.7-4bit"),
pretty_name="GLM 4.7 4bit",
storage_size=Memory.from_bytes(198556925568),
n_layers=91,
hidden_size=5120,
supports_tensor=True,
),
),
"glm-4.7-8bit-gs32": ModelCard(
short_id="glm-4.7-8bit-gs32",
model_id=ModelId("mlx-community/GLM-4.7-8bit-gs32"),
name="GLM 4.7 8bit (gs32)",
description="GLM 4.7 8bit (gs32)",
tags=[],
metadata=ModelMetadata(
model_id=ModelId("mlx-community/GLM-4.7-8bit-gs32"),
pretty_name="GLM 4.7 8bit (gs32)",
storage_size=Memory.from_bytes(396963397248),
n_layers=91,
hidden_size=5120,
supports_tensor=True,
),
),
# "devstral-2-123b-instruct-2512-8bit": ModelCard(
# short_id="devstral-2-123b-instruct-2512-8bit",
# model_id=ModelId("mlx-community/Devstral-2-123B-Instruct-2512-8bit"),

View File

@@ -89,6 +89,10 @@ async def assert_downloads():
await sd.ensure_shard(
await build_full_shard(MODEL_CARDS["gpt-oss-20b-4bit"].model_id)
)
await sd.ensure_shard(await build_full_shard(MODEL_CARDS["deepseek-v3.2"].model_id))
await sd.ensure_shard(
await build_full_shard(MODEL_CARDS["glm-4.7-8bit-gs32"].model_id)
)
async def ring_backend(test: Tests):