Compare commits

...

1 Commits

Author SHA1 Message Date
Alex Cheema
21b594b176 add GLM-5 model support
- Add model cards for GLM-5 variants: MXFP4-Q8, 4bit, 8bit-MXFP8, bf16
- Add GlmMoeDsaForCausalLM to supports_tensor whitelist
- Fix EOS tokens: GLM-5 and GLM-4.7 share the same EOS tokens
  (154820, 154827, 154829), consolidate the match condition

Note: full tensor parallel support for GLM-5 requires additional
auto_parallel changes (CacheList compatibility, NullIndexer for DSA)
and upstream MLX fixes for MXFP quantized matmul.

Closes #1468

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 10:20:34 -08:00
6 changed files with 50 additions and 1 deletions

View File

@@ -0,0 +1,12 @@
model_id = "mlx-community/GLM-5-4bit"
n_layers = 78
hidden_size = 6144
supports_tensor = true
tasks = ["TextGeneration"]
family = "glm"
quantization = "4bit"
base_model = "GLM 5"
capabilities = ["text", "thinking"]
[storage_size]
in_bytes = 418621403136

View File

@@ -0,0 +1,12 @@
model_id = "mlx-community/GLM-5-8bit-MXFP8"
n_layers = 78
hidden_size = 6144
supports_tensor = true
tasks = ["TextGeneration"]
family = "glm"
quantization = "8bit"
base_model = "GLM 5"
capabilities = ["text", "thinking"]
[storage_size]
in_bytes = 767273926656

View File

@@ -0,0 +1,12 @@
model_id = "mlx-community/GLM-5-MXFP4-Q8"
n_layers = 78
hidden_size = 6144
supports_tensor = true
tasks = ["TextGeneration"]
family = "glm"
quantization = "MXFP4-Q8"
base_model = "GLM 5"
capabilities = ["text", "thinking"]
[storage_size]
in_bytes = 405480321024

View File

@@ -0,0 +1,12 @@
model_id = "mlx-community/GLM-5"
n_layers = 78
hidden_size = 6144
supports_tensor = true
tasks = ["TextGeneration"]
family = "glm"
quantization = "bf16"
base_model = "GLM 5"
capabilities = ["text", "thinking"]
[storage_size]
in_bytes = 1487822475264

View File

@@ -182,6 +182,7 @@ class ConfigData(BaseModel):
def supports_tensor(self) -> bool:
return self.architectures in [
["Glm4MoeLiteForCausalLM"],
["GlmMoeDsaForCausalLM"],
["DeepseekV32ForCausalLM"],
["DeepseekV3ForCausalLM"],
["Qwen3NextForCausalLM"],

View File

@@ -285,7 +285,7 @@ def get_eos_token_ids_for_model(model_id: ModelId) -> list[int] | None:
model_id_lower = model_id.lower()
if "kimi-k2" in model_id_lower:
return [163586]
elif "glm-4.7-flash" in model_id_lower:
elif "glm-5" in model_id_lower or "glm-4.7" in model_id_lower:
# 154820: <|endoftext|>, 154827: <|user|>, 154829: <|observation|>
return [154820, 154827, 154829]
elif "glm" in model_id_lower: