fix partial download progress showing 0% on restart

On restart, _emit_existing_download_progress() checked downloaded_bytes_this_session to decide if a download was pending. Since this field is always 0 in a new session, partially downloaded models were reported as DownloadPending (0%) instead of DownloadOngoing with their actual progress. Check downloaded_bytes (actual data on disk) instead. Closes #1042 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 14:55:13 -05:00 · 2026-02-17 10:13:24 -08:00
7 changed files with 2 additions and 51 deletions
--- a/resources/inference_model_cards/mlx-community--GLM-5-4bit.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-5-4bit.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-5-4bit"
-n_layers = 78
-hidden_size = 6144
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "4bit"
-base_model = "GLM 5"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 418621403136
--- a/resources/inference_model_cards/mlx-community--GLM-5-8bit-MXFP8.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-5-8bit-MXFP8.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-5-8bit-MXFP8"
-n_layers = 78
-hidden_size = 6144
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "8bit"
-base_model = "GLM 5"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 767273926656
--- a/resources/inference_model_cards/mlx-community--GLM-5-MXFP4-Q8.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-5-MXFP4-Q8.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-5-MXFP4-Q8"
-n_layers = 78
-hidden_size = 6144
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "MXFP4-Q8"
-base_model = "GLM 5"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 405480321024
--- a/resources/inference_model_cards/mlx-community--GLM-5.toml
+++ b/resources/inference_model_cards/mlx-community--GLM-5.toml
@@ -1,12 +0,0 @@
-model_id = "mlx-community/GLM-5"
-n_layers = 78
-hidden_size = 6144
-supports_tensor = true
-tasks = ["TextGeneration"]
-family = "glm"
-quantization = "bf16"
-base_model = "GLM 5"
-capabilities = ["text", "thinking"]
-
-[storage_size]
-in_bytes = 1487822475264
--- a/src/exo/download/coordinator.py
+++ b/src/exo/download/coordinator.py
@@ -324,7 +324,7 @@ class DownloadCoordinator:
                                shard_metadata=progress.shard,
                                total_bytes=progress.total_bytes,
                            )
-                        elif progress.downloaded_bytes_this_session.in_bytes == 0:
+                        elif progress.downloaded_bytes.in_bytes == 0:
                            status = DownloadPending(
                                node_id=self.node_id,
                                shard_metadata=progress.shard,
--- a/src/exo/shared/models/model_cards.py
+++ b/src/exo/shared/models/model_cards.py
@@ -182,7 +182,6 @@ class ConfigData(BaseModel):
    def supports_tensor(self) -> bool:
        return self.architectures in [
            ["Glm4MoeLiteForCausalLM"],
-            ["GlmMoeDsaForCausalLM"],
            ["DeepseekV32ForCausalLM"],
            ["DeepseekV3ForCausalLM"],
            ["Qwen3NextForCausalLM"],
--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -285,7 +285,7 @@ def get_eos_token_ids_for_model(model_id: ModelId) -> list[int] | None:
    model_id_lower = model_id.lower()
    if "kimi-k2" in model_id_lower:
        return [163586]
-    elif "glm-5" in model_id_lower or "glm-4.7" in model_id_lower:
+    elif "glm-4.7-flash" in model_id_lower:
        # 154820: <|endoftext|>, 154827: <|user|>, 154829: <|observation|>
        return [154820, 154827, 154829]
    elif "glm" in model_id_lower: