Compare commits

...

4 Commits

Author SHA1 Message Date
Alex Cheema
3cf844e7f1 Merge branch 'main' into meta-instance-split/download-completion-detection 2026-02-22 06:57:27 -08:00
Alex Cheema
18717023ad chore: remove deprecated MlxIbv dashboard references (#1584)
## Summary
- Remove legacy MlxIbvInstance references from ChatSidebar and ModelCard
components
- MlxIbv was replaced by MlxJaccl; these are leftover type checks
- Split from #1519 for independent review

## Test plan
- [x] Visual inspection of dashboard components

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-22 06:56:12 -08:00
Alex Cheema
4768f50d56 fix: detect completed downloads by checking final file exists
The previous byte-comparison fallback in the coordinator could falsely
report .partial files as complete (e.g. when a process was killed after
download but before hash verification and rename). Instead, fix the
source: only mark a file as "complete" during status scanning when the
final (non-.partial) file exists on disk, which implies hash verification
and rename succeeded. Remove the coordinator-level byte comparison
workaround since the source now reports correctly.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 13:14:07 -08:00
Alex Cheema
3b54e7dfa7 fix: detect completed downloads via byte comparison
When scanning existing download status, a download could report
status "in_progress" or "not_started" even though all bytes have
been downloaded. This adds a fallback check: if downloaded >= total
bytes (and total > 0), treat it as completed regardless of the
reported status string.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 07:21:44 -08:00
3 changed files with 8 additions and 9 deletions

View File

@@ -185,11 +185,7 @@
let instanceType: string | null = null;
if (instanceTag === "MlxRingInstance") instanceType = "MLX Ring";
else if (
instanceTag === "MlxIbvInstance" ||
instanceTag === "MlxJacclInstance"
)
instanceType = "MLX RDMA";
else if (instanceTag === "MlxJacclInstance") instanceType = "MLX RDMA";
let sharding: string | null = null;
const inst = instance as {

View File

@@ -21,7 +21,7 @@
} | null;
nodes?: Record<string, NodeInfo>;
sharding?: "Pipeline" | "Tensor";
runtime?: "MlxRing" | "MlxIbv" | "MlxJaccl";
runtime?: "MlxRing" | "MlxJaccl";
onLaunch?: () => void;
tags?: string[];
apiPreview?: PlacementPreview | null;
@@ -348,7 +348,7 @@
// Debug mode state
const isDebugMode = $derived(debugMode());
const topology = $derived(topologyData());
const isRdma = $derived(runtime === "MlxIbv" || runtime === "MlxJaccl");
const isRdma = $derived(runtime === "MlxJaccl");
// Get interface name for an IP from node data
function getInterfaceForIp(nodeId: string, ip?: string): string | null {
@@ -575,7 +575,7 @@
>
{runtime === "MlxRing"
? "MLX Ring"
: runtime === "MlxIbv" || runtime === "MlxJaccl"
: runtime === "MlxJaccl"
? "MLX RDMA"
: runtime}
</span>

View File

@@ -823,6 +823,7 @@ async def download_shard(
for file in filtered_file_list:
downloaded_bytes = await get_downloaded_size(target_dir / file.path)
final_file_exists = await aios.path.exists(target_dir / file.path)
file_progress[file.path] = RepoFileDownloadProgress(
repo_id=shard.model_card.model_id,
repo_revision=revision,
@@ -832,7 +833,9 @@ async def download_shard(
total=Memory.from_bytes(file.size or 0),
speed=0,
eta=timedelta(0),
status="complete" if downloaded_bytes == file.size else "not_started",
status="complete"
if final_file_exists and downloaded_bytes == file.size
else "not_started",
start_time=time.time(),
)