Compare commits

...

3 Commits

Author SHA1 Message Date
rltakashige
27b4993e64 Merge branch 'main' into fix-partial-download-progress 2026-02-20 12:44:19 +00:00
Alex Cheema
bddad7e79c feat: show ETA on prefill progress bar (#1557)
## Summary
- Show estimated time remaining during prefill (prompt processing phase)
- Track prefill start time via performance.now() and extrapolate from
observed token throughput
- Display ~Xs remaining or ~Xm Ys remaining next to the percentage on
the progress bar
- Wait 200ms before showing ETA to ensure a stable sample window

## Changes
**PrefillProgressBar.svelte**: Add etaText derived computation that
calculates remaining time from (remainingTokens / tokensPerMs). Renders
in a new flex row below the progress bar alongside the percentage.

**app.svelte.ts**: Add startedAt: number field to PrefillProgress
interface. Set on first prefill_progress SSE event, preserved across
subsequent updates.

## Test plan
- [ ] Start inference with a long prompt (10k+ tokens) on a multi-node
cluster
- [ ] Verify the progress bar shows ~Xs remaining after ~200ms of
prefill
- [ ] Verify the ETA decreases as prefill progresses
- [ ] Verify short prefills (<200ms) dont flash a briefly-visible ETA
- [ ] Verify ETA disappears when prefill completes and token generation
begins

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: rltakashige <rl.takashige@gmail.com>
2026-02-20 12:37:56 +00:00
Alex Cheema
526cd9f333 fix partial download progress showing 0% on restart
On restart, _emit_existing_download_progress() checked
downloaded_bytes_this_session to decide if a download was pending.
Since this field is always 0 in a new session, partially downloaded
models were reported as DownloadPending (0%) instead of DownloadOngoing
with their actual progress. Check downloaded_bytes (actual data on
disk) instead.

Closes #1042

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 10:13:24 -08:00
3 changed files with 24 additions and 3 deletions

View File

@@ -14,6 +14,21 @@
: 0,
);
const etaText = $derived.by(() => {
if (progress.processed <= 0 || progress.total <= 0) return null;
const elapsedMs = performance.now() - progress.startedAt;
if (elapsedMs < 200) return null; // need a minimum sample window
const tokensPerMs = progress.processed / elapsedMs;
const remainingTokens = progress.total - progress.processed;
const remainingMs = remainingTokens / tokensPerMs;
const remainingSec = Math.ceil(remainingMs / 1000);
if (remainingSec <= 0) return null;
if (remainingSec < 60) return `~${remainingSec}s remaining`;
const mins = Math.floor(remainingSec / 60);
const secs = remainingSec % 60;
return `~${mins}m ${secs}s remaining`;
});
function formatTokenCount(count: number | undefined): string {
if (count == null) return "0";
if (count >= 1000) {
@@ -40,8 +55,11 @@
style="width: {percentage}%"
></div>
</div>
<div class="text-right text-xs text-exo-light-gray/70 mt-0.5 font-mono">
{percentage}%
<div
class="flex items-center justify-between text-xs text-exo-light-gray/70 mt-0.5 font-mono"
>
<span>{etaText ?? ""}</span>
<span>{percentage}%</span>
</div>
</div>

View File

@@ -281,6 +281,8 @@ export interface TokenData {
export interface PrefillProgress {
processed: number;
total: number;
/** Timestamp (performance.now()) when prefill started. */
startedAt: number;
}
export interface Message {
@@ -2464,6 +2466,7 @@ class AppStore {
this.prefillProgress = {
processed: inner.processed_tokens,
total: inner.total_tokens,
startedAt: this.prefillProgress?.startedAt ?? performance.now(),
};
},
},

View File

@@ -338,7 +338,7 @@ class DownloadCoordinator:
),
)
elif progress.status in ["in_progress", "not_started"]:
if progress.downloaded_this_session.in_bytes == 0:
if progress.downloaded_bytes.in_bytes == 0:
status = DownloadPending(
node_id=self.node_id,
shard_metadata=progress.shard,