mirror of
https://github.com/exo-explore/exo.git
synced 2026-02-19 15:27:02 -05:00
Compare commits
3 Commits
session-id
...
feat/mac-s
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
23f295e684 | ||
|
|
7637fb554f | ||
|
|
1a9c5fa6fb |
@@ -12,6 +12,7 @@
|
||||
} from "$lib/stores/app.svelte";
|
||||
import type { MessageAttachment } from "$lib/stores/app.svelte";
|
||||
import MarkdownContent from "./MarkdownContent.svelte";
|
||||
import PrefillProgressBar from "./PrefillProgressBar.svelte";
|
||||
import TokenHeatmap from "./TokenHeatmap.svelte";
|
||||
import PrefillProgressBar from "./PrefillProgressBar.svelte";
|
||||
import ImageLightbox from "./ImageLightbox.svelte";
|
||||
@@ -625,7 +626,9 @@
|
||||
<MarkdownContent
|
||||
content={message.content || (loading ? response : "")}
|
||||
/>
|
||||
{#if loading && !message.content}
|
||||
{#if loading && !message.content && prefill}
|
||||
<PrefillProgressBar progress={prefill} class="mt-2" />
|
||||
{:else if loading && !message.content}
|
||||
<span
|
||||
class="inline-block w-2 h-4 bg-exo-yellow/70 ml-1 cursor-blink"
|
||||
></span>
|
||||
|
||||
@@ -14,6 +14,21 @@
|
||||
: 0,
|
||||
);
|
||||
|
||||
const etaText = $derived.by(() => {
|
||||
if (progress.processed <= 0 || progress.total <= 0) return null;
|
||||
const elapsedMs = performance.now() - progress.startedAt;
|
||||
if (elapsedMs < 200) return null; // need a minimum sample window
|
||||
const tokensPerMs = progress.processed / elapsedMs;
|
||||
const remainingTokens = progress.total - progress.processed;
|
||||
const remainingMs = remainingTokens / tokensPerMs;
|
||||
const remainingSec = Math.ceil(remainingMs / 1000);
|
||||
if (remainingSec <= 0) return null;
|
||||
if (remainingSec < 60) return `~${remainingSec}s remaining`;
|
||||
const mins = Math.floor(remainingSec / 60);
|
||||
const secs = remainingSec % 60;
|
||||
return `~${mins}m ${secs}s remaining`;
|
||||
});
|
||||
|
||||
function formatTokenCount(count: number | undefined): string {
|
||||
if (count == null) return "0";
|
||||
if (count >= 1000) {
|
||||
@@ -40,8 +55,11 @@
|
||||
style="width: {percentage}%"
|
||||
></div>
|
||||
</div>
|
||||
<div class="text-right text-xs text-exo-light-gray/70 mt-0.5 font-mono">
|
||||
{percentage}%
|
||||
<div
|
||||
class="flex items-center justify-between text-xs text-exo-light-gray/70 mt-0.5 font-mono"
|
||||
>
|
||||
<span>{etaText ?? ""}</span>
|
||||
<span>{percentage}%</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -276,6 +276,8 @@ export interface TokenData {
|
||||
export interface PrefillProgress {
|
||||
processed: number;
|
||||
total: number;
|
||||
/** Timestamp (performance.now()) when prefill started. */
|
||||
startedAt: number;
|
||||
}
|
||||
|
||||
export interface Message {
|
||||
@@ -520,12 +522,12 @@ class AppStore {
|
||||
messages = $state<Message[]>([]);
|
||||
currentResponse = $state("");
|
||||
isLoading = $state(false);
|
||||
prefillProgress = $state<PrefillProgress | null>(null);
|
||||
|
||||
// Performance metrics
|
||||
ttftMs = $state<number | null>(null); // Time to first token in ms
|
||||
tps = $state<number | null>(null); // Tokens per second
|
||||
totalTokens = $state<number>(0); // Total tokens in current response
|
||||
prefillProgress = $state<PrefillProgress | null>(null);
|
||||
|
||||
// Abort controller for stopping generation
|
||||
private currentAbortController: AbortController | null = null;
|
||||
@@ -2018,6 +2020,7 @@ class AppStore {
|
||||
): Promise<void> {
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = "";
|
||||
let currentEventType = "";
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
@@ -2033,7 +2036,15 @@ class AppStore {
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
if (!trimmed) {
|
||||
currentEventType = "";
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trimmed.startsWith("event: ")) {
|
||||
currentEventType = trimmed.slice(7);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle SSE comments (": key json") for prefill progress etc.
|
||||
if (trimmed.startsWith(": ") && onEvent) {
|
||||
@@ -2055,14 +2066,22 @@ class AppStore {
|
||||
|
||||
if (trimmed.startsWith("data: ")) {
|
||||
const data = trimmed.slice(6);
|
||||
if (data === "[DONE]") continue;
|
||||
if (data === "[DONE]") {
|
||||
currentEventType = "";
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(data) as T;
|
||||
onChunk(parsed);
|
||||
const parsed = JSON.parse(data);
|
||||
if (currentEventType && onEvent?.[currentEventType]) {
|
||||
onEvent[currentEventType](parsed);
|
||||
} else {
|
||||
onChunk(parsed as T);
|
||||
}
|
||||
} catch {
|
||||
// Skip malformed JSON
|
||||
}
|
||||
currentEventType = "";
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2163,6 +2182,7 @@ class AppStore {
|
||||
|
||||
this.isLoading = true;
|
||||
this.currentResponse = "";
|
||||
this.prefillProgress = null;
|
||||
this.ttftMs = null;
|
||||
this.tps = null;
|
||||
this.totalTokens = 0;
|
||||
@@ -2367,6 +2387,11 @@ class AppStore {
|
||||
}
|
||||
|
||||
if (tokenContent) {
|
||||
// Clear prefill progress once tokens start arriving
|
||||
if (this.prefillProgress !== null) {
|
||||
this.prefillProgress = null;
|
||||
}
|
||||
|
||||
// Track first token for TTFT
|
||||
if (firstTokenTime === null) {
|
||||
firstTokenTime = performance.now();
|
||||
@@ -2420,6 +2445,7 @@ class AppStore {
|
||||
this.prefillProgress = {
|
||||
processed: inner.processed_tokens,
|
||||
total: inner.total_tokens,
|
||||
startedAt: this.prefillProgress?.startedAt ?? performance.now(),
|
||||
};
|
||||
},
|
||||
},
|
||||
@@ -2474,6 +2500,7 @@ class AppStore {
|
||||
this.prefillProgress = null;
|
||||
this.isLoading = false;
|
||||
this.currentResponse = "";
|
||||
this.prefillProgress = null;
|
||||
this.saveConversationsToStorage();
|
||||
}
|
||||
}
|
||||
@@ -3106,10 +3133,10 @@ export const hasStartedChat = () => appStore.hasStartedChat;
|
||||
export const messages = () => appStore.messages;
|
||||
export const currentResponse = () => appStore.currentResponse;
|
||||
export const isLoading = () => appStore.isLoading;
|
||||
export const prefillProgress = () => appStore.prefillProgress;
|
||||
export const ttftMs = () => appStore.ttftMs;
|
||||
export const tps = () => appStore.tps;
|
||||
export const totalTokens = () => appStore.totalTokens;
|
||||
export const prefillProgress = () => appStore.prefillProgress;
|
||||
export const topologyData = () => appStore.topologyData;
|
||||
export const instances = () => appStore.instances;
|
||||
export const runners = () => appStore.runners;
|
||||
|
||||
@@ -562,6 +562,8 @@ class API:
|
||||
if command_id in self._text_generation_queues:
|
||||
del self._text_generation_queues[command_id]
|
||||
|
||||
|
||||
|
||||
async def _collect_text_generation_with_stats(
|
||||
self, command_id: CommandId
|
||||
) -> BenchChatCompletionResponse:
|
||||
|
||||
@@ -90,6 +90,8 @@ def prefill(
|
||||
)
|
||||
if has_ssm:
|
||||
snapshots.append(snapshot_ssm_states(cache))
|
||||
if on_prefill_progress is not None:
|
||||
on_prefill_progress(processed, total)
|
||||
|
||||
if on_prefill_progress is not None:
|
||||
on_prefill_progress(processed, total)
|
||||
|
||||
Reference in New Issue
Block a user