mirror of
https://github.com/exo-explore/exo.git
synced 2026-01-21 20:39:59 -05:00
Compare commits
3 Commits
leo/use-ml
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
023108a19d | ||
|
|
c9818c30b4 | ||
|
|
8f6726d6be |
@@ -364,7 +364,7 @@ The `exo-bench` tool measures model prefill and token generation speed across di
|
||||
|
||||
```bash
|
||||
uv run bench/exo_bench.py \
|
||||
--model llama-3.2-1b \
|
||||
--model Llama-3.2-1B-Instruct-4bit \
|
||||
--pp 128,256,512 \
|
||||
--tg 128,256
|
||||
```
|
||||
@@ -385,7 +385,7 @@ uv run bench/exo_bench.py \
|
||||
|
||||
```bash
|
||||
uv run bench/exo_bench.py \
|
||||
--model llama-3.2-1b \
|
||||
--model Llama-3.2-1B-Instruct-4bit \
|
||||
--pp 128,512 \
|
||||
--tg 128 \
|
||||
--max-nodes 2 \
|
||||
|
||||
@@ -195,14 +195,14 @@ def resolve_model_short_id(client: ExoClient, model_arg: str) -> tuple[str, str]
|
||||
data = models.get("data") or []
|
||||
|
||||
for m in data:
|
||||
if m.get("id") == model_arg:
|
||||
short_id = str(m["id"])
|
||||
full_id = str(m.get("hugging_face_id") or m["id"])
|
||||
if m.get("name").lower() == model_arg.lower():
|
||||
short_id = str(m["name"])
|
||||
full_id = str(m.get("hugging_face_id") or m["name"])
|
||||
return short_id, full_id
|
||||
|
||||
for m in data:
|
||||
if m.get("hugging_face_id") == model_arg:
|
||||
short_id = str(m["id"])
|
||||
short_id = str(m["name"])
|
||||
full_id = str(m["hugging_face_id"])
|
||||
return short_id, full_id
|
||||
|
||||
@@ -373,7 +373,7 @@ def main() -> int:
|
||||
short_id, full_model_id = resolve_model_short_id(client, args.model)
|
||||
|
||||
previews_resp = client.request_json(
|
||||
"GET", "/instance/previews", params={"model_id": short_id}
|
||||
"GET", "/instance/previews", params={"model_id": full_model_id}
|
||||
)
|
||||
previews = previews_resp.get("previews") or []
|
||||
|
||||
|
||||
@@ -172,6 +172,33 @@
|
||||
}
|
||||
|
||||
let downloadOverview = $state<NodeEntry[]>([]);
|
||||
let models = $state<Array<{ id: string; storage_size_megabytes?: number }>>(
|
||||
[],
|
||||
);
|
||||
|
||||
async function fetchModels() {
|
||||
try {
|
||||
const response = await fetch("/models");
|
||||
if (response.ok) {
|
||||
const data = await response.json();
|
||||
models = data.data || [];
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Failed to fetch models:", error);
|
||||
}
|
||||
}
|
||||
|
||||
function getModelTotalBytes(
|
||||
modelId: string,
|
||||
downloadTotalBytes: number,
|
||||
): number {
|
||||
if (downloadTotalBytes > 0) return downloadTotalBytes;
|
||||
const model = models.find((m) => m.id === modelId);
|
||||
if (model?.storage_size_megabytes) {
|
||||
return model.storage_size_megabytes * 1024 * 1024;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
$effect(() => {
|
||||
try {
|
||||
@@ -346,6 +373,7 @@
|
||||
onMount(() => {
|
||||
// Ensure we fetch at least once when visiting downloads directly
|
||||
refreshState();
|
||||
fetchModels();
|
||||
});
|
||||
</script>
|
||||
|
||||
@@ -454,7 +482,7 @@
|
||||
{#if model.status !== "completed"}
|
||||
<div class="text-[11px] text-exo-light-gray font-mono">
|
||||
{formatBytes(model.downloadedBytes)} / {formatBytes(
|
||||
model.totalBytes,
|
||||
getModelTotalBytes(model.modelId, model.totalBytes),
|
||||
)}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
@@ -59,8 +59,9 @@ class ModelCard(CamelCaseModel):
|
||||
|
||||
@staticmethod
|
||||
async def load(model_id: ModelId) -> "ModelCard":
|
||||
if model_id in MODEL_CARDS:
|
||||
return MODEL_CARDS[model_id]
|
||||
for card in MODEL_CARDS.values():
|
||||
if card.model_id == model_id:
|
||||
return card
|
||||
return await ModelCard.from_hf(model_id)
|
||||
|
||||
@staticmethod
|
||||
@@ -409,158 +410,159 @@ MODEL_CARDS: dict[str, ModelCard] = {
|
||||
supports_tensor=True,
|
||||
tasks=[ModelTask.TextGeneration],
|
||||
),
|
||||
"flux1-schnell": ModelCard(
|
||||
model_id=ModelId("black-forest-labs/FLUX.1-schnell"),
|
||||
storage_size=Memory.from_bytes(23782357120 + 9524621312),
|
||||
n_layers=57,
|
||||
hidden_size=1,
|
||||
supports_tensor=False,
|
||||
tasks=[ModelTask.TextToImage],
|
||||
components=[
|
||||
ComponentInfo(
|
||||
component_name="text_encoder",
|
||||
component_path="text_encoder/",
|
||||
storage_size=Memory.from_kb(0),
|
||||
n_layers=12,
|
||||
can_shard=False,
|
||||
safetensors_index_filename=None, # Single file
|
||||
),
|
||||
ComponentInfo(
|
||||
component_name="text_encoder_2",
|
||||
component_path="text_encoder_2/",
|
||||
storage_size=Memory.from_bytes(9524621312),
|
||||
n_layers=24,
|
||||
can_shard=False,
|
||||
safetensors_index_filename="model.safetensors.index.json",
|
||||
),
|
||||
ComponentInfo(
|
||||
component_name="transformer",
|
||||
component_path="transformer/",
|
||||
storage_size=Memory.from_bytes(23782357120),
|
||||
n_layers=57, # 19 transformer_blocks + 38 single_transformer_blocks
|
||||
can_shard=True,
|
||||
safetensors_index_filename="diffusion_pytorch_model.safetensors.index.json",
|
||||
),
|
||||
ComponentInfo(
|
||||
component_name="vae",
|
||||
component_path="vae/",
|
||||
storage_size=Memory.from_kb(0),
|
||||
n_layers=None,
|
||||
can_shard=False,
|
||||
safetensors_index_filename=None,
|
||||
),
|
||||
],
|
||||
),
|
||||
"flux1-dev": ModelCard(
|
||||
model_id=ModelId("black-forest-labs/FLUX.1-dev"),
|
||||
storage_size=Memory.from_bytes(23782357120 + 9524621312),
|
||||
n_layers=57,
|
||||
hidden_size=1,
|
||||
supports_tensor=False,
|
||||
tasks=[ModelTask.TextToImage, ModelTask.ImageToImage],
|
||||
components=[
|
||||
ComponentInfo(
|
||||
component_name="text_encoder",
|
||||
component_path="text_encoder/",
|
||||
storage_size=Memory.from_kb(0),
|
||||
n_layers=12,
|
||||
can_shard=False,
|
||||
safetensors_index_filename=None, # Single file
|
||||
),
|
||||
ComponentInfo(
|
||||
component_name="text_encoder_2",
|
||||
component_path="text_encoder_2/",
|
||||
storage_size=Memory.from_bytes(9524621312),
|
||||
n_layers=24,
|
||||
can_shard=False,
|
||||
safetensors_index_filename="model.safetensors.index.json",
|
||||
),
|
||||
ComponentInfo(
|
||||
component_name="transformer",
|
||||
component_path="transformer/",
|
||||
storage_size=Memory.from_bytes(23802816640),
|
||||
n_layers=57, # 19 transformer_blocks + 38 single_transformer_blocks
|
||||
can_shard=True,
|
||||
safetensors_index_filename="diffusion_pytorch_model.safetensors.index.json",
|
||||
),
|
||||
ComponentInfo(
|
||||
component_name="vae",
|
||||
component_path="vae/",
|
||||
storage_size=Memory.from_kb(0),
|
||||
n_layers=None,
|
||||
can_shard=False,
|
||||
safetensors_index_filename=None,
|
||||
),
|
||||
],
|
||||
),
|
||||
"qwen-image": ModelCard(
|
||||
model_id=ModelId("Qwen/Qwen-Image"),
|
||||
storage_size=Memory.from_bytes(16584333312 + 40860802176),
|
||||
n_layers=60, # Qwen has 60 transformer blocks (all joint-style)
|
||||
hidden_size=1,
|
||||
supports_tensor=False,
|
||||
tasks=[ModelTask.TextToImage, ModelTask.ImageToImage],
|
||||
components=[
|
||||
ComponentInfo(
|
||||
component_name="text_encoder",
|
||||
component_path="text_encoder/",
|
||||
storage_size=Memory.from_kb(16584333312),
|
||||
n_layers=12,
|
||||
can_shard=False,
|
||||
safetensors_index_filename=None, # Single file
|
||||
),
|
||||
ComponentInfo(
|
||||
component_name="transformer",
|
||||
component_path="transformer/",
|
||||
storage_size=Memory.from_bytes(40860802176),
|
||||
n_layers=60,
|
||||
can_shard=True,
|
||||
safetensors_index_filename="diffusion_pytorch_model.safetensors.index.json",
|
||||
),
|
||||
ComponentInfo(
|
||||
component_name="vae",
|
||||
component_path="vae/",
|
||||
storage_size=Memory.from_kb(0),
|
||||
n_layers=None,
|
||||
can_shard=False,
|
||||
safetensors_index_filename=None,
|
||||
),
|
||||
],
|
||||
),
|
||||
"qwen-image-edit-2509": ModelCard(
|
||||
model_id=ModelId("Qwen/Qwen-Image-Edit-2509"),
|
||||
storage_size=Memory.from_bytes(16584333312 + 40860802176),
|
||||
n_layers=60, # Qwen has 60 transformer blocks (all joint-style)
|
||||
hidden_size=1,
|
||||
supports_tensor=False,
|
||||
tasks=[ModelTask.ImageToImage],
|
||||
components=[
|
||||
ComponentInfo(
|
||||
component_name="text_encoder",
|
||||
component_path="text_encoder/",
|
||||
storage_size=Memory.from_kb(16584333312),
|
||||
n_layers=12,
|
||||
can_shard=False,
|
||||
safetensors_index_filename=None, # Single file
|
||||
),
|
||||
ComponentInfo(
|
||||
component_name="transformer",
|
||||
component_path="transformer/",
|
||||
storage_size=Memory.from_bytes(40860802176),
|
||||
n_layers=60,
|
||||
can_shard=True,
|
||||
safetensors_index_filename="diffusion_pytorch_model.safetensors.index.json",
|
||||
),
|
||||
ComponentInfo(
|
||||
component_name="vae",
|
||||
component_path="vae/",
|
||||
storage_size=Memory.from_kb(0),
|
||||
n_layers=None,
|
||||
can_shard=False,
|
||||
safetensors_index_filename=None,
|
||||
),
|
||||
],
|
||||
),
|
||||
# Image models commented out - feature not stable (see https://github.com/exo-explore/exo/issues/1242)
|
||||
# "flux1-schnell": ModelCard(
|
||||
# model_id=ModelId("black-forest-labs/FLUX.1-schnell"),
|
||||
# storage_size=Memory.from_bytes(23782357120 + 9524621312),
|
||||
# n_layers=57,
|
||||
# hidden_size=1,
|
||||
# supports_tensor=False,
|
||||
# tasks=[ModelTask.TextToImage],
|
||||
# components=[
|
||||
# ComponentInfo(
|
||||
# component_name="text_encoder",
|
||||
# component_path="text_encoder/",
|
||||
# storage_size=Memory.from_kb(0),
|
||||
# n_layers=12,
|
||||
# can_shard=False,
|
||||
# safetensors_index_filename=None, # Single file
|
||||
# ),
|
||||
# ComponentInfo(
|
||||
# component_name="text_encoder_2",
|
||||
# component_path="text_encoder_2/",
|
||||
# storage_size=Memory.from_bytes(9524621312),
|
||||
# n_layers=24,
|
||||
# can_shard=False,
|
||||
# safetensors_index_filename="model.safetensors.index.json",
|
||||
# ),
|
||||
# ComponentInfo(
|
||||
# component_name="transformer",
|
||||
# component_path="transformer/",
|
||||
# storage_size=Memory.from_bytes(23782357120),
|
||||
# n_layers=57, # 19 transformer_blocks + 38 single_transformer_blocks
|
||||
# can_shard=True,
|
||||
# safetensors_index_filename="diffusion_pytorch_model.safetensors.index.json",
|
||||
# ),
|
||||
# ComponentInfo(
|
||||
# component_name="vae",
|
||||
# component_path="vae/",
|
||||
# storage_size=Memory.from_kb(0),
|
||||
# n_layers=None,
|
||||
# can_shard=False,
|
||||
# safetensors_index_filename=None,
|
||||
# ),
|
||||
# ],
|
||||
# ),
|
||||
# "flux1-dev": ModelCard(
|
||||
# model_id=ModelId("black-forest-labs/FLUX.1-dev"),
|
||||
# storage_size=Memory.from_bytes(23782357120 + 9524621312),
|
||||
# n_layers=57,
|
||||
# hidden_size=1,
|
||||
# supports_tensor=False,
|
||||
# tasks=[ModelTask.TextToImage, ModelTask.ImageToImage],
|
||||
# components=[
|
||||
# ComponentInfo(
|
||||
# component_name="text_encoder",
|
||||
# component_path="text_encoder/",
|
||||
# storage_size=Memory.from_kb(0),
|
||||
# n_layers=12,
|
||||
# can_shard=False,
|
||||
# safetensors_index_filename=None, # Single file
|
||||
# ),
|
||||
# ComponentInfo(
|
||||
# component_name="text_encoder_2",
|
||||
# component_path="text_encoder_2/",
|
||||
# storage_size=Memory.from_bytes(9524621312),
|
||||
# n_layers=24,
|
||||
# can_shard=False,
|
||||
# safetensors_index_filename="model.safetensors.index.json",
|
||||
# ),
|
||||
# ComponentInfo(
|
||||
# component_name="transformer",
|
||||
# component_path="transformer/",
|
||||
# storage_size=Memory.from_bytes(23802816640),
|
||||
# n_layers=57, # 19 transformer_blocks + 38 single_transformer_blocks
|
||||
# can_shard=True,
|
||||
# safetensors_index_filename="diffusion_pytorch_model.safetensors.index.json",
|
||||
# ),
|
||||
# ComponentInfo(
|
||||
# component_name="vae",
|
||||
# component_path="vae/",
|
||||
# storage_size=Memory.from_kb(0),
|
||||
# n_layers=None,
|
||||
# can_shard=False,
|
||||
# safetensors_index_filename=None,
|
||||
# ),
|
||||
# ],
|
||||
# ),
|
||||
# "qwen-image": ModelCard(
|
||||
# model_id=ModelId("Qwen/Qwen-Image"),
|
||||
# storage_size=Memory.from_bytes(16584333312 + 40860802176),
|
||||
# n_layers=60, # Qwen has 60 transformer blocks (all joint-style)
|
||||
# hidden_size=1,
|
||||
# supports_tensor=False,
|
||||
# tasks=[ModelTask.TextToImage, ModelTask.ImageToImage],
|
||||
# components=[
|
||||
# ComponentInfo(
|
||||
# component_name="text_encoder",
|
||||
# component_path="text_encoder/",
|
||||
# storage_size=Memory.from_kb(16584333312),
|
||||
# n_layers=12,
|
||||
# can_shard=False,
|
||||
# safetensors_index_filename=None, # Single file
|
||||
# ),
|
||||
# ComponentInfo(
|
||||
# component_name="transformer",
|
||||
# component_path="transformer/",
|
||||
# storage_size=Memory.from_bytes(40860802176),
|
||||
# n_layers=60,
|
||||
# can_shard=True,
|
||||
# safetensors_index_filename="diffusion_pytorch_model.safetensors.index.json",
|
||||
# ),
|
||||
# ComponentInfo(
|
||||
# component_name="vae",
|
||||
# component_path="vae/",
|
||||
# storage_size=Memory.from_kb(0),
|
||||
# n_layers=None,
|
||||
# can_shard=False,
|
||||
# safetensors_index_filename=None,
|
||||
# ),
|
||||
# ],
|
||||
# ),
|
||||
# "qwen-image-edit-2509": ModelCard(
|
||||
# model_id=ModelId("Qwen/Qwen-Image-Edit-2509"),
|
||||
# storage_size=Memory.from_bytes(16584333312 + 40860802176),
|
||||
# n_layers=60, # Qwen has 60 transformer blocks (all joint-style)
|
||||
# hidden_size=1,
|
||||
# supports_tensor=False,
|
||||
# tasks=[ModelTask.ImageToImage],
|
||||
# components=[
|
||||
# ComponentInfo(
|
||||
# component_name="text_encoder",
|
||||
# component_path="text_encoder/",
|
||||
# storage_size=Memory.from_kb(16584333312),
|
||||
# n_layers=12,
|
||||
# can_shard=False,
|
||||
# safetensors_index_filename=None, # Single file
|
||||
# ),
|
||||
# ComponentInfo(
|
||||
# component_name="transformer",
|
||||
# component_path="transformer/",
|
||||
# storage_size=Memory.from_bytes(40860802176),
|
||||
# n_layers=60,
|
||||
# can_shard=True,
|
||||
# safetensors_index_filename="diffusion_pytorch_model.safetensors.index.json",
|
||||
# ),
|
||||
# ComponentInfo(
|
||||
# component_name="vae",
|
||||
# component_path="vae/",
|
||||
# storage_size=Memory.from_kb(0),
|
||||
# n_layers=None,
|
||||
# can_shard=False,
|
||||
# safetensors_index_filename=None,
|
||||
# ),
|
||||
# ],
|
||||
# ),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ def exo_shard_downloader(max_parallel_downloads: int = 8) -> ShardDownloader:
|
||||
|
||||
|
||||
async def build_base_shard(model_id: ModelId) -> ShardMetadata:
|
||||
model_card = await ModelCard.from_hf(model_id)
|
||||
model_card = await ModelCard.load(model_id)
|
||||
return PipelineShardMetadata(
|
||||
model_card=model_card,
|
||||
device_rank=0,
|
||||
|
||||
Reference in New Issue
Block a user