mirror of
https://github.com/exo-explore/exo.git
synced 2026-02-10 14:11:11 -05:00
Compare commits
19 Commits
leo/tmp-te
...
leo/send-a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
aeff7b9d19 | ||
|
|
036fda70a5 | ||
|
|
eb9391810a | ||
|
|
305a3c8b70 | ||
|
|
ead19bea74 | ||
|
|
5a83e59182 | ||
|
|
5b5577bead | ||
|
|
8314a2aa78 | ||
|
|
163cf18384 | ||
|
|
2204f651c8 | ||
|
|
4abdaaf74b | ||
|
|
2fbdb27bb1 | ||
|
|
3f57416dbf | ||
|
|
8f3681cf7e | ||
|
|
9dc4f786bd | ||
|
|
dcb4cabc15 | ||
|
|
d79b3a0e75 | ||
|
|
a2f1d48712 | ||
|
|
3b2f553a25 |
8
.github/workflows/build-app.yml
vendored
8
.github/workflows/build-app.yml
vendored
@@ -396,7 +396,7 @@ jobs:
|
||||
path: output/EXO-${{ env.RELEASE_VERSION }}.dmg
|
||||
|
||||
- name: Upload to S3
|
||||
if: env.SPARKLE_S3_BUCKET != '' && github.ref_type == 'tag'
|
||||
if: env.SPARKLE_S3_BUCKET != ''
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
@@ -412,6 +412,12 @@ jobs:
|
||||
PREFIX="${PREFIX}/"
|
||||
fi
|
||||
DMG_NAME="EXO-${RELEASE_VERSION}.dmg"
|
||||
|
||||
if [[ "${{ github.ref_type }}" != "tag" ]]; then
|
||||
aws s3 cp "$DMG_NAME" "s3://${SPARKLE_S3_BUCKET}/${PREFIX}EXO-${GITHUB_SHA}.dmg"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
aws s3 cp "$DMG_NAME" "s3://${SPARKLE_S3_BUCKET}/${PREFIX}${DMG_NAME}"
|
||||
if [[ "$IS_ALPHA" != "true" ]]; then
|
||||
aws s3 cp "$DMG_NAME" "s3://${SPARKLE_S3_BUCKET}/${PREFIX}EXO-latest.dmg"
|
||||
|
||||
@@ -2366,7 +2366,7 @@ class custom_function:
|
||||
def default_device() -> Device:
|
||||
"""Get the default device."""
|
||||
|
||||
def default_stream(device: Device) -> Stream:
|
||||
def default_stream(device: Device | DeviceType) -> Stream:
|
||||
"""Get the device's default stream."""
|
||||
|
||||
def degrees(a: array, /, *, stream: Stream | Device | None = ...) -> array:
|
||||
|
||||
14
TODO.md
14
TODO.md
@@ -1,28 +1,14 @@
|
||||
2. Currently a lot of requests from the API are timing out, but we still process those requests internally. If an API request times out, we should cancel all corresponding tasks to that API request (why process a request with nobody listening).
|
||||
3. Task cancellation. When API http request gets cancelled, it should cancel corresponding task.
|
||||
4. I'd like to see profiled network latency / bandwidth.
|
||||
5. I'd like to see how much bandwidth each link is using.
|
||||
6. We should handle the case where one machine doesn't have the model downloaded and then other machines are waiting on it. In this case we get loads of timeout errors because the others are waiting for the one that needs to download the model.
|
||||
7. Solve the problem of in continuous batching when a new prompt comes in, it will block decode of the current batch until the prefill is complete.
|
||||
8. We want people to be able to copy models over to a new device without ever connecting EXO to the internet. Right now EXO require internet connection once to cache some files to check if a download is complete. Instead, we should simply check if there is a non-empty model folder locally with no .partial files. This indicates it's a fully downloaded model that can be loaded.
|
||||
10. More granular control over how to deploy instances.
|
||||
12. Nix is great but installing it is a pain and we have ended up in a lot of cases having PATH issues or installation issues. For example, after rebooting mike it seemed to no longer have a nix installation and needed reinstalling. It has a bunch of broken symlinks left over from nix that caused ssh to fail, making it even harder to debug. We need consistent environments (perhaps MDM) so we can guarantee nix is installed properly on each machine.
|
||||
13. Memory pressure instead of memory used.
|
||||
14. Show the type of each connection (TB5, Ethernet, etc.) in the UI. Refer to old exo: https://github.com/exo-explore/exo/blob/56f783b38dc6b08ce606b07a5386dc40dae00330/exo/helpers.py#L251
|
||||
15. Prioritise certain connection types (or by latency). TB5 > Ethernet > WiFi. Refer to old exo: https://github.com/exo-explore/exo/blob/56f783b38dc6b08ce606b07a5386dc40dae00330/exo/helpers.py#L251
|
||||
16. Dynamically switch to higher priority connection when it becomes available. Probably bring back InstanceReplacedAtomically.
|
||||
17. Faster model loads by streaming model from other devices in cluster.
|
||||
18. Add support for specifying the type of network connection to use in a test. Depends on 15/16.
|
||||
20. Add chat completion cancellations (e.g OpenWebUI has something for cancelling an ongoing request).
|
||||
23. Do we need cache_limit? We went back and forth on that a lot because we thought it might be causing issues. One problem is it sets it relative to model size. So if you have multiple models loaded in it will take the most recent model size for the cache_limit. This is problematic if you launch DeepSeek -> Llama for example.
|
||||
24. further openai/lmstudio api compatibility
|
||||
25. Rethink retry logic
|
||||
26. Task cancellation. When API http request gets cancelled, it should cancel corresponding task.
|
||||
27. Log cleanup - per-module log filters and default to DEBUG log levels
|
||||
28. Validate RDMA connections with ibv_devinfo in the info gatherer
|
||||
|
||||
Potential refactors:
|
||||
|
||||
2. Topology can be simplified
|
||||
|
||||
Random errors we've run into:
|
||||
|
||||
@@ -44,7 +44,7 @@ struct BugReportService {
|
||||
let dayPrefix = Self.dayPrefixString(now)
|
||||
let prefix = "reports/\(dayPrefix)/\(timestamp)/"
|
||||
|
||||
let logData = readLog()
|
||||
let logFiles = readAllLogs()
|
||||
let ifconfigText = try await captureIfconfig()
|
||||
let hostName = Host.current().localizedName ?? "unknown"
|
||||
let debugInfo = readDebugInfo()
|
||||
@@ -67,12 +67,14 @@ struct BugReportService {
|
||||
clusterTbBridgeStatus: clusterTbBridgeStatus
|
||||
)
|
||||
|
||||
let uploads: [(path: String, data: Data?)] = [
|
||||
("\(prefix)exo.log", logData),
|
||||
var uploads: [(path: String, data: Data?)] = logFiles.map { (path, data) in
|
||||
("\(prefix)\(path)", data)
|
||||
}
|
||||
uploads.append(contentsOf: [
|
||||
("\(prefix)state.json", stateData),
|
||||
("\(prefix)events.json", eventsData),
|
||||
("\(prefix)report.json", reportJSON),
|
||||
]
|
||||
])
|
||||
|
||||
let uploadItems: [(key: String, body: Data)] = uploads.compactMap { item in
|
||||
guard let body = item.data else { return nil }
|
||||
@@ -149,11 +151,26 @@ struct BugReportService {
|
||||
return decoded.urls
|
||||
}
|
||||
|
||||
private func readLog() -> Data? {
|
||||
let logURL = URL(fileURLWithPath: NSHomeDirectory())
|
||||
.appendingPathComponent(".exo")
|
||||
.appendingPathComponent("exo.log")
|
||||
return try? Data(contentsOf: logURL)
|
||||
private func readAllLogs() -> [(path: String, data: Data)] {
|
||||
let exoDir = URL(fileURLWithPath: NSHomeDirectory()).appendingPathComponent(".exo")
|
||||
var results: [(path: String, data: Data)] = []
|
||||
|
||||
// Current log
|
||||
let currentLog = exoDir.appendingPathComponent("exo.log")
|
||||
if let data = try? Data(contentsOf: currentLog) {
|
||||
results.append(("exo.log", data))
|
||||
}
|
||||
|
||||
// Archived logs (.zst)
|
||||
let contents = (try? FileManager.default.contentsOfDirectory(atPath: exoDir.path)) ?? []
|
||||
for name in contents
|
||||
where name.hasPrefix("exo.") && name.hasSuffix(".log.zst")) {
|
||||
if let data = try? Data(contentsOf: exoDir.appendingPathComponent(name)) {
|
||||
results.append((name, data))
|
||||
}
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
private func captureIfconfig() async throws -> String {
|
||||
|
||||
16
bench/pyproject.toml
Normal file
16
bench/pyproject.toml
Normal file
@@ -0,0 +1,16 @@
|
||||
[project]
|
||||
name = "exo-bench"
|
||||
version = "0.1.0"
|
||||
description = "Benchmarking tool for exo distributed inference"
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"loguru>=0.7.3",
|
||||
"transformers>=5.0.0",
|
||||
"huggingface-hub>=0.33.4",
|
||||
"tiktoken>=0.12.0",
|
||||
"jinja2>=3.1.0",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
0
bench/src/exo_bench/__init__.py
Normal file
0
bench/src/exo_bench/__init__.py
Normal file
@@ -599,9 +599,8 @@
|
||||
: isImageModel()
|
||||
? "Describe the image you want to generate..."
|
||||
: placeholder}
|
||||
disabled={loading}
|
||||
rows={1}
|
||||
class="flex-1 resize-none bg-transparent text-foreground placeholder:text-exo-light-gray/60 placeholder:text-sm placeholder:tracking-[0.15em] placeholder:leading-7 focus:outline-none focus:ring-0 focus:border-none disabled:opacity-50 text-sm leading-7 font-mono"
|
||||
class="flex-1 resize-none bg-transparent text-foreground placeholder:text-exo-light-gray/60 placeholder:text-sm placeholder:tracking-[0.15em] placeholder:leading-7 focus:outline-none focus:ring-0 focus:border-none text-sm leading-7 font-mono"
|
||||
style="min-height: 28px; max-height: 150px;"
|
||||
></textarea>
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
import type { MessageAttachment } from "$lib/stores/app.svelte";
|
||||
import MarkdownContent from "./MarkdownContent.svelte";
|
||||
import TokenHeatmap from "./TokenHeatmap.svelte";
|
||||
import ImageLightbox from "./ImageLightbox.svelte";
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
@@ -101,6 +102,9 @@
|
||||
let copiedMessageId = $state<string | null>(null);
|
||||
let expandedThinkingMessageIds = $state<Set<string>>(new Set());
|
||||
|
||||
// Lightbox state
|
||||
let expandedImageSrc = $state<string | null>(null);
|
||||
|
||||
// Uncertainty heatmap toggle
|
||||
let heatmapMessageIds = $state<Set<string>>(new Set());
|
||||
|
||||
@@ -389,10 +393,15 @@
|
||||
class="flex items-center gap-2 bg-exo-dark-gray/60 border border-exo-yellow/20 rounded px-2 py-1 text-xs font-mono"
|
||||
>
|
||||
{#if attachment.type === "image" && attachment.preview}
|
||||
<!-- svelte-ignore a11y_no_noninteractive_element_interactions, a11y_click_events_have_key_events -->
|
||||
<img
|
||||
src={attachment.preview}
|
||||
alt={attachment.name}
|
||||
class="w-12 h-12 object-cover rounded border border-exo-yellow/20"
|
||||
class="w-12 h-12 object-cover rounded border border-exo-yellow/20 cursor-pointer hover:border-exo-yellow/50 transition-colors"
|
||||
onclick={() => {
|
||||
if (attachment.preview)
|
||||
expandedImageSrc = attachment.preview;
|
||||
}}
|
||||
/>
|
||||
{:else}
|
||||
<span>{getAttachmentIcon(attachment)}</span>
|
||||
@@ -466,15 +475,44 @@
|
||||
<div class="mb-3">
|
||||
{#each message.attachments.filter((a) => a.type === "generated-image") as attachment}
|
||||
<div class="relative group/img inline-block">
|
||||
<!-- svelte-ignore a11y_no_noninteractive_element_interactions, a11y_click_events_have_key_events -->
|
||||
<img
|
||||
src={attachment.preview}
|
||||
alt=""
|
||||
class="max-w-full max-h-[512px] rounded-lg border border-exo-yellow/20 shadow-lg shadow-black/20"
|
||||
class="max-w-full max-h-[512px] rounded-lg border border-exo-yellow/20 shadow-lg shadow-black/20 cursor-pointer"
|
||||
onclick={() => {
|
||||
if (attachment.preview)
|
||||
expandedImageSrc = attachment.preview;
|
||||
}}
|
||||
/>
|
||||
<!-- Button overlay -->
|
||||
<div
|
||||
class="absolute top-2 right-2 flex gap-1 opacity-0 group-hover/img:opacity-100 transition-opacity"
|
||||
>
|
||||
<!-- Expand button -->
|
||||
<button
|
||||
type="button"
|
||||
class="p-2 rounded-lg bg-exo-dark-gray/80 border border-exo-yellow/30 text-exo-yellow hover:bg-exo-dark-gray hover:border-exo-yellow/50 cursor-pointer"
|
||||
onclick={() => {
|
||||
if (attachment.preview)
|
||||
expandedImageSrc = attachment.preview;
|
||||
}}
|
||||
title="Expand image"
|
||||
>
|
||||
<svg
|
||||
class="w-4 h-4"
|
||||
fill="none"
|
||||
viewBox="0 0 24 24"
|
||||
stroke="currentColor"
|
||||
stroke-width="2"
|
||||
>
|
||||
<path
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
d="M4 8V4m0 0h4M4 4l5 5m11-1V4m0 0h-4m4 0l-5 5M4 16v4m0 0h4m-4 0l5-5m11 5l-5-5m5 5v-4m0 4h-4"
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
<!-- Edit button -->
|
||||
<button
|
||||
type="button"
|
||||
@@ -789,3 +827,8 @@
|
||||
</button>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<ImageLightbox
|
||||
src={expandedImageSrc}
|
||||
onclose={() => (expandedImageSrc = null)}
|
||||
/>
|
||||
|
||||
@@ -58,6 +58,18 @@
|
||||
d="M17.962 1.844h-4.326l-3.425 7.81H5.369V1.878H1.5V22h3.87v-8.477h6.824a3.025 3.025 0 002.743-1.75V22h3.87v-8.477a3.87 3.87 0 00-3.588-3.86v-.01h-2.125a3.94 3.94 0 002.323-2.12l2.545-5.689z"
|
||||
/>
|
||||
</svg>
|
||||
{:else if family === "flux"}
|
||||
<svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
|
||||
<path
|
||||
d="M12 2L2 19h7.5l2.5-4.5L14.5 19H22L12 2zm0 4.5L16.5 17h-3l-1.5-2.7L10.5 17h-3L12 6.5z"
|
||||
/>
|
||||
</svg>
|
||||
{:else if family === "qwen-image"}
|
||||
<svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
|
||||
<path
|
||||
d="M12.604 1.34c.393.69.784 1.382 1.174 2.075a.18.18 0 00.157.091h5.552c.174 0 .322.11.446.327l1.454 2.57c.19.337.24.478.024.837-.26.43-.513.864-.76 1.3l-.367.658c-.106.196-.223.28-.04.512l2.652 4.637c.172.301.111.494-.043.77-.437.785-.882 1.564-1.335 2.34-.159.272-.352.375-.68.37-.777-.016-1.552-.01-2.327.016a.099.099 0 00-.081.05 575.097 575.097 0 01-2.705 4.74c-.169.293-.38.363-.725.364-.997.003-2.002.004-3.017.002a.537.537 0 01-.465-.271l-1.335-2.323a.09.09 0 00-.083-.049H4.982c-.285.03-.553-.001-.805-.092l-1.603-2.77a.543.543 0 01-.002-.54l1.207-2.12a.198.198 0 000-.197 550.951 550.951 0 01-1.875-3.272l-.79-1.395c-.16-.31-.173-.496.095-.965.465-.813.927-1.625 1.387-2.436.132-.234.304-.334.584-.335a338.3 338.3 0 012.589-.001.124.124 0 00.107-.063l2.806-4.895a.488.488 0 01.422-.246c.524-.001 1.053 0 1.583-.006L11.704 1c.341-.003.724.032.9.34zm-3.432.403a.06.06 0 00-.052.03L6.254 6.788a.157.157 0 01-.135.078H3.253c-.056 0-.07.025-.041.074l5.81 10.156c.025.042.013.062-.034.063l-2.795.015a.218.218 0 00-.2.116l-1.32 2.31c-.044.078-.021.118.068.118l5.716.008c.046 0 .08.02.104.061l1.403 2.454c.046.081.092.082.139 0l5.006-8.76.783-1.382a.055.055 0 01.096 0l1.424 2.53a.122.122 0 00.107.062l2.763-.02a.04.04 0 00.035-.02.041.041 0 000-.04l-2.9-5.086a.108.108 0 010-.113l.293-.507 1.12-1.977c.024-.041.012-.062-.035-.062H9.2c-.059 0-.073-.026-.043-.077l1.434-2.505a.107.107 0 000-.114L9.225 1.774a.06.06 0 00-.053-.031zm6.29 8.02c.046 0 .058.02.034.06l-.832 1.465-2.613 4.585a.056.056 0 01-.05.029.058.058 0 01-.05-.029L8.498 9.841c-.02-.034-.01-.052.028-.054l.216-.012 6.722-.012z"
|
||||
/>
|
||||
</svg>
|
||||
{:else if family === "huggingface"}
|
||||
<svg class="w-6 h-6 {className}" viewBox="0 0 24 24" fill="currentColor">
|
||||
<path
|
||||
|
||||
@@ -22,6 +22,8 @@
|
||||
glm: "GLM",
|
||||
minimax: "MiniMax",
|
||||
kimi: "Kimi",
|
||||
flux: "FLUX",
|
||||
"qwen-image": "Qwen Img",
|
||||
};
|
||||
|
||||
function getFamilyName(family: string): string {
|
||||
@@ -32,7 +34,7 @@
|
||||
</script>
|
||||
|
||||
<div
|
||||
class="flex flex-col gap-1 py-2 px-1 border-r border-exo-yellow/10 bg-exo-medium-gray/30 min-w-[64px]"
|
||||
class="flex flex-col gap-1 py-2 px-1 border-r border-exo-yellow/10 bg-exo-medium-gray/30 min-w-[64px] overflow-y-auto scrollbar-hide"
|
||||
>
|
||||
<!-- All models (no filter) -->
|
||||
<button
|
||||
|
||||
96
dashboard/src/lib/components/ImageLightbox.svelte
Normal file
96
dashboard/src/lib/components/ImageLightbox.svelte
Normal file
@@ -0,0 +1,96 @@
|
||||
<script lang="ts">
|
||||
import { fade, fly } from "svelte/transition";
|
||||
import { cubicOut } from "svelte/easing";
|
||||
|
||||
interface Props {
|
||||
src: string | null;
|
||||
onclose: () => void;
|
||||
}
|
||||
|
||||
let { src, onclose }: Props = $props();
|
||||
|
||||
function handleKeydown(e: KeyboardEvent) {
|
||||
if (e.key === "Escape") {
|
||||
onclose();
|
||||
}
|
||||
}
|
||||
|
||||
function extensionFromSrc(dataSrc: string): string {
|
||||
const match = dataSrc.match(/^data:image\/(\w+)/);
|
||||
if (match) return match[1] === "jpeg" ? "jpg" : match[1];
|
||||
const urlMatch = dataSrc.match(/\.(\w+)(?:\?|$)/);
|
||||
if (urlMatch) return urlMatch[1];
|
||||
return "png";
|
||||
}
|
||||
|
||||
function handleDownload(e: MouseEvent) {
|
||||
e.stopPropagation();
|
||||
if (!src) return;
|
||||
const link = document.createElement("a");
|
||||
link.href = src;
|
||||
link.download = `image-${Date.now()}.${extensionFromSrc(src)}`;
|
||||
link.click();
|
||||
}
|
||||
|
||||
function handleClose(e: MouseEvent) {
|
||||
e.stopPropagation();
|
||||
onclose();
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:window onkeydown={src ? handleKeydown : undefined} />
|
||||
|
||||
{#if src}
|
||||
<div
|
||||
class="fixed inset-0 z-50 bg-black/90 backdrop-blur-sm flex items-center justify-center"
|
||||
transition:fade={{ duration: 200 }}
|
||||
onclick={onclose}
|
||||
role="presentation"
|
||||
onintrostart={() => (document.body.style.overflow = "hidden")}
|
||||
onoutroend={() => (document.body.style.overflow = "")}
|
||||
>
|
||||
<div class="absolute top-4 right-4 flex gap-2 z-10">
|
||||
<button
|
||||
type="button"
|
||||
class="p-2 rounded-lg bg-exo-dark-gray/80 border border-exo-yellow/30 text-exo-yellow hover:bg-exo-dark-gray hover:border-exo-yellow/50 cursor-pointer transition-colors"
|
||||
onclick={handleDownload}
|
||||
title="Download image"
|
||||
>
|
||||
<svg
|
||||
class="w-5 h-5"
|
||||
fill="none"
|
||||
viewBox="0 0 24 24"
|
||||
stroke="currentColor"
|
||||
stroke-width="2"
|
||||
>
|
||||
<path
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-4l-4 4m0 0l-4-4m4 4V4"
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
class="p-2 rounded-lg bg-exo-dark-gray/80 border border-exo-yellow/30 text-exo-yellow hover:bg-exo-dark-gray hover:border-exo-yellow/50 cursor-pointer transition-colors"
|
||||
onclick={handleClose}
|
||||
title="Close"
|
||||
>
|
||||
<svg class="w-5 h-5" viewBox="0 0 24 24" fill="currentColor">
|
||||
<path
|
||||
d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<!-- svelte-ignore a11y_no_noninteractive_element_interactions, a11y_click_events_have_key_events -->
|
||||
<img
|
||||
{src}
|
||||
alt=""
|
||||
class="max-w-[90vw] max-h-[90vh] object-contain rounded-lg shadow-2xl"
|
||||
transition:fly={{ y: 20, duration: 300, easing: cubicOut }}
|
||||
onclick={(e) => e.stopPropagation()}
|
||||
/>
|
||||
</div>
|
||||
{/if}
|
||||
@@ -24,6 +24,8 @@
|
||||
{ id: "thinking", label: "Thinking" },
|
||||
{ id: "code", label: "Code" },
|
||||
{ id: "vision", label: "Vision" },
|
||||
{ id: "image_gen", label: "Image Gen" },
|
||||
{ id: "image_edit", label: "Image Edit" },
|
||||
];
|
||||
|
||||
// Size ranges
|
||||
@@ -142,6 +144,45 @@
|
||||
stroke-linejoin="round"
|
||||
/><circle cx="12" cy="12" r="3" /></svg
|
||||
>
|
||||
{:else if cap.id === "image_gen"}
|
||||
<svg
|
||||
class="w-3.5 h-3.5 inline-block"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
stroke-width="1.5"
|
||||
><rect
|
||||
x="3"
|
||||
y="3"
|
||||
width="18"
|
||||
height="18"
|
||||
rx="2"
|
||||
ry="2"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
/><circle cx="8.5" cy="8.5" r="1.5" /><path
|
||||
d="M21 15l-5-5L5 21"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
/></svg
|
||||
>
|
||||
{:else if cap.id === "image_edit"}
|
||||
<svg
|
||||
class="w-3.5 h-3.5 inline-block"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
stroke-width="1.5"
|
||||
><path
|
||||
d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
/><path
|
||||
d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
/></svg
|
||||
>
|
||||
{/if}
|
||||
<span class="ml-1">{cap.label}</span>
|
||||
</button>
|
||||
|
||||
@@ -200,9 +200,42 @@
|
||||
stroke-width="1.5"
|
||||
title="Supports image generation"
|
||||
>
|
||||
<rect x="3" y="3" width="18" height="18" rx="2" ry="2" />
|
||||
<rect
|
||||
x="3"
|
||||
y="3"
|
||||
width="18"
|
||||
height="18"
|
||||
rx="2"
|
||||
ry="2"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
/>
|
||||
<circle cx="8.5" cy="8.5" r="1.5" />
|
||||
<path d="M21 15l-5-5L5 21" />
|
||||
<path
|
||||
d="M21 15l-5-5L5 21"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
/>
|
||||
</svg>
|
||||
{:else if cap === "image_edit"}
|
||||
<svg
|
||||
class="w-3.5 h-3.5 text-white/40 flex-shrink-0"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
stroke-width="1.5"
|
||||
title="Supports image editing"
|
||||
>
|
||||
<path
|
||||
d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
/>
|
||||
<path
|
||||
d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
/>
|
||||
</svg>
|
||||
{/if}
|
||||
{/each}
|
||||
|
||||
@@ -363,6 +363,8 @@
|
||||
"deepseek",
|
||||
"gpt-oss",
|
||||
"llama",
|
||||
"flux",
|
||||
"qwen-image",
|
||||
];
|
||||
return Array.from(families).sort((a, b) => {
|
||||
const aIdx = familyOrder.indexOf(a);
|
||||
@@ -606,7 +608,7 @@
|
||||
/>
|
||||
|
||||
<!-- Model list -->
|
||||
<div class="flex-1 overflow-y-auto flex flex-col">
|
||||
<div class="flex-1 overflow-y-auto scrollbar-hide flex flex-col">
|
||||
{#if selectedFamily === "huggingface"}
|
||||
<!-- HuggingFace Hub view -->
|
||||
<div class="flex-1 flex flex-col min-h-0">
|
||||
@@ -624,7 +626,7 @@
|
||||
</div>
|
||||
|
||||
<!-- Results list -->
|
||||
<div class="flex-1 overflow-y-auto">
|
||||
<div class="flex-1 overflow-y-auto scrollbar-hide">
|
||||
{#if hfIsLoadingTrending && hfTrendingModels.length === 0}
|
||||
<div
|
||||
class="flex items-center justify-center py-12 text-white/40"
|
||||
|
||||
@@ -50,7 +50,7 @@ let
|
||||
owner = "ml-explore";
|
||||
repo = "mlx";
|
||||
tag = "v${version}";
|
||||
hash = "sha256-OJk6jPlbaSlsUdk3ADz3tWcRzTWXRof3/q8Soe1AO6w=";
|
||||
hash = "sha256-SV/3MXt+SuJ69XfLfXycold6KgtXSM7OE0KwMSNw+eE=";
|
||||
};
|
||||
|
||||
patches = [
|
||||
|
||||
@@ -28,6 +28,8 @@ dependencies = [
|
||||
"pillow>=11.0,<12.0", # compatibility with mflux
|
||||
"mflux==0.15.5",
|
||||
"python-multipart>=0.0.21",
|
||||
"msgspec>=0.19.0",
|
||||
"zstandard>=0.23.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
@@ -57,6 +59,7 @@ dev = [
|
||||
[tool.uv.workspace]
|
||||
members = [
|
||||
"rust/exo_pyo3_bindings",
|
||||
"bench",
|
||||
]
|
||||
|
||||
[tool.uv.sources]
|
||||
|
||||
@@ -69,6 +69,16 @@
|
||||
text = ''exec python ${path} "$@"'';
|
||||
};
|
||||
|
||||
benchVenv = pythonSet.mkVirtualEnv "exo-bench-env" {
|
||||
exo-bench = [ ];
|
||||
};
|
||||
|
||||
mkBenchScript = name: path: pkgs.writeShellApplication {
|
||||
inherit name;
|
||||
runtimeInputs = [ benchVenv ];
|
||||
text = ''exec python ${path} "$@"'';
|
||||
};
|
||||
|
||||
mkSimplePythonScript = name: path: pkgs.writeShellApplication {
|
||||
inherit name;
|
||||
runtimeInputs = [ pkgs.python313 ];
|
||||
@@ -96,8 +106,8 @@
|
||||
exo = exoPackage;
|
||||
# Test environment for running pytest outside of Nix sandbox (needs GPU access)
|
||||
exo-test-env = testVenv;
|
||||
exo-bench = mkPythonScript "exo-bench" (inputs.self + /bench/exo_bench.py);
|
||||
} // {
|
||||
exo-bench = mkBenchScript "exo-bench" (inputs.self + /bench/exo_bench.py);
|
||||
exo-get-all-models-on-cluster = mkSimplePythonScript "exo-get-all-models-on-cluster" (inputs.self + /tests/get_all_models_on_cluster.py);
|
||||
};
|
||||
|
||||
|
||||
@@ -3,6 +3,10 @@ n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["ImageToImage"]
|
||||
family = "flux"
|
||||
quantization = "4bit"
|
||||
base_model = "FLUX.1 Kontext"
|
||||
capabilities = ["image_edit"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 15475325472
|
||||
|
||||
@@ -3,6 +3,10 @@ n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["ImageToImage"]
|
||||
family = "flux"
|
||||
quantization = "8bit"
|
||||
base_model = "FLUX.1 Kontext"
|
||||
capabilities = ["image_edit"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 21426029632
|
||||
|
||||
@@ -3,6 +3,10 @@ n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["ImageToImage"]
|
||||
family = "flux"
|
||||
quantization = ""
|
||||
base_model = "FLUX.1 Kontext"
|
||||
capabilities = ["image_edit"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 33327437952
|
||||
|
||||
@@ -3,6 +3,10 @@ n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["TextToImage"]
|
||||
family = "flux"
|
||||
quantization = "4bit"
|
||||
base_model = "FLUX.1 Krea"
|
||||
capabilities = ["image_gen"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 15475325472
|
||||
|
||||
@@ -3,6 +3,10 @@ n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["TextToImage"]
|
||||
family = "flux"
|
||||
quantization = "8bit"
|
||||
base_model = "FLUX.1 Krea"
|
||||
capabilities = ["image_gen"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 21426029632
|
||||
|
||||
@@ -3,6 +3,10 @@ n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["TextToImage"]
|
||||
family = "flux"
|
||||
quantization = ""
|
||||
base_model = "FLUX.1 Krea"
|
||||
capabilities = ["image_gen"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 33327437952
|
||||
|
||||
@@ -3,6 +3,10 @@ n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["TextToImage"]
|
||||
family = "flux"
|
||||
quantization = "4bit"
|
||||
base_model = "FLUX.1 Dev"
|
||||
capabilities = ["image_gen"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 15475325472
|
||||
|
||||
@@ -3,6 +3,10 @@ n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["TextToImage"]
|
||||
family = "flux"
|
||||
quantization = "8bit"
|
||||
base_model = "FLUX.1 Dev"
|
||||
capabilities = ["image_gen"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 21426029632
|
||||
|
||||
@@ -3,6 +3,10 @@ n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["TextToImage"]
|
||||
family = "flux"
|
||||
quantization = ""
|
||||
base_model = "FLUX.1 Dev"
|
||||
capabilities = ["image_gen"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 33327437952
|
||||
|
||||
@@ -3,6 +3,10 @@ n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["TextToImage"]
|
||||
family = "flux"
|
||||
quantization = "4bit"
|
||||
base_model = "FLUX.1 Schnell"
|
||||
capabilities = ["image_gen"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 15470210592
|
||||
|
||||
@@ -3,6 +3,10 @@ n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["TextToImage"]
|
||||
family = "flux"
|
||||
quantization = "8bit"
|
||||
base_model = "FLUX.1 Schnell"
|
||||
capabilities = ["image_gen"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 21415799872
|
||||
|
||||
@@ -3,6 +3,10 @@ n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["TextToImage"]
|
||||
family = "flux"
|
||||
quantization = ""
|
||||
base_model = "FLUX.1 Schnell"
|
||||
capabilities = ["image_gen"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 33306978432
|
||||
|
||||
@@ -4,6 +4,10 @@ hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["TextToImage"]
|
||||
uses_cfg = true
|
||||
family = "qwen-image"
|
||||
quantization = "4bit"
|
||||
base_model = "Qwen Image"
|
||||
capabilities = ["image_gen"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 26799533856
|
||||
|
||||
@@ -4,6 +4,10 @@ hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["TextToImage"]
|
||||
uses_cfg = true
|
||||
family = "qwen-image"
|
||||
quantization = "8bit"
|
||||
base_model = "Qwen Image"
|
||||
capabilities = ["image_gen"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 37014734400
|
||||
|
||||
@@ -4,6 +4,10 @@ hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["ImageToImage"]
|
||||
uses_cfg = true
|
||||
family = "qwen-image"
|
||||
quantization = "4bit"
|
||||
base_model = "Qwen Image Edit"
|
||||
capabilities = ["image_edit"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 26799533856
|
||||
|
||||
@@ -4,6 +4,10 @@ hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["ImageToImage"]
|
||||
uses_cfg = true
|
||||
family = "qwen-image"
|
||||
quantization = "8bit"
|
||||
base_model = "Qwen Image Edit"
|
||||
capabilities = ["image_edit"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 37014734400
|
||||
|
||||
@@ -4,6 +4,10 @@ hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["ImageToImage"]
|
||||
uses_cfg = true
|
||||
family = "qwen-image"
|
||||
quantization = ""
|
||||
base_model = "Qwen Image Edit"
|
||||
capabilities = ["image_edit"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 57445135488
|
||||
|
||||
@@ -4,6 +4,10 @@ hidden_size = 1
|
||||
supports_tensor = false
|
||||
tasks = ["TextToImage"]
|
||||
uses_cfg = true
|
||||
family = "qwen-image"
|
||||
quantization = ""
|
||||
base_model = "Qwen Image"
|
||||
capabilities = ["image_gen"]
|
||||
|
||||
[storage_size]
|
||||
in_bytes = 57445135488
|
||||
|
||||
@@ -457,6 +457,8 @@ async def download_file_with_retry(
|
||||
)
|
||||
except HuggingFaceAuthenticationError:
|
||||
raise
|
||||
except FileNotFoundError:
|
||||
raise
|
||||
except HuggingFaceRateLimitError as e:
|
||||
if attempt == n_attempts - 1:
|
||||
raise e
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
import time
|
||||
from collections.abc import AsyncGenerator
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from exo.shared.types.api import (
|
||||
ChatCompletionChoice,
|
||||
@@ -141,7 +140,7 @@ async def generate_chat_stream(
|
||||
if isinstance(chunk, ToolCallChunk):
|
||||
tool_call_deltas = [
|
||||
ToolCall(
|
||||
id=str(uuid4()),
|
||||
id=tool.id,
|
||||
index=i,
|
||||
function=tool,
|
||||
)
|
||||
@@ -207,7 +206,7 @@ async def collect_chat_response(
|
||||
if isinstance(chunk, ToolCallChunk):
|
||||
tool_calls.extend(
|
||||
ToolCall(
|
||||
id=str(uuid4()),
|
||||
id=tool.id,
|
||||
index=i,
|
||||
function=tool,
|
||||
)
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
import json
|
||||
from collections.abc import AsyncGenerator
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from exo.shared.types.api import FinishReason
|
||||
from exo.shared.types.chunks import ErrorChunk, TokenChunk, ToolCallChunk
|
||||
@@ -179,7 +178,7 @@ async def collect_claude_response(
|
||||
for tool in chunk.tool_calls:
|
||||
tool_use_blocks.append(
|
||||
ClaudeToolUseBlock(
|
||||
id=f"toolu_{uuid4().hex[:24]}",
|
||||
id=f"toolu_{tool.id}",
|
||||
name=tool.name,
|
||||
input=json.loads(tool.arguments), # pyright: ignore[reportAny]
|
||||
)
|
||||
@@ -264,7 +263,7 @@ async def generate_claude_stream(
|
||||
|
||||
# Emit tool_use content blocks
|
||||
for tool in chunk.tool_calls:
|
||||
tool_id = f"toolu_{uuid4().hex[:24]}"
|
||||
tool_id = f"toolu_{tool.id}"
|
||||
tool_input_json = tool.arguments
|
||||
|
||||
# content_block_start for tool_use
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
from collections.abc import AsyncGenerator
|
||||
from itertools import count
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from exo.shared.types.chunks import ErrorChunk, TokenChunk, ToolCallChunk
|
||||
from exo.shared.types.common import CommandId
|
||||
@@ -140,8 +139,8 @@ async def collect_responses_response(
|
||||
for tool in chunk.tool_calls:
|
||||
function_call_items.append(
|
||||
ResponseFunctionCallItem(
|
||||
id=f"fc_{uuid4().hex[:24]}",
|
||||
call_id=f"call_{uuid4().hex[:24]}",
|
||||
id=f"fc_{tool.id}",
|
||||
call_id=f"call_{tool.id}",
|
||||
name=tool.name,
|
||||
arguments=tool.arguments,
|
||||
)
|
||||
@@ -246,8 +245,8 @@ async def generate_responses_stream(
|
||||
if isinstance(chunk, ToolCallChunk):
|
||||
last_stats = chunk.stats or last_stats
|
||||
for tool in chunk.tool_calls:
|
||||
fc_id = f"fc_{uuid4().hex[:24]}"
|
||||
call_id = f"call_{uuid4().hex[:24]}"
|
||||
fc_id = f"fc_{tool.id}"
|
||||
call_id = f"call_{tool.id}"
|
||||
|
||||
# response.output_item.added for function_call
|
||||
fc_item = ResponseFunctionCallItem(
|
||||
|
||||
@@ -3,7 +3,7 @@ import contextlib
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
from collections.abc import AsyncGenerator, Awaitable, Callable
|
||||
from collections.abc import AsyncGenerator, Awaitable, Callable, Iterator
|
||||
from datetime import datetime, timezone
|
||||
from http import HTTPStatus
|
||||
from pathlib import Path
|
||||
@@ -37,11 +37,13 @@ from exo.master.adapters.responses import (
|
||||
generate_responses_stream,
|
||||
responses_request_to_text_generation,
|
||||
)
|
||||
from exo.master.event_log import DiskEventLog
|
||||
from exo.master.image_store import ImageStore
|
||||
from exo.master.placement import place_instance as get_instance_placements
|
||||
from exo.shared.apply import apply
|
||||
from exo.shared.constants import (
|
||||
DASHBOARD_DIR,
|
||||
EXO_EVENT_LOG_DIR,
|
||||
EXO_IMAGE_CACHE_DIR,
|
||||
EXO_MAX_CHUNK_SIZE,
|
||||
EXO_TRACING_CACHE_DIR,
|
||||
@@ -146,6 +148,8 @@ from exo.utils.banner import print_startup_banner
|
||||
from exo.utils.channels import Receiver, Sender, channel
|
||||
from exo.utils.event_buffer import OrderedBuffer
|
||||
|
||||
_API_EVENT_LOG_DIR = EXO_EVENT_LOG_DIR / "api"
|
||||
|
||||
|
||||
def _format_to_content_type(image_format: Literal["png", "jpeg", "webp"] | None) -> str:
|
||||
return f"image/{image_format or 'png'}"
|
||||
@@ -175,7 +179,7 @@ class API:
|
||||
election_receiver: Receiver[ElectionMessage],
|
||||
) -> None:
|
||||
self.state = State()
|
||||
self._event_log: list[Event] = []
|
||||
self._event_log = DiskEventLog(_API_EVENT_LOG_DIR)
|
||||
self.command_sender = command_sender
|
||||
self.download_command_sender = download_command_sender
|
||||
self.global_event_receiver = global_event_receiver
|
||||
@@ -223,6 +227,8 @@ class API:
|
||||
|
||||
def reset(self, new_session_id: SessionId, result_clock: int):
|
||||
logger.info("Resetting API State")
|
||||
self._event_log.close()
|
||||
self._event_log = DiskEventLog(_API_EVENT_LOG_DIR)
|
||||
self.state = State()
|
||||
self.session_id = new_session_id
|
||||
self.event_buffer = OrderedBuffer[Event]()
|
||||
@@ -289,7 +295,7 @@ class API:
|
||||
self.app.post("/v1/messages", response_model=None)(self.claude_messages)
|
||||
self.app.post("/v1/responses", response_model=None)(self.openai_responses)
|
||||
self.app.get("/state")(lambda: self.state)
|
||||
self.app.get("/events")(lambda: self._event_log)
|
||||
self.app.get("/events")(self.stream_events)
|
||||
self.app.post("/download/start")(self.start_download)
|
||||
self.app.delete("/download/{node_id}/{model_id:path}")(self.delete_download)
|
||||
self.app.get("/v1/traces")(self.list_traces)
|
||||
@@ -386,7 +392,12 @@ class API:
|
||||
if len(list(self.state.topology.list_nodes())) == 0:
|
||||
return PlacementPreviewResponse(previews=[])
|
||||
|
||||
model_card = await ModelCard.load(model_id)
|
||||
try:
|
||||
model_card = await ModelCard.load(model_id)
|
||||
except Exception as exc:
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Failed to load model card: {exc}"
|
||||
) from exc
|
||||
instance_combinations: list[tuple[Sharding, InstanceMeta, int]] = []
|
||||
for sharding in (Sharding.Pipeline, Sharding.Tensor):
|
||||
for instance_meta in (InstanceMeta.MlxRing, InstanceMeta.MlxJaccl):
|
||||
@@ -689,6 +700,22 @@ class API:
|
||||
)
|
||||
return resolved_model
|
||||
|
||||
def stream_events(self) -> StreamingResponse:
|
||||
def _generate_json_array(events: Iterator[Event]) -> Iterator[str]:
|
||||
yield "["
|
||||
first = True
|
||||
for event in events:
|
||||
if not first:
|
||||
yield ","
|
||||
first = False
|
||||
yield event.model_dump_json()
|
||||
yield "]"
|
||||
|
||||
return StreamingResponse(
|
||||
_generate_json_array(self._event_log.read_all()),
|
||||
media_type="application/json",
|
||||
)
|
||||
|
||||
async def get_image(self, image_id: str) -> FileResponse:
|
||||
stored = self._image_store.get(Id(image_id))
|
||||
if stored is None:
|
||||
@@ -1337,6 +1364,7 @@ class API:
|
||||
with anyio.CancelScope(shield=True):
|
||||
shutdown_ev.set()
|
||||
finally:
|
||||
self._event_log.close()
|
||||
self.command_sender.close()
|
||||
self.global_event_receiver.close()
|
||||
|
||||
|
||||
195
src/exo/master/event_log.py
Normal file
195
src/exo/master/event_log.py
Normal file
@@ -0,0 +1,195 @@
|
||||
import contextlib
|
||||
import json
|
||||
from collections import OrderedDict
|
||||
from collections.abc import Iterator
|
||||
from io import BufferedRandom, BufferedReader
|
||||
from pathlib import Path
|
||||
|
||||
import msgspec
|
||||
import zstandard
|
||||
from loguru import logger
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
from exo.shared.types.events import Event
|
||||
|
||||
_EVENT_ADAPTER: TypeAdapter[Event] = TypeAdapter(Event)
|
||||
|
||||
_HEADER_SIZE = 4 # uint32 big-endian
|
||||
_OFFSET_CACHE_SIZE = 128
|
||||
_MAX_ARCHIVES = 5
|
||||
|
||||
|
||||
def _serialize_event(event: Event) -> bytes:
|
||||
return msgspec.msgpack.encode(event.model_dump(mode="json"))
|
||||
|
||||
|
||||
def _deserialize_event(raw: bytes) -> Event:
|
||||
# Decode msgpack into a Python dict, then re-encode as JSON for Pydantic.
|
||||
# Pydantic's validate_json() uses JSON-mode coercion (e.g. string -> enum)
|
||||
# even under strict=True, whereas validate_python() does not. Going through
|
||||
# JSON is the only way to get correct round-trip deserialization without
|
||||
# disabling strict mode or adding casts everywhere.
|
||||
as_json = json.dumps(msgspec.msgpack.decode(raw, type=dict))
|
||||
return _EVENT_ADAPTER.validate_json(as_json)
|
||||
|
||||
|
||||
def _unpack_header(header: bytes) -> int:
|
||||
return int.from_bytes(header, byteorder="big")
|
||||
|
||||
|
||||
def _skip_record(f: BufferedReader) -> bool:
|
||||
"""Skip one length-prefixed record. Returns False on EOF."""
|
||||
header = f.read(_HEADER_SIZE)
|
||||
if len(header) < _HEADER_SIZE:
|
||||
return False
|
||||
f.seek(_unpack_header(header), 1)
|
||||
return True
|
||||
|
||||
|
||||
def _read_record(f: BufferedReader) -> Event | None:
|
||||
"""Read one length-prefixed record. Returns None on EOF."""
|
||||
header = f.read(_HEADER_SIZE)
|
||||
if len(header) < _HEADER_SIZE:
|
||||
return None
|
||||
length = _unpack_header(header)
|
||||
payload = f.read(length)
|
||||
if len(payload) < length:
|
||||
return None
|
||||
return _deserialize_event(payload)
|
||||
|
||||
|
||||
class DiskEventLog:
|
||||
"""Append-only event log backed by a file on disk.
|
||||
|
||||
On-disk format: sequence of length-prefixed msgpack records.
|
||||
Each record is [4-byte big-endian uint32 length][msgpack payload].
|
||||
|
||||
Uses a bounded LRU cache of event index → byte offset for efficient
|
||||
random access without storing an offset per event.
|
||||
"""
|
||||
|
||||
def __init__(self, directory: Path) -> None:
|
||||
self._directory = directory
|
||||
self._directory.mkdir(parents=True, exist_ok=True)
|
||||
self._active_path = directory / "events.bin"
|
||||
self._offset_cache: OrderedDict[int, int] = OrderedDict()
|
||||
self._count: int = 0
|
||||
|
||||
# Rotate stale active file from a previous session/crash
|
||||
if self._active_path.exists():
|
||||
self._rotate(self._active_path, self._directory)
|
||||
|
||||
self._file: BufferedRandom = open(self._active_path, "w+b") # noqa: SIM115
|
||||
|
||||
def _cache_offset(self, idx: int, offset: int) -> None:
|
||||
self._offset_cache[idx] = offset
|
||||
self._offset_cache.move_to_end(idx)
|
||||
if len(self._offset_cache) > _OFFSET_CACHE_SIZE:
|
||||
self._offset_cache.popitem(last=False)
|
||||
|
||||
def _seek_to(self, f: BufferedReader, target_idx: int) -> None:
|
||||
"""Seek f to the byte offset of event target_idx, using cache or scanning forward."""
|
||||
if target_idx in self._offset_cache:
|
||||
self._offset_cache.move_to_end(target_idx)
|
||||
f.seek(self._offset_cache[target_idx])
|
||||
return
|
||||
|
||||
# Find the highest cached index before target_idx
|
||||
scan_from_idx = 0
|
||||
scan_from_offset = 0
|
||||
for cached_idx in self._offset_cache:
|
||||
if cached_idx < target_idx:
|
||||
scan_from_idx = cached_idx
|
||||
scan_from_offset = self._offset_cache[cached_idx]
|
||||
|
||||
# Scan forward, skipping records
|
||||
f.seek(scan_from_offset)
|
||||
for _ in range(scan_from_idx, target_idx):
|
||||
_skip_record(f)
|
||||
|
||||
self._cache_offset(target_idx, f.tell())
|
||||
|
||||
def append(self, event: Event) -> None:
|
||||
packed = _serialize_event(event)
|
||||
self._file.write(len(packed).to_bytes(_HEADER_SIZE, byteorder="big"))
|
||||
self._file.write(packed)
|
||||
self._count += 1
|
||||
|
||||
def read_range(self, start: int, end: int) -> Iterator[Event]:
|
||||
"""Yield events from index start (inclusive) to end (exclusive)."""
|
||||
end = min(end, self._count)
|
||||
if start < 0 or end < 0 or start >= end:
|
||||
return
|
||||
|
||||
self._file.flush()
|
||||
with open(self._active_path, "rb") as f:
|
||||
self._seek_to(f, start)
|
||||
for _ in range(end - start):
|
||||
event = _read_record(f)
|
||||
if event is None:
|
||||
break
|
||||
yield event
|
||||
|
||||
# Cache where we ended up so the next sequential read is a hit
|
||||
if end < self._count:
|
||||
self._cache_offset(end, f.tell())
|
||||
|
||||
def read_all(self) -> Iterator[Event]:
|
||||
"""Yield all events from the log one at a time."""
|
||||
if self._count == 0:
|
||||
return
|
||||
self._file.flush()
|
||||
with open(self._active_path, "rb") as f:
|
||||
for _ in range(self._count):
|
||||
event = _read_record(f)
|
||||
if event is None:
|
||||
break
|
||||
yield event
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self._count
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the file and rotate active file to compressed archive."""
|
||||
if self._file.closed:
|
||||
return
|
||||
self._file.close()
|
||||
if self._active_path.exists() and self._count > 0:
|
||||
self._rotate(self._active_path, self._directory)
|
||||
elif self._active_path.exists():
|
||||
self._active_path.unlink()
|
||||
|
||||
@staticmethod
|
||||
def _archive_path(directory: Path, n: int) -> Path:
|
||||
return directory / f"events.{n}.bin.zst"
|
||||
|
||||
@staticmethod
|
||||
def _rotate(source: Path, directory: Path) -> None:
|
||||
"""Compress source into a numbered archive, shifting older archives.
|
||||
|
||||
Keeps at most ``_MAX_ARCHIVES`` compressed copies. The most recent
|
||||
archive is always ``events.1.bin.zst``; older ones are shifted up
|
||||
(2, 3, …) and the oldest beyond the limit is deleted.
|
||||
"""
|
||||
try:
|
||||
# Shift existing archives
|
||||
oldest = DiskEventLog._archive_path(directory, _MAX_ARCHIVES)
|
||||
with contextlib.suppress(FileNotFoundError):
|
||||
oldest.unlink()
|
||||
for i in range(_MAX_ARCHIVES - 1, 0, -1):
|
||||
current = DiskEventLog._archive_path(directory, i)
|
||||
if current.exists():
|
||||
current.rename(DiskEventLog._archive_path(directory, i + 1))
|
||||
|
||||
# Compress source into slot 1
|
||||
dest = DiskEventLog._archive_path(directory, 1)
|
||||
compressor = zstandard.ZstdCompressor()
|
||||
with open(source, "rb") as f_in, open(dest, "wb") as f_out:
|
||||
compressor.copy_stream(f_in, f_out)
|
||||
source.unlink()
|
||||
logger.info(f"Rotated event log: {source} -> {dest}")
|
||||
except Exception as e:
|
||||
logger.opt(exception=e).warning(f"Failed to rotate event log {source}")
|
||||
# Clean up the source even if compression fails
|
||||
with contextlib.suppress(OSError):
|
||||
source.unlink()
|
||||
@@ -4,6 +4,7 @@ import anyio
|
||||
from anyio.abc import TaskGroup
|
||||
from loguru import logger
|
||||
|
||||
from exo.master.event_log import DiskEventLog
|
||||
from exo.master.placement import (
|
||||
add_instance_to_placements,
|
||||
cancel_unnecessary_downloads,
|
||||
@@ -12,7 +13,7 @@ from exo.master.placement import (
|
||||
place_instance,
|
||||
)
|
||||
from exo.shared.apply import apply
|
||||
from exo.shared.constants import EXO_TRACING_ENABLED
|
||||
from exo.shared.constants import EXO_EVENT_LOG_DIR, EXO_TRACING_ENABLED
|
||||
from exo.shared.types.commands import (
|
||||
CreateInstance,
|
||||
DeleteInstance,
|
||||
@@ -88,8 +89,7 @@ class Master:
|
||||
local_event_receiver.clone_sender()
|
||||
)
|
||||
self._multi_buffer = MultiSourceBuffer[NodeId, Event]()
|
||||
# TODO: not have this
|
||||
self._event_log: list[Event] = []
|
||||
self._event_log = DiskEventLog(EXO_EVENT_LOG_DIR / "master")
|
||||
self._pending_traces: dict[TaskId, dict[int, list[TraceEventData]]] = {}
|
||||
self._expected_ranks: dict[TaskId, set[int]] = {}
|
||||
|
||||
@@ -103,6 +103,7 @@ class Master:
|
||||
tg.start_soon(self._loopback_processor)
|
||||
tg.start_soon(self._plan)
|
||||
finally:
|
||||
self._event_log.close()
|
||||
self.global_event_sender.close()
|
||||
self.local_event_receiver.close()
|
||||
self.command_receiver.close()
|
||||
@@ -332,10 +333,13 @@ class Master:
|
||||
]
|
||||
case RequestEventLog():
|
||||
# We should just be able to send everything, since other buffers will ignore old messages
|
||||
for i in range(command.since_idx, len(self._event_log)):
|
||||
await self._send_event(
|
||||
IndexedEvent(idx=i, event=self._event_log[i])
|
||||
)
|
||||
# rate limit to 1000 at a time
|
||||
end = min(command.since_idx + 1000, len(self._event_log))
|
||||
for i, event in enumerate(
|
||||
self._event_log.read_range(command.since_idx, end),
|
||||
start=command.since_idx,
|
||||
):
|
||||
await self._send_event(IndexedEvent(idx=i, event=event))
|
||||
for event in generated_events:
|
||||
await self.event_sender.send(event)
|
||||
except ValueError as e:
|
||||
|
||||
143
src/exo/master/tests/test_event_log.py
Normal file
143
src/exo/master/tests/test_event_log.py
Normal file
@@ -0,0 +1,143 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from exo.master.event_log import DiskEventLog
|
||||
from exo.shared.types.events import TestEvent
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def log_dir(tmp_path: Path) -> Path:
|
||||
return tmp_path / "event_log"
|
||||
|
||||
|
||||
def test_append_and_read_back(log_dir: Path):
|
||||
log = DiskEventLog(log_dir)
|
||||
events = [TestEvent() for _ in range(5)]
|
||||
for e in events:
|
||||
log.append(e)
|
||||
|
||||
assert len(log) == 5
|
||||
|
||||
result = list(log.read_all())
|
||||
assert len(result) == 5
|
||||
for original, restored in zip(events, result, strict=True):
|
||||
assert original.event_id == restored.event_id
|
||||
|
||||
log.close()
|
||||
|
||||
|
||||
def test_read_range(log_dir: Path):
|
||||
log = DiskEventLog(log_dir)
|
||||
events = [TestEvent() for _ in range(10)]
|
||||
for e in events:
|
||||
log.append(e)
|
||||
|
||||
result = list(log.read_range(3, 7))
|
||||
assert len(result) == 4
|
||||
for i, restored in enumerate(result):
|
||||
assert events[3 + i].event_id == restored.event_id
|
||||
|
||||
log.close()
|
||||
|
||||
|
||||
def test_read_range_bounds(log_dir: Path):
|
||||
log = DiskEventLog(log_dir)
|
||||
events = [TestEvent() for _ in range(3)]
|
||||
for e in events:
|
||||
log.append(e)
|
||||
|
||||
# Start beyond count
|
||||
assert list(log.read_range(5, 10)) == []
|
||||
# Negative start
|
||||
assert list(log.read_range(-1, 2)) == []
|
||||
# End beyond count is clamped
|
||||
result = list(log.read_range(1, 100))
|
||||
assert len(result) == 2
|
||||
|
||||
log.close()
|
||||
|
||||
|
||||
def test_empty_log(log_dir: Path):
|
||||
log = DiskEventLog(log_dir)
|
||||
assert len(log) == 0
|
||||
assert list(log.read_all()) == []
|
||||
assert list(log.read_range(0, 10)) == []
|
||||
log.close()
|
||||
|
||||
|
||||
def test_rotation_on_close(log_dir: Path):
|
||||
log = DiskEventLog(log_dir)
|
||||
log.append(TestEvent())
|
||||
log.close()
|
||||
|
||||
active = log_dir / "events.bin"
|
||||
archive = log_dir / "events.1.bin.zst"
|
||||
assert not active.exists()
|
||||
assert archive.exists()
|
||||
assert archive.stat().st_size > 0
|
||||
|
||||
|
||||
def test_rotation_on_construction_with_stale_file(log_dir: Path):
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
active = log_dir / "events.bin"
|
||||
active.write_bytes(b"stale data")
|
||||
|
||||
log = DiskEventLog(log_dir)
|
||||
archive = log_dir / "events.1.bin.zst"
|
||||
assert archive.exists()
|
||||
assert len(log) == 0
|
||||
|
||||
log.close()
|
||||
|
||||
|
||||
def test_empty_log_no_archive(log_dir: Path):
|
||||
"""Closing an empty log should not leave an archive."""
|
||||
log = DiskEventLog(log_dir)
|
||||
log.close()
|
||||
|
||||
active = log_dir / "events.bin"
|
||||
archive = log_dir / "events.1.bin.zst"
|
||||
assert not active.exists()
|
||||
assert not archive.exists()
|
||||
|
||||
|
||||
def test_close_is_idempotent(log_dir: Path):
|
||||
log = DiskEventLog(log_dir)
|
||||
log.append(TestEvent())
|
||||
log.close()
|
||||
log.close() # should not raise
|
||||
|
||||
archive = log_dir / "events.1.bin.zst"
|
||||
assert archive.exists()
|
||||
|
||||
|
||||
def test_successive_sessions(log_dir: Path):
|
||||
"""Simulate two master sessions: both archives should be kept."""
|
||||
log1 = DiskEventLog(log_dir)
|
||||
log1.append(TestEvent())
|
||||
log1.close()
|
||||
|
||||
assert (log_dir / "events.1.bin.zst").exists()
|
||||
|
||||
log2 = DiskEventLog(log_dir)
|
||||
log2.append(TestEvent())
|
||||
log2.append(TestEvent())
|
||||
log2.close()
|
||||
|
||||
# Session 1 archive shifted to slot 2, session 2 in slot 1
|
||||
assert (log_dir / "events.1.bin.zst").exists()
|
||||
assert (log_dir / "events.2.bin.zst").exists()
|
||||
|
||||
|
||||
def test_rotation_keeps_at_most_5_archives(log_dir: Path):
|
||||
"""After 7 sessions, only the 5 most recent archives should remain."""
|
||||
for _ in range(7):
|
||||
log = DiskEventLog(log_dir)
|
||||
log.append(TestEvent())
|
||||
log.close()
|
||||
|
||||
for i in range(1, 6):
|
||||
assert (log_dir / f"events.{i}.bin.zst").exists()
|
||||
assert not (log_dir / "events.6.bin.zst").exists()
|
||||
assert not (log_dir / "events.7.bin.zst").exists()
|
||||
@@ -60,6 +60,7 @@ EXO_MAX_CHUNK_SIZE = 512 * 1024
|
||||
|
||||
EXO_CUSTOM_MODEL_CARDS_DIR = EXO_DATA_HOME / "custom_model_cards"
|
||||
|
||||
EXO_EVENT_LOG_DIR = EXO_DATA_HOME / "event_log"
|
||||
EXO_IMAGE_CACHE_DIR = EXO_CACHE_HOME / "images"
|
||||
EXO_TRACING_CACHE_DIR = EXO_CACHE_HOME / "traces"
|
||||
|
||||
|
||||
@@ -1,11 +1,30 @@
|
||||
import logging
|
||||
import sys
|
||||
from collections.abc import Iterator
|
||||
from pathlib import Path
|
||||
|
||||
import zstandard
|
||||
from hypercorn import Config
|
||||
from hypercorn.logging import Logger as HypercornLogger
|
||||
from loguru import logger
|
||||
|
||||
_MAX_LOG_ARCHIVES = 5
|
||||
|
||||
|
||||
def _zstd_compress(filepath: str) -> None:
|
||||
source = Path(filepath)
|
||||
dest = source.with_suffix(source.suffix + ".zst")
|
||||
cctx = zstandard.ZstdCompressor()
|
||||
with open(source, "rb") as f_in, open(dest, "wb") as f_out:
|
||||
cctx.copy_stream(f_in, f_out)
|
||||
source.unlink()
|
||||
|
||||
|
||||
def _once_then_never() -> Iterator[bool]:
|
||||
yield True
|
||||
while True:
|
||||
yield False
|
||||
|
||||
|
||||
class InterceptLogger(HypercornLogger):
|
||||
def __init__(self, config: Config):
|
||||
@@ -53,13 +72,16 @@ def logger_setup(log_file: Path | None, verbosity: int = 0):
|
||||
enqueue=True,
|
||||
)
|
||||
if log_file:
|
||||
rotate_once = _once_then_never()
|
||||
logger.add(
|
||||
log_file,
|
||||
format="[ {time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} ] {message}",
|
||||
level="INFO",
|
||||
colorize=False,
|
||||
enqueue=True,
|
||||
rotation="1 week",
|
||||
rotation=lambda _, __: next(rotate_once),
|
||||
retention=_MAX_LOG_ARCHIVES,
|
||||
compression=_zstd_compress,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -30,11 +30,11 @@ from exo.utils.pydantic_ext import CamelCaseModel
|
||||
# kinda ugly...
|
||||
# TODO: load search path from config.toml
|
||||
_custom_cards_dir = Path(str(EXO_CUSTOM_MODEL_CARDS_DIR))
|
||||
_csp = [Path(RESOURCES_DIR) / "inference_model_cards", _custom_cards_dir]
|
||||
if EXO_ENABLE_IMAGE_MODELS:
|
||||
_csp.append(Path(RESOURCES_DIR) / "image_model_cards")
|
||||
|
||||
CARD_SEARCH_PATH = _csp
|
||||
CARD_SEARCH_PATH = [
|
||||
Path(RESOURCES_DIR) / "inference_model_cards",
|
||||
Path(RESOURCES_DIR) / "image_model_cards",
|
||||
_custom_cards_dir,
|
||||
]
|
||||
|
||||
_card_cache: dict[ModelId, "ModelCard"] = {}
|
||||
|
||||
@@ -49,10 +49,16 @@ async def _refresh_card_cache():
|
||||
pass
|
||||
|
||||
|
||||
def _is_image_card(card: "ModelCard") -> bool:
|
||||
return any(t in (ModelTask.TextToImage, ModelTask.ImageToImage) for t in card.tasks)
|
||||
|
||||
|
||||
async def get_model_cards() -> list["ModelCard"]:
|
||||
if len(_card_cache) == 0:
|
||||
await _refresh_card_cache()
|
||||
return list(_card_cache.values())
|
||||
if EXO_ENABLE_IMAGE_MODELS:
|
||||
return list(_card_cache.values())
|
||||
return [c for c in _card_cache.values() if not _is_image_card(c)]
|
||||
|
||||
|
||||
class ModelTask(str, Enum):
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import time
|
||||
from collections.abc import Generator
|
||||
from typing import Annotated, Any, Literal
|
||||
from uuid import uuid4
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from pydantic_core import PydanticUseDefault
|
||||
@@ -60,6 +61,7 @@ class ChatCompletionMessageText(BaseModel):
|
||||
|
||||
|
||||
class ToolCallItem(BaseModel):
|
||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
name: str
|
||||
arguments: str
|
||||
|
||||
|
||||
@@ -352,7 +352,10 @@ class InfoGatherer:
|
||||
if self.misc_poll_interval is None:
|
||||
return
|
||||
while True:
|
||||
await self.info_sender.send(await MiscData.gather())
|
||||
try:
|
||||
await self.info_sender.send(await MiscData.gather())
|
||||
except Exception as e:
|
||||
logger.warning(f"Error gathering misc data: {e}")
|
||||
await anyio.sleep(self.misc_poll_interval)
|
||||
|
||||
async def _monitor_system_profiler_thunderbolt_data(self):
|
||||
@@ -363,15 +366,17 @@ class InfoGatherer:
|
||||
return
|
||||
|
||||
while True:
|
||||
data = await ThunderboltConnectivity.gather()
|
||||
assert data is not None
|
||||
try:
|
||||
data = await ThunderboltConnectivity.gather()
|
||||
assert data is not None
|
||||
|
||||
idents = [it for i in data if (it := i.ident(iface_map)) is not None]
|
||||
await self.info_sender.send(MacThunderboltIdentifiers(idents=idents))
|
||||
|
||||
conns = [it for i in data if (it := i.conn()) is not None]
|
||||
await self.info_sender.send(MacThunderboltConnections(conns=conns))
|
||||
idents = [it for i in data if (it := i.ident(iface_map)) is not None]
|
||||
await self.info_sender.send(MacThunderboltIdentifiers(idents=idents))
|
||||
|
||||
conns = [it for i in data if (it := i.conn()) is not None]
|
||||
await self.info_sender.send(MacThunderboltConnections(conns=conns))
|
||||
except Exception as e:
|
||||
logger.warning(f"Error gathering Thunderbolt data: {e}")
|
||||
await anyio.sleep(self.system_profiler_interval)
|
||||
|
||||
async def _monitor_memory_usage(self):
|
||||
@@ -384,26 +389,35 @@ class InfoGatherer:
|
||||
if self.memory_poll_rate is None:
|
||||
return
|
||||
while True:
|
||||
await self.info_sender.send(
|
||||
MemoryUsage.from_psutil(override_memory=override_memory)
|
||||
)
|
||||
try:
|
||||
await self.info_sender.send(
|
||||
MemoryUsage.from_psutil(override_memory=override_memory)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error gathering memory usage: {e}")
|
||||
await anyio.sleep(self.memory_poll_rate)
|
||||
|
||||
async def _watch_system_info(self):
|
||||
if self.interface_watcher_interval is None:
|
||||
return
|
||||
while True:
|
||||
nics = await get_network_interfaces()
|
||||
await self.info_sender.send(NodeNetworkInterfaces(ifaces=nics))
|
||||
try:
|
||||
nics = await get_network_interfaces()
|
||||
await self.info_sender.send(NodeNetworkInterfaces(ifaces=nics))
|
||||
except Exception as e:
|
||||
logger.warning(f"Error gathering network interfaces: {e}")
|
||||
await anyio.sleep(self.interface_watcher_interval)
|
||||
|
||||
async def _monitor_thunderbolt_bridge_status(self):
|
||||
if self.thunderbolt_bridge_poll_interval is None:
|
||||
return
|
||||
while True:
|
||||
curr = await ThunderboltBridgeInfo.gather()
|
||||
if curr is not None:
|
||||
await self.info_sender.send(curr)
|
||||
try:
|
||||
curr = await ThunderboltBridgeInfo.gather()
|
||||
if curr is not None:
|
||||
await self.info_sender.send(curr)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error gathering Thunderbolt Bridge status: {e}")
|
||||
await anyio.sleep(self.thunderbolt_bridge_poll_interval)
|
||||
|
||||
async def _monitor_macmon(self, macmon_path: str):
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from collections.abc import Mapping
|
||||
from collections import defaultdict
|
||||
from collections.abc import AsyncGenerator, Mapping
|
||||
|
||||
import anyio
|
||||
import httpx
|
||||
@@ -8,6 +9,7 @@ from loguru import logger
|
||||
from exo.shared.topology import Topology
|
||||
from exo.shared.types.common import NodeId
|
||||
from exo.shared.types.profiling import NodeNetworkInfo
|
||||
from exo.utils.channels import Sender, channel
|
||||
|
||||
REACHABILITY_ATTEMPTS = 3
|
||||
|
||||
@@ -80,10 +82,10 @@ async def check_reachable(
|
||||
topology: Topology,
|
||||
self_node_id: NodeId,
|
||||
node_network: Mapping[NodeId, NodeNetworkInfo],
|
||||
) -> dict[NodeId, set[str]]:
|
||||
"""Check which nodes are reachable and return their IPs."""
|
||||
) -> AsyncGenerator[tuple[str, NodeId], None]:
|
||||
"""Yield (ip, node_id) pairs as reachability probes complete."""
|
||||
|
||||
reachable: dict[NodeId, set[str]] = {}
|
||||
send, recv = channel[tuple[str, NodeId]]()
|
||||
|
||||
# these are intentionally httpx's defaults so we can tune them later
|
||||
timeout = httpx.Timeout(timeout=5.0)
|
||||
@@ -93,6 +95,18 @@ async def check_reachable(
|
||||
keepalive_expiry=5,
|
||||
)
|
||||
|
||||
async def _probe(
|
||||
target_ip: str,
|
||||
expected_node_id: NodeId,
|
||||
client: httpx.AsyncClient,
|
||||
send: Sender[tuple[str, NodeId]],
|
||||
) -> None:
|
||||
async with send:
|
||||
out: defaultdict[NodeId, set[str]] = defaultdict(set)
|
||||
await check_reachability(target_ip, expected_node_id, out, client)
|
||||
if expected_node_id in out:
|
||||
await send.send((target_ip, expected_node_id))
|
||||
|
||||
async with (
|
||||
httpx.AsyncClient(timeout=timeout, limits=limits) as client,
|
||||
create_task_group() as tg,
|
||||
@@ -103,12 +117,9 @@ async def check_reachable(
|
||||
if node_id == self_node_id:
|
||||
continue
|
||||
for iface in node_network[node_id].interfaces:
|
||||
tg.start_soon(
|
||||
check_reachability,
|
||||
iface.ip_address,
|
||||
node_id,
|
||||
reachable,
|
||||
client,
|
||||
)
|
||||
tg.start_soon(_probe, iface.ip_address, node_id, client, send.clone())
|
||||
send.close()
|
||||
|
||||
return reachable
|
||||
with recv:
|
||||
async for item in recv:
|
||||
yield item
|
||||
|
||||
@@ -121,10 +121,15 @@ class PipelineFirstLayer(CustomMlxLayer):
|
||||
super().__init__(original_layer)
|
||||
self.r: int = r
|
||||
self.group = group
|
||||
self.is_prefill: bool = False
|
||||
|
||||
def __call__(self, x: mx.array, *args: object, **kwargs: object) -> mx.array:
|
||||
if self.r != 0:
|
||||
x = mx.distributed.recv_like(x, (self.r - 1), group=self.group)
|
||||
if self.is_prefill:
|
||||
# We want to avoid GPU timeout errors by evalling the distributed operation
|
||||
# so that it stays on CPU, which does not have a timeout.
|
||||
mx.eval(x)
|
||||
return self.original_layer(x, *args, **kwargs)
|
||||
|
||||
|
||||
@@ -141,6 +146,7 @@ class PipelineLastLayer(CustomMlxLayer):
|
||||
self.s: int = s
|
||||
self.group = group
|
||||
self.original_layer_signature = signature(self.original_layer.__call__)
|
||||
self.is_prefill: bool = False
|
||||
|
||||
def __call__(self, x: mx.array, *args: object, **kwargs: object) -> mx.array:
|
||||
cache = self.original_layer_signature.bind_partial(
|
||||
@@ -155,14 +161,25 @@ class PipelineLastLayer(CustomMlxLayer):
|
||||
)
|
||||
if cache is not None:
|
||||
cache.keys = mx.depends(cache.keys, output) # type: ignore[reportUnknownMemberType]
|
||||
if self.is_prefill:
|
||||
mx.eval(output)
|
||||
if cache is not None:
|
||||
mx.eval(cache.keys) # type: ignore
|
||||
|
||||
output = mx.distributed.all_gather(output, group=self.group)[
|
||||
-output.shape[0] :
|
||||
] # type :ignore
|
||||
if not self.is_prefill:
|
||||
output = mx.distributed.all_gather(output, group=self.group)[
|
||||
-output.shape[0] :
|
||||
]
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def set_pipeline_prefill(model: nn.Module, is_prefill: bool) -> None:
|
||||
for layer in model.layers: # type: ignore
|
||||
if isinstance(layer, (PipelineFirstLayer, PipelineLastLayer)):
|
||||
layer.is_prefill = is_prefill
|
||||
|
||||
|
||||
def _inner_model(model: nn.Module) -> nn.Module:
|
||||
inner = getattr(model, "model", None)
|
||||
if isinstance(inner, nn.Module):
|
||||
|
||||
@@ -24,6 +24,7 @@ from exo.shared.types.worker.runner_response import (
|
||||
GenerationResponse,
|
||||
)
|
||||
from exo.worker.engines.mlx import Model
|
||||
from exo.worker.engines.mlx.auto_parallel import set_pipeline_prefill
|
||||
from exo.worker.engines.mlx.cache import (
|
||||
CacheSnapshot,
|
||||
KVPrefixCache,
|
||||
@@ -83,6 +84,8 @@ def prefill(
|
||||
if has_ssm:
|
||||
snapshots.append(snapshot_ssm_states(cache))
|
||||
|
||||
set_pipeline_prefill(model, is_prefill=True)
|
||||
|
||||
# Use max_tokens=1 because max_tokens=0 does not work.
|
||||
# We just throw away the generated token - we only care about filling the cache
|
||||
for _ in stream_generate(
|
||||
@@ -92,13 +95,15 @@ def prefill(
|
||||
max_tokens=1,
|
||||
sampler=sampler,
|
||||
prompt_cache=cache,
|
||||
prefill_step_size=2048,
|
||||
prefill_step_size=8192,
|
||||
kv_group_size=KV_GROUP_SIZE,
|
||||
kv_bits=KV_BITS,
|
||||
prompt_progress_callback=progress_callback,
|
||||
):
|
||||
break # Stop after first iteration - cache is now filled
|
||||
|
||||
set_pipeline_prefill(model, is_prefill=False)
|
||||
|
||||
# stream_generate added 1 extra generated token to the cache, so we should trim it.
|
||||
# Because of needing to roll back arrays cache, we will generate on 2 tokens so trim 1 more.
|
||||
pre_gen = deepcopy(snapshots[-2]) if has_ssm else None
|
||||
@@ -145,6 +150,8 @@ def warmup_inference(
|
||||
# Use a default sampler for warmup
|
||||
sampler = make_sampler(temp=0.0)
|
||||
|
||||
mx_barrier(group)
|
||||
|
||||
logger.info("Generating warmup tokens")
|
||||
for _r in stream_generate(
|
||||
model=model,
|
||||
@@ -298,6 +305,9 @@ def mlx_generate(
|
||||
)
|
||||
max_stop_len = max((len(s) for s in stop_sequences), default=0)
|
||||
|
||||
mx_barrier(group)
|
||||
logger.info("Ready to prefill")
|
||||
|
||||
# Prefill cache with all tokens except the last one
|
||||
prefill_tps, prefill_tokens, ssm_snapshots_list = prefill(
|
||||
model,
|
||||
@@ -320,6 +330,9 @@ def mlx_generate(
|
||||
reasoning_tokens = 0
|
||||
think_start = tokenizer.think_start
|
||||
think_end = tokenizer.think_end
|
||||
|
||||
mx_barrier(group)
|
||||
|
||||
for completion_tokens, out in enumerate(
|
||||
stream_generate(
|
||||
model=model,
|
||||
@@ -329,8 +342,7 @@ def mlx_generate(
|
||||
sampler=sampler,
|
||||
logits_processors=logits_processors,
|
||||
prompt_cache=caches,
|
||||
# TODO: Dynamically change prefill step size to be the maximum possible without timing out.
|
||||
prefill_step_size=2048,
|
||||
prefill_step_size=1,
|
||||
kv_group_size=KV_GROUP_SIZE,
|
||||
kv_bits=KV_BITS,
|
||||
),
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timezone
|
||||
from random import random
|
||||
from typing import Iterator
|
||||
@@ -345,29 +346,29 @@ class Worker:
|
||||
edges = set(
|
||||
conn.edge for conn in self.state.topology.out_edges(self.node_id)
|
||||
)
|
||||
conns = await check_reachable(
|
||||
conns: defaultdict[NodeId, set[str]] = defaultdict(set)
|
||||
async for ip, nid in check_reachable(
|
||||
self.state.topology,
|
||||
self.node_id,
|
||||
self.state.node_network,
|
||||
)
|
||||
for nid in conns:
|
||||
for ip in conns[nid]:
|
||||
edge = SocketConnection(
|
||||
# nonsense multiaddr
|
||||
sink_multiaddr=Multiaddr(address=f"/ip4/{ip}/tcp/52415")
|
||||
if "." in ip
|
||||
# nonsense multiaddr
|
||||
else Multiaddr(address=f"/ip6/{ip}/tcp/52415"),
|
||||
)
|
||||
if edge not in edges:
|
||||
logger.debug(f"ping discovered {edge=}")
|
||||
await self.event_sender.send(
|
||||
TopologyEdgeCreated(
|
||||
conn=Connection(
|
||||
source=self.node_id, sink=nid, edge=edge
|
||||
)
|
||||
)
|
||||
):
|
||||
if ip in conns[nid]:
|
||||
continue
|
||||
conns[nid].add(ip)
|
||||
edge = SocketConnection(
|
||||
# nonsense multiaddr
|
||||
sink_multiaddr=Multiaddr(address=f"/ip4/{ip}/tcp/52415")
|
||||
if "." in ip
|
||||
# nonsense multiaddr
|
||||
else Multiaddr(address=f"/ip6/{ip}/tcp/52415"),
|
||||
)
|
||||
if edge not in edges:
|
||||
logger.debug(f"ping discovered {edge=}")
|
||||
await self.event_sender.send(
|
||||
TopologyEdgeCreated(
|
||||
conn=Connection(source=self.node_id, sink=nid, edge=edge)
|
||||
)
|
||||
)
|
||||
|
||||
for conn in self.state.topology.out_edges(self.node_id):
|
||||
if not isinstance(conn.edge, SocketConnection):
|
||||
@@ -377,8 +378,7 @@ class Worker:
|
||||
continue
|
||||
if (
|
||||
conn.sink not in conns
|
||||
or conn.edge.sink_multiaddr.ip_address
|
||||
not in conns.get(conn.sink, set())
|
||||
or conn.edge.sink_multiaddr.ip_address not in conns[conn.sink]
|
||||
):
|
||||
logger.debug(f"ping failed to discover {conn=}")
|
||||
await self.event_sender.send(TopologyEdgeDeleted(conn=conn))
|
||||
|
||||
@@ -810,8 +810,9 @@ def patch_kimi_tokenizer(tokenizer: TokenizerWrapper):
|
||||
|
||||
# kimi has a fixed function naming scheme, with a json formatted arg
|
||||
# functions.multiply:0 <|tool_call_argument_begin|> {"a": 2, "b": 3}
|
||||
# Also needs to handle tools like call_0<|tool_call_argument_begin|>{"filePath": "..."}
|
||||
_func_name_regex = re.compile(
|
||||
r"^\s*(.+):\d+\s*<\|tool_call_argument_begin\|>", re.DOTALL
|
||||
r"^\s*(.+)[:](\d+)\s*<\|tool_call_argument_begin\|>", re.DOTALL
|
||||
)
|
||||
_func_arg_regex = re.compile(r"<\|tool_call_argument_begin\|>\s*(.*)\s*", re.DOTALL)
|
||||
|
||||
@@ -835,9 +836,10 @@ def patch_kimi_tokenizer(tokenizer: TokenizerWrapper):
|
||||
func_name_match = _func_name_regex.search(text)
|
||||
if func_name_match is None:
|
||||
raise ValueError(f"Could not parse function name from tool call: {text!r}")
|
||||
func_name = func_name_match.group(1)
|
||||
original_func_name = func_name_match.group(1)
|
||||
tool_id = func_name_match.group(2)
|
||||
# strip off the `functions.` prefix, if it exists.
|
||||
func_name = func_name[func_name.find(".") + 1 :]
|
||||
func_name = original_func_name[original_func_name.find(".") + 1 :]
|
||||
|
||||
func_args_match = _func_arg_regex.search(text)
|
||||
if func_args_match is None:
|
||||
@@ -846,7 +848,11 @@ def patch_kimi_tokenizer(tokenizer: TokenizerWrapper):
|
||||
# the args should be valid json - no need to check against our tools to deserialize
|
||||
arg_dct = _deserialize(func_args) # pyright: ignore[reportAny]
|
||||
|
||||
return dict(name=func_name, arguments=arg_dct) # pyright: ignore[reportAny]
|
||||
return dict(
|
||||
id=f"{original_func_name}:{tool_id}",
|
||||
name=func_name,
|
||||
arguments=arg_dct, # pyright: ignore[reportAny]
|
||||
)
|
||||
|
||||
tokenizer._tool_call_start = tool_call_start
|
||||
tokenizer._tool_call_end = tool_call_end
|
||||
@@ -929,7 +935,13 @@ def _validate_single_tool(obj: dict[str, Any]) -> ToolCallItem:
|
||||
and ((args := obj.get("arguments")) is not None)
|
||||
and isinstance(name, str)
|
||||
):
|
||||
return ToolCallItem(name=name, arguments=json.dumps(args))
|
||||
raw_id: object = obj.get("id")
|
||||
extra = {"id": str(raw_id)} if raw_id is not None else {}
|
||||
return ToolCallItem(
|
||||
**extra,
|
||||
name=name,
|
||||
arguments=json.dumps(args),
|
||||
)
|
||||
else:
|
||||
raise ValidationError
|
||||
|
||||
|
||||
86
uv.lock
generated
86
uv.lock
generated
@@ -16,6 +16,7 @@ prerelease-mode = "allow"
|
||||
[manifest]
|
||||
members = [
|
||||
"exo",
|
||||
"exo-bench",
|
||||
"exo-pyo3-bindings",
|
||||
]
|
||||
|
||||
@@ -367,6 +368,7 @@ dependencies = [
|
||||
{ name = "mlx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "mlx", extra = ["cpu"], marker = "sys_platform == 'linux'" },
|
||||
{ name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "msgspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "openai-harmony", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "pillow", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "psutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
@@ -376,6 +378,7 @@ dependencies = [
|
||||
{ name = "tiktoken", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "tomlkit", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "types-aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "zstandard", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
]
|
||||
|
||||
[package.dev-dependencies]
|
||||
@@ -404,6 +407,7 @@ requires-dist = [
|
||||
{ name = "mlx", marker = "sys_platform == 'darwin'", specifier = "==0.30.5" },
|
||||
{ name = "mlx", extras = ["cpu"], marker = "sys_platform == 'linux'", specifier = "==0.30.5" },
|
||||
{ name = "mlx-lm", specifier = "==0.30.6" },
|
||||
{ name = "msgspec", specifier = ">=0.19.0" },
|
||||
{ name = "openai-harmony", specifier = ">=0.0.8" },
|
||||
{ name = "pillow", specifier = ">=11.0,<12.0" },
|
||||
{ name = "psutil", specifier = ">=7.0.0" },
|
||||
@@ -413,6 +417,7 @@ requires-dist = [
|
||||
{ name = "tiktoken", specifier = ">=0.12.0" },
|
||||
{ name = "tomlkit", specifier = ">=0.14.0" },
|
||||
{ name = "types-aiofiles", specifier = ">=24.1.0.20250708" },
|
||||
{ name = "zstandard", specifier = ">=0.23.0" },
|
||||
]
|
||||
|
||||
[package.metadata.requires-dev]
|
||||
@@ -425,6 +430,27 @@ dev = [
|
||||
{ name = "ruff", specifier = ">=0.11.13" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "exo-bench"
|
||||
version = "0.1.0"
|
||||
source = { editable = "bench" }
|
||||
dependencies = [
|
||||
{ name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "loguru", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "tiktoken", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "huggingface-hub", specifier = ">=0.33.4" },
|
||||
{ name = "jinja2", specifier = ">=3.1.0" },
|
||||
{ name = "loguru", specifier = ">=0.7.3" },
|
||||
{ name = "tiktoken", specifier = ">=0.12.0" },
|
||||
{ name = "transformers", specifier = ">=5.0.0" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "exo-pyo3-bindings"
|
||||
version = "0.1.0"
|
||||
@@ -1104,6 +1130,32 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "msgspec"
|
||||
version = "0.20.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ea/9c/bfbd12955a49180cbd234c5d29ec6f74fe641698f0cd9df154a854fc8a15/msgspec-0.20.0.tar.gz", hash = "sha256:692349e588fde322875f8d3025ac01689fead5901e7fb18d6870a44519d62a29", size = 317862, upload-time = "2025-11-24T03:56:28.934Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/8a/d1/b902d38b6e5ba3bdddbec469bba388d647f960aeed7b5b3623a8debe8a76/msgspec-0.20.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9c1ff8db03be7598b50dd4b4a478d6fe93faae3bd54f4f17aa004d0e46c14c46", size = 196463, upload-time = "2025-11-24T03:55:43.405Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/57/b6/eff0305961a1d9447ec2b02f8c73c8946f22564d302a504185b730c9a761/msgspec-0.20.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f6532369ece217fd37c5ebcfd7e981f2615628c21121b7b2df9d3adcf2fd69b8", size = 188650, upload-time = "2025-11-24T03:55:44.761Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/99/93/f2ec1ae1de51d3fdee998a1ede6b2c089453a2ee82b5c1b361ed9095064a/msgspec-0.20.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9a1697da2f85a751ac3cc6a97fceb8e937fc670947183fb2268edaf4016d1ee", size = 218834, upload-time = "2025-11-24T03:55:46.441Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/28/83/36557b04cfdc317ed8a525c4993b23e43a8fbcddaddd78619112ca07138c/msgspec-0.20.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7fac7e9c92eddcd24c19d9e5f6249760941485dff97802461ae7c995a2450111", size = 224917, upload-time = "2025-11-24T03:55:48.06Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8f/56/362037a1ed5be0b88aced59272442c4b40065c659700f4b195a7f4d0ac88/msgspec-0.20.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f953a66f2a3eb8d5ea64768445e2bb301d97609db052628c3e1bcb7d87192a9f", size = 222821, upload-time = "2025-11-24T03:55:49.388Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/92/75/fa2370ec341cedf663731ab7042e177b3742645c5dd4f64dc96bd9f18a6b/msgspec-0.20.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:247af0313ae64a066d3aea7ba98840f6681ccbf5c90ba9c7d17f3e39dbba679c", size = 227227, upload-time = "2025-11-24T03:55:51.125Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bb/18/62dc13ab0260c7d741dda8dc7f481495b93ac9168cd887dda5929880eef8/msgspec-0.20.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:eead16538db1b3f7ec6e3ed1f6f7c5dec67e90f76e76b610e1ffb5671815633a", size = 196407, upload-time = "2025-11-24T03:55:55.001Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/dd/1d/b9949e4ad6953e9f9a142c7997b2f7390c81e03e93570c7c33caf65d27e1/msgspec-0.20.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:703c3bb47bf47801627fb1438f106adbfa2998fe586696d1324586a375fca238", size = 188889, upload-time = "2025-11-24T03:55:56.311Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/19/f8bb2dc0f1bfe46cc7d2b6b61c5e9b5a46c62298e8f4d03bbe499c926180/msgspec-0.20.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6cdb227dc585fb109305cee0fd304c2896f02af93ecf50a9c84ee54ee67dbb42", size = 219691, upload-time = "2025-11-24T03:55:57.908Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/8e/6b17e43f6eb9369d9858ee32c97959fcd515628a1df376af96c11606cf70/msgspec-0.20.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27d35044dd8818ac1bd0fedb2feb4fbdff4e3508dd7c5d14316a12a2d96a0de0", size = 224918, upload-time = "2025-11-24T03:55:59.322Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/db/0e833a177db1a4484797adba7f429d4242585980b90882cc38709e1b62df/msgspec-0.20.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b4296393a29ee42dd25947981c65506fd4ad39beaf816f614146fa0c5a6c91ae", size = 223436, upload-time = "2025-11-24T03:56:00.716Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c3/30/d2ee787f4c918fd2b123441d49a7707ae9015e0e8e1ab51aa7967a97b90e/msgspec-0.20.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:205fbdadd0d8d861d71c8f3399fe1a82a2caf4467bc8ff9a626df34c12176980", size = 227190, upload-time = "2025-11-24T03:56:02.371Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4c/06/3da3fc9aaa55618a8f43eb9052453cfe01f82930bca3af8cea63a89f3a11/msgspec-0.20.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f84703e0e6ef025663dd1de828ca028774797b8155e070e795c548f76dde65d5", size = 200389, upload-time = "2025-11-24T03:56:06.375Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/83/3b/cc4270a5ceab40dfe1d1745856951b0a24fd16ac8539a66ed3004a60c91e/msgspec-0.20.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7c83fc24dd09cf1275934ff300e3951b3adc5573f0657a643515cc16c7dee131", size = 193198, upload-time = "2025-11-24T03:56:07.742Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cd/ae/4c7905ac53830c8e3c06fdd60e3cdcfedc0bbc993872d1549b84ea21a1bd/msgspec-0.20.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f13ccb1c335a124e80c4562573b9b90f01ea9521a1a87f7576c2e281d547f56", size = 225973, upload-time = "2025-11-24T03:56:09.18Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/da/032abac1de4d0678d99eaeadb1323bd9d247f4711c012404ba77ed6f15ca/msgspec-0.20.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:17c2b5ca19f19306fc83c96d85e606d2cc107e0caeea85066b5389f664e04846", size = 229509, upload-time = "2025-11-24T03:56:10.898Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/69/52/fdc7bdb7057a166f309e0b44929e584319e625aaba4771b60912a9321ccd/msgspec-0.20.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d931709355edabf66c2dd1a756b2d658593e79882bc81aae5964969d5a291b63", size = 230434, upload-time = "2025-11-24T03:56:12.48Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cb/fe/1dfd5f512b26b53043884e4f34710c73e294e7cc54278c3fe28380e42c37/msgspec-0.20.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:565f915d2e540e8a0c93a01ff67f50aebe1f7e22798c6a25873f9fda8d1325f8", size = 231758, upload-time = "2025-11-24T03:56:13.765Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "multidict"
|
||||
version = "6.7.0"
|
||||
@@ -2452,3 +2504,37 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/cf/72/6a85bba382f22cf78add705d8c3731748397d986e197e53ecc7835e76de7/yarl-1.22.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c0232bce2170103ec23c454e54a57008a9a72b5d1c3105dc2496750da8cfa47c", size = 341473, upload-time = "2025-10-06T14:12:10.994Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstandard"
|
||||
version = "0.25.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fd/aa/3e0508d5a5dd96529cdc5a97011299056e14c6505b678fd58938792794b1/zstandard-0.25.0.tar.gz", hash = "sha256:7713e1179d162cf5c7906da876ec2ccb9c3a9dcbdffef0cc7f70c3667a205f0b", size = 711513, upload-time = "2025-09-14T22:15:54.002Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/35/0b/8df9c4ad06af91d39e94fa96cc010a24ac4ef1378d3efab9223cc8593d40/zstandard-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec996f12524f88e151c339688c3897194821d7f03081ab35d31d1e12ec975e94", size = 795735, upload-time = "2025-09-14T22:17:26.042Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3f/06/9ae96a3e5dcfd119377ba33d4c42a7d89da1efabd5cb3e366b156c45ff4d/zstandard-0.25.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a1a4ae2dec3993a32247995bdfe367fc3266da832d82f8438c8570f989753de1", size = 640440, upload-time = "2025-09-14T22:17:27.366Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/14/933d27204c2bd404229c69f445862454dcc101cd69ef8c6068f15aaec12c/zstandard-0.25.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:e96594a5537722fdfb79951672a2a63aec5ebfb823e7560586f7484819f2a08f", size = 5343070, upload-time = "2025-09-14T22:17:28.896Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6d/db/ddb11011826ed7db9d0e485d13df79b58586bfdec56e5c84a928a9a78c1c/zstandard-0.25.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bfc4e20784722098822e3eee42b8e576b379ed72cca4a7cb856ae733e62192ea", size = 5063001, upload-time = "2025-09-14T22:17:31.044Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/db/00/87466ea3f99599d02a5238498b87bf84a6348290c19571051839ca943777/zstandard-0.25.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:457ed498fc58cdc12fc48f7950e02740d4f7ae9493dd4ab2168a47c93c31298e", size = 5394120, upload-time = "2025-09-14T22:17:32.711Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2b/95/fc5531d9c618a679a20ff6c29e2b3ef1d1f4ad66c5e161ae6ff847d102a9/zstandard-0.25.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:fd7a5004eb1980d3cefe26b2685bcb0b17989901a70a1040d1ac86f1d898c551", size = 5451230, upload-time = "2025-09-14T22:17:34.41Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/63/4b/e3678b4e776db00f9f7b2fe58e547e8928ef32727d7a1ff01dea010f3f13/zstandard-0.25.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8e735494da3db08694d26480f1493ad2cf86e99bdd53e8e9771b2752a5c0246a", size = 5547173, upload-time = "2025-09-14T22:17:36.084Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4e/d5/ba05ed95c6b8ec30bd468dfeab20589f2cf709b5c940483e31d991f2ca58/zstandard-0.25.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3a39c94ad7866160a4a46d772e43311a743c316942037671beb264e395bdd611", size = 5046736, upload-time = "2025-09-14T22:17:37.891Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/50/d5/870aa06b3a76c73eced65c044b92286a3c4e00554005ff51962deef28e28/zstandard-0.25.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:172de1f06947577d3a3005416977cce6168f2261284c02080e7ad0185faeced3", size = 5576368, upload-time = "2025-09-14T22:17:40.206Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5d/35/398dc2ffc89d304d59bc12f0fdd931b4ce455bddf7038a0a67733a25f550/zstandard-0.25.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3c83b0188c852a47cd13ef3bf9209fb0a77fa5374958b8c53aaa699398c6bd7b", size = 4954022, upload-time = "2025-09-14T22:17:41.879Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9a/5c/36ba1e5507d56d2213202ec2b05e8541734af5f2ce378c5d1ceaf4d88dc4/zstandard-0.25.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1673b7199bbe763365b81a4f3252b8e80f44c9e323fc42940dc8843bfeaf9851", size = 5267889, upload-time = "2025-09-14T22:17:43.577Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/70/e8/2ec6b6fb7358b2ec0113ae202647ca7c0e9d15b61c005ae5225ad0995df5/zstandard-0.25.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0be7622c37c183406f3dbf0cba104118eb16a4ea7359eeb5752f0794882fc250", size = 5433952, upload-time = "2025-09-14T22:17:45.271Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7b/01/b5f4d4dbc59ef193e870495c6f1275f5b2928e01ff5a81fecb22a06e22fb/zstandard-0.25.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5f5e4c2a23ca271c218ac025bd7d635597048b366d6f31f420aaeb715239fc98", size = 5814054, upload-time = "2025-09-14T22:17:47.08Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b2/e5/fbd822d5c6f427cf158316d012c5a12f233473c2f9c5fe5ab1ae5d21f3d8/zstandard-0.25.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f187a0bb61b35119d1926aee039524d1f93aaf38a9916b8c4b78ac8514a0aaf", size = 5360113, upload-time = "2025-09-14T22:17:48.893Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/5c/f8923b595b55fe49e30612987ad8bf053aef555c14f05bb659dd5dbe3e8a/zstandard-0.25.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e29f0cf06974c899b2c188ef7f783607dbef36da4c242eb6c82dcd8b512855e3", size = 795887, upload-time = "2025-09-14T22:17:54.198Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8d/09/d0a2a14fc3439c5f874042dca72a79c70a532090b7ba0003be73fee37ae2/zstandard-0.25.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:05df5136bc5a011f33cd25bc9f506e7426c0c9b3f9954f056831ce68f3b6689f", size = 640658, upload-time = "2025-09-14T22:17:55.423Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5d/7c/8b6b71b1ddd517f68ffb55e10834388d4f793c49c6b83effaaa05785b0b4/zstandard-0.25.0-cp314-cp314-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:f604efd28f239cc21b3adb53eb061e2a205dc164be408e553b41ba2ffe0ca15c", size = 5379849, upload-time = "2025-09-14T22:17:57.372Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a4/86/a48e56320d0a17189ab7a42645387334fba2200e904ee47fc5a26c1fd8ca/zstandard-0.25.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223415140608d0f0da010499eaa8ccdb9af210a543fac54bce15babbcfc78439", size = 5058095, upload-time = "2025-09-14T22:17:59.498Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/ad/eb659984ee2c0a779f9d06dbfe45e2dc39d99ff40a319895df2d3d9a48e5/zstandard-0.25.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e54296a283f3ab5a26fc9b8b5d4978ea0532f37b231644f367aa588930aa043", size = 5551751, upload-time = "2025-09-14T22:18:01.618Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/61/b3/b637faea43677eb7bd42ab204dfb7053bd5c4582bfe6b1baefa80ac0c47b/zstandard-0.25.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ca54090275939dc8ec5dea2d2afb400e0f83444b2fc24e07df7fdef677110859", size = 6364818, upload-time = "2025-09-14T22:18:03.769Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/31/dc/cc50210e11e465c975462439a492516a73300ab8caa8f5e0902544fd748b/zstandard-0.25.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e09bb6252b6476d8d56100e8147b803befa9a12cea144bbe629dd508800d1ad0", size = 5560402, upload-time = "2025-09-14T22:18:05.954Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c9/ae/56523ae9c142f0c08efd5e868a6da613ae76614eca1305259c3bf6a0ed43/zstandard-0.25.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a9ec8c642d1ec73287ae3e726792dd86c96f5681eb8df274a757bf62b750eae7", size = 4955108, upload-time = "2025-09-14T22:18:07.68Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/98/cf/c899f2d6df0840d5e384cf4c4121458c72802e8bda19691f3b16619f51e9/zstandard-0.25.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a4089a10e598eae6393756b036e0f419e8c1d60f44a831520f9af41c14216cf2", size = 5269248, upload-time = "2025-09-14T22:18:09.753Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1b/c0/59e912a531d91e1c192d3085fc0f6fb2852753c301a812d856d857ea03c6/zstandard-0.25.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f67e8f1a324a900e75b5e28ffb152bcac9fbed1cc7b43f99cd90f395c4375344", size = 5430330, upload-time = "2025-09-14T22:18:11.966Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a0/1d/7e31db1240de2df22a58e2ea9a93fc6e38cc29353e660c0272b6735d6669/zstandard-0.25.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:9654dbc012d8b06fc3d19cc825af3f7bf8ae242226df5f83936cb39f5fdc846c", size = 5811123, upload-time = "2025-09-14T22:18:13.907Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f6/49/fac46df5ad353d50535e118d6983069df68ca5908d4d65b8c466150a4ff1/zstandard-0.25.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4203ce3b31aec23012d3a4cf4a2ed64d12fea5269c49aed5e4c3611b938e4088", size = 5359591, upload-time = "2025-09-14T22:18:16.465Z" },
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user