mirror of
https://github.com/exo-explore/exo.git
synced 2026-01-12 16:09:46 -05:00
Compare commits
6 Commits
gather-lin
...
sami/fix-l
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6088493841 | ||
|
|
007eb80029 | ||
|
|
8d7b6789b3 | ||
|
|
3c5b7ea670 | ||
|
|
b74a610537 | ||
|
|
18c4e49f91 |
3
.github/workflows/build-app.yml
vendored
3
.github/workflows/build-app.yml
vendored
@@ -1,6 +1,7 @@
|
||||
name: Build EXO macOS DMG
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
tags:
|
||||
- "v*"
|
||||
@@ -35,7 +36,7 @@ jobs:
|
||||
|
||||
- name: Derive release version from tag
|
||||
run: |
|
||||
if [[ "$GITHUB_REF_NAME" == "test-app" ]]; then
|
||||
if [[ "$GITHUB_REF_NAME" == "test-app" || "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||
VERSION="0.0.0-alpha.0"
|
||||
echo "IS_ALPHA=true" >> $GITHUB_ENV
|
||||
else
|
||||
|
||||
12
.github/workflows/pipeline.yml
vendored
12
.github/workflows/pipeline.yml
vendored
@@ -20,6 +20,12 @@ jobs:
|
||||
with:
|
||||
nix_path: nixpkgs=channel:nixos-unstable
|
||||
|
||||
- uses: cachix/cachix-action@v14
|
||||
name: Configure Cachix
|
||||
with:
|
||||
name: exo
|
||||
authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
|
||||
|
||||
- name: Configure git user
|
||||
run: |
|
||||
git config --local user.email "github-actions@users.noreply.github.com"
|
||||
@@ -101,6 +107,12 @@ jobs:
|
||||
with:
|
||||
nix_path: nixpkgs=channel:nixos-unstable
|
||||
|
||||
- uses: cachix/cachix-action@v14
|
||||
name: Configure Cachix
|
||||
with:
|
||||
name: exo
|
||||
authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
|
||||
|
||||
- name: Run nix flake check
|
||||
run: |
|
||||
nix flake check
|
||||
|
||||
@@ -305,7 +305,10 @@ curl -X DELETE http://localhost:52415/instance/YOUR_INSTANCE_ID
|
||||
- List all models: `curl http://localhost:52415/models`
|
||||
- Inspect instance IDs and deployment state: `curl http://localhost:52415/state`
|
||||
|
||||
For further details, see API types and endpoints in [src/exo/master/api.py](src/exo/master/api.py).
|
||||
For further details, see:
|
||||
|
||||
- API basic documentation in [docs/api.md](docs/api.md).
|
||||
- API types and endpoints in [src/exo/master/api.py](src/exo/master/api.py).
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -56,8 +56,12 @@ struct ContentView: View {
|
||||
}
|
||||
|
||||
private var shouldShowLocalNetworkWarning: Bool {
|
||||
// Only show warning if:
|
||||
// 1. Local network is not working
|
||||
// 2. EXO is running
|
||||
// 3. We've had a successful connection before (avoids false positive on fresh install)
|
||||
if case .notWorking = localNetworkChecker.status {
|
||||
return controller.status != .stopped
|
||||
return controller.status != .stopped && localNetworkChecker.hasWorkedBefore
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -7,6 +7,8 @@ import os.log
|
||||
/// macOS local network permission can appear enabled in System Preferences but not
|
||||
/// actually work after a restart. This service detects this by creating a UDP
|
||||
/// connection to the mDNS multicast address (224.0.0.251:5353).
|
||||
///
|
||||
/// Only shows warnings if permission previously worked (to avoid false positives on fresh install).
|
||||
@MainActor
|
||||
final class LocalNetworkChecker: ObservableObject {
|
||||
enum Status: Equatable {
|
||||
@@ -35,10 +37,14 @@ final class LocalNetworkChecker: ObservableObject {
|
||||
}
|
||||
|
||||
private static let logger = Logger(subsystem: "io.exo.EXO", category: "LocalNetworkChecker")
|
||||
private static let hasWorkedBeforeKey = "LocalNetworkChecker.hasWorkedBefore"
|
||||
|
||||
@Published private(set) var status: Status = .unknown
|
||||
@Published private(set) var lastConnectionState: String = "none"
|
||||
|
||||
/// True if we've ever had a successful connection (persisted across launches)
|
||||
@Published private(set) var hasWorkedBefore: Bool = UserDefaults.standard.bool(forKey: hasWorkedBeforeKey)
|
||||
|
||||
private var connection: NWConnection?
|
||||
private var checkTask: Task<Void, Never>?
|
||||
|
||||
@@ -52,7 +58,14 @@ final class LocalNetworkChecker: ObservableObject {
|
||||
guard let self else { return }
|
||||
let result = await self.performCheck()
|
||||
self.status = result
|
||||
Self.logger.info("Local network check complete: \(result.displayText)")
|
||||
|
||||
// Record success so we can detect regressions on future launches
|
||||
if case .working = result {
|
||||
self.hasWorkedBefore = true
|
||||
UserDefaults.standard.set(true, forKey: Self.hasWorkedBeforeKey)
|
||||
}
|
||||
|
||||
Self.logger.info("Local network check complete: \(result.displayText), hasWorkedBefore: \(self.hasWorkedBefore)")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -199,7 +199,13 @@
|
||||
const rawProgress = (downloadPayload as Record<string, unknown>).download_progress
|
||||
?? (downloadPayload as Record<string, unknown>).downloadProgress
|
||||
?? {};
|
||||
const totalBytes = getBytes((rawProgress as Record<string, unknown>).total_bytes ?? (rawProgress as Record<string, unknown>).totalBytes);
|
||||
// For DownloadCompleted, total_bytes is at top level; for DownloadOngoing, it's inside download_progress
|
||||
const totalBytes = getBytes(
|
||||
(downloadPayload as Record<string, unknown>).total_bytes
|
||||
?? (downloadPayload as Record<string, unknown>).totalBytes
|
||||
?? (rawProgress as Record<string, unknown>).total_bytes
|
||||
?? (rawProgress as Record<string, unknown>).totalBytes
|
||||
);
|
||||
const downloadedBytes = getBytes((rawProgress as Record<string, unknown>).downloaded_bytes ?? (rawProgress as Record<string, unknown>).downloadedBytes);
|
||||
const speed = (rawProgress as Record<string, unknown>).speed as number ?? 0;
|
||||
const etaMs = (rawProgress as Record<string, unknown>).eta_ms as number ?? (rawProgress as Record<string, unknown>).etaMs as number ?? 0;
|
||||
@@ -332,8 +338,13 @@
|
||||
<div class="text-lg font-mono text-white truncate">{node.nodeName}</div>
|
||||
<div class="text-xs text-exo-light-gray font-mono truncate">{node.nodeId}</div>
|
||||
</div>
|
||||
<div class="text-xs font-mono uppercase tracking-wider whitespace-nowrap shrink-0">
|
||||
<span class="text-green-400">{node.models.filter(m => m.status === 'completed').length}</span><span class="text-exo-yellow"> /{node.models.length} models</span>
|
||||
<div class="text-xs font-mono uppercase tracking-wider whitespace-nowrap shrink-0 text-right">
|
||||
<div>
|
||||
<span class="text-green-400">{node.models.filter(m => m.status === 'completed').length}</span><span class="text-exo-yellow"> / {node.models.length} models</span>
|
||||
</div>
|
||||
<div class="text-exo-light-gray normal-case tracking-normal">
|
||||
{formatBytes(node.models.filter(m => m.status === 'completed').reduce((sum, m) => sum + m.totalBytes, 0))} on disk
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -385,7 +396,7 @@
|
||||
</div>
|
||||
|
||||
<div class="flex items-center justify-between text-xs font-mono text-exo-light-gray">
|
||||
<span>{model.status === 'completed' ? 'Completed' : `${formatSpeed(model.speed)} • ETA ${formatEta(model.etaMs)}`}</span>
|
||||
<span>{model.status === 'completed' ? `Completed (${formatBytes(model.totalBytes)})` : `${formatSpeed(model.speed)} • ETA ${formatEta(model.etaMs)}`}</span>
|
||||
{#if model.status !== 'completed'}
|
||||
<span>{model.files.length} file{model.files.length === 1 ? '' : 's'}</span>
|
||||
{/if}
|
||||
|
||||
212
docs/api.md
Normal file
212
docs/api.md
Normal file
@@ -0,0 +1,212 @@
|
||||
# EXO API – Technical Reference
|
||||
|
||||
This document describes the REST API exposed by the **EXO ** service, as implemented in:
|
||||
|
||||
`src/exo/master/api.py`
|
||||
|
||||
The API is used to manage model instances in the cluster, inspect cluster state, and perform inference using an OpenAI-compatible interface.
|
||||
|
||||
Base URL example:
|
||||
|
||||
```
|
||||
http://localhost:52415
|
||||
```
|
||||
|
||||
## 1. General / Meta Endpoints
|
||||
|
||||
### Get Master Node ID
|
||||
|
||||
**GET** `/node_id`
|
||||
|
||||
Returns the identifier of the current master node.
|
||||
|
||||
**Response (example):**
|
||||
|
||||
```json
|
||||
{
|
||||
"node_id": "node-1234"
|
||||
}
|
||||
```
|
||||
|
||||
### Get Cluster State
|
||||
|
||||
**GET** `/state`
|
||||
|
||||
Returns the current state of the cluster, including nodes and active instances.
|
||||
|
||||
**Response:**
|
||||
JSON object describing topology, nodes, and instances.
|
||||
|
||||
### Get Events
|
||||
|
||||
**GET** `/events`
|
||||
|
||||
Returns the list of internal events recorded by the master (mainly for debugging and observability).
|
||||
|
||||
**Response:**
|
||||
Array of event objects.
|
||||
|
||||
## 2. Model Instance Management
|
||||
|
||||
### Create Instance
|
||||
|
||||
**POST** `/instance`
|
||||
|
||||
Creates a new model instance in the cluster.
|
||||
|
||||
**Request body (example):**
|
||||
|
||||
```json
|
||||
{
|
||||
"instance": {
|
||||
"model_id": "llama-3.2-1b",
|
||||
"placement": { }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
JSON description of the created instance.
|
||||
|
||||
### Delete Instance
|
||||
|
||||
**DELETE** `/instance/{instance_id}`
|
||||
|
||||
Deletes an existing instance by ID.
|
||||
|
||||
**Path parameters:**
|
||||
|
||||
* `instance_id`: string, ID of the instance to delete
|
||||
|
||||
**Response:**
|
||||
Status / confirmation JSON.
|
||||
|
||||
### Get Instance
|
||||
|
||||
**GET** `/instance/{instance_id}`
|
||||
|
||||
Returns details of a specific instance.
|
||||
|
||||
**Path parameters:**
|
||||
|
||||
* `instance_id`: string
|
||||
|
||||
**Response:**
|
||||
JSON description of the instance.
|
||||
|
||||
### Preview Placements
|
||||
|
||||
**GET** `/instance/previews?model_id=...`
|
||||
|
||||
Returns possible placement previews for a given model.
|
||||
|
||||
**Query parameters:**
|
||||
|
||||
* `model_id`: string, required
|
||||
|
||||
**Response:**
|
||||
Array of placement preview objects.
|
||||
|
||||
### Compute Placement
|
||||
|
||||
**GET** `/instance/placement`
|
||||
|
||||
Computes a placement for a potential instance without creating it.
|
||||
|
||||
**Query parameters (typical):**
|
||||
|
||||
* `model_id`: string
|
||||
* `sharding`: string or config
|
||||
* `instance_meta`: JSON-encoded metadata
|
||||
* `min_nodes`: integer
|
||||
|
||||
**Response:**
|
||||
JSON object describing the proposed placement / instance configuration.
|
||||
|
||||
### Place Instance (Dry Operation)
|
||||
|
||||
**POST** `/place_instance`
|
||||
|
||||
Performs a placement operation for an instance (planning step), without necessarily creating it.
|
||||
|
||||
**Request body:**
|
||||
JSON describing the instance to be placed.
|
||||
|
||||
**Response:**
|
||||
Placement result.
|
||||
|
||||
## 3. Models
|
||||
|
||||
### List Models
|
||||
|
||||
**GET** `/models`
|
||||
**GET** `/v1/models` (alias)
|
||||
|
||||
Returns the list of available models and their metadata.
|
||||
|
||||
**Response:**
|
||||
Array of model descriptors.
|
||||
|
||||
## 4. Inference / Chat Completions
|
||||
|
||||
### OpenAI-Compatible Chat Completions
|
||||
|
||||
**POST** `/v1/chat/completions`
|
||||
|
||||
Executes a chat completion request using an OpenAI-compatible schema. Supports streaming and non-streaming modes.
|
||||
|
||||
**Request body (example):**
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "llama-3.2-1b",
|
||||
"messages": [
|
||||
{ "role": "system", "content": "You are a helpful assistant." },
|
||||
{ "role": "user", "content": "Hello" }
|
||||
],
|
||||
"stream": false
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
OpenAI-compatible chat completion response.
|
||||
|
||||
### Benchmarked Chat Completions
|
||||
|
||||
**POST** `/bench/chat/completions`
|
||||
|
||||
Same as `/v1/chat/completions`, but also returns performance and generation statistics.
|
||||
|
||||
**Request body:**
|
||||
Same schema as `/v1/chat/completions`.
|
||||
|
||||
**Response:**
|
||||
Chat completion plus benchmarking metrics.
|
||||
|
||||
## 5. Complete Endpoint Summary
|
||||
|
||||
```
|
||||
GET /node_id
|
||||
GET /state
|
||||
GET /events
|
||||
|
||||
POST /instance
|
||||
GET /instance/{instance_id}
|
||||
DELETE /instance/{instance_id}
|
||||
|
||||
GET /instance/previews
|
||||
GET /instance/placement
|
||||
POST /place_instance
|
||||
|
||||
GET /models
|
||||
GET /v1/models
|
||||
|
||||
POST /v1/chat/completions
|
||||
POST /bench/chat/completions
|
||||
```
|
||||
|
||||
## 6. Notes
|
||||
|
||||
* The `/v1/chat/completions` endpoint is compatible with the OpenAI API format, so existing OpenAI clients can be pointed to EXO by changing the base URL.
|
||||
* The instance placement endpoints allow you to plan and preview cluster allocations before actually creating instances.
|
||||
* The `/events` and `/state` endpoints are primarily intended for operational visibility and debugging.
|
||||
14
flake.nix
14
flake.nix
@@ -16,12 +16,11 @@
|
||||
};
|
||||
};
|
||||
|
||||
# TODO: figure out caching story
|
||||
# nixConfig = {
|
||||
# # nix community cachix
|
||||
# extra-trusted-public-keys = "nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs=";
|
||||
# extra-substituters = "https://nix-community.cachix.org";
|
||||
# };
|
||||
nixConfig = {
|
||||
# nix community cachix
|
||||
extra-trusted-public-keys = "exo.cachix.org-1:okq7hl624TBeAR3kV+g39dUFSiaZgLRkLsFBCuJ2NZI=";
|
||||
extra-substituters = "https://exo.cachix.org";
|
||||
};
|
||||
|
||||
outputs =
|
||||
inputs:
|
||||
@@ -73,6 +72,9 @@
|
||||
packages =
|
||||
with pkgs;
|
||||
[
|
||||
# FORMATTING
|
||||
treefmtEval.config.build.wrapper
|
||||
|
||||
# PYTHON
|
||||
python313
|
||||
uv
|
||||
|
||||
@@ -2,6 +2,7 @@ from exo.shared.apply import apply_node_download_progress
|
||||
from exo.shared.tests.conftest import get_pipeline_shard_metadata
|
||||
from exo.shared.types.common import NodeId
|
||||
from exo.shared.types.events import NodeDownloadProgress
|
||||
from exo.shared.types.memory import Memory
|
||||
from exo.shared.types.state import State
|
||||
from exo.shared.types.worker.downloads import DownloadCompleted
|
||||
from exo.worker.tests.constants import MODEL_A_ID, MODEL_B_ID
|
||||
@@ -13,6 +14,7 @@ def test_apply_node_download_progress():
|
||||
event = DownloadCompleted(
|
||||
node_id=NodeId("node-1"),
|
||||
shard_metadata=shard1,
|
||||
total_bytes=Memory(),
|
||||
)
|
||||
|
||||
new_state = apply_node_download_progress(
|
||||
@@ -28,10 +30,12 @@ def test_apply_two_node_download_progress():
|
||||
event1 = DownloadCompleted(
|
||||
node_id=NodeId("node-1"),
|
||||
shard_metadata=shard1,
|
||||
total_bytes=Memory(),
|
||||
)
|
||||
event2 = DownloadCompleted(
|
||||
node_id=NodeId("node-1"),
|
||||
shard_metadata=shard2,
|
||||
total_bytes=Memory(),
|
||||
)
|
||||
state = State(downloads={NodeId("node-1"): [event1]})
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ class DownloadPending(BaseDownloadProgress):
|
||||
|
||||
|
||||
class DownloadCompleted(BaseDownloadProgress):
|
||||
pass
|
||||
total_bytes: Memory
|
||||
|
||||
|
||||
class DownloadFailed(BaseDownloadProgress):
|
||||
|
||||
@@ -217,7 +217,9 @@ class Worker:
|
||||
)
|
||||
if initial_progress.status == "complete":
|
||||
progress = DownloadCompleted(
|
||||
shard_metadata=shard, node_id=self.node_id
|
||||
shard_metadata=shard,
|
||||
node_id=self.node_id,
|
||||
total_bytes=initial_progress.total_bytes,
|
||||
)
|
||||
self.download_status[shard.model_meta.model_id] = progress
|
||||
await self.event_sender.send(
|
||||
@@ -364,7 +366,11 @@ class Worker:
|
||||
nonlocal self
|
||||
nonlocal last_progress_time
|
||||
if progress.status == "complete":
|
||||
status = DownloadCompleted(shard_metadata=shard, node_id=self.node_id)
|
||||
status = DownloadCompleted(
|
||||
shard_metadata=shard,
|
||||
node_id=self.node_id,
|
||||
total_bytes=progress.total_bytes,
|
||||
)
|
||||
self.download_status[shard.model_meta.model_id] = status
|
||||
# Footgun!
|
||||
self.event_sender.send_nowait(
|
||||
@@ -457,7 +463,9 @@ class Worker:
|
||||
) in self.shard_downloader.get_shard_download_status():
|
||||
if progress.status == "complete":
|
||||
status = DownloadCompleted(
|
||||
node_id=self.node_id, shard_metadata=progress.shard
|
||||
node_id=self.node_id,
|
||||
shard_metadata=progress.shard,
|
||||
total_bytes=progress.total_bytes,
|
||||
)
|
||||
elif progress.status in ["in_progress", "not_started"]:
|
||||
if progress.downloaded_bytes_this_session.in_bytes == 0:
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import exo.worker.plan as plan_mod
|
||||
from exo.shared.types.common import NodeId
|
||||
from exo.shared.types.memory import Memory
|
||||
from exo.shared.types.models import ModelId
|
||||
from exo.shared.types.tasks import LoadModel
|
||||
from exo.shared.types.worker.downloads import DownloadCompleted, DownloadProgress
|
||||
@@ -94,13 +95,23 @@ def test_plan_loads_model_when_all_shards_downloaded_and_waiting():
|
||||
|
||||
# Local node has already marked its shard as downloaded (not actually used by _load_model)
|
||||
local_download_status = {
|
||||
MODEL_A_ID: DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)
|
||||
MODEL_A_ID: DownloadCompleted(
|
||||
shard_metadata=shard1, node_id=NODE_A, total_bytes=Memory()
|
||||
)
|
||||
}
|
||||
|
||||
# Global view has completed downloads for both nodes
|
||||
global_download_status = {
|
||||
NODE_A: [DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)],
|
||||
NODE_B: [DownloadCompleted(shard_metadata=shard2, node_id=NODE_B)],
|
||||
NODE_A: [
|
||||
DownloadCompleted(
|
||||
shard_metadata=shard1, node_id=NODE_A, total_bytes=Memory()
|
||||
)
|
||||
],
|
||||
NODE_B: [
|
||||
DownloadCompleted(
|
||||
shard_metadata=shard2, node_id=NODE_B, total_bytes=Memory()
|
||||
)
|
||||
],
|
||||
}
|
||||
|
||||
result = plan_mod.plan(
|
||||
@@ -140,7 +151,9 @@ def test_plan_does_not_request_download_when_shard_already_downloaded():
|
||||
|
||||
# Local status claims the shard is downloaded already
|
||||
local_download_status = {
|
||||
MODEL_A_ID: DownloadCompleted(shard_metadata=shard, node_id=NODE_A)
|
||||
MODEL_A_ID: DownloadCompleted(
|
||||
shard_metadata=shard, node_id=NODE_A, total_bytes=Memory()
|
||||
)
|
||||
}
|
||||
|
||||
# Global view hasn't caught up yet (no completed shards recorded for NODE_A)
|
||||
@@ -192,10 +205,16 @@ def test_plan_does_not_load_model_until_all_shards_downloaded_globally():
|
||||
|
||||
# Only NODE_A's shard is recorded as downloaded globally
|
||||
local_download_status = {
|
||||
MODEL_A_ID: DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)
|
||||
MODEL_A_ID: DownloadCompleted(
|
||||
shard_metadata=shard1, node_id=NODE_A, total_bytes=Memory()
|
||||
)
|
||||
}
|
||||
global_download_status = {
|
||||
NODE_A: [DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)],
|
||||
NODE_A: [
|
||||
DownloadCompleted(
|
||||
shard_metadata=shard1, node_id=NODE_A, total_bytes=Memory()
|
||||
)
|
||||
],
|
||||
NODE_B: [], # NODE_B has no downloads completed yet
|
||||
}
|
||||
|
||||
@@ -212,9 +231,15 @@ def test_plan_does_not_load_model_until_all_shards_downloaded_globally():
|
||||
assert result is None
|
||||
|
||||
global_download_status = {
|
||||
NODE_A: [DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)],
|
||||
NODE_A: [
|
||||
DownloadCompleted(
|
||||
shard_metadata=shard1, node_id=NODE_A, total_bytes=Memory()
|
||||
)
|
||||
],
|
||||
NODE_B: [
|
||||
DownloadCompleted(shard_metadata=shard2, node_id=NODE_B)
|
||||
DownloadCompleted(
|
||||
shard_metadata=shard2, node_id=NODE_B, total_bytes=Memory()
|
||||
)
|
||||
], # NODE_B has no downloads completed yet
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user