fix local netowork warning

nix: enable cachix
Enable cachix and push to it in the pipeline.yml workflow. This won't cache a huge amount yet but will automatically extend our caching as we build more of the repo with Nix in CI. It can also be used by local users by accepting our cache to improve the speed of local builds. Test plan: - CI
2026-01-12 16:09:46 -05:00 · 2026-01-13 00:39:03 +05:00 · 2026-01-12 17:24:59 +01:00 · 2026-01-12 16:34:29 +01:00 · 2026-01-12 12:14:21 +01:00 · 2026-01-11 18:44:40 +00:00
12 changed files with 321 additions and 26 deletions
--- a/.github/workflows/build-app.yml
+++ b/.github/workflows/build-app.yml
@@ -1,6 +1,7 @@
 name: Build EXO macOS DMG

 on:
+  workflow_dispatch:
  push:
    tags:
      - "v*"
@@ -35,7 +36,7 @@ jobs:

      - name: Derive release version from tag
        run: |
-          if [[ "$GITHUB_REF_NAME" == "test-app" ]]; then
+          if [[ "$GITHUB_REF_NAME" == "test-app" || "${{ github.event_name }}" == "workflow_dispatch" ]]; then
            VERSION="0.0.0-alpha.0"
            echo "IS_ALPHA=true" >> $GITHUB_ENV
          else
--- a/.github/workflows/pipeline.yml
+++ b/.github/workflows/pipeline.yml
@@ -20,6 +20,12 @@ jobs:
        with:
          nix_path: nixpkgs=channel:nixos-unstable

+      - uses: cachix/cachix-action@v14
+        name: Configure Cachix
+        with:
+          name: exo
+          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
+
      - name: Configure git user
        run: |
          git config --local user.email "github-actions@users.noreply.github.com"
@@ -101,6 +107,12 @@ jobs:
        with:
          nix_path: nixpkgs=channel:nixos-unstable

+      - uses: cachix/cachix-action@v14
+        name: Configure Cachix
+        with:
+          name: exo
+          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
+
      - name: Run nix flake check
        run: |
          nix flake check
--- a/README.md
+++ b/README.md
@@ -305,7 +305,10 @@ curl -X DELETE http://localhost:52415/instance/YOUR_INSTANCE_ID
 - List all models: `curl http://localhost:52415/models`
 - Inspect instance IDs and deployment state: `curl http://localhost:52415/state`

-For further details, see API types and endpoints in [src/exo/master/api.py](src/exo/master/api.py).
+For further details, see:
+
+- API basic documentation in [docs/api.md](docs/api.md).
+- API types and endpoints in [src/exo/master/api.py](src/exo/master/api.py).

 ---

--- a/app/EXO/EXO/ContentView.swift
+++ b/app/EXO/EXO/ContentView.swift
@@ -56,8 +56,12 @@ struct ContentView: View {
    }

    private var shouldShowLocalNetworkWarning: Bool {
+        // Only show warning if:
+        // 1. Local network is not working
+        // 2. EXO is running
+        // 3. We've had a successful connection before (avoids false positive on fresh install)
        if case .notWorking = localNetworkChecker.status {
-            return controller.status != .stopped
+            return controller.status != .stopped && localNetworkChecker.hasWorkedBefore
        }
        return false
    }
--- a/app/EXO/EXO/Services/LocalNetworkChecker.swift
+++ b/app/EXO/EXO/Services/LocalNetworkChecker.swift
@@ -7,6 +7,8 @@ import os.log
 /// macOS local network permission can appear enabled in System Preferences but not
 /// actually work after a restart. This service detects this by creating a UDP
 /// connection to the mDNS multicast address (224.0.0.251:5353).
+///
+/// Only shows warnings if permission previously worked (to avoid false positives on fresh install).
@MainActor
 final class LocalNetworkChecker: ObservableObject {
    enum Status: Equatable {
@@ -35,10 +37,14 @@ final class LocalNetworkChecker: ObservableObject {
    }

    private static let logger = Logger(subsystem: "io.exo.EXO", category: "LocalNetworkChecker")
+    private static let hasWorkedBeforeKey = "LocalNetworkChecker.hasWorkedBefore"

    @Published private(set) var status: Status = .unknown
    @Published private(set) var lastConnectionState: String = "none"

+    /// True if we've ever had a successful connection (persisted across launches)
+    @Published private(set) var hasWorkedBefore: Bool = UserDefaults.standard.bool(forKey: hasWorkedBeforeKey)
+
    private var connection: NWConnection?
    private var checkTask: Task<Void, Never>?

@@ -52,7 +58,14 @@ final class LocalNetworkChecker: ObservableObject {
            guard let self else { return }
            let result = await self.performCheck()
            self.status = result
-            Self.logger.info("Local network check complete: \(result.displayText)")
+
+            // Record success so we can detect regressions on future launches
+            if case .working = result {
+                self.hasWorkedBefore = true
+                UserDefaults.standard.set(true, forKey: Self.hasWorkedBeforeKey)
+            }
+
+            Self.logger.info("Local network check complete: \(result.displayText), hasWorkedBefore: \(self.hasWorkedBefore)")
        }
    }

--- a/dashboard/src/routes/downloads/+page.svelte
+++ b/dashboard/src/routes/downloads/+page.svelte
@@ -199,7 +199,13 @@
 					const rawProgress = (downloadPayload as Record<string, unknown>).download_progress
 						?? (downloadPayload as Record<string, unknown>).downloadProgress
 						?? {};
-					const totalBytes = getBytes((rawProgress as Record<string, unknown>).total_bytes ?? (rawProgress as Record<string, unknown>).totalBytes);
+					// For DownloadCompleted, total_bytes is at top level; for DownloadOngoing, it's inside download_progress
+					const totalBytes = getBytes(
+						(downloadPayload as Record<string, unknown>).total_bytes
+						?? (downloadPayload as Record<string, unknown>).totalBytes
+						?? (rawProgress as Record<string, unknown>).total_bytes
+						?? (rawProgress as Record<string, unknown>).totalBytes
+					);
 					const downloadedBytes = getBytes((rawProgress as Record<string, unknown>).downloaded_bytes ?? (rawProgress as Record<string, unknown>).downloadedBytes);
 					const speed = (rawProgress as Record<string, unknown>).speed as number ?? 0;
 					const etaMs = (rawProgress as Record<string, unknown>).eta_ms as number ?? (rawProgress as Record<string, unknown>).etaMs as number ?? 0;
@@ -332,8 +338,13 @@
 								<div class="text-lg font-mono text-white truncate">{node.nodeName}</div>
 								<div class="text-xs text-exo-light-gray font-mono truncate">{node.nodeId}</div>
 							</div>
-							<div class="text-xs font-mono uppercase tracking-wider whitespace-nowrap shrink-0">
-								<span class="text-green-400">{node.models.filter(m => m.status === 'completed').length}</span><span class="text-exo-yellow"> /{node.models.length} models</span>
+							<div class="text-xs font-mono uppercase tracking-wider whitespace-nowrap shrink-0 text-right">
+								<div>
+									<span class="text-green-400">{node.models.filter(m => m.status === 'completed').length}</span><span class="text-exo-yellow"> / {node.models.length} models</span>
+								</div>
+								<div class="text-exo-light-gray normal-case tracking-normal">
+									{formatBytes(node.models.filter(m => m.status === 'completed').reduce((sum, m) => sum + m.totalBytes, 0))} on disk
+								</div>
 							</div>
 						</div>

@@ -385,7 +396,7 @@
 								</div>

 								<div class="flex items-center justify-between text-xs font-mono text-exo-light-gray">
-									<span>{model.status === 'completed' ? 'Completed' : `${formatSpeed(model.speed)} • ETA ${formatEta(model.etaMs)}`}</span>
+									<span>{model.status === 'completed' ? `Completed (${formatBytes(model.totalBytes)})` : `${formatSpeed(model.speed)} • ETA ${formatEta(model.etaMs)}`}</span>
 									{#if model.status !== 'completed'}
 										<span>{model.files.length} file{model.files.length === 1 ? '' : 's'}</span>
 									{/if}
--- a/docs/api.md
+++ b/docs/api.md
@@ -0,0 +1,212 @@
+# EXO API – Technical Reference
+
+This document describes the REST API exposed by the **EXO ** service, as implemented in:
+
+`src/exo/master/api.py`
+
+The API is used to manage model instances in the cluster, inspect cluster state, and perform inference using an OpenAI-compatible interface.
+
+Base URL example:
+
+```
+http://localhost:52415
+```
+
+## 1. General / Meta Endpoints
+
+### Get Master Node ID
+
+**GET** `/node_id`
+
+Returns the identifier of the current master node.
+
+**Response (example):**
+
+```json
+{
+  "node_id": "node-1234"
+}
+```
+
+### Get Cluster State
+
+**GET** `/state`
+
+Returns the current state of the cluster, including nodes and active instances.
+
+**Response:**
+JSON object describing topology, nodes, and instances.
+
+### Get Events
+
+**GET** `/events`
+
+Returns the list of internal events recorded by the master (mainly for debugging and observability).
+
+**Response:**
+Array of event objects.
+
+## 2. Model Instance Management
+
+### Create Instance
+
+**POST** `/instance`
+
+Creates a new model instance in the cluster.
+
+**Request body (example):**
+
+```json
+{
+  "instance": {
+    "model_id": "llama-3.2-1b",
+    "placement": { }
+  }
+}
+```
+
+**Response:**
+JSON description of the created instance.
+
+### Delete Instance
+
+**DELETE** `/instance/{instance_id}`
+
+Deletes an existing instance by ID.
+
+**Path parameters:**
+
+* `instance_id`: string, ID of the instance to delete
+
+**Response:**
+Status / confirmation JSON.
+
+### Get Instance
+
+**GET** `/instance/{instance_id}`
+
+Returns details of a specific instance.
+
+**Path parameters:**
+
+* `instance_id`: string
+
+**Response:**
+JSON description of the instance.
+
+### Preview Placements
+
+**GET** `/instance/previews?model_id=...`
+
+Returns possible placement previews for a given model.
+
+**Query parameters:**
+
+* `model_id`: string, required
+
+**Response:**
+Array of placement preview objects.
+
+### Compute Placement
+
+**GET** `/instance/placement`
+
+Computes a placement for a potential instance without creating it.
+
+**Query parameters (typical):**
+
+* `model_id`: string
+* `sharding`: string or config
+* `instance_meta`: JSON-encoded metadata
+* `min_nodes`: integer
+
+**Response:**
+JSON object describing the proposed placement / instance configuration.
+
+### Place Instance (Dry Operation)
+
+**POST** `/place_instance`
+
+Performs a placement operation for an instance (planning step), without necessarily creating it.
+
+**Request body:**
+JSON describing the instance to be placed.
+
+**Response:**
+Placement result.
+
+## 3. Models
+
+### List Models
+
+**GET** `/models`
+**GET** `/v1/models` (alias)
+
+Returns the list of available models and their metadata.
+
+**Response:**
+Array of model descriptors.
+
+## 4. Inference / Chat Completions
+
+### OpenAI-Compatible Chat Completions
+
+**POST** `/v1/chat/completions`
+
+Executes a chat completion request using an OpenAI-compatible schema. Supports streaming and non-streaming modes.
+
+**Request body (example):**
+
+```json
+{
+  "model": "llama-3.2-1b",
+  "messages": [
+    { "role": "system", "content": "You are a helpful assistant." },
+    { "role": "user", "content": "Hello" }
+  ],
+  "stream": false
+}
+```
+
+**Response:**
+OpenAI-compatible chat completion response.
+
+### Benchmarked Chat Completions
+
+**POST** `/bench/chat/completions`
+
+Same as `/v1/chat/completions`, but also returns performance and generation statistics.
+
+**Request body:**
+Same schema as `/v1/chat/completions`.
+
+**Response:**
+Chat completion plus benchmarking metrics.
+
+## 5. Complete Endpoint Summary
+
+```
+GET     /node_id
+GET     /state
+GET     /events
+
+POST    /instance
+GET     /instance/{instance_id}
+DELETE  /instance/{instance_id}
+
+GET     /instance/previews
+GET     /instance/placement
+POST    /place_instance
+
+GET     /models
+GET     /v1/models
+
+POST    /v1/chat/completions
+POST    /bench/chat/completions
+```
+
+## 6. Notes
+
+* The `/v1/chat/completions` endpoint is compatible with the OpenAI API format, so existing OpenAI clients can be pointed to EXO by changing the base URL.
+* The instance placement endpoints allow you to plan and preview cluster allocations before actually creating instances.
+* The `/events` and `/state` endpoints are primarily intended for operational visibility and debugging.
--- a/flake.nix
+++ b/flake.nix
@@ -16,12 +16,11 @@
    };
  };

-  # TODO: figure out caching story
-  # nixConfig = {
-  #   # nix community cachix
-  #   extra-trusted-public-keys = "nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs=";
-  #   extra-substituters = "https://nix-community.cachix.org";
-  # };
+  nixConfig = {
+    # nix community cachix
+    extra-trusted-public-keys = "exo.cachix.org-1:okq7hl624TBeAR3kV+g39dUFSiaZgLRkLsFBCuJ2NZI=";
+    extra-substituters = "https://exo.cachix.org";
+  };

  outputs =
    inputs:
@@ -73,6 +72,9 @@
          packages =
            with pkgs;
            [
+              # FORMATTING
+              treefmtEval.config.build.wrapper
+
              # PYTHON
              python313
              uv
--- a/src/exo/shared/tests/test_apply/test_apply_node_download.py
+++ b/src/exo/shared/tests/test_apply/test_apply_node_download.py
@@ -2,6 +2,7 @@ from exo.shared.apply import apply_node_download_progress
 from exo.shared.tests.conftest import get_pipeline_shard_metadata
 from exo.shared.types.common import NodeId
 from exo.shared.types.events import NodeDownloadProgress
+from exo.shared.types.memory import Memory
 from exo.shared.types.state import State
 from exo.shared.types.worker.downloads import DownloadCompleted
 from exo.worker.tests.constants import MODEL_A_ID, MODEL_B_ID
@@ -13,6 +14,7 @@ def test_apply_node_download_progress():
    event = DownloadCompleted(
        node_id=NodeId("node-1"),
        shard_metadata=shard1,
+        total_bytes=Memory(),
    )

    new_state = apply_node_download_progress(
@@ -28,10 +30,12 @@ def test_apply_two_node_download_progress():
    event1 = DownloadCompleted(
        node_id=NodeId("node-1"),
        shard_metadata=shard1,
+        total_bytes=Memory(),
    )
    event2 = DownloadCompleted(
        node_id=NodeId("node-1"),
        shard_metadata=shard2,
+        total_bytes=Memory(),
    )
    state = State(downloads={NodeId("node-1"): [event1]})

--- a/src/exo/shared/types/worker/downloads.py
+++ b/src/exo/shared/types/worker/downloads.py
@@ -28,7 +28,7 @@ class DownloadPending(BaseDownloadProgress):


 class DownloadCompleted(BaseDownloadProgress):
-    pass
+    total_bytes: Memory


 class DownloadFailed(BaseDownloadProgress):
--- a/src/exo/worker/main.py
+++ b/src/exo/worker/main.py
@@ -217,7 +217,9 @@ class Worker:
                    )
                    if initial_progress.status == "complete":
                        progress = DownloadCompleted(
-                            shard_metadata=shard, node_id=self.node_id
+                            shard_metadata=shard,
+                            node_id=self.node_id,
+                            total_bytes=initial_progress.total_bytes,
                        )
                        self.download_status[shard.model_meta.model_id] = progress
                        await self.event_sender.send(
@@ -364,7 +366,11 @@ class Worker:
            nonlocal self
            nonlocal last_progress_time
            if progress.status == "complete":
-                status = DownloadCompleted(shard_metadata=shard, node_id=self.node_id)
+                status = DownloadCompleted(
+                    shard_metadata=shard,
+                    node_id=self.node_id,
+                    total_bytes=progress.total_bytes,
+                )
                self.download_status[shard.model_meta.model_id] = status
                # Footgun!
                self.event_sender.send_nowait(
@@ -457,7 +463,9 @@ class Worker:
                ) in self.shard_downloader.get_shard_download_status():
                    if progress.status == "complete":
                        status = DownloadCompleted(
-                            node_id=self.node_id, shard_metadata=progress.shard
+                            node_id=self.node_id,
+                            shard_metadata=progress.shard,
+                            total_bytes=progress.total_bytes,
                        )
                    elif progress.status in ["in_progress", "not_started"]:
                        if progress.downloaded_bytes_this_session.in_bytes == 0:
--- a/src/exo/worker/tests/unittests/test_plan/test_download_and_loading.py
+++ b/src/exo/worker/tests/unittests/test_plan/test_download_and_loading.py
@@ -1,5 +1,6 @@
 import exo.worker.plan as plan_mod
 from exo.shared.types.common import NodeId
+from exo.shared.types.memory import Memory
 from exo.shared.types.models import ModelId
 from exo.shared.types.tasks import LoadModel
 from exo.shared.types.worker.downloads import DownloadCompleted, DownloadProgress
@@ -94,13 +95,23 @@ def test_plan_loads_model_when_all_shards_downloaded_and_waiting():

    # Local node has already marked its shard as downloaded (not actually used by _load_model)
    local_download_status = {
-        MODEL_A_ID: DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)
+        MODEL_A_ID: DownloadCompleted(
+            shard_metadata=shard1, node_id=NODE_A, total_bytes=Memory()
+        )
    }

    # Global view has completed downloads for both nodes
    global_download_status = {
-        NODE_A: [DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)],
-        NODE_B: [DownloadCompleted(shard_metadata=shard2, node_id=NODE_B)],
+        NODE_A: [
+            DownloadCompleted(
+                shard_metadata=shard1, node_id=NODE_A, total_bytes=Memory()
+            )
+        ],
+        NODE_B: [
+            DownloadCompleted(
+                shard_metadata=shard2, node_id=NODE_B, total_bytes=Memory()
+            )
+        ],
    }

    result = plan_mod.plan(
@@ -140,7 +151,9 @@ def test_plan_does_not_request_download_when_shard_already_downloaded():

    # Local status claims the shard is downloaded already
    local_download_status = {
-        MODEL_A_ID: DownloadCompleted(shard_metadata=shard, node_id=NODE_A)
+        MODEL_A_ID: DownloadCompleted(
+            shard_metadata=shard, node_id=NODE_A, total_bytes=Memory()
+        )
    }

    # Global view hasn't caught up yet (no completed shards recorded for NODE_A)
@@ -192,10 +205,16 @@ def test_plan_does_not_load_model_until_all_shards_downloaded_globally():

    # Only NODE_A's shard is recorded as downloaded globally
    local_download_status = {
-        MODEL_A_ID: DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)
+        MODEL_A_ID: DownloadCompleted(
+            shard_metadata=shard1, node_id=NODE_A, total_bytes=Memory()
+        )
    }
    global_download_status = {
-        NODE_A: [DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)],
+        NODE_A: [
+            DownloadCompleted(
+                shard_metadata=shard1, node_id=NODE_A, total_bytes=Memory()
+            )
+        ],
        NODE_B: [],  # NODE_B has no downloads completed yet
    }

@@ -212,9 +231,15 @@ def test_plan_does_not_load_model_until_all_shards_downloaded_globally():
    assert result is None

    global_download_status = {
-        NODE_A: [DownloadCompleted(shard_metadata=shard1, node_id=NODE_A)],
+        NODE_A: [
+            DownloadCompleted(
+                shard_metadata=shard1, node_id=NODE_A, total_bytes=Memory()
+            )
+        ],
        NODE_B: [
-            DownloadCompleted(shard_metadata=shard2, node_id=NODE_B)
+            DownloadCompleted(
+                shard_metadata=shard2, node_id=NODE_B, total_bytes=Memory()
+            )
        ],  # NODE_B has no downloads completed yet
    }
Author	SHA1	Message	Date
Sami Khan	6088493841	fix local netowork warning	2026-01-13 00:39:03 +05:00
Jake Hillion	007eb80029	nix: enable cachix Enable cachix and push to it in the pipeline.yml workflow. This won't cache a huge amount yet but will automatically extend our caching as we build more of the repo with Nix in CI. It can also be used by local users by accepting our cache to improve the speed of local builds. Test plan: - CI	2026-01-12 17:24:59 +01:00
Jake Hillion	8d7b6789b3	dashboard: show disk usage for completed models The downloads dashboard showed "Completed" for finished model downloads but provided no indication of how much disk space each model or the total models on a node were using. Added total_bytes field to DownloadCompleted type so the size is preserved when a download completes. Updated the dashboard to display the model size next to "Completed" status (e.g., "Completed (251.1GB)") and a total disk usage line below the model count for each node (e.g., "502.2GB on disk"). Test plan: - Ran unit tests for download apply and planning logic - Type checked all modified files with basedpyright	2026-01-12 16:34:29 +01:00
Jake Hillion	3c5b7ea670	ci: add workflow_dispatch trigger to build-app Build app is the most convenient way to get a DMG for testing, but currently it's a bit limited. You have to push to test-app every time which is far from ideal and requires a bit too much force pushing for my liking. Add the workflow_dispatch trigger. This adds a button in the actions UI to trigger a workflow for a named branch, which means you can use your normal dev branch instead of having to push to test-app. We'll leave that behaviour there for now too, though it may change in future. Filter on `"${{ github.event_name }}" == "workflow_dispatch"` and set those to alpha as well. Will verify by pushing the first version from `main` just in case. Unfortunately we do have to merge this before we can test it. Test plan: - Looking really hard.	2026-01-12 12:14:21 +01:00
PG	b74a610537	Add a basic documentation to the api interface (#1122 ) ## Motivation Adds basic api documentation ## Changes - Add docs/api.md - Modify README.md	2026-01-11 18:44:40 +00:00
Jake Hillion	18c4e49f91	nix: put treefmt in devshell treefmt is a useful to be able to access directly for some formatters like `jj fix`. Expose it in the devshell. Test plan: - Used with `jj fix` on a large branch. It worked.	2026-01-09 17:53:50 +01:00