python: add hermetic basedpyright typecheck to nix flake check

The existing CI typecheck job used `uv run basedpyright` which depends on a non-hermetic uv sync step. This replaces it with a fully hermetic typecheck as a Nix flake check using the uv2nix virtual environment. Added a `typecheckVenv` with dev dependencies, a `linuxOverlay` to ignore native shared library deps (NVIDIA, torch, triton, mlx) that aren't needed at type-check time, and `passthru` preservation plus `.pyi` stub copying on the `exo-pyo3-bindings` overlay so basedpyright can resolve the Rust bindings types. Also guarded the `mlx` Nix build override to macOS only since it requires Metal. Removed the old non-hermetic `typecheck` CI job since `nix flake check` now covers it. The hermetic check ensures type checking uses exactly the locked dependency versions and catches type errors without requiring a working uv/pip environment. Test plan: - CI (`nix flake check` runs on x86_64-linux, aarch64-linux, aarch64-darwin) - Verified `nix build ".#checks.x86_64-linux.typecheck"` passes with 0 errors
2026-02-15 08:34:31 -05:00 · 2026-02-14 14:21:49 +00:00
11 changed files with 45 additions and 129 deletions
--- a/.github/workflows/pipeline.yml
+++ b/.github/workflows/pipeline.yml
@@ -8,33 +8,6 @@ on:
      - main

 jobs:
-  typecheck:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          lfs: false
-
-      - uses: cachix/install-nix-action@v31
-        with:
-          nix_path: nixpkgs=channel:nixos-unstable
-
-      - uses: cachix/cachix-action@v14
-        name: Configure Cachix
-        with:
-          name: exo
-          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
-
-      - name: Load nix develop environment
-        run: nix run github:nicknovitski/nix-develop/v1
-
-      - name: Sync dependencies
-        run: uv sync --all-packages
-
-      - name: Run type checker
-        run: uv run basedpyright --project pyproject.toml
-
  nix:
    name: Build and check (${{ matrix.system }})
    runs-on: ${{ matrix.runner }}
--- a/dashboard/src/lib/stores/app.svelte.ts
+++ b/dashboard/src/lib/stores/app.svelte.ts
@@ -72,8 +72,6 @@ export interface Instance {
    runnerToShard?: Record<string, unknown>;
    nodeToRunner?: Record<string, string>;
  };
-  draftModel?: string;
-  numDraftTokens?: number;
 }

 // Granular node state types from the new state structure
--- a/python/parts.nix
+++ b/python/parts.nix
@@ -14,7 +14,9 @@

      # Override overlay to inject Nix-built components
      exoOverlay = final: prev: {
-        # Replace workspace exo_pyo3_bindings with Nix-built wheel
+        # Replace workspace exo_pyo3_bindings with Nix-built wheel.
+        # Preserve passthru so mkVirtualEnv can resolve dependency groups.
+        # Copy .pyi stub + py.typed marker so basedpyright can find the types.
        exo-pyo3-bindings = pkgs.stdenv.mkDerivation {
          pname = "exo-pyo3-bindings";
          version = "0.1.0";
@@ -22,6 +24,12 @@
          # Install from pre-built wheel
          nativeBuildInputs = [ final.pyprojectWheelHook ];
          dontStrip = true;
+          passthru = prev.exo-pyo3-bindings.passthru or { };
+          postInstall = ''
+            local siteDir=$out/${final.python.sitePackages}/exo_pyo3_bindings
+            cp ${inputs.self}/rust/exo_pyo3_bindings/exo_pyo3_bindings.pyi $siteDir/
+            touch $siteDir/py.typed
+          '';
        };
      };

@@ -29,17 +37,32 @@

      # Overlay to provide build systems and custom packages
      buildSystemsOverlay = final: prev: {
-        # Use our pure Nix-built MLX with Metal support
-        mlx = self'.packages.mlx;
-
        # mlx-lm is a git dependency that needs setuptools
        mlx-lm = prev.mlx-lm.overrideAttrs (old: {
          nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
            final.setuptools
          ];
        });
+      } // lib.optionalAttrs pkgs.stdenv.hostPlatform.isDarwin {
+        # Use our pure Nix-built MLX with Metal support (macOS only)
+        mlx = self'.packages.mlx;
      };

+      # Additional overlay for Linux-specific fixes (type checking env).
+      # Native wheels have shared lib dependencies we don't need at type-check time.
+      linuxOverlay = final: prev:
+        let
+          ignoreMissing = drv: drv.overrideAttrs { autoPatchelfIgnoreMissingDeps = [ "*" ]; };
+          nvidiaPackages = lib.filterAttrs (name: _: lib.hasPrefix "nvidia-" name) prev;
+        in
+        lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux (
+          (lib.mapAttrs (_: ignoreMissing) nvidiaPackages) // {
+            mlx = ignoreMissing prev.mlx;
+            torch = ignoreMissing prev.torch;
+            triton = ignoreMissing prev.triton;
+          }
+        );
+
      pythonSet = (pkgs.callPackage inputs.pyproject-nix.build.packages {
        inherit python;
      }).overrideScope (
@@ -48,6 +71,7 @@
          overlay
          exoOverlay
          buildSystemsOverlay
+          linuxOverlay
        ]
      );
      exoVenv = pythonSet.mkVirtualEnv "exo-env" workspace.deps.default;
@@ -118,6 +142,21 @@
          ${pkgs.ruff}/bin/ruff check ${inputs.self}
          touch $out
        '';
+
+        # Hermetic basedpyright type checking
+        typecheck = pkgs.runCommand "typecheck"
+          {
+            nativeBuildInputs = [
+              testVenv
+              pkgs.basedpyright
+            ];
+          }
+          ''
+            cd ${inputs.self}
+            export HOME=$TMPDIR
+            basedpyright --pythonpath ${testVenv}/bin/python
+            touch $out
+          '';
      };
    };
 }
--- a/src/exo/master/main.py
+++ b/src/exo/master/main.py
@@ -24,7 +24,6 @@ from exo.shared.types.commands import (
    PlaceInstance,
    RequestEventLog,
    SendInputChunk,
-    SetInstanceDraftModel,
    TaskFinished,
    TestCommand,
    TextGeneration,
@@ -36,7 +35,6 @@ from exo.shared.types.events import (
    IndexedEvent,
    InputChunkReceived,
    InstanceDeleted,
-    InstanceDraftModelUpdated,
    NodeGatheredInfo,
    NodeTimedOut,
    TaskCreated,
@@ -321,14 +319,6 @@ class Master:
                                    chunk=chunk,
                                )
                            )
-                        case SetInstanceDraftModel():
-                            generated_events.append(
-                                InstanceDraftModelUpdated(
-                                    instance_id=command.instance_id,
-                                    draft_model=command.draft_model,
-                                    num_draft_tokens=command.num_draft_tokens,
-                                )
-                            )
                        case TaskFinished():
                            generated_events.append(
                                TaskDeleted(
--- a/src/exo/master/placement.py
+++ b/src/exo/master/placement.py
@@ -153,8 +153,6 @@ def place_instance(
                shard_assignments=shard_assignments,
                jaccl_devices=mlx_jaccl_devices,
                jaccl_coordinators=mlx_jaccl_coordinators,
-                draft_model=command.draft_model,
-                num_draft_tokens=command.num_draft_tokens,
            )
        case InstanceMeta.MlxRing:
            ephemeral_port = random_ephemeral_port()
@@ -169,8 +167,6 @@ def place_instance(
                shard_assignments=shard_assignments,
                hosts_by_node=hosts_by_node,
                ephemeral_port=ephemeral_port,
-                draft_model=command.draft_model,
-                num_draft_tokens=command.num_draft_tokens,
            )

    return target_instances
--- a/src/exo/shared/apply.py
+++ b/src/exo/shared/apply.py
@@ -12,7 +12,6 @@ from exo.shared.types.events import (
    InputChunkReceived,
    InstanceCreated,
    InstanceDeleted,
-    InstanceDraftModelUpdated,
    NodeDownloadProgress,
    NodeGatheredInfo,
    NodeTimedOut,
@@ -73,8 +72,6 @@ def event_apply(event: Event, state: State) -> State:
            return apply_instance_created(event, state)
        case InstanceDeleted():
            return apply_instance_deleted(event, state)
-        case InstanceDraftModelUpdated():
-            return apply_instance_draft_model_updated(event, state)
        case NodeTimedOut():
            return apply_node_timed_out(event, state)
        case NodeDownloadProgress():
@@ -193,25 +190,6 @@ def apply_instance_deleted(event: InstanceDeleted, state: State) -> State:
    return state.model_copy(update={"instances": new_instances})


-def apply_instance_draft_model_updated(
-    event: InstanceDraftModelUpdated, state: State
-) -> State:
-    if event.instance_id not in state.instances:
-        return state
-    instance = state.instances[event.instance_id]
-    updated_instance = instance.model_copy(
-        update={
-            "draft_model": event.draft_model,
-            "num_draft_tokens": event.num_draft_tokens,
-        }
-    )
-    new_instances: Mapping[InstanceId, Instance] = {
-        **state.instances,
-        event.instance_id: updated_instance,
-    }
-    return state.model_copy(update={"instances": new_instances})
-
-
 def apply_runner_status_updated(event: RunnerStatusUpdated, state: State) -> State:
    new_runners: Mapping[RunnerId, RunnerStatus] = {
        **state.runners,
--- a/src/exo/shared/types/commands.py
+++ b/src/exo/shared/types/commands.py
@@ -38,8 +38,6 @@ class PlaceInstance(BaseCommand):
    sharding: Sharding
    instance_meta: InstanceMeta
    min_nodes: int
-    draft_model: ModelId | None = None
-    num_draft_tokens: int = 4


 class CreateInstance(BaseCommand):
@@ -74,14 +72,6 @@ class DeleteDownload(BaseCommand):
    model_id: ModelId


-class SetInstanceDraftModel(BaseCommand):
-    """Set or update the draft model for an existing instance."""
-
-    instance_id: InstanceId
-    draft_model: ModelId | None  # None to disable speculative decoding
-    num_draft_tokens: int = 4
-
-
 class CancelDownload(BaseCommand):
    target_node_id: NodeId
    model_id: ModelId
@@ -99,7 +89,6 @@ Command = (
    | PlaceInstance
    | CreateInstance
    | DeleteInstance
-    | SetInstanceDraftModel
    | TaskFinished
    | SendInputChunk
 )
--- a/src/exo/shared/types/events.py
+++ b/src/exo/shared/types/events.py
@@ -5,7 +5,7 @@ from pydantic import Field

 from exo.shared.topology import Connection
 from exo.shared.types.chunks import GenerationChunk, InputImageChunk
-from exo.shared.types.common import CommandId, Id, ModelId, NodeId, SessionId
+from exo.shared.types.common import CommandId, Id, NodeId, SessionId
 from exo.shared.types.tasks import Task, TaskId, TaskStatus
 from exo.shared.types.worker.downloads import DownloadProgress
 from exo.shared.types.worker.instances import Instance, InstanceId
@@ -68,14 +68,6 @@ class InstanceDeleted(BaseEvent):
    instance_id: InstanceId


-class InstanceDraftModelUpdated(BaseEvent):
-    """Draft model updated on an existing instance."""
-
-    instance_id: InstanceId
-    draft_model: ModelId | None
-    num_draft_tokens: int
-
-
 class RunnerStatusUpdated(BaseEvent):
    runner_id: RunnerId
    runner_status: RunnerStatus
@@ -149,7 +141,6 @@ Event = (
    | TaskAcknowledged
    | InstanceCreated
    | InstanceDeleted
-    | InstanceDraftModelUpdated
    | RunnerStatusUpdated
    | RunnerDeleted
    | NodeTimedOut
--- a/src/exo/shared/types/tasks.py
+++ b/src/exo/shared/types/tasks.py
@@ -40,12 +40,6 @@ class DownloadModel(BaseTask):  # emitted by Worker
    shard_metadata: ShardMetadata


-class DownloadDraftModel(BaseTask):  # emitted by Worker
-    """Download a draft model for speculative decoding (rank 0 only)."""
-
-    model_id: str  # HuggingFace model ID
-
-
 class LoadModel(BaseTask):  # emitted by Worker
    pass

@@ -86,17 +80,9 @@ class Shutdown(BaseTask):  # emitted by Worker
    runner_id: RunnerId


-class SetDraftModel(BaseTask):  # emitted by Worker
-    """Load or clear a draft model on an already-running instance."""
-
-    model_id: str | None  # HuggingFace model ID, or None to clear
-    num_draft_tokens: int = 4
-
-
 Task = (
    CreateRunner
    | DownloadModel
-    | DownloadDraftModel
    | ConnectToGroup
    | LoadModel
    | StartWarmup
@@ -104,5 +90,4 @@ Task = (
    | ImageGeneration
    | ImageEdits
    | Shutdown
-    | SetDraftModel
 )
--- a/src/exo/shared/types/worker/instances.py
+++ b/src/exo/shared/types/worker/instances.py
@@ -2,7 +2,7 @@ from enum import Enum

 from pydantic import model_validator

-from exo.shared.types.common import Host, Id, ModelId, NodeId
+from exo.shared.types.common import Host, Id, NodeId
 from exo.shared.types.worker.runners import RunnerId, ShardAssignments, ShardMetadata
 from exo.utils.pydantic_ext import CamelCaseModel, TaggedModel

@@ -19,8 +19,6 @@ class InstanceMeta(str, Enum):
 class BaseInstance(TaggedModel):
    instance_id: InstanceId
    shard_assignments: ShardAssignments
-    draft_model: ModelId | None = None  # For speculative decoding (rank 0 only)
-    num_draft_tokens: int = 4  # Tokens to draft per iteration (when draft_model is set)

    def shard(self, runner_id: RunnerId) -> ShardMetadata | None:
        return self.shard_assignments.runner_to_shard.get(runner_id, None)
--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -223,27 +223,6 @@ def load_mlx_items(
    return cast(Model, model), tokenizer


-def load_draft_model(model_id: ModelId) -> nn.Module:
-    """Load a draft model for speculative decoding (rank 0 only).
-
-    Draft models are small models (typically 0.5B-2B parameters) used to
-    generate candidate tokens quickly, which are then verified by the main
-    model in a single forward pass.
-
-    Assumes the model has already been downloaded by the worker.
-
-    Args:
-        model_id: HuggingFace model ID for the draft model
-
-    Returns:
-        The loaded draft model
-    """
-    model_path = build_model_path(model_id)
-    draft_model, _ = load_model(model_path, strict=True)
-    logger.info(f"Loaded draft model from {model_path}")
-    return draft_model
-
-
 def shard_and_load(
    shard_metadata: ShardMetadata,
    group: Group,