From cd9f3182d91ffd2dfab37cf0cf1b9ec6fe30cdb0 Mon Sep 17 00:00:00 2001
From: Alex Cheema <alexcheema123@gmail.com>
Date: Tue, 3 Feb 2026 19:15:50 -0800
Subject: [PATCH] Fix NameError for Cache in WrappedMiniMaxAttention

Use string annotation for the Cache type since it only exists in type
stubs, not in the actual mlx_lm package at runtime.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/exo/worker/engines/mlx/auto_parallel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/exo/worker/engines/mlx/auto_parallel.py b/src/exo/worker/engines/mlx/auto_parallel.py
index 28e82f73..1e470399 100644
--- a/src/exo/worker/engines/mlx/auto_parallel.py
+++ b/src/exo/worker/engines/mlx/auto_parallel.py
@@ -635,7 +635,7 @@ class WrappedMiniMaxAttention(CustomMlxLayer):
         self,
         x: mx.array,
         mask: mx.array | None = None,
-        cache: Cache | None = None,
+        cache: "Cache | None" = None,
     ) -> mx.array:
         batch_dim, seq_dim, _ = x.shape