From cd9f3182d91ffd2dfab37cf0cf1b9ec6fe30cdb0 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Tue, 3 Feb 2026 19:15:50 -0800 Subject: [PATCH] Fix NameError for Cache in WrappedMiniMaxAttention Use string annotation for the Cache type since it only exists in type stubs, not in the actual mlx_lm package at runtime. Co-Authored-By: Claude Opus 4.5 --- src/exo/worker/engines/mlx/auto_parallel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/exo/worker/engines/mlx/auto_parallel.py b/src/exo/worker/engines/mlx/auto_parallel.py index 28e82f73..1e470399 100644 --- a/src/exo/worker/engines/mlx/auto_parallel.py +++ b/src/exo/worker/engines/mlx/auto_parallel.py @@ -635,7 +635,7 @@ class WrappedMiniMaxAttention(CustomMlxLayer): self, x: mx.array, mask: mx.array | None = None, - cache: Cache | None = None, + cache: "Cache | None" = None, ) -> mx.array: batch_dim, seq_dim, _ = x.shape