use model.make_cache in make_kv_cache

2026-02-15 08:34:31 -05:00 · 2025-12-30 17:46:13 +00:00
1 changed files with 4 additions and 0 deletions
--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -343,6 +343,10 @@ def make_kv_cache(
 ) -> list[KVCache | RotatingKVCache | QuantizedKVCache]:
    assert hasattr(model, "layers")

+    if hasattr(model, "make_cache"):
+        logger.info(f"Using make_cache")
+        return model.make_cache() # type: ignore
+
    if max_kv_size is None:
        if KV_CACHE_BITS is None:
            logger.info("Using default KV cache")