Compare commits

1 Commits

Author SHA1 Message Date
Alex Cheema
72fca71522 use model.make_cache in make_kv_cache 2025-12-30 17:46:13 +00:00

View File

@@ -343,6 +343,10 @@ def make_kv_cache(
) -> list[KVCache | RotatingKVCache | QuantizedKVCache]:
assert hasattr(model, "layers")
if hasattr(model, "make_cache"):
logger.info(f"Using make_cache")
return model.make_cache() # type: ignore
if max_kv_size is None:
if KV_CACHE_BITS is None:
logger.info("Using default KV cache")