set KV_CACHE_BITS to None to disable quantized kv cache

2026-04-25 00:09:15 -04:00 · 2025-12-30 16:49:32 +00:00
parent 8a6da58404
commit 31d4cd8409
1 changed files with 1 additions and 1 deletions
--- a/src/exo/worker/engines/mlx/constants.py
+++ b/src/exo/worker/engines/mlx/constants.py
@@ -9,7 +9,7 @@ MAX_KV_SIZE: int | None = 3200
 KEEP_KV_SIZE: int | None = 1600
 QUANTIZE_MODEL_MODE: str | None = "affine"
 CACHE_GROUP_SIZE: int = 64
-KV_CACHE_BITS: int | None = 8
+KV_CACHE_BITS: int | None = None

 # TODO: We should really make this opt-in, but Kimi requires trust_remote_code=True
 TRUST_REMOTE_CODE: bool = True