From 31d4cd84097dfa04aa2da3eb4e5efd67a7cdd42c Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Tue, 30 Dec 2025 16:49:32 +0000 Subject: [PATCH] set KV_CACHE_BITS to None to disable quantized kv cache --- src/exo/worker/engines/mlx/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/exo/worker/engines/mlx/constants.py b/src/exo/worker/engines/mlx/constants.py index 0c17c97d3..2544e9beb 100644 --- a/src/exo/worker/engines/mlx/constants.py +++ b/src/exo/worker/engines/mlx/constants.py @@ -9,7 +9,7 @@ MAX_KV_SIZE: int | None = 3200 KEEP_KV_SIZE: int | None = 1600 QUANTIZE_MODEL_MODE: str | None = "affine" CACHE_GROUP_SIZE: int = 64 -KV_CACHE_BITS: int | None = 8 +KV_CACHE_BITS: int | None = None # TODO: We should really make this opt-in, but Kimi requires trust_remote_code=True TRUST_REMOTE_CODE: bool = True