From 31d4cd84097dfa04aa2da3eb4e5efd67a7cdd42c Mon Sep 17 00:00:00 2001
From: Alex Cheema <alexcheema123@gmail.com>
Date: Tue, 30 Dec 2025 16:49:32 +0000
Subject: [PATCH] set KV_CACHE_BITS to None to disable quantized kv cache

---
 src/exo/worker/engines/mlx/constants.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/exo/worker/engines/mlx/constants.py b/src/exo/worker/engines/mlx/constants.py
index 0c17c97d3..2544e9beb 100644
--- a/src/exo/worker/engines/mlx/constants.py
+++ b/src/exo/worker/engines/mlx/constants.py
@@ -9,7 +9,7 @@ MAX_KV_SIZE: int | None = 3200
 KEEP_KV_SIZE: int | None = 1600
 QUANTIZE_MODEL_MODE: str | None = "affine"
 CACHE_GROUP_SIZE: int = 64
-KV_CACHE_BITS: int | None = 8
+KV_CACHE_BITS: int | None = None
 
 # TODO: We should really make this opt-in, but Kimi requires trust_remote_code=True
 TRUST_REMOTE_CODE: bool = True