set KV_CACHE_BITS to None to disable quantized kv cache

This commit is contained in:
Alex Cheema
2025-12-30 16:49:32 +00:00
committed by Evan Quiney
parent 8a6da58404
commit 31d4cd8409

View File

@@ -9,7 +9,7 @@ MAX_KV_SIZE: int | None = 3200
KEEP_KV_SIZE: int | None = 1600
QUANTIZE_MODEL_MODE: str | None = "affine"
CACHE_GROUP_SIZE: int = 64
KV_CACHE_BITS: int | None = 8
KV_CACHE_BITS: int | None = None
# TODO: We should really make this opt-in, but Kimi requires trust_remote_code=True
TRUST_REMOTE_CODE: bool = True