mirror of
https://github.com/exo-explore/exo.git
synced 2026-04-25 00:09:15 -04:00
set KV_CACHE_BITS to None to disable quantized kv cache
This commit is contained in:
@@ -9,7 +9,7 @@ MAX_KV_SIZE: int | None = 3200
|
||||
KEEP_KV_SIZE: int | None = 1600
|
||||
QUANTIZE_MODEL_MODE: str | None = "affine"
|
||||
CACHE_GROUP_SIZE: int = 64
|
||||
KV_CACHE_BITS: int | None = 8
|
||||
KV_CACHE_BITS: int | None = None
|
||||
|
||||
# TODO: We should really make this opt-in, but Kimi requires trust_remote_code=True
|
||||
TRUST_REMOTE_CODE: bool = True
|
||||
|
||||
Reference in New Issue
Block a user