fix: keep TRUST_REMOTE_CODE=True for built-in models

The constant is the default for built-in models with known model cards, which are trusted. Custom models added via API already default to trust_remote_code=False in ModelCard.fetch_from_hf(). The CLI flag overrides custom models only. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
feat: add --trust-remote-code CLI flag for custom model tokenizers
2026-02-26 03:06:05 -05:00 · 2026-02-24 15:44:06 +00:00 · 2026-02-24 15:44:06 +00:00
3 changed files with 20 additions and 2 deletions
--- a/src/exo/main.py
+++ b/src/exo/main.py
@@ -261,6 +261,13 @@ def main():
    if args.offline:
        logger.info("Running in OFFLINE mode — no internet checks, local models only")
    # Set trust_remote_code override env var for runner subprocesses
    if args.trust_remote_code:
        os.environ["EXO_TRUST_REMOTE_CODE"] = "1"
        logger.warning(
            "--trust-remote-code enabled: models may execute arbitrary code during loading"
        )
    # Set FAST_SYNCH override env var for runner subprocesses
    if args.fast_synch is True:
        os.environ["EXO_FAST_SYNCH"] = "on"
@@ -285,6 +292,7 @@ class Args(CamelCaseModel):
    no_downloads: bool = False
    offline: bool = False
    fast_synch: bool | None = None  # None = auto, True = force on, False = force off
    trust_remote_code: bool = False
    @classmethod
    def parse(cls) -> Self:
@@ -336,6 +344,11 @@ class Args(CamelCaseModel):
            action="store_true",
            help="Run in offline/air-gapped mode: skip internet checks, use only pre-staged local models",
        )
        parser.add_argument(
            "--trust-remote-code",
            action="store_true",
            help="Allow models to execute custom code during tokenizer loading (security-sensitive, CLI-only)",
        )
        fast_synch_group = parser.add_mutually_exclusive_group()
        fast_synch_group.add_argument(
            "--fast-synch",
--- a/src/exo/worker/engines/mlx/constants.py
+++ b/src/exo/worker/engines/mlx/constants.py
@@ -13,5 +13,6 @@ KV_CACHE_BITS: int | None = None
 DEFAULT_TOP_LOGPROBS: int = 5
-# TODO: We should really make this opt-in, but Kimi requires trust_remote_code=True
+# True for built-in models with known model cards; custom models added via API default to False
 # and can be overridden with the --trust-remote-code CLI flag.
 TRUST_REMOTE_CODE: bool = True
--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -291,10 +291,14 @@ def shard_and_load(
 def get_tokenizer(model_path: Path, shard_metadata: ShardMetadata) -> TokenizerWrapper:
    """Load tokenizer for a model shard. Delegates to load_tokenizer_for_model_id."""
    trust_remote_code = (
        shard_metadata.model_card.trust_remote_code
        or os.environ.get("EXO_TRUST_REMOTE_CODE") == "1"
    )
    return load_tokenizer_for_model_id(
        shard_metadata.model_card.model_id,
        model_path,
-        trust_remote_code=shard_metadata.model_card.trust_remote_code,
+        trust_remote_code=trust_remote_code,
    )