fix kimi eos token ids

This commit is contained in:
Alex Cheema
2025-11-13 10:39:14 -08:00
committed by GitHub
parent b62f68474a
commit d793f5f96c
3 changed files with 34 additions and 24 deletions

View File

@@ -43,41 +43,41 @@ stages:
# generation_length: 10
# time_between_requests: 2.0
# iterations: 5
- name: "pp64_g64"
prompt_length: 64
generation_length: 64
time_between_requests: 2.0
iterations: 5
# - name: "pp64_g64"
# prompt_length: 64
# generation_length: 64
# time_between_requests: 2.0
# iterations: 5
# - name: "pp64_g512"
# prompt_length: 64
# generation_length: 512
# time_between_requests: 2.0
# iterations: 10
- name: "pp256_g64"
prompt_length: 256
generation_length: 64
time_between_requests: 2.0
iterations: 5
# - name: "pp256_g64"
# prompt_length: 256
# generation_length: 64
# time_between_requests: 2.0
# iterations: 5
# - name: "pp256_g512"
# prompt_length: 256
# generation_length: 512
# time_between_requests: 2.0
# iterations: 10
- name: "pp1024_g64"
prompt_length: 1024
generation_length: 64
time_between_requests: 2.0
iterations: 5
# - name: "pp1024_g64"
# prompt_length: 1024
# generation_length: 64
# time_between_requests: 2.0
# iterations: 5
# - name: "pp1024_g512"
# prompt_length: 1024
# generation_length: 512
# time_between_requests: 2.0
# iterations: 10
- name: "pp2048_g64"
prompt_length: 2048
generation_length: 64
time_between_requests: 2.0
iterations: 5
# - name: "pp2048_g64"
# prompt_length: 2048
# generation_length: 64
# time_between_requests: 2.0
# iterations: 5
# - name: "pp2048_g512"
# prompt_length: 2048
# generation_length: 512
@@ -87,7 +87,7 @@ stages:
prompt_length: 4096
generation_length: 64
time_between_requests: 2.0
iterations: 5
iterations: 4
# - name: "pp4096_g512"
# prompt_length: 4096
# generation_length: 512
@@ -97,7 +97,7 @@ stages:
prompt_length: 8192
generation_length: 64
time_between_requests: 2.0
iterations: 5
iterations: 4
# - name: "pp8192_g512"
# prompt_length: 8192
# generation_length: 512

View File

@@ -4,6 +4,7 @@ on: [push]
jobs:
plan:
if: contains(github.event.head_commit.message, '/bench')
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.build.outputs.matrix }}

View File

@@ -149,7 +149,10 @@ def initialize_mlx(
tokenizer = cast(
TokenizerWrapper,
load_tokenizer(
model_path, tokenizer_config_extra={"trust_remote_code": True}
model_path,
tokenizer_config_extra={"trust_remote_code": True},
# TODO: HACK for Kimi K2 wrong eos token id
eos_token_ids=[163586] if "kimi-k2" in bound_instance.bound_shard().model_meta.model_id.lower() else None,
),
)
assert isinstance(tokenizer, TokenizerWrapper)
@@ -177,7 +180,13 @@ def shard_and_load(
# TODO: we should really make this opt-in, but Kimi requires trust_remote_code=True
tokenizer = cast(
TokenizerWrapper,
load_tokenizer(model_path, tokenizer_config_extra={"trust_remote_code": True}),
# TODO: HACK for Kimi K2 wrong eos token id
load_tokenizer(
model_path,
tokenizer_config_extra={"trust_remote_code": True},
# TODO: HACK for Kimi K2 wrong eos token id
eos_token_ids=[163586] if "kimi-k2" in shard_metadata.model_meta.model_id.lower() else None,
),
)
logger.info(f"Group size: {group.size()}, group rank: {group.rank()}")