tagged model

This commit is contained in:
Ryuichi Leo Takashige
2026-02-02 11:41:22 +00:00
parent c0f192897c
commit 2d15e49f4e
2 changed files with 4 additions and 3 deletions

View File

@@ -239,10 +239,12 @@ class ChatCompletionTaskParams(BaseModel):
tool_choice: str | dict[str, Any] | None = None
parallel_tool_calls: bool | None = None
user: str | None = None
# Internal flag for benchmark mode - set by API, preserved through serialization
bench: bool = False
class BenchChatCompletionTaskParams(ChatCompletionTaskParams):
pass
bench: bool = True
class PlaceInstanceParams(BaseModel):

View File

@@ -8,7 +8,6 @@ from mlx_lm.sample_utils import make_sampler
from mlx_lm.tokenizer_utils import TokenizerWrapper
from exo.shared.types.api import (
BenchChatCompletionTaskParams,
ChatCompletionMessage,
FinishReason,
GenerationStats,
@@ -368,7 +367,7 @@ def mlx_generate(
) -> Generator[GenerationResponse]:
# Ensure that generation stats only contains peak memory for this generation
mx.reset_peak_memory()
is_bench: bool = isinstance(task, BenchChatCompletionTaskParams)
is_bench: bool = task.bench
# Currently we support chat-completion tasks only.
logger.debug(f"task_params: {task}")