fix: add support for min_tokens

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
2026-02-07 22:33:28 -05:00 · 2024-04-02 04:22:14 +00:00
parent 5c0d2787c0
commit 1d817a7e01
3 changed files with 10 additions and 1 deletions
--- a/openllm-python/src/_openllm_tiny/_llm.py
+++ b/openllm-python/src/_openllm_tiny/_llm.py
@@ -171,6 +171,7 @@ class LLM:

    top_p = 1.0 if config['temperature'] <= 1e-5 else config['top_p']
    config = config.model_copy(update=dict(stop=list(stop), stop_token_ids=stop_token_ids, top_p=top_p))
+
    sampling_params = SamplingParams(**{
      k: getattr(config, k, None) for k in set(inspect.signature(SamplingParams).parameters.keys())
    })
--- a/openllm-python/src/_openllm_tiny/_service.py
+++ b/openllm-python/src/_openllm_tiny/_service.py
@@ -92,7 +92,7 @@ class LLMService:
      stop_token_ids=stop_token_ids,
      request_id=request_id,
    ):
-      yield f'data: {generated.model_dump_json()}\n\n'
+      yield f'data: {core.GenerationOutput.from_vllm(generated).model_dump_json()}\n\n'
    yield 'data: [DONE]\n\n'

  @core.utils.api(route='/v1/metadata')