fix: add support for min_tokens

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2024-04-02 04:22:14 +00:00
parent 5c0d2787c0
commit 1d817a7e01
3 changed files with 10 additions and 1 deletions

View File

@@ -171,6 +171,7 @@ class LLM:
top_p = 1.0 if config['temperature'] <= 1e-5 else config['top_p']
config = config.model_copy(update=dict(stop=list(stop), stop_token_ids=stop_token_ids, top_p=top_p))
sampling_params = SamplingParams(**{
k: getattr(config, k, None) for k in set(inspect.signature(SamplingParams).parameters.keys())
})

View File

@@ -92,7 +92,7 @@ class LLMService:
stop_token_ids=stop_token_ids,
request_id=request_id,
):
yield f'data: {generated.model_dump_json()}\n\n'
yield f'data: {core.GenerationOutput.from_vllm(generated).model_dump_json()}\n\n'
yield 'data: [DONE]\n\n'
@core.utils.api(route='/v1/metadata')