mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-02-07 22:33:28 -05:00
fix: add support for min_tokens
Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -171,6 +171,7 @@ class LLM:
|
||||
|
||||
top_p = 1.0 if config['temperature'] <= 1e-5 else config['top_p']
|
||||
config = config.model_copy(update=dict(stop=list(stop), stop_token_ids=stop_token_ids, top_p=top_p))
|
||||
|
||||
sampling_params = SamplingParams(**{
|
||||
k: getattr(config, k, None) for k in set(inspect.signature(SamplingParams).parameters.keys())
|
||||
})
|
||||
|
||||
@@ -92,7 +92,7 @@ class LLMService:
|
||||
stop_token_ids=stop_token_ids,
|
||||
request_id=request_id,
|
||||
):
|
||||
yield f'data: {generated.model_dump_json()}\n\n'
|
||||
yield f'data: {core.GenerationOutput.from_vllm(generated).model_dump_json()}\n\n'
|
||||
yield 'data: [DONE]\n\n'
|
||||
|
||||
@core.utils.api(route='/v1/metadata')
|
||||
|
||||
Reference in New Issue
Block a user