mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-01-24 23:37:48 -05:00
fix: correct classes for regression
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -38,7 +38,7 @@ _JsonInput = bentoml.io.JSON.from_sample({'prompt': '', 'llm_config': llm_config
|
||||
@svc.api(route='/v1/generate', input=_JsonInput, output=bentoml.io.JSON.from_sample({'responses': [], 'configuration': llm_config.model_dump(flatten=True)}))
|
||||
async def generate_v1(input_dict: dict[str, t.Any]) -> openllm.GenerateOutput:
|
||||
echo = input_dict.pop('echo', False)
|
||||
qa_inputs = openllm.GenerationInput.from_llm_config(llm_config)(**input_dict)
|
||||
qa_inputs = openllm.GenerateInput.from_llm_config(llm_config)(**input_dict)
|
||||
config = qa_inputs.llm_config.model_dump()
|
||||
if runner.backend == 'vllm':
|
||||
async for output in runner.vllm_generate.async_stream(qa_inputs.prompt, adapter_name=qa_inputs.adapter_name, echo=echo, request_id=openllm_core.utils.gen_random_uuid(), **config):
|
||||
@@ -51,7 +51,7 @@ async def generate_v1(input_dict: dict[str, t.Any]) -> openllm.GenerateOutput:
|
||||
@svc.api(route='/v1/generate_stream', input=_JsonInput, output=bentoml.io.Text(content_type='text/event-stream'))
|
||||
async def generate_stream_v1(input_dict: dict[str, t.Any]) -> t.AsyncGenerator[str, None]:
|
||||
echo = input_dict.pop('echo', False)
|
||||
qa_inputs = openllm.GenerationInput.from_llm_config(llm_config)(**input_dict)
|
||||
qa_inputs = openllm.GenerateInput.from_llm_config(llm_config)(**input_dict)
|
||||
if runner.backend == 'vllm':
|
||||
return runner.vllm_generate_iterator.async_stream(qa_inputs.prompt,
|
||||
adapter_name=qa_inputs.adapter_name,
|
||||
|
||||
Reference in New Issue
Block a user