fix: correct classes for regression

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron
2023-10-17 02:00:11 -04:00
parent 9701d77e36
commit aedb1e4843

View File

@@ -38,7 +38,7 @@ _JsonInput = bentoml.io.JSON.from_sample({'prompt': '', 'llm_config': llm_config
@svc.api(route='/v1/generate', input=_JsonInput, output=bentoml.io.JSON.from_sample({'responses': [], 'configuration': llm_config.model_dump(flatten=True)}))
async def generate_v1(input_dict: dict[str, t.Any]) -> openllm.GenerateOutput:
echo = input_dict.pop('echo', False)
qa_inputs = openllm.GenerationInput.from_llm_config(llm_config)(**input_dict)
qa_inputs = openllm.GenerateInput.from_llm_config(llm_config)(**input_dict)
config = qa_inputs.llm_config.model_dump()
if runner.backend == 'vllm':
async for output in runner.vllm_generate.async_stream(qa_inputs.prompt, adapter_name=qa_inputs.adapter_name, echo=echo, request_id=openllm_core.utils.gen_random_uuid(), **config):
@@ -51,7 +51,7 @@ async def generate_v1(input_dict: dict[str, t.Any]) -> openllm.GenerateOutput:
@svc.api(route='/v1/generate_stream', input=_JsonInput, output=bentoml.io.Text(content_type='text/event-stream'))
async def generate_stream_v1(input_dict: dict[str, t.Any]) -> t.AsyncGenerator[str, None]:
echo = input_dict.pop('echo', False)
qa_inputs = openllm.GenerationInput.from_llm_config(llm_config)(**input_dict)
qa_inputs = openllm.GenerateInput.from_llm_config(llm_config)(**input_dict)
if runner.backend == 'vllm':
return runner.vllm_generate_iterator.async_stream(qa_inputs.prompt,
adapter_name=qa_inputs.adapter_name,