mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-03-13 04:30:48 -04:00
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Signed-off-by: GutZuFusss <leon.ikinger@googlemail.com> Co-authored-by: GutZuFusss <leon.ikinger@googlemail.com>
17 lines
458 B
Python
17 lines
458 B
Python
from __future__ import annotations
|
|
|
|
import bentoml
|
|
import openllm
|
|
|
|
model = "dolly-v2"
|
|
|
|
llm_config = openllm.AutoConfig.for_model(model)
|
|
llm_runner = openllm.Runner(model, llm_config=llm_config)
|
|
|
|
svc = bentoml.Service(name="llm-service", runners=[llm_runner])
|
|
|
|
@svc.api(input=bentoml.io.Text(), output=bentoml.io.Text())
|
|
async def prompt(input_text: str) -> str:
|
|
answer = await llm_runner.generate.async_run(input_text)
|
|
return answer[0]["generated_text"]
|