feat: heuristics logprobs (#692)

* fix(encoder): bring back T5 support on PyTorch

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* feat: support logprobs and prompt_logprobs

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* docs: update changelog

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

---------

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2023-11-18 19:26:20 -05:00
committed by GitHub
parent 4499469efb
commit 1831d8f129
6 changed files with 135 additions and 54 deletions

View File

@@ -387,7 +387,7 @@ class LLM(t.Generic[M, T], ReprMixin):
async def generate(
self, prompt, prompt_token_ids=None, stop=None, stop_token_ids=None, request_id=None, adapter_name=None, **attrs
):
) -> GenerationOutput:
config = self.config.model_construct_env(**attrs)
texts, token_ids = [[]] * config['n'], [[]] * config['n']
final_result = None
@@ -410,7 +410,7 @@ class LLM(t.Generic[M, T], ReprMixin):
async def generate_iterator(
self, prompt, prompt_token_ids=None, stop=None, stop_token_ids=None, request_id=None, adapter_name=None, **attrs
):
) -> t.AsyncGenerator[GenerationOutput, None]:
from bentoml._internal.runner.runner_handle import DummyRunnerHandle
if isinstance(self.runner._runner_handle, DummyRunnerHandle):