mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-02-06 13:52:21 -05:00
feat: heuristics logprobs (#692)
* fix(encoder): bring back T5 support on PyTorch Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * feat: support logprobs and prompt_logprobs Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * docs: update changelog Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -387,7 +387,7 @@ class LLM(t.Generic[M, T], ReprMixin):
|
||||
|
||||
async def generate(
|
||||
self, prompt, prompt_token_ids=None, stop=None, stop_token_ids=None, request_id=None, adapter_name=None, **attrs
|
||||
):
|
||||
) -> GenerationOutput:
|
||||
config = self.config.model_construct_env(**attrs)
|
||||
texts, token_ids = [[]] * config['n'], [[]] * config['n']
|
||||
final_result = None
|
||||
@@ -410,7 +410,7 @@ class LLM(t.Generic[M, T], ReprMixin):
|
||||
|
||||
async def generate_iterator(
|
||||
self, prompt, prompt_token_ids=None, stop=None, stop_token_ids=None, request_id=None, adapter_name=None, **attrs
|
||||
):
|
||||
) -> t.AsyncGenerator[GenerationOutput, None]:
|
||||
from bentoml._internal.runner.runner_handle import DummyRunnerHandle
|
||||
|
||||
if isinstance(self.runner._runner_handle, DummyRunnerHandle):
|
||||
|
||||
Reference in New Issue
Block a user