feat: heuristics logprobs (#692)

* fix(encoder): bring back T5 support on PyTorch Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * feat: support logprobs and prompt_logprobs Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * docs: update changelog Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2026-02-06 13:52:21 -05:00 · 2023-11-18 19:26:20 -05:00
parent 4499469efb
commit 1831d8f129
6 changed files with 135 additions and 54 deletions
--- a/openllm-python/src/openllm/_llm.py
+++ b/openllm-python/src/openllm/_llm.py
@@ -387,7 +387,7 @@ class LLM(t.Generic[M, T], ReprMixin):

  async def generate(
    self, prompt, prompt_token_ids=None, stop=None, stop_token_ids=None, request_id=None, adapter_name=None, **attrs
-  ):
+  ) -> GenerationOutput:
    config = self.config.model_construct_env(**attrs)
    texts, token_ids = [[]] * config['n'], [[]] * config['n']
    final_result = None
@@ -410,7 +410,7 @@ class LLM(t.Generic[M, T], ReprMixin):

  async def generate_iterator(
    self, prompt, prompt_token_ids=None, stop=None, stop_token_ids=None, request_id=None, adapter_name=None, **attrs
-  ):
+  ) -> t.AsyncGenerator[GenerationOutput, None]:
    from bentoml._internal.runner.runner_handle import DummyRunnerHandle

    if isinstance(self.runner._runner_handle, DummyRunnerHandle):