chore(llm): expose langchain API to the runner

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2026-05-18 13:41:39 -04:00 · 2023-05-30 14:37:00 -07:00
parent 36ba176bd5
commit 5b1b7d6ab8
1 changed files with 9 additions and 1 deletions
--- a/src/openllm/_llm.py
+++ b/src/openllm/_llm.py
@@ -44,6 +44,8 @@ else:

 logger = logging.getLogger(__name__)

+_object_setattr = object.__setattr__
+
 # NOTE: `1-2` -> text-generation and text2text-generation
 FRAMEWORK_TO_AUTOCLASS_MAPPING = {
    "pt": ("AutoModelForCausalLM", "AutoModelForSeq2SeqLM"),
@@ -742,7 +744,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
        else:
            _supported_resources = ("nvidia.com/gpu", "cpu")

-        return bentoml.Runner(
+        runner = bentoml.Runner(
            type(
                inflection.camelize(self.config.__openllm_model_name__) + "Runnable",
                (_Runnable,),
@@ -757,6 +759,12 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
            scheduling_strategy=scheduling_strategy,
        )

+        # NOTE: returning the two langchain API's to the runner
+        _object_setattr(runner, "llm_type", self.llm_type)
+        _object_setattr(runner, "identifying_params", self.identifying_params)
+
+        return runner
+

 def Runner(start_name: str, **attrs: t.Any) -> bentoml.Runner:
    """Create a Runner for given LLM. For a list of currently supported LLM, check out 'openllm models'