chore(llm): expose langchain API to the runner

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron
2023-05-30 14:37:00 -07:00
parent 36ba176bd5
commit 5b1b7d6ab8

View File

@@ -44,6 +44,8 @@ else:
logger = logging.getLogger(__name__)
_object_setattr = object.__setattr__
# NOTE: `1-2` -> text-generation and text2text-generation
FRAMEWORK_TO_AUTOCLASS_MAPPING = {
"pt": ("AutoModelForCausalLM", "AutoModelForSeq2SeqLM"),
@@ -742,7 +744,7 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
else:
_supported_resources = ("nvidia.com/gpu", "cpu")
return bentoml.Runner(
runner = bentoml.Runner(
type(
inflection.camelize(self.config.__openllm_model_name__) + "Runnable",
(_Runnable,),
@@ -757,6 +759,12 @@ class LLM(LLMInterface, metaclass=LLMMetaclass):
scheduling_strategy=scheduling_strategy,
)
# NOTE: returning the two langchain API's to the runner
_object_setattr(runner, "llm_type", self.llm_type)
_object_setattr(runner, "identifying_params", self.identifying_params)
return runner
def Runner(start_name: str, **attrs: t.Any) -> bentoml.Runner:
"""Create a Runner for given LLM. For a list of currently supported LLM, check out 'openllm models'