from __future__ import annotations import os import typing as t import warnings import openllm from openllm_core._typing_compat import LiteralBackend from openllm_core.utils import first_not_none from openllm_core.utils import is_vllm_available if t.TYPE_CHECKING: from openllm_core import LLMConfig from openllm_core._typing_compat import ParamSpec from ._llm import LLMRunner P = ParamSpec('P') _object_setattr = object.__setattr__ def _mark_deprecated(fn: t.Callable[P, t.Any]) -> t.Callable[P, t.Any]: _object_setattr(fn, '__deprecated__', True) return fn @_mark_deprecated def Runner( model_name: str, ensure_available: bool = True, init_local: bool = False, backend: LiteralBackend | None = None, llm_config: LLMConfig | None = None, **attrs: t.Any, ) -> LLMRunner[t.Any, t.Any]: """Create a Runner for given LLM. For a list of currently supported LLM, check out 'openllm models'. > [!WARNING] > This method is now deprecated and in favor of 'openllm.LLM.runner' ```python runner = openllm.Runner("dolly-v2") @svc.on_startup def download(): runner.download_model() ``` if `init_local=True` (For development workflow), it will also enable `ensure_available`. Default value of `ensure_available` is None. If set then use that given value, otherwise fallback to the aforementioned behaviour. Args: model_name: Supported model name from 'openllm models' ensure_available: If True, it will download the model if it is not available. If False, it will skip downloading the model. If False, make sure the model is available locally. Default to True, and openllm.LLM will always check if models are available locally. based on generated tag. backend: The given Runner implementation one choose for this Runner. If `OPENLLM_BACKEND` is set, it will respect it. llm_config: Optional ``openllm.LLMConfig`` to initialise this ``openllm.LLMRunner``. init_local: If True, it will initialize the model locally. This is useful if you want to run the model locally. (Symmetrical to bentoml.Runner.init_local()) **attrs: The rest of kwargs will then be passed to the LLM. Refer to the LLM documentation for the kwargs behaviour """ from ._llm import LLM if llm_config is None: llm_config = openllm.AutoConfig.for_model(model_name) model_id = attrs.get('model_id', default=os.getenv('OPENLLM_MODEL_ID', llm_config['default_id'])) _RUNNER_MSG = f"""\ Using 'openllm.Runner' is now deprecated. Make sure to switch to the following syntax: ```python llm = openllm.LLM('{model_id}') svc = bentoml.Service('...', runners=[llm.runner]) @svc.api(...) async def chat(input: str) -> str: async for it in llm.generate_iterator(input): print(it) ``` """ warnings.warn(_RUNNER_MSG, DeprecationWarning, stacklevel=2) attrs.update( { 'model_id': model_id, 'quantize': os.getenv('OPENLLM_QUANTIZE', attrs.get('quantize', None)), 'serialisation': first_not_none( attrs.get('serialisation'), os.environ.get('OPENLLM_SERIALIZATION'), default=llm_config['serialisation'] ), 'system_message': first_not_none(os.environ.get('OPENLLM_SYSTEM_MESSAGE'), attrs.get('system_message'), None), 'prompt_template': first_not_none(os.environ.get('OPENLLM_PROMPT_TEMPLATE'), attrs.get('prompt_template'), None), } ) backend = t.cast(LiteralBackend, first_not_none(backend, default='vllm' if is_vllm_available() else 'pt')) llm = LLM[t.Any, t.Any](backend=backend, llm_config=llm_config, **attrs) if init_local: llm.runner.init_local(quiet=True) return llm.runner _DEPRECATED = {k: v for k, v in locals().items() if getattr(v, '__deprecated__', False)} __all__ = list(_DEPRECATED) def __dir__() -> list[str]: return sorted(_DEPRECATED.keys())