fix: persistent styling between ruff and yapf (#279)

This commit is contained in:
Aaron Pham
2023-08-30 11:37:41 -04:00
committed by GitHub
parent f678f71e18
commit c9cef1d773
145 changed files with 1051 additions and 395 deletions

1
cz.py
View File

@@ -6,6 +6,7 @@ import token
import tokenize
from tabulate import tabulate
TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
def run_cz(dir: str, package: str):

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import bentoml
import openllm
model = "dolly-v2"
llm_config = openllm.AutoConfig.for_model(model)

View File

@@ -5,6 +5,7 @@ from langchain.llms import OpenLLM
import bentoml
from bentoml.io import Text
SAMPLE_INPUT = "What is the weather in San Francisco?"
llm = OpenLLM(model_name="dolly-v2", model_id="databricks/dolly-v2-7b", embedded=False,)

View File

@@ -1,5 +1,9 @@
from __future__ import annotations
from . import benmin as benmin
from ._base import BaseAsyncClient as BaseAsyncClient, BaseClient as BaseClient
from .client import AsyncGrpcClient as AsyncGrpcClient, AsyncHTTPClient as AsyncHTTPClient, GrpcClient as GrpcClient, HTTPClient as HTTPClient
from ._base import BaseAsyncClient as BaseAsyncClient
from ._base import BaseClient as BaseClient
from .client import AsyncGrpcClient as AsyncGrpcClient
from .client import AsyncHTTPClient as AsyncHTTPClient
from .client import GrpcClient as GrpcClient
from .client import HTTPClient as HTTPClient

View File

@@ -4,6 +4,7 @@ import abc
import functools
import logging
import typing as t
from http import HTTPStatus
from urllib.parse import urljoin
@@ -12,14 +13,23 @@ import httpx
import orjson
import openllm_core
from openllm_core._typing_compat import LiteralString, overload
from openllm_core.utils import bentoml_cattr, ensure_exec_coro, is_transformers_available, is_transformers_supports_agent
from .benmin import AsyncClient as AsyncBentoClient, Client as BentoClient
from openllm_core._typing_compat import LiteralString
from openllm_core._typing_compat import overload
from openllm_core.utils import bentoml_cattr
from openllm_core.utils import ensure_exec_coro
from openllm_core.utils import is_transformers_available
from openllm_core.utils import is_transformers_supports_agent
from .benmin import AsyncClient as AsyncBentoClient
from .benmin import Client as BentoClient
if t.TYPE_CHECKING:
import transformers
from openllm_core._typing_compat import DictStrAny, LiteralRuntime
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import LiteralRuntime
logger = logging.getLogger(__name__)
@attr.define(slots=False, init=False)
@@ -204,7 +214,9 @@ class _AsyncClient(_ClientAttr):
async def _run_hf_agent(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
if not is_transformers_supports_agent(): raise RuntimeError('This version of transformers does not support agent.run. Make sure to upgrade to transformers>4.30.0')
if len(args) > 1: raise ValueError("'args' should only take one positional argument.")
from transformers.tools.agents import clean_code_for_run, get_tool_creation_code, resolve_tools
from transformers.tools.agents import clean_code_for_run
from transformers.tools.agents import get_tool_creation_code
from transformers.tools.agents import resolve_tools
from transformers.tools.python_interpreter import evaluate
task = kwargs.pop('task', args[0])

View File

@@ -13,13 +13,16 @@ via `client.call` or `await client.call`.
"""
from __future__ import annotations
import typing as t
from abc import abstractmethod
import attr
import httpx
import bentoml
if t.TYPE_CHECKING: from bentoml._internal.service.inference_api import InferenceAPI
if t.TYPE_CHECKING:
from bentoml._internal.service.inference_api import InferenceAPI
__all__ = ['Client', 'AsyncClient']

View File

@@ -6,21 +6,31 @@ import time
import typing as t
import bentoml
from bentoml._internal.service.inference_api import InferenceAPI
from bentoml.grpc.utils import import_generated_stubs, load_from_file
from openllm_client.benmin import AsyncClient, Client
from openllm_core._typing_compat import NotRequired, overload
from openllm_core.utils import ensure_exec_coro, is_grpc_available, is_grpc_health_available
from bentoml.grpc.utils import import_generated_stubs
from bentoml.grpc.utils import load_from_file
from openllm_client.benmin import AsyncClient
from openllm_client.benmin import Client
from openllm_core._typing_compat import NotRequired
from openllm_core._typing_compat import overload
from openllm_core.utils import ensure_exec_coro
from openllm_core.utils import is_grpc_available
from openllm_core.utils import is_grpc_health_available
if not is_grpc_available() or not is_grpc_health_available(): raise ImportError("gRPC is required to use gRPC client. Install with 'pip install \"openllm-client[grpc]\"'.")
import grpc
import grpc_health.v1.health_pb2 as pb_health
import grpc_health.v1.health_pb2_grpc as services_health
from google.protobuf import json_format
from grpc import aio
pb, services = import_generated_stubs('v1')
if t.TYPE_CHECKING:
from bentoml.grpc.v1.service_pb2 import ServiceMetadataResponse
logger = logging.getLogger(__name__)
class ClientCredentials(t.TypedDict):

View File

@@ -5,6 +5,7 @@ import logging
import time
import typing as t
import urllib.error
from urllib.parse import urlparse
import httpx
@@ -14,9 +15,12 @@ import starlette.requests
import starlette.responses
import bentoml
from bentoml._internal.service.inference_api import InferenceAPI
from openllm_client.benmin import AsyncClient, Client
from openllm_client.benmin import AsyncClient
from openllm_client.benmin import Client
from openllm_core.utils import ensure_exec_coro
logger = logging.getLogger(__name__)
class HttpClient(Client):

View File

@@ -1,8 +1,11 @@
from __future__ import annotations
import logging
from urllib.parse import urlparse
from ._base import BaseAsyncClient, BaseClient
from ._base import BaseAsyncClient
from ._base import BaseClient
logger = logging.getLogger(__name__)
def process_http_address(self: AsyncHTTPClient | HTTPClient, address: str) -> None:

View File

@@ -1,33 +1,44 @@
from __future__ import annotations
from . import exceptions as exceptions, utils as utils
from ._configuration import GenerationConfig as GenerationConfig, LLMConfig as LLMConfig, SamplingParams as SamplingParams
from ._schema import EmbeddingsOutput as EmbeddingsOutput, GenerationInput as GenerationInput, GenerationOutput as GenerationOutput, HfAgentInput as HfAgentInput, MetadataOutput as MetadataOutput, unmarshal_vllm_outputs as unmarshal_vllm_outputs
from ._strategies import AmdGpuResource as AmdGpuResource, CascadingResourceStrategy as CascadingResourceStrategy, LiteralResourceSpec as LiteralResourceSpec, NvidiaGpuResource as NvidiaGpuResource, available_resource_spec as available_resource_spec, get_resource as get_resource
from .config import (
CONFIG_MAPPING as CONFIG_MAPPING,
CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES,
START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING,
START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING,
START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING,
START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING,
START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING,
START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING,
START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING,
START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING,
START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING,
START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING,
START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING,
AutoConfig as AutoConfig,
BaichuanConfig as BaichuanConfig,
ChatGLMConfig as ChatGLMConfig,
DollyV2Config as DollyV2Config,
FalconConfig as FalconConfig,
FlanT5Config as FlanT5Config,
GPTNeoXConfig as GPTNeoXConfig,
LlamaConfig as LlamaConfig,
MPTConfig as MPTConfig,
OPTConfig as OPTConfig,
StableLMConfig as StableLMConfig,
StarCoderConfig as StarCoderConfig,
)
from . import exceptions as exceptions
from . import utils as utils
from ._configuration import GenerationConfig as GenerationConfig
from ._configuration import LLMConfig as LLMConfig
from ._configuration import SamplingParams as SamplingParams
from ._schema import EmbeddingsOutput as EmbeddingsOutput
from ._schema import GenerationInput as GenerationInput
from ._schema import GenerationOutput as GenerationOutput
from ._schema import HfAgentInput as HfAgentInput
from ._schema import MetadataOutput as MetadataOutput
from ._schema import unmarshal_vllm_outputs as unmarshal_vllm_outputs
from ._strategies import AmdGpuResource as AmdGpuResource
from ._strategies import CascadingResourceStrategy as CascadingResourceStrategy
from ._strategies import LiteralResourceSpec as LiteralResourceSpec
from ._strategies import NvidiaGpuResource as NvidiaGpuResource
from ._strategies import available_resource_spec as available_resource_spec
from ._strategies import get_resource as get_resource
from .config import CONFIG_MAPPING as CONFIG_MAPPING
from .config import CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
from .config import START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING
from .config import START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING
from .config import START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING
from .config import START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING
from .config import START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING
from .config import START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING
from .config import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING
from .config import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING
from .config import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING
from .config import START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING
from .config import START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING
from .config import AutoConfig as AutoConfig
from .config import BaichuanConfig as BaichuanConfig
from .config import ChatGLMConfig as ChatGLMConfig
from .config import DollyV2Config as DollyV2Config
from .config import FalconConfig as FalconConfig
from .config import FlanT5Config as FlanT5Config
from .config import GPTNeoXConfig as GPTNeoXConfig
from .config import LlamaConfig as LlamaConfig
from .config import MPTConfig as MPTConfig
from .config import OPTConfig as OPTConfig
from .config import StableLMConfig as StableLMConfig
from .config import StarCoderConfig as StarCoderConfig

View File

@@ -47,24 +47,50 @@ import click_option_group as cog
import inflection
import orjson
# NOTE: Using internal API from attr here, since we are actually allowing subclass of openllm_core.LLMConfig to become 'attrs'-ish
from attr._compat import set_closure_cell
from attr._make import _CountingAttr, _make_init, _transform_attrs
from cattr.gen import make_dict_structure_fn, make_dict_unstructure_fn, override
from attr._make import _CountingAttr
from attr._make import _make_init
from attr._make import _transform_attrs
from cattr.gen import make_dict_structure_fn
from cattr.gen import make_dict_unstructure_fn
from cattr.gen import override
from deepmerge.merger import Merger
import openllm_core
from ._strategies import LiteralResourceSpec, available_resource_spec, resource_spec
from ._typing_compat import AdapterType, AnyCallable, At, DictStrAny, ListStr, LiteralRuntime, LiteralString, NotRequired, Required, Self, overload
from ._strategies import LiteralResourceSpec
from ._strategies import available_resource_spec
from ._strategies import resource_spec
from ._typing_compat import AdapterType
from ._typing_compat import AnyCallable
from ._typing_compat import At
from ._typing_compat import DictStrAny
from ._typing_compat import ListStr
from ._typing_compat import LiteralRuntime
from ._typing_compat import LiteralString
from ._typing_compat import NotRequired
from ._typing_compat import Required
from ._typing_compat import Self
from ._typing_compat import overload
from .exceptions import ForbiddenAttributeError
from .utils import ENV_VARS_TRUE_VALUES, MYPY, LazyLoader, ReprMixin, bentoml_cattr, codegen, dantic, field_env_key, first_not_none, lenient_issubclass
from .utils import ENV_VARS_TRUE_VALUES
from .utils import MYPY
from .utils import LazyLoader
from .utils import ReprMixin
from .utils import bentoml_cattr
from .utils import codegen
from .utils import dantic
from .utils import field_env_key
from .utils import first_not_none
from .utils import lenient_issubclass
from .utils.import_utils import BACKENDS_MAPPING
if t.TYPE_CHECKING:
import click
import peft
import transformers
import vllm
from transformers.generation.beam_constraints import Constraint
else:
Constraint = t.Any
@@ -781,6 +807,7 @@ class _ConfigAttr:
'''Optional tokenizer class for this given LLM. See Llama for example.'''
# update-config-stubs.py: special stop
class _ConfigBuilder:
"""A modified version of attrs internal _ClassBuilder, and should only be called within __init_subclass__ of LLMConfig.

View File

@@ -6,10 +6,13 @@ import typing as t
import attr
import inflection
from openllm_core._configuration import GenerationConfig, LLMConfig
from openllm_core._configuration import GenerationConfig
from openllm_core._configuration import LLMConfig
from .utils import bentoml_cattr
if t.TYPE_CHECKING: import vllm
if t.TYPE_CHECKING:
import vllm
@attr.frozen(slots=True)
class GenerationInput:

View File

@@ -13,11 +13,14 @@ import warnings
import psutil
import bentoml
from bentoml._internal.resource import get_resource, system_resources
from bentoml._internal.resource import get_resource
from bentoml._internal.resource import system_resources
from bentoml._internal.runner.strategy import THREAD_ENVS
from ._typing_compat import overload
from .utils import DEBUG, ReprMixin
from .utils import DEBUG
from .utils import ReprMixin
class DynResource(t.Protocol):
resource_id: t.ClassVar[str]
@@ -98,10 +101,12 @@ def _from_system(cls: type[DynResource]) -> list[str]:
# we don't want to use CLI because parsing is a pain.
sys.path.append('/opt/rocm/libexec/rocm_smi')
try:
from ctypes import byref, c_uint32
from ctypes import byref
from ctypes import c_uint32
# refers to https://github.com/RadeonOpenCompute/rocm_smi_lib/blob/master/python_smi_tools/rsmiBindings.py
from rsmiBindings import rocmsmi, rsmi_status_t
from rsmiBindings import rocmsmi
from rsmiBindings import rsmi_status_t
device_count = c_uint32(0)
ret = rocmsmi.rsmi_num_monitor_devices(byref(device_count))
@@ -149,7 +154,11 @@ def _from_spec(cls: type[DynResource], spec: t.Any) -> list[str]:
raise TypeError(f"'{cls.__name__}.from_spec' only supports parsing spec of type int, str, or list, got '{type(spec)}' instead.")
def _raw_device_uuid_nvml() -> list[str] | None:
from ctypes import CDLL, byref, c_int, c_void_p, create_string_buffer
from ctypes import CDLL
from ctypes import byref
from ctypes import c_int
from ctypes import c_void_p
from ctypes import create_string_buffer
try:
nvml_h = CDLL('libnvidia-ml.so.1')

View File

@@ -7,7 +7,9 @@ import typing as t
import attr
import bentoml
from bentoml._internal.types import ModelSignatureDict as ModelSignatureDict
if t.TYPE_CHECKING:
import auto_gptq as autogptq
import peft
@@ -15,11 +17,13 @@ if t.TYPE_CHECKING:
import vllm
import openllm
from bentoml._internal.runner.runnable import RunnableMethod
from bentoml._internal.runner.runner import RunnerMethod
from bentoml._internal.runner.strategy import Strategy
from .utils.lazy import VersionInfo
M = t.TypeVar(
'M',
bound='t.Union[transformers.PreTrainedModel, transformers.Pipeline, transformers.TFPreTrainedModel, transformers.FlaxPreTrainedModel, vllm.LLMEngine, peft.PeftModel, autogptq.modeling.BaseGPTQForCausalLM]'
@@ -41,14 +45,28 @@ LiteralContainerRegistry = t.Literal['docker', 'gh', 'ecr']
LiteralContainerVersionStrategy = t.Literal['release', 'nightly', 'latest', 'custom']
if sys.version_info[:2] >= (3, 11):
from typing import LiteralString as LiteralString, NotRequired as NotRequired, Required as Required, Self as Self, dataclass_transform as dataclass_transform, overload as overload
from typing import LiteralString as LiteralString
from typing import NotRequired as NotRequired
from typing import Required as Required
from typing import Self as Self
from typing import dataclass_transform as dataclass_transform
from typing import overload as overload
else:
from typing_extensions import LiteralString as LiteralString, NotRequired as NotRequired, Required as Required, Self as Self, dataclass_transform as dataclass_transform, overload as overload
from typing_extensions import LiteralString as LiteralString
from typing_extensions import NotRequired as NotRequired
from typing_extensions import Required as Required
from typing_extensions import Self as Self
from typing_extensions import dataclass_transform as dataclass_transform
from typing_extensions import overload as overload
if sys.version_info[:2] >= (3, 10):
from typing import Concatenate as Concatenate, ParamSpec as ParamSpec, TypeAlias as TypeAlias
from typing import Concatenate as Concatenate
from typing import ParamSpec as ParamSpec
from typing import TypeAlias as TypeAlias
else:
from typing_extensions import Concatenate as Concatenate, ParamSpec as ParamSpec, TypeAlias as TypeAlias
from typing_extensions import Concatenate as Concatenate
from typing_extensions import ParamSpec as ParamSpec
from typing_extensions import TypeAlias as TypeAlias
class PeftAdapterOutput(t.TypedDict):
success: bool

View File

@@ -1,14 +1,27 @@
from __future__ import annotations
from .configuration_auto import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig
from .configuration_baichuan import START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING, BaichuanConfig as BaichuanConfig
from .configuration_chatglm import START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING, ChatGLMConfig as ChatGLMConfig
from .configuration_dolly_v2 import START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING, DollyV2Config as DollyV2Config
from .configuration_falcon import START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING, FalconConfig as FalconConfig
from .configuration_flan_t5 import START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING, FlanT5Config as FlanT5Config
from .configuration_gpt_neox import START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING, GPTNeoXConfig as GPTNeoXConfig
from .configuration_llama import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING, LlamaConfig as LlamaConfig
from .configuration_mpt import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING, MPTConfig as MPTConfig
from .configuration_opt import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING, OPTConfig as OPTConfig
from .configuration_stablelm import START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING, StableLMConfig as StableLMConfig
from .configuration_starcoder import START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING, StarCoderConfig as StarCoderConfig
from .configuration_auto import CONFIG_MAPPING as CONFIG_MAPPING
from .configuration_auto import CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
from .configuration_auto import AutoConfig as AutoConfig
from .configuration_baichuan import START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING
from .configuration_baichuan import BaichuanConfig as BaichuanConfig
from .configuration_chatglm import START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING
from .configuration_chatglm import ChatGLMConfig as ChatGLMConfig
from .configuration_dolly_v2 import START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING
from .configuration_dolly_v2 import DollyV2Config as DollyV2Config
from .configuration_falcon import START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING
from .configuration_falcon import FalconConfig as FalconConfig
from .configuration_flan_t5 import START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING
from .configuration_flan_t5 import FlanT5Config as FlanT5Config
from .configuration_gpt_neox import START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING
from .configuration_gpt_neox import GPTNeoXConfig as GPTNeoXConfig
from .configuration_llama import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING
from .configuration_llama import LlamaConfig as LlamaConfig
from .configuration_mpt import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING
from .configuration_mpt import MPTConfig as MPTConfig
from .configuration_opt import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING
from .configuration_opt import OPTConfig as OPTConfig
from .configuration_stablelm import START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING
from .configuration_stablelm import StableLMConfig as StableLMConfig
from .configuration_starcoder import START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING
from .configuration_starcoder import StarCoderConfig as StarCoderConfig

View File

@@ -2,15 +2,21 @@
from __future__ import annotations
import importlib
import typing as t
from collections import OrderedDict
import inflection
import openllm_core
from openllm_core.utils import ReprMixin
if t.TYPE_CHECKING:
import types
from collections import _odict_items, _odict_keys, _odict_values
from collections import _odict_items
from collections import _odict_keys
from collections import _odict_values
from openllm_core._typing_compat import LiteralString
ConfigKeysView = _odict_keys[str, type[openllm_core.LLMConfig]]

View File

@@ -2,7 +2,9 @@ from __future__ import annotations
import typing as t
import openllm_core
from openllm_core._prompt import process_prompt
START_BAICHUAN_COMMAND_DOCSTRING = '''\
Run a LLMServer for Baichuan model.

View File

@@ -2,7 +2,9 @@ from __future__ import annotations
import typing as t
import openllm_core
from openllm_core.utils import dantic
START_CHATGLM_COMMAND_DOCSTRING = '''\
Run a LLMServer for ChatGLM model.

View File

@@ -2,9 +2,12 @@ from __future__ import annotations
import typing as t
import openllm_core
from openllm_core._prompt import process_prompt
from openllm_core.utils import dantic
if t.TYPE_CHECKING: import transformers
if t.TYPE_CHECKING:
import transformers
START_DOLLY_V2_COMMAND_DOCSTRING = '''\
Run a LLMServer for dolly-v2 model.

View File

@@ -2,7 +2,9 @@ from __future__ import annotations
import typing as t
import openllm_core
from openllm_core._prompt import process_prompt
START_FALCON_COMMAND_DOCSTRING = '''\
Run a LLMServer for FalconLM model.

View File

@@ -2,7 +2,9 @@ from __future__ import annotations
import typing as t
import openllm_core
from openllm_core._prompt import process_prompt
START_FLAN_T5_COMMAND_DOCSTRING = '''\
Run a LLMServer for FLAN-T5 model.

View File

@@ -2,8 +2,10 @@ from __future__ import annotations
import typing as t
import openllm_core
from openllm_core._prompt import process_prompt
from openllm_core.utils import dantic
START_GPT_NEOX_COMMAND_DOCSTRING = '''\
Run a LLMServer for GPTNeoX model.

View File

@@ -2,8 +2,10 @@ from __future__ import annotations
import typing as t
import openllm_core
from openllm_core._prompt import process_prompt
from openllm_core.utils import dantic
START_LLAMA_COMMAND_DOCSTRING = '''\
Run a LLMServer for Llama model.

View File

@@ -2,8 +2,10 @@ from __future__ import annotations
import typing as t
import openllm_core
from openllm_core._prompt import process_prompt
from openllm_core.utils import dantic
MPTPromptType = t.Literal['default', 'instruct', 'chat', 'storywriter']
START_MPT_COMMAND_DOCSTRING = '''\

View File

@@ -2,8 +2,10 @@ from __future__ import annotations
import typing as t
import openllm_core
from openllm_core._prompt import process_prompt
from openllm_core.utils import dantic
START_OPT_COMMAND_DOCSTRING = '''\
Run a LLMServer for OPT model.

View File

@@ -2,7 +2,9 @@ from __future__ import annotations
import typing as t
import openllm_core
from openllm_core._prompt import process_prompt
START_STABLELM_COMMAND_DOCSTRING = '''\
Run a LLMServer for StableLM model.

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import typing as t
import openllm_core
START_STARCODER_COMMAND_DOCSTRING = '''\
Run a LLMServer for StarCoder model.

View File

@@ -14,33 +14,35 @@ import sys
import types
import typing as t
import uuid
from pathlib import Path
from circus.exc import ConflictError
import openllm_core
from bentoml._internal.configuration import (
DEBUG_ENV_VAR as DEBUG_ENV_VAR,
GRPC_DEBUG_ENV_VAR as _GRPC_DEBUG_ENV_VAR,
QUIET_ENV_VAR as QUIET_ENV_VAR,
get_debug_mode as _get_debug_mode,
get_quiet_mode as _get_quiet_mode,
set_quiet_mode as set_quiet_mode,
)
from bentoml._internal.configuration import DEBUG_ENV_VAR as DEBUG_ENV_VAR
from bentoml._internal.configuration import GRPC_DEBUG_ENV_VAR as _GRPC_DEBUG_ENV_VAR
from bentoml._internal.configuration import QUIET_ENV_VAR as QUIET_ENV_VAR
from bentoml._internal.configuration import get_debug_mode as _get_debug_mode
from bentoml._internal.configuration import get_quiet_mode as _get_quiet_mode
from bentoml._internal.configuration import set_quiet_mode as set_quiet_mode
from bentoml._internal.models.model import ModelContext as _ModelContext
from bentoml._internal.types import LazyType as LazyType
from bentoml._internal.utils import (
LazyLoader as LazyLoader,
bentoml_cattr as bentoml_cattr,
calc_dir_size as calc_dir_size,
first_not_none as first_not_none,
pkg as pkg,
reserve_free_port as reserve_free_port,
resolve_user_filepath as resolve_user_filepath,
)
from openllm_core.utils.lazy import (LazyModule as LazyModule, VersionInfo as VersionInfo,)
from bentoml._internal.utils import LazyLoader as LazyLoader
from bentoml._internal.utils import bentoml_cattr as bentoml_cattr
from bentoml._internal.utils import calc_dir_size as calc_dir_size
from bentoml._internal.utils import first_not_none as first_not_none
from bentoml._internal.utils import pkg as pkg
from bentoml._internal.utils import reserve_free_port as reserve_free_port
from bentoml._internal.utils import resolve_user_filepath as resolve_user_filepath
from openllm_core.utils.import_utils import ENV_VARS_TRUE_VALUES as ENV_VARS_TRUE_VALUES
from openllm_core.utils.lazy import LazyModule as LazyModule
from openllm_core.utils.lazy import VersionInfo as VersionInfo
if t.TYPE_CHECKING:
from openllm_core._typing_compat import AnyCallable
logger = logging.getLogger(__name__)
try:
from typing import GenericAlias as _TypingGenericAlias # type: ignore
@@ -309,7 +311,6 @@ _import_structure: dict[str, list[str]] = {
'lazy': ['LazyModule'],
'import_utils': [
'OPTIONAL_DEPENDENCIES',
'ENV_VARS_TRUE_VALUES',
'DummyMetaclass',
'EnvVarMixin',
'require_backends',
@@ -340,37 +341,37 @@ _import_structure: dict[str, list[str]] = {
if t.TYPE_CHECKING:
# NOTE: The following exports useful utils from bentoml
from . import (analytics as analytics, codegen as codegen, dantic as dantic,)
from .import_utils import (
ENV_VARS_TRUE_VALUES as ENV_VARS_TRUE_VALUES,
OPTIONAL_DEPENDENCIES as OPTIONAL_DEPENDENCIES,
DummyMetaclass as DummyMetaclass,
EnvVarMixin as EnvVarMixin,
is_autogptq_available as is_autogptq_available,
is_bitsandbytes_available as is_bitsandbytes_available,
is_cpm_kernels_available as is_cpm_kernels_available,
is_datasets_available as is_datasets_available,
is_einops_available as is_einops_available,
is_fairscale_available as is_fairscale_available,
is_flax_available as is_flax_available,
is_grpc_available as is_grpc_available,
is_grpc_health_available as is_grpc_health_available,
is_jupyter_available as is_jupyter_available,
is_jupytext_available as is_jupytext_available,
is_notebook_available as is_notebook_available,
is_peft_available as is_peft_available,
is_sentencepiece_available as is_sentencepiece_available,
is_tf_available as is_tf_available,
is_torch_available as is_torch_available,
is_transformers_available as is_transformers_available,
is_transformers_supports_agent as is_transformers_supports_agent,
is_transformers_supports_kbit as is_transformers_supports_kbit,
is_triton_available as is_triton_available,
is_vllm_available as is_vllm_available,
is_xformers_available as is_xformers_available,
require_backends as require_backends,
)
from . import analytics as analytics
from . import codegen as codegen
from . import dantic as dantic
from .import_utils import OPTIONAL_DEPENDENCIES as OPTIONAL_DEPENDENCIES
from .import_utils import DummyMetaclass as DummyMetaclass
from .import_utils import EnvVarMixin as EnvVarMixin
from .import_utils import is_autogptq_available as is_autogptq_available
from .import_utils import is_bitsandbytes_available as is_bitsandbytes_available
from .import_utils import is_cpm_kernels_available as is_cpm_kernels_available
from .import_utils import is_datasets_available as is_datasets_available
from .import_utils import is_einops_available as is_einops_available
from .import_utils import is_fairscale_available as is_fairscale_available
from .import_utils import is_flax_available as is_flax_available
from .import_utils import is_grpc_available as is_grpc_available
from .import_utils import is_grpc_health_available as is_grpc_health_available
from .import_utils import is_jupyter_available as is_jupyter_available
from .import_utils import is_jupytext_available as is_jupytext_available
from .import_utils import is_notebook_available as is_notebook_available
from .import_utils import is_peft_available as is_peft_available
from .import_utils import is_sentencepiece_available as is_sentencepiece_available
from .import_utils import is_tf_available as is_tf_available
from .import_utils import is_torch_available as is_torch_available
from .import_utils import is_transformers_available as is_transformers_available
from .import_utils import is_transformers_supports_agent as is_transformers_supports_agent
from .import_utils import is_transformers_supports_kbit as is_transformers_supports_kbit
from .import_utils import is_triton_available as is_triton_available
from .import_utils import is_vllm_available as is_vllm_available
from .import_utils import is_xformers_available as is_xformers_available
from .import_utils import require_backends as require_backends
from .representation import ReprMixin as ReprMixin
__lazy = LazyModule(__name__, globals()['__file__'], _import_structure, extra_objects=_extras)
__all__ = __lazy.__all__
__dir__ = __lazy.__dir__

View File

@@ -14,8 +14,10 @@ import typing as t
import attr
import openllm_core
from bentoml._internal.utils import analytics as _internal_analytics
from openllm_core._typing_compat import ParamSpec
P = ParamSpec('P')
T = t.TypeVar('T')
logger = logging.getLogger(__name__)

View File

@@ -5,12 +5,18 @@ import linecache
import logging
import types
import typing as t
from operator import itemgetter
import orjson
if t.TYPE_CHECKING:
import openllm_core
from openllm_core._typing_compat import AnyCallable, DictStrAny, ListStr, LiteralString
from openllm_core._typing_compat import AnyCallable
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import ListStr
from openllm_core._typing_compat import LiteralString
PartialAny = functools.partial[t.Any]
_T = t.TypeVar('_T', bound=t.Callable[..., t.Any])
@@ -110,7 +116,8 @@ def generate_function(
def make_env_transformer(
cls: type[openllm_core.LLMConfig], model_name: str, suffix: LiteralString | None = None, default_callback: t.Callable[[str, t.Any], t.Any] | None = None, globs: DictStrAny | None = None,
) -> AnyCallable:
from openllm_core.utils import dantic, field_env_key
from openllm_core.utils import dantic
from openllm_core.utils import field_env_key
def identity(_: str, x_value: t.Any) -> t.Any:
return x_value

View File

@@ -5,6 +5,7 @@ import importlib
import os
import sys
import typing as t
from enum import Enum
import attr
@@ -12,8 +13,14 @@ import click
import click_option_group as cog
import inflection
import orjson
from click import ParamType, shell_completion as sc, types as click_types
if t.TYPE_CHECKING: from attr import _ValidatorType
from click import ParamType
from click import shell_completion as sc
from click import types as click_types
if t.TYPE_CHECKING:
from attr import _ValidatorType
AnyCallable = t.Callable[..., t.Any]
FC = t.TypeVar('FC', bound=t.Union[AnyCallable, click.Command])

View File

@@ -7,19 +7,25 @@ import importlib.util
import logging
import os
import typing as t
from collections import OrderedDict
import inflection
import packaging.version
import openllm_core
from bentoml._internal.utils import LazyLoader, pkg
from openllm_core._typing_compat import LiteralString, overload
from bentoml._internal.utils import LazyLoader
from bentoml._internal.utils import pkg
from openllm_core._typing_compat import LiteralString
from openllm_core._typing_compat import overload
from .representation import ReprMixin
if t.TYPE_CHECKING:
BackendOrderedDict = OrderedDict[str, t.Tuple[t.Callable[[], bool], str]]
from openllm_core._typing_compat import LiteralRuntime
logger = logging.getLogger(__name__)
OPTIONAL_DEPENDENCIES = {'opt', 'flan-t5', 'vllm', 'fine-tune', 'ggml', 'agents', 'openai', 'playground', 'gptq', 'grpc'}
ENV_VARS_TRUE_VALUES = {'1', 'ON', 'YES', 'TRUE'}
@@ -406,7 +412,7 @@ class EnvVarMixin(ReprMixin):
def _framework_value(self) -> LiteralRuntime:
from . import first_not_none
return t.cast(t.Literal['pt', 'tf', 'flax', 'vllm'], first_not_none(os.environ.get(self['framework']), default=self._implementation))
return t.cast(LiteralRuntime, first_not_none(os.environ.get(self['framework']), default=self._implementation))
def _bettertransformer_value(self) -> bool:
from . import first_not_none

View File

@@ -14,6 +14,7 @@ import warnings
import attr
import openllm_core
__all__ = ['VersionInfo', 'LazyModule']
# vendorred from attrs

View File

@@ -1,12 +1,15 @@
from __future__ import annotations
import typing as t
from abc import abstractmethod
import attr
import orjson
from openllm_core import utils
if t.TYPE_CHECKING: from openllm_core._typing_compat import TypeAlias
if t.TYPE_CHECKING:
from openllm_core._typing_compat import TypeAlias
ReprArgs: TypeAlias = t.Generator[t.Tuple[t.Optional[str], t.Any], None, None]

View File

@@ -7,6 +7,7 @@ To start any OpenLLM model:
openllm start <model_name> --options ...
'''
from __future__ import annotations
if __name__ == '__main__':
from openllm.cli.entrypoint import cli
cli()

View File

@@ -3,13 +3,19 @@ from __future__ import annotations
import typing as t
import transformers
from huggingface_hub import snapshot_download
import bentoml
import openllm
from bentoml._internal.frameworks.transformers import API_VERSION, MODULE_NAME
from bentoml._internal.models.model import ModelOptions, ModelSignature
if t.TYPE_CHECKING: import torch
from bentoml._internal.frameworks.transformers import API_VERSION
from bentoml._internal.frameworks.transformers import MODULE_NAME
from bentoml._internal.models.model import ModelOptions
from bentoml._internal.models.model import ModelSignature
if t.TYPE_CHECKING:
import torch
_GENERIC_EMBEDDING_ID = 'sentence-transformers/all-MiniLM-L6-v2'
_BENTOMODEL_ID = 'sentence-transformers--all-MiniLM-L6-v2'

View File

@@ -3,7 +3,11 @@ from __future__ import annotations
import typing as t
import transformers
if t.TYPE_CHECKING: import torch, openllm
if t.TYPE_CHECKING:
import torch
import openllm
# reexport from transformers
LogitsProcessorList = transformers.LogitsProcessorList

View File

@@ -16,20 +16,62 @@ import attr
import fs.path
import inflection
import orjson
from huggingface_hub import hf_hub_download
import bentoml
import openllm
import openllm_core
from bentoml._internal.models.model import ModelSignature
from openllm_core._configuration import FineTuneConfig, LLMConfig, _object_getattribute, _setattr_class
from openllm_core._configuration import FineTuneConfig
from openllm_core._configuration import LLMConfig
from openllm_core._configuration import _object_getattribute
from openllm_core._configuration import _setattr_class
from openllm_core._schema import unmarshal_vllm_outputs
from openllm_core._typing_compat import AdaptersMapping, AdaptersTuple, AdapterType, AnyCallable, DictStrAny, ListStr, LiteralRuntime, LiteralString, LLMEmbeddings, LLMRunnable, LLMRunner, M, ModelSignatureDict as _ModelSignatureDict, NotRequired, PeftAdapterOutput, T, TupleAny, overload
from openllm_core.utils import DEBUG, ENV_VARS_TRUE_VALUES, MYPY, EnvVarMixin, LazyLoader, ReprMixin, apply, bentoml_cattr, codegen, device_count, first_not_none, generate_hash_from_file, is_peft_available, is_torch_available, non_intrusive_setattr, normalize_attrs_to_model_tokenizer_pair, resolve_filepath, validate_is_path
from openllm_core._typing_compat import AdaptersMapping
from openllm_core._typing_compat import AdaptersTuple
from openllm_core._typing_compat import AdapterType
from openllm_core._typing_compat import AnyCallable
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import ListStr
from openllm_core._typing_compat import LiteralRuntime
from openllm_core._typing_compat import LiteralString
from openllm_core._typing_compat import LLMEmbeddings
from openllm_core._typing_compat import LLMRunnable
from openllm_core._typing_compat import LLMRunner
from openllm_core._typing_compat import M
from openllm_core._typing_compat import ModelSignatureDict as _ModelSignatureDict
from openllm_core._typing_compat import NotRequired
from openllm_core._typing_compat import PeftAdapterOutput
from openllm_core._typing_compat import T
from openllm_core._typing_compat import TupleAny
from openllm_core._typing_compat import overload
from openllm_core.utils import DEBUG
from openllm_core.utils import ENV_VARS_TRUE_VALUES
from openllm_core.utils import MYPY
from openllm_core.utils import EnvVarMixin
from openllm_core.utils import LazyLoader
from openllm_core.utils import ReprMixin
from openllm_core.utils import apply
from openllm_core.utils import bentoml_cattr
from openllm_core.utils import codegen
from openllm_core.utils import device_count
from openllm_core.utils import first_not_none
from openllm_core.utils import generate_hash_from_file
from openllm_core.utils import is_peft_available
from openllm_core.utils import is_torch_available
from openllm_core.utils import non_intrusive_setattr
from openllm_core.utils import normalize_attrs_to_model_tokenizer_pair
from openllm_core.utils import resolve_filepath
from openllm_core.utils import validate_is_path
from ._quantisation import infer_quantisation_config
from .exceptions import ForbiddenAttributeError, GpuNotAvailableError, OpenLLMException
from .exceptions import ForbiddenAttributeError
from .exceptions import GpuNotAvailableError
from .exceptions import OpenLLMException
from .utils import infer_auto_class
if t.TYPE_CHECKING:
import auto_gptq as autogptq
import peft
@@ -1077,7 +1119,9 @@ class LLM(LLMInterface[M, T], ReprMixin):
**attrs: t.Any
) -> t.Iterator[t.Any]:
# NOTE: encoder-decoder models will need to implement their own generate_iterator for now
from ._generation import get_context_length, is_partial_stop, prepare_logits_processor
from ._generation import get_context_length
from ._generation import is_partial_stop
from ._generation import prepare_logits_processor
len_prompt = len(prompt)
if stop_token_ids is None: stop_token_ids = []

View File

@@ -4,11 +4,17 @@ import logging
import typing as t
from openllm_core._typing_compat import overload
from openllm_core.utils import LazyLoader, is_autogptq_available, is_bitsandbytes_available, is_transformers_supports_kbit, pkg
from openllm_core.utils import LazyLoader
from openllm_core.utils import is_autogptq_available
from openllm_core.utils import is_bitsandbytes_available
from openllm_core.utils import is_transformers_supports_kbit
from openllm_core.utils import pkg
if t.TYPE_CHECKING:
from openllm_core._typing_compat import DictStrAny
from ._llm import LLM
autogptq, torch, transformers = LazyLoader('autogptq', globals(), 'auto_gptq'), LazyLoader('torch', globals(), 'torch'), LazyLoader('transformers', globals(), 'transformers')
logger = logging.getLogger(__name__)

View File

@@ -5,6 +5,7 @@ import typing as t
import warnings
import orjson
from starlette.applications import Starlette
from starlette.responses import JSONResponse
from starlette.routing import Route
@@ -12,17 +13,21 @@ from starlette.routing import Route
import bentoml
import openllm
import openllm_core
if t.TYPE_CHECKING:
from starlette.requests import Request
from starlette.responses import Response
from bentoml._internal.runner.runner import AbstractRunner, RunnerMethod
from bentoml._internal.runner.runner import AbstractRunner
from bentoml._internal.runner.runner import RunnerMethod
from openllm_core._typing_compat import TypeAlias
_EmbeddingMethod: TypeAlias = RunnerMethod[t.Union[bentoml.Runnable, openllm.LLMRunnable[t.Any, t.Any]], [t.List[str]], t.Sequence[openllm.LLMEmbeddings]]
_EmbeddingMethod: TypeAlias = RunnerMethod[t.Union[bentoml.Runnable, openllm.LLMRunnable[t.Any, t.Any]], [t.List[str]], t.Sequence[openllm.EmbeddingsOutput]]
# The following warnings from bitsandbytes, and probably not that important for users to see
warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization')
warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization')
warnings.filterwarnings('ignore', message='The installed version of bitsandbytes was compiled without GPU support.')
model = os.environ.get('OPENLLM_MODEL', '{__model_name__}') # openllm: model name
adapter_map = os.environ.get('OPENLLM_ADAPTER_MAP', '''{__model_adapter_map__}''') # openllm: model adapter map
llm_config = openllm.AutoConfig.for_model(model)
@@ -37,6 +42,7 @@ generic_embedding_runner = bentoml.Runner(
runners: list[AbstractRunner] = [runner]
if not runner.supports_embeddings: runners.append(generic_embedding_runner)
svc = bentoml.Service(name=f"llm-{llm_config['start_name']}-service", runners=runners)
_JsonInput = bentoml.io.JSON.from_sample({'prompt': '', 'llm_config': llm_config.model_dump(flatten=True), 'adapter_name': None})
@svc.api(route='/v1/generate', input=_JsonInput, output=bentoml.io.JSON.from_sample({'responses': [], 'configuration': llm_config.model_dump(flatten=True)}))

View File

@@ -7,15 +7,26 @@ import os
import typing as t
from openllm_core.utils import LazyModule
_import_structure: dict[str, list[str]] = {
'_package': ['create_bento', 'build_editable', 'construct_python_options', 'construct_docker_options'],
'oci': ['CONTAINER_NAMES', 'get_base_container_tag', 'build_container', 'get_base_container_name', 'supported_registries', 'RefResolver']
}
if t.TYPE_CHECKING:
from . import _package as _package, oci as oci
from ._package import build_editable as build_editable, construct_docker_options as construct_docker_options, construct_python_options as construct_python_options, create_bento as create_bento
from .oci import CONTAINER_NAMES as CONTAINER_NAMES, RefResolver as RefResolver, build_container as build_container, get_base_container_name as get_base_container_name, get_base_container_tag as get_base_container_tag, supported_registries as supported_registries
from . import _package as _package
from . import oci as oci
from ._package import build_editable as build_editable
from ._package import construct_docker_options as construct_docker_options
from ._package import construct_python_options as construct_python_options
from ._package import create_bento as create_bento
from .oci import CONTAINER_NAMES as CONTAINER_NAMES
from .oci import RefResolver as RefResolver
from .oci import build_container as build_container
from .oci import get_base_container_name as get_base_container_name
from .oci import get_base_container_tag as get_base_container_tag
from .oci import supported_registries as supported_registries
__lazy = LazyModule(__name__, os.path.abspath('__file__'), _import_structure)
__all__ = __lazy.__all__
__dir__ = __lazy.__dir__

View File

@@ -6,27 +6,39 @@ import logging
import os
import string
import typing as t
from pathlib import Path
import fs
import fs.copy
import fs.errors
import orjson
from simple_di import Provide, inject
from simple_di import Provide
from simple_di import inject
import bentoml
import openllm_core
from bentoml._internal.bento.build_config import BentoBuildConfig, DockerOptions, ModelSpec, PythonOptions
from bentoml._internal.bento.build_config import BentoBuildConfig
from bentoml._internal.bento.build_config import DockerOptions
from bentoml._internal.bento.build_config import ModelSpec
from bentoml._internal.bento.build_config import PythonOptions
from bentoml._internal.configuration.containers import BentoMLContainer
from . import oci
if t.TYPE_CHECKING:
from fs.base import FS
import openllm
from bentoml._internal.bento import BentoStore
from bentoml._internal.models.model import ModelStore
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString
from openllm_core._typing_compat import LiteralContainerRegistry
from openllm_core._typing_compat import LiteralContainerVersionStrategy
from openllm_core._typing_compat import LiteralString
logger = logging.getLogger(__name__)
OPENLLM_DEV_BUILD = 'OPENLLM_DEV_BUILD'

View File

@@ -9,7 +9,10 @@ import pathlib
import shutil
import subprocess
import typing as t
from datetime import datetime, timedelta, timezone
from datetime import datetime
from datetime import timedelta
from datetime import timezone
import attr
import orjson
@@ -17,11 +20,17 @@ import orjson
import bentoml
import openllm
import openllm_core
from openllm_core.utils.lazy import VersionInfo
if t.TYPE_CHECKING:
from ghapi import all
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString, RefTuple
from openllm_core._typing_compat import LiteralContainerRegistry
from openllm_core._typing_compat import LiteralContainerVersionStrategy
from openllm_core._typing_compat import LiteralString
from openllm_core._typing_compat import RefTuple
all = openllm_core.utils.LazyLoader('all', globals(), 'ghapi.all') # noqa: F811
logger = logging.getLogger(__name__)

View File

@@ -9,21 +9,28 @@ import click
import click_option_group as cog
import inflection
import orjson
from bentoml_cli.utils import BentoMLCommandGroup
from click import shell_completion as sc
from click.shell_completion import CompletionItem
import bentoml
import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralString, ParamSpec
from openllm_core._typing_compat import Concatenate
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import LiteralString
from openllm_core._typing_compat import ParamSpec
from openllm_core.utils import DEBUG
from . import termui
if t.TYPE_CHECKING:
import subprocess
from openllm_core._configuration import LLMConfig
logger = logging.getLogger(__name__)
P = ParamSpec('P')

View File

@@ -7,20 +7,27 @@ import subprocess
import sys
import typing as t
from simple_di import Provide, inject
from simple_di import Provide
from simple_di import inject
import bentoml
import openllm
import openllm_core
from bentoml._internal.configuration.containers import BentoMLContainer
from openllm.exceptions import OpenLLMException
from . import termui
from ._factory import start_command_factory
if t.TYPE_CHECKING:
from bentoml._internal.bento import BentoStore
from openllm_core._configuration import LLMConfig
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralRuntime, LiteralString
from openllm_core._typing_compat import LiteralContainerRegistry
from openllm_core._typing_compat import LiteralContainerVersionStrategy
from openllm_core._typing_compat import LiteralRuntime
from openllm_core._typing_compat import LiteralString
logger = logging.getLogger(__name__)
def _start(
@@ -81,7 +88,8 @@ def _start(
framework: The framework to use for this LLM. By default, this is set to ``pt``.
additional_args: Additional arguments to pass to ``openllm start``.
"""
from .entrypoint import start_command, start_grpc_command
from .entrypoint import start_command
from .entrypoint import start_grpc_command
llm_config = openllm.AutoConfig.for_model(model_name)
_ModelEnv = openllm_core.utils.EnvVarMixin(
model_name,

View File

@@ -42,29 +42,80 @@ import fs.copy
import fs.errors
import inflection
import orjson
from bentoml_cli.utils import BentoMLCommandGroup, opt_callback
from simple_di import Provide, inject
from bentoml_cli.utils import BentoMLCommandGroup
from bentoml_cli.utils import opt_callback
from simple_di import Provide
from simple_di import inject
import bentoml
import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.models.model import ModelStore
from openllm import bundle, serialisation
from openllm import bundle
from openllm import serialisation
from openllm.exceptions import OpenLLMException
from openllm.models.auto import CONFIG_MAPPING, MODEL_FLAX_MAPPING_NAMES, MODEL_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES, MODEL_VLLM_MAPPING_NAMES, AutoConfig, AutoLLM
from openllm.models.auto import CONFIG_MAPPING
from openllm.models.auto import MODEL_FLAX_MAPPING_NAMES
from openllm.models.auto import MODEL_MAPPING_NAMES
from openllm.models.auto import MODEL_TF_MAPPING_NAMES
from openllm.models.auto import MODEL_VLLM_MAPPING_NAMES
from openllm.models.auto import AutoConfig
from openllm.models.auto import AutoLLM
from openllm.utils import infer_auto_class
from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralRuntime, LiteralString, ParamSpec, Self
from openllm_core.utils import DEBUG, DEBUG_ENV_VAR, OPTIONAL_DEPENDENCIES, QUIET_ENV_VAR, EnvVarMixin, LazyLoader, analytics, bentoml_cattr, compose, configure_logging, dantic, first_not_none, get_debug_mode, get_quiet_mode, is_torch_available, is_transformers_supports_agent, resolve_user_filepath, set_debug_mode, set_quiet_mode
from openllm_core._typing_compat import Concatenate
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import LiteralRuntime
from openllm_core._typing_compat import LiteralString
from openllm_core._typing_compat import ParamSpec
from openllm_core._typing_compat import Self
from openllm_core.utils import DEBUG
from openllm_core.utils import DEBUG_ENV_VAR
from openllm_core.utils import OPTIONAL_DEPENDENCIES
from openllm_core.utils import QUIET_ENV_VAR
from openllm_core.utils import EnvVarMixin
from openllm_core.utils import LazyLoader
from openllm_core.utils import analytics
from openllm_core.utils import bentoml_cattr
from openllm_core.utils import compose
from openllm_core.utils import configure_logging
from openllm_core.utils import dantic
from openllm_core.utils import first_not_none
from openllm_core.utils import get_debug_mode
from openllm_core.utils import get_quiet_mode
from openllm_core.utils import is_torch_available
from openllm_core.utils import is_transformers_supports_agent
from openllm_core.utils import resolve_user_filepath
from openllm_core.utils import set_debug_mode
from openllm_core.utils import set_quiet_mode
from . import termui
from ._factory import FC, LiteralOutput, _AnyCallable, bettertransformer_option, container_registry_option, fast_option, machine_option, model_id_option, model_name_argument, model_version_option, output_option, parse_device_callback, quantize_option, serialisation_option, start_command_factory, workers_per_resource_option
from ._factory import FC
from ._factory import LiteralOutput
from ._factory import _AnyCallable
from ._factory import bettertransformer_option
from ._factory import container_registry_option
from ._factory import fast_option
from ._factory import machine_option
from ._factory import model_id_option
from ._factory import model_name_argument
from ._factory import model_version_option
from ._factory import output_option
from ._factory import parse_device_callback
from ._factory import quantize_option
from ._factory import serialisation_option
from ._factory import start_command_factory
from ._factory import workers_per_resource_option
if t.TYPE_CHECKING:
import torch
from bentoml._internal.bento import BentoStore
from bentoml._internal.container import DefaultBuilder
from openllm_core._schema import EmbeddingsOutput
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
from openllm_core._typing_compat import LiteralContainerRegistry
from openllm_core._typing_compat import LiteralContainerVersionStrategy
else:
torch = LazyLoader('torch', globals(), 'torch')

View File

@@ -5,9 +5,14 @@ import click
import orjson
import openllm
from openllm.cli import termui
from openllm.cli._factory import container_registry_option, machine_option
if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
from openllm.cli._factory import container_registry_option
from openllm.cli._factory import machine_option
if t.TYPE_CHECKING:
from openllm_core._typing_compat import LiteralContainerRegistry
from openllm_core._typing_compat import LiteralContainerVersionStrategy
@click.command(
'build_base_container',

View File

@@ -5,13 +5,19 @@ import typing as t
import click
import psutil
from simple_di import Provide, inject
from simple_di import Provide
from simple_di import inject
import bentoml
from bentoml._internal.configuration.containers import BentoMLContainer
from openllm.cli import termui
from openllm.cli._factory import bento_complete_envvar, machine_option
if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
from openllm.cli._factory import bento_complete_envvar
from openllm.cli._factory import machine_option
if t.TYPE_CHECKING:
from bentoml._internal.bento import BentoStore
@click.command('dive_bentos', context_settings=termui.CONTEXT_SETTINGS)
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)

View File

@@ -2,9 +2,12 @@ from __future__ import annotations
import typing as t
import click
from simple_di import Provide, inject
from simple_di import Provide
from simple_di import inject
import bentoml
from bentoml._internal.bento.bento import BentoInfo
from bentoml._internal.bento.build_config import DockerOptions
from bentoml._internal.configuration.containers import BentoMLContainer
@@ -12,7 +15,9 @@ from bentoml._internal.container.generate import generate_containerfile
from openllm.cli import termui
from openllm.cli._factory import bento_complete_envvar
from openllm_core.utils import bentoml_cattr
if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
if t.TYPE_CHECKING:
from bentoml._internal.bento import BentoStore
@click.command('get_containerfile', context_settings=termui.CONTEXT_SETTINGS, help='Return Containerfile of any given Bento.')
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)

View File

@@ -4,12 +4,17 @@ import typing as t
import click
import inflection
import orjson
from bentoml_cli.utils import opt_callback
import openllm
from openllm.cli import termui
from openllm.cli._factory import machine_option, model_complete_envvar, output_option
from openllm.cli._factory import machine_option
from openllm.cli._factory import model_complete_envvar
from openllm.cli._factory import output_option
from openllm_core._prompt import process_prompt
LiteralOutput = t.Literal['json', 'pretty', 'porcelain']
@click.command('get_prompt', context_settings=termui.CONTEXT_SETTINGS)

View File

@@ -6,9 +6,11 @@ import orjson
import bentoml
import openllm
from bentoml._internal.utils import human_readable_size
from openllm.cli import termui
from openllm.cli._factory import LiteralOutput, output_option
from openllm.cli._factory import LiteralOutput
from openllm.cli._factory import output_option
@click.command('list_bentos', context_settings=termui.CONTEXT_SETTINGS)
@output_option(default_value='json')

View File

@@ -7,10 +7,16 @@ import orjson
import bentoml
import openllm
from bentoml._internal.utils import human_readable_size
from openllm.cli import termui
from openllm.cli._factory import LiteralOutput, model_complete_envvar, model_name_argument, output_option
if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
from openllm.cli._factory import LiteralOutput
from openllm.cli._factory import model_complete_envvar
from openllm.cli._factory import model_name_argument
from openllm.cli._factory import output_option
if t.TYPE_CHECKING:
from openllm_core._typing_compat import DictStrAny
@click.command('list_models', context_settings=termui.CONTEXT_SETTINGS)
@model_name_argument(required=False, shell_complete=model_complete_envvar)

View File

@@ -13,12 +13,16 @@ import yaml
from openllm import playground
from openllm.cli import termui
from openllm_core.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
from openllm_core.utils import is_jupyter_available
from openllm_core.utils import is_jupytext_available
from openllm_core.utils import is_notebook_available
if t.TYPE_CHECKING:
import jupytext
import nbformat
from openllm_core._typing_compat import DictStrAny
logger = logging.getLogger(__name__)
def load_notebook_metadata() -> DictStrAny:

View File

@@ -6,7 +6,9 @@ import click
import inflection
import openllm
if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
if t.TYPE_CHECKING:
from openllm_core._typing_compat import DictStrAny
def echo(text: t.Any, fg: str = 'green', _with_style: bool = True, **attrs: t.Any) -> None:
attrs['fg'] = fg if not openllm.utils.get_debug_mode() else None

View File

@@ -14,7 +14,14 @@ from __future__ import annotations
import typing as t
import openllm_client
if t.TYPE_CHECKING: from openllm_client import AsyncHTTPClient as AsyncHTTPClient, BaseAsyncClient as BaseAsyncClient, BaseClient as BaseClient, HTTPClient as HTTPClient, GrpcClient as GrpcClient, AsyncGrpcClient as AsyncGrpcClient
if t.TYPE_CHECKING:
from openllm_client import AsyncGrpcClient as AsyncGrpcClient
from openllm_client import AsyncHTTPClient as AsyncHTTPClient
from openllm_client import BaseAsyncClient as BaseAsyncClient
from openllm_client import BaseClient as BaseClient
from openllm_client import GrpcClient as GrpcClient
from openllm_client import HTTPClient as HTTPClient
def __dir__() -> t.Sequence[str]:
return sorted(dir(openllm_client))

View File

@@ -1,4 +1,11 @@
'''Base exceptions for OpenLLM. This extends BentoML exceptions.'''
from __future__ import annotations
from openllm_core.exceptions import Error as Error, FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError, ForbiddenAttributeError as ForbiddenAttributeError, GpuNotAvailableError as GpuNotAvailableError, MissingAnnotationAttributeError as MissingAnnotationAttributeError, MissingDependencyError as MissingDependencyError, OpenLLMException as OpenLLMException, ValidationError as ValidationError
from openllm_core.exceptions import Error as Error
from openllm_core.exceptions import FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError
from openllm_core.exceptions import ForbiddenAttributeError as ForbiddenAttributeError
from openllm_core.exceptions import GpuNotAvailableError as GpuNotAvailableError
from openllm_core.exceptions import MissingAnnotationAttributeError as MissingAnnotationAttributeError
from openllm_core.exceptions import MissingDependencyError as MissingDependencyError
from openllm_core.exceptions import OpenLLMException as OpenLLMException
from openllm_core.exceptions import ValidationError as ValidationError

View File

@@ -3,8 +3,15 @@ import os
import typing as t
import openllm
from openllm_core.config import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig
from openllm_core.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
from openllm_core.config import CONFIG_MAPPING as CONFIG_MAPPING
from openllm_core.config import CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
from openllm_core.config import AutoConfig as AutoConfig
from openllm_core.utils import LazyModule
from openllm_core.utils import is_flax_available
from openllm_core.utils import is_tf_available
from openllm_core.utils import is_torch_available
from openllm_core.utils import is_vllm_available
_import_structure: dict[str, list[str]] = {
'modeling_auto': ['MODEL_MAPPING_NAMES'],
'modeling_flax_auto': ['MODEL_FLAX_MAPPING_NAMES'],

View File

@@ -12,11 +12,14 @@ import openllm
from openllm_core.utils import ReprMixin
if t.TYPE_CHECKING:
import types
from collections import _odict_items, _odict_keys, _odict_values
from collections import _odict_items
from collections import _odict_keys
from collections import _odict_values
from _typeshed import SupportsIter
from openllm_core._typing_compat import LiteralString, LLMRunner
from openllm_core._typing_compat import LiteralString
from openllm_core._typing_compat import LLMRunner
ConfigModelKeysView = _odict_keys[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
ConfigModelValuesView = _odict_values[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
ConfigModelItemsView = _odict_items[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]

View File

@@ -4,7 +4,9 @@ from collections import OrderedDict
from openllm_core.config import CONFIG_MAPPING_NAMES
from .factory import BaseAutoLLMClass, _LazyAutoMapping
from .factory import BaseAutoLLMClass
from .factory import _LazyAutoMapping
MODEL_MAPPING_NAMES = OrderedDict([('chatglm', 'ChatGLM'), ('dolly_v2', 'DollyV2'), ('falcon', 'Falcon'), ('flan_t5', 'FlanT5'), ('gpt_neox', 'GPTNeoX'), ('llama', 'Llama'), ('mpt', 'MPT'), (
'opt', 'OPT'
), ('stablelm', 'StableLM'), ('starcoder', 'StarCoder'), ('baichuan', 'Baichuan')])

View File

@@ -4,7 +4,9 @@ from collections import OrderedDict
from openllm_core.config import CONFIG_MAPPING_NAMES
from .factory import BaseAutoLLMClass, _LazyAutoMapping
from .factory import BaseAutoLLMClass
from .factory import _LazyAutoMapping
MODEL_FLAX_MAPPING_NAMES = OrderedDict([('flan_t5', 'FlaxFlanT5'), ('opt', 'FlaxOPT')])
MODEL_FLAX_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FLAX_MAPPING_NAMES)

View File

@@ -4,7 +4,9 @@ from collections import OrderedDict
from openllm_core.config import CONFIG_MAPPING_NAMES
from .factory import BaseAutoLLMClass, _LazyAutoMapping
from .factory import BaseAutoLLMClass
from .factory import _LazyAutoMapping
MODEL_TF_MAPPING_NAMES = OrderedDict([('flan_t5', 'TFFlanT5'), ('opt', 'TFOPT')])
MODEL_TF_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES)

View File

@@ -4,7 +4,9 @@ from collections import OrderedDict
from openllm_core.config import CONFIG_MAPPING_NAMES
from .factory import BaseAutoLLMClass, _LazyAutoMapping
from .factory import BaseAutoLLMClass
from .factory import _LazyAutoMapping
MODEL_VLLM_MAPPING_NAMES = OrderedDict([('baichuan', 'VLLMBaichuan'), ('dolly_v2', 'VLLMDollyV2'), ('falcon', 'VLLMFalcon'), ('gpt_neox', 'VLLMGPTNeoX'), ('mpt', 'VLLMMPT'), (
'opt', 'VLLMOPT'
), ('stablelm', 'VLLMStableLM'), ('starcoder', 'VLLMStarCoder'), ('llama', 'VLLMLlama')])

View File

@@ -3,8 +3,14 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available, is_vllm_available
from openllm_core.config.configuration_baichuan import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING, BaichuanConfig as BaichuanConfig
from openllm.utils import LazyModule
from openllm.utils import is_cpm_kernels_available
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_baichuan import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_baichuan import START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING
from openllm_core.config.configuration_baichuan import BaichuanConfig as BaichuanConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available() or not is_cpm_kernels_available(): raise MissingDependencyError

View File

@@ -3,8 +3,13 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available
from openllm_core.config.configuration_chatglm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING, ChatGLMConfig as ChatGLMConfig
from openllm.utils import LazyModule
from openllm.utils import is_cpm_kernels_available
from openllm.utils import is_torch_available
from openllm_core.config.configuration_chatglm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_chatglm import START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING
from openllm_core.config.configuration_chatglm import ChatGLMConfig as ChatGLMConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available() or not is_cpm_kernels_available(): raise MissingDependencyError

View File

@@ -2,7 +2,8 @@ from __future__ import annotations
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
if t.TYPE_CHECKING:
import transformers
class ChatGLM(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerFast']):
__openllm_internal__ = True

View File

@@ -3,8 +3,13 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING, DollyV2Config as DollyV2Config
from openllm.utils import LazyModule
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_dolly_v2 import START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING
from openllm_core.config.configuration_dolly_v2 import DollyV2Config as DollyV2Config
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -5,7 +5,10 @@ import typing as t
import openllm
from openllm_core._typing_compat import overload
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE, END_KEY, RESPONSE_KEY, get_special_token_id
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_dolly_v2 import END_KEY
from openllm_core.config.configuration_dolly_v2 import RESPONSE_KEY
from openllm_core.config.configuration_dolly_v2 import get_special_token_id
if t.TYPE_CHECKING: import torch, transformers, tensorflow as tf
else: torch, transformers, tf = openllm.utils.LazyLoader('torch', globals(), 'torch'), openllm.utils.LazyLoader('transformers', globals(), 'transformers'), openllm.utils.LazyLoader('tf', globals(), 'tensorflow')
logger = logging.getLogger(__name__)

View File

@@ -3,8 +3,13 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_falcon import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING, FalconConfig as FalconConfig
from openllm.utils import LazyModule
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_falcon import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_falcon import START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING
from openllm_core.config.configuration_falcon import FalconConfig as FalconConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -3,8 +3,14 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available
from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING, FlanT5Config as FlanT5Config
from openllm.utils import LazyModule
from openllm.utils import is_flax_available
from openllm.utils import is_tf_available
from openllm.utils import is_torch_available
from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_flan_t5 import START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING
from openllm_core.config.configuration_flan_t5 import FlanT5Config as FlanT5Config
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -2,7 +2,8 @@ from __future__ import annotations
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
if t.TYPE_CHECKING:
import transformers
class FlanT5(openllm.LLM['transformers.T5ForConditionalGeneration', 'transformers.T5TokenizerFast']):
__openllm_internal__ = True

View File

@@ -3,8 +3,13 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_gpt_neox import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING, GPTNeoXConfig as GPTNeoXConfig
from openllm.utils import LazyModule
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_gpt_neox import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_gpt_neox import START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING
from openllm_core.config.configuration_gpt_neox import GPTNeoXConfig as GPTNeoXConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -3,8 +3,14 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_llama import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING, LlamaConfig as LlamaConfig
from openllm.utils import LazyModule
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_llama import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_llama import PROMPT_MAPPING as PROMPT_MAPPING
from openllm_core.config.configuration_llama import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING
from openllm_core.config.configuration_llama import LlamaConfig as LlamaConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_vllm_available(): raise MissingDependencyError

View File

@@ -2,7 +2,8 @@ from __future__ import annotations
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
if t.TYPE_CHECKING:
import transformers
class Llama(openllm.LLM['transformers.LlamaForCausalLM', 'transformers.LlamaTokenizerFast']):
__openllm_internal__ = True

View File

@@ -3,8 +3,14 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_mpt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING, MPTConfig as MPTConfig
from openllm.utils import LazyModule
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_mpt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_mpt import PROMPT_MAPPING as PROMPT_MAPPING
from openllm_core.config.configuration_mpt import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING
from openllm_core.config.configuration_mpt import MPTConfig as MPTConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -4,8 +4,11 @@ import typing as t
import bentoml
import openllm
from openllm.utils import generate_labels, is_triton_available
if t.TYPE_CHECKING: import transformers, torch
from openllm.utils import generate_labels
from openllm.utils import is_triton_available
if t.TYPE_CHECKING:
import torch
import transformers
logger = logging.getLogger(__name__)

View File

@@ -3,8 +3,15 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING, OPTConfig as OPTConfig
from openllm.utils import LazyModule
from openllm.utils import is_flax_available
from openllm.utils import is_tf_available
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_opt import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING
from openllm_core.config.configuration_opt import OPTConfig as OPTConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -3,8 +3,13 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_stablelm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING, StableLMConfig as StableLMConfig
from openllm.utils import LazyModule
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_stablelm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_stablelm import START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING
from openllm_core.config.configuration_stablelm import StableLMConfig as StableLMConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -2,7 +2,8 @@ from __future__ import annotations
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
if t.TYPE_CHECKING:
import transformers
class StableLM(openllm.LLM['transformers.GPTNeoXForCausalLM', 'transformers.GPTNeoXTokenizerFast']):
__openllm_internal__ = True

View File

@@ -3,8 +3,13 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_starcoder import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING, StarCoderConfig as StarCoderConfig
from openllm.utils import LazyModule
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_starcoder import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_starcoder import START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING
from openllm_core.config.configuration_starcoder import StarCoderConfig as StarCoderConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -5,7 +5,11 @@ import typing as t
import bentoml
import openllm
from openllm.utils import generate_labels
from openllm_core.config.configuration_starcoder import EOD, FIM_MIDDLE, FIM_PAD, FIM_PREFIX, FIM_SUFFIX
from openllm_core.config.configuration_starcoder import EOD
from openllm_core.config.configuration_starcoder import FIM_MIDDLE
from openllm_core.config.configuration_starcoder import FIM_PAD
from openllm_core.config.configuration_starcoder import FIM_PREFIX
from openllm_core.config.configuration_starcoder import FIM_SUFFIX
if t.TYPE_CHECKING: import transformers
class StarCoder(openllm.LLM['transformers.GPTBigCodeForCausalLM', 'transformers.GPT2TokenizerFast']):

View File

@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)
from datasets import load_dataset
from trl import SFTTrainer
DEFAULT_MODEL_ID = "ybelkada/falcon-7b-sharded-bf16"
DATASET_NAME = "timdettmers/openassistant-guanaco"

View File

@@ -4,6 +4,7 @@ import logging
import typing as t
import openllm
openllm.utils.configure_logging()
logger = logging.getLogger(__name__)

View File

@@ -23,6 +23,7 @@ from datasets import load_dataset
if t.TYPE_CHECKING:
from peft import PeftModel
DEFAULT_MODEL_ID = "facebook/opt-6.7b"
def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any, training_args: TrainingArguments):

View File

@@ -30,12 +30,19 @@ import cloudpickle
import fs
import openllm
from bentoml._internal.models.model import CUSTOM_OBJECTS_FILENAME
from openllm_core._typing_compat import M, ParamSpec, T
from openllm_core._typing_compat import M
from openllm_core._typing_compat import ParamSpec
from openllm_core._typing_compat import T
if t.TYPE_CHECKING:
import bentoml
from . import constants as constants, ggml as ggml, transformers as transformers
from . import constants as constants
from . import ggml as ggml
from . import transformers as transformers
P = ParamSpec('P')
def load_tokenizer(llm: openllm.LLM[t.Any, T], **tokenizer_attrs: t.Any) -> T:
@@ -44,7 +51,8 @@ def load_tokenizer(llm: openllm.LLM[t.Any, T], **tokenizer_attrs: t.Any) -> T:
By default, it will try to find the bentomodel whether it is in store..
If model is not found, it will raises a ``bentoml.exceptions.NotFound``.
'''
from .transformers._helpers import infer_tokenizers_from_llm, process_config
from .transformers._helpers import infer_tokenizers_from_llm
from .transformers._helpers import process_config
config, *_ = process_config(llm._bentomodel.path, llm.__llm_trust_remote_code__)
bentomodel_fs = fs.open_fs(llm._bentomodel.path)

View File

@@ -1,4 +1,5 @@
from __future__ import annotations
FRAMEWORK_TO_AUTOCLASS_MAPPING = {
'pt': ('AutoModelForCausalLM', 'AutoModelForSeq2SeqLM'),
'tf': ('TFAutoModelForCausalLM', 'TFAutoModelForSeq2SeqLM'),

View File

@@ -7,7 +7,9 @@ import typing as t
import bentoml
import openllm
if t.TYPE_CHECKING: from openllm_core._typing_compat import M
if t.TYPE_CHECKING:
from openllm_core._typing_compat import M
_conversion_strategy = {'pt': 'ggml'}

View File

@@ -5,15 +5,23 @@ import logging
import typing as t
from huggingface_hub import snapshot_download
from simple_di import Provide, inject
from simple_di import Provide
from simple_di import inject
import bentoml
import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.models.model import ModelOptions
from ._helpers import check_unintialised_params, infer_autoclass_from_llm, infer_tokenizers_from_llm, make_model_signatures, process_config, update_model
from ._helpers import check_unintialised_params
from ._helpers import infer_autoclass_from_llm
from ._helpers import infer_tokenizers_from_llm
from ._helpers import make_model_signatures
from ._helpers import process_config
from ._helpers import update_model
from .weights import HfIgnore
if t.TYPE_CHECKING:
import types
@@ -24,7 +32,9 @@ if t.TYPE_CHECKING:
import vllm
from bentoml._internal.models import ModelStore
from openllm_core._typing_compat import DictStrAny, M, T
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import M
from openllm_core._typing_compat import T
else:
vllm = openllm.utils.LazyLoader('vllm', globals(), 'vllm')
autogptq = openllm.utils.LazyLoader('autogptq', globals(), 'auto_gptq')

View File

@@ -4,16 +4,24 @@ import typing as t
import openllm
import openllm_core
from bentoml._internal.models.model import ModelInfo, ModelSignature
from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING, HUB_ATTRS
from bentoml._internal.models.model import ModelInfo
from bentoml._internal.models.model import ModelSignature
from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING
from openllm.serialisation.constants import HUB_ATTRS
if t.TYPE_CHECKING:
import torch
import transformers
from transformers.models.auto.auto_factory import _BaseAutoModelClass
import bentoml
from bentoml._internal.models.model import ModelSignaturesType
from openllm_core._typing_compat import DictStrAny, M, T
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import M
from openllm_core._typing_compat import T
else:
transformers, torch = openllm_core.utils.LazyLoader('transformers', globals(), 'transformers'), openllm_core.utils.LazyLoader('torch', globals(), 'torch')

View File

@@ -2,10 +2,14 @@ from __future__ import annotations
import typing as t
import attr
from huggingface_hub import HfApi
if t.TYPE_CHECKING:
import openllm
from openllm_core._typing_compat import M, T
from openllm_core._typing_compat import M
from openllm_core._typing_compat import T
def has_safetensors_weights(model_id: str, revision: str | None = None) -> bool:
return any(s.rfilename.endswith('.safetensors') for s in HfApi().model_info(model_id, revision=revision).siblings)

View File

@@ -8,7 +8,9 @@ import typing as t
import bentoml
import openllm
if t.TYPE_CHECKING: from ._typing_compat import LiteralRuntime
if t.TYPE_CHECKING:
from openllm_core._typing_compat import LiteralRuntime
logger = logging.getLogger(__name__)

View File

@@ -8,9 +8,14 @@ import typing as t
import openllm_core
from . import dummy_flax_objects as dummy_flax_objects, dummy_pt_objects as dummy_pt_objects, dummy_tf_objects as dummy_tf_objects, dummy_vllm_objects as dummy_vllm_objects
from . import dummy_flax_objects as dummy_flax_objects
from . import dummy_pt_objects as dummy_pt_objects
from . import dummy_tf_objects as dummy_tf_objects
from . import dummy_vllm_objects as dummy_vllm_objects
if t.TYPE_CHECKING:
import openllm
from openllm_core._typing_compat import LiteralRuntime
def generate_labels(llm: openllm.LLM[t.Any, t.Any]) -> dict[str, t.Any]:

View File

@@ -1,7 +1,9 @@
from __future__ import annotations
import os
from hypothesis import HealthCheck, settings
from hypothesis import HealthCheck
from hypothesis import settings
settings.register_profile('CI', settings(suppress_health_check=[HealthCheck.too_slow]), deadline=None)
if 'CI' in os.environ: settings.load_profile('CI')

View File

@@ -5,7 +5,9 @@ import typing as t
from hypothesis import strategies as st
import openllm
from openllm_core._configuration import ModelSettings
logger = logging.getLogger(__name__)
env_strats = st.sampled_from([openllm.utils.EnvVarMixin(model_name) for model_name in openllm.CONFIG_MAPPING.keys()])

View File

@@ -3,17 +3,25 @@ import contextlib
import os
import sys
import typing as t
from unittest import mock
import attr
import pytest
import transformers
from hypothesis import assume, given, strategies as st
from hypothesis import assume
from hypothesis import given
from hypothesis import strategies as st
import openllm
from openllm_core._configuration import GenerationConfig, ModelSettings, field_env_key
from ._strategies._configuration import make_llm_config, model_settings
from openllm_core._configuration import GenerationConfig
from openllm_core._configuration import ModelSettings
from openllm_core._configuration import field_env_key
from ._strategies._configuration import make_llm_config
from ._strategies._configuration import model_settings
# XXX: @aarnphm fixes TypedDict behaviour in 3.11
@pytest.mark.skipif(sys.version_info[:2] == (3, 11), reason='TypedDict in 3.11 behaves differently, so we need to fix this')

View File

@@ -6,7 +6,9 @@ import typing as t
import pytest
import openllm
if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralRuntime
if t.TYPE_CHECKING:
from openllm_core._typing_compat import LiteralRuntime
_FRAMEWORK_MAPPING = {'flan_t5': 'google/flan-t5-small', 'opt': 'facebook/opt-125m', 'baichuan': 'baichuan-inc/Baichuan-7B',}
_PROMPT_MAPPING = {'qa': 'Answer the following yes/no question by reasoning step-by-step. Can you write a whole Haiku in a single tweet?',}

View File

@@ -6,7 +6,9 @@ import logging
import sys
import time
import typing as t
from abc import ABC, abstractmethod
from abc import ABC
from abc import abstractmethod
import attr
import docker
@@ -14,18 +16,25 @@ import docker.errors
import docker.types
import orjson
import pytest
from syrupy.extensions.json import JSONSnapshotExtension
import openllm
from openllm._llm import normalise_model_name
from openllm_core._typing_compat import DictStrAny, ListAny
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import ListAny
logger = logging.getLogger(__name__)
if t.TYPE_CHECKING:
import subprocess
from syrupy.assertion import SnapshotAssertion
from syrupy.types import PropertyFilter, PropertyMatcher, SerializableData, SerializedData
from syrupy.types import PropertyFilter
from syrupy.types import PropertyMatcher
from syrupy.types import SerializableData
from syrupy.types import SerializedData
from openllm._configuration import GenerationConfig
from openllm.client import BaseAsyncClient

View File

@@ -4,10 +4,14 @@ import typing as t
import pytest
import openllm
if t.TYPE_CHECKING:
import contextlib
from .conftest import HandleProtocol, ResponseComparator, _Handle
from .conftest import HandleProtocol
from .conftest import ResponseComparator
from .conftest import _Handle
model = 'flan_t5'
model_id = 'google/flan-t5-small'

View File

@@ -4,10 +4,14 @@ import typing as t
import pytest
import openllm
if t.TYPE_CHECKING:
import contextlib
from .conftest import HandleProtocol, ResponseComparator, _Handle
from .conftest import HandleProtocol
from .conftest import ResponseComparator
from .conftest import _Handle
model = 'opt'
model_id = 'facebook/opt-125m'

View File

@@ -3,7 +3,9 @@ import os
import typing as t
import pytest
if t.TYPE_CHECKING: import openllm
if t.TYPE_CHECKING:
import openllm
@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='Model is too large for CI')
def test_flan_t5_implementation(prompt: str, llm: openllm.LLM[t.Any, t.Any]):

View File

@@ -6,8 +6,11 @@ import typing as t
import pytest
import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
if t.TYPE_CHECKING: from pathlib import Path
if t.TYPE_CHECKING:
from pathlib import Path
HF_INTERNAL_T5_TESTING = 'hf-internal-testing/tiny-random-t5'

Some files were not shown because too many files have changed in this diff Show More