mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-02-18 14:47:30 -05:00
fix: persistent styling between ruff and yapf (#279)
This commit is contained in:
1
cz.py
1
cz.py
@@ -6,6 +6,7 @@ import token
|
||||
import tokenize
|
||||
|
||||
from tabulate import tabulate
|
||||
|
||||
TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
|
||||
|
||||
def run_cz(dir: str, package: str):
|
||||
|
||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
|
||||
model = "dolly-v2"
|
||||
|
||||
llm_config = openllm.AutoConfig.for_model(model)
|
||||
|
||||
@@ -5,6 +5,7 @@ from langchain.llms import OpenLLM
|
||||
|
||||
import bentoml
|
||||
from bentoml.io import Text
|
||||
|
||||
SAMPLE_INPUT = "What is the weather in San Francisco?"
|
||||
|
||||
llm = OpenLLM(model_name="dolly-v2", model_id="databricks/dolly-v2-7b", embedded=False,)
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from . import benmin as benmin
|
||||
from ._base import BaseAsyncClient as BaseAsyncClient, BaseClient as BaseClient
|
||||
from .client import AsyncGrpcClient as AsyncGrpcClient, AsyncHTTPClient as AsyncHTTPClient, GrpcClient as GrpcClient, HTTPClient as HTTPClient
|
||||
from ._base import BaseAsyncClient as BaseAsyncClient
|
||||
from ._base import BaseClient as BaseClient
|
||||
from .client import AsyncGrpcClient as AsyncGrpcClient
|
||||
from .client import AsyncHTTPClient as AsyncHTTPClient
|
||||
from .client import GrpcClient as GrpcClient
|
||||
from .client import HTTPClient as HTTPClient
|
||||
|
||||
@@ -4,6 +4,7 @@ import abc
|
||||
import functools
|
||||
import logging
|
||||
import typing as t
|
||||
|
||||
from http import HTTPStatus
|
||||
from urllib.parse import urljoin
|
||||
|
||||
@@ -12,14 +13,23 @@ import httpx
|
||||
import orjson
|
||||
|
||||
import openllm_core
|
||||
from openllm_core._typing_compat import LiteralString, overload
|
||||
from openllm_core.utils import bentoml_cattr, ensure_exec_coro, is_transformers_available, is_transformers_supports_agent
|
||||
|
||||
from .benmin import AsyncClient as AsyncBentoClient, Client as BentoClient
|
||||
from openllm_core._typing_compat import LiteralString
|
||||
from openllm_core._typing_compat import overload
|
||||
from openllm_core.utils import bentoml_cattr
|
||||
from openllm_core.utils import ensure_exec_coro
|
||||
from openllm_core.utils import is_transformers_available
|
||||
from openllm_core.utils import is_transformers_supports_agent
|
||||
|
||||
from .benmin import AsyncClient as AsyncBentoClient
|
||||
from .benmin import Client as BentoClient
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import transformers
|
||||
|
||||
from openllm_core._typing_compat import DictStrAny, LiteralRuntime
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
from openllm_core._typing_compat import LiteralRuntime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@attr.define(slots=False, init=False)
|
||||
@@ -204,7 +214,9 @@ class _AsyncClient(_ClientAttr):
|
||||
async def _run_hf_agent(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
|
||||
if not is_transformers_supports_agent(): raise RuntimeError('This version of transformers does not support agent.run. Make sure to upgrade to transformers>4.30.0')
|
||||
if len(args) > 1: raise ValueError("'args' should only take one positional argument.")
|
||||
from transformers.tools.agents import clean_code_for_run, get_tool_creation_code, resolve_tools
|
||||
from transformers.tools.agents import clean_code_for_run
|
||||
from transformers.tools.agents import get_tool_creation_code
|
||||
from transformers.tools.agents import resolve_tools
|
||||
from transformers.tools.python_interpreter import evaluate
|
||||
|
||||
task = kwargs.pop('task', args[0])
|
||||
|
||||
@@ -13,13 +13,16 @@ via `client.call` or `await client.call`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
from abc import abstractmethod
|
||||
|
||||
import attr
|
||||
import httpx
|
||||
|
||||
import bentoml
|
||||
if t.TYPE_CHECKING: from bentoml._internal.service.inference_api import InferenceAPI
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from bentoml._internal.service.inference_api import InferenceAPI
|
||||
|
||||
__all__ = ['Client', 'AsyncClient']
|
||||
|
||||
|
||||
@@ -6,21 +6,31 @@ import time
|
||||
import typing as t
|
||||
|
||||
import bentoml
|
||||
|
||||
from bentoml._internal.service.inference_api import InferenceAPI
|
||||
from bentoml.grpc.utils import import_generated_stubs, load_from_file
|
||||
from openllm_client.benmin import AsyncClient, Client
|
||||
from openllm_core._typing_compat import NotRequired, overload
|
||||
from openllm_core.utils import ensure_exec_coro, is_grpc_available, is_grpc_health_available
|
||||
from bentoml.grpc.utils import import_generated_stubs
|
||||
from bentoml.grpc.utils import load_from_file
|
||||
from openllm_client.benmin import AsyncClient
|
||||
from openllm_client.benmin import Client
|
||||
from openllm_core._typing_compat import NotRequired
|
||||
from openllm_core._typing_compat import overload
|
||||
from openllm_core.utils import ensure_exec_coro
|
||||
from openllm_core.utils import is_grpc_available
|
||||
from openllm_core.utils import is_grpc_health_available
|
||||
|
||||
if not is_grpc_available() or not is_grpc_health_available(): raise ImportError("gRPC is required to use gRPC client. Install with 'pip install \"openllm-client[grpc]\"'.")
|
||||
import grpc
|
||||
import grpc_health.v1.health_pb2 as pb_health
|
||||
import grpc_health.v1.health_pb2_grpc as services_health
|
||||
|
||||
from google.protobuf import json_format
|
||||
from grpc import aio
|
||||
|
||||
pb, services = import_generated_stubs('v1')
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from bentoml.grpc.v1.service_pb2 import ServiceMetadataResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ClientCredentials(t.TypedDict):
|
||||
|
||||
@@ -5,6 +5,7 @@ import logging
|
||||
import time
|
||||
import typing as t
|
||||
import urllib.error
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httpx
|
||||
@@ -14,9 +15,12 @@ import starlette.requests
|
||||
import starlette.responses
|
||||
|
||||
import bentoml
|
||||
|
||||
from bentoml._internal.service.inference_api import InferenceAPI
|
||||
from openllm_client.benmin import AsyncClient, Client
|
||||
from openllm_client.benmin import AsyncClient
|
||||
from openllm_client.benmin import Client
|
||||
from openllm_core.utils import ensure_exec_coro
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class HttpClient(Client):
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
from __future__ import annotations
|
||||
import logging
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from ._base import BaseAsyncClient, BaseClient
|
||||
from ._base import BaseAsyncClient
|
||||
from ._base import BaseClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def process_http_address(self: AsyncHTTPClient | HTTPClient, address: str) -> None:
|
||||
|
||||
@@ -1,33 +1,44 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from . import exceptions as exceptions, utils as utils
|
||||
from ._configuration import GenerationConfig as GenerationConfig, LLMConfig as LLMConfig, SamplingParams as SamplingParams
|
||||
from ._schema import EmbeddingsOutput as EmbeddingsOutput, GenerationInput as GenerationInput, GenerationOutput as GenerationOutput, HfAgentInput as HfAgentInput, MetadataOutput as MetadataOutput, unmarshal_vllm_outputs as unmarshal_vllm_outputs
|
||||
from ._strategies import AmdGpuResource as AmdGpuResource, CascadingResourceStrategy as CascadingResourceStrategy, LiteralResourceSpec as LiteralResourceSpec, NvidiaGpuResource as NvidiaGpuResource, available_resource_spec as available_resource_spec, get_resource as get_resource
|
||||
from .config import (
|
||||
CONFIG_MAPPING as CONFIG_MAPPING,
|
||||
CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES,
|
||||
START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING,
|
||||
START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING,
|
||||
START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING,
|
||||
START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING,
|
||||
START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING,
|
||||
START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING,
|
||||
START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING,
|
||||
START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING,
|
||||
START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING,
|
||||
START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING,
|
||||
START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING,
|
||||
AutoConfig as AutoConfig,
|
||||
BaichuanConfig as BaichuanConfig,
|
||||
ChatGLMConfig as ChatGLMConfig,
|
||||
DollyV2Config as DollyV2Config,
|
||||
FalconConfig as FalconConfig,
|
||||
FlanT5Config as FlanT5Config,
|
||||
GPTNeoXConfig as GPTNeoXConfig,
|
||||
LlamaConfig as LlamaConfig,
|
||||
MPTConfig as MPTConfig,
|
||||
OPTConfig as OPTConfig,
|
||||
StableLMConfig as StableLMConfig,
|
||||
StarCoderConfig as StarCoderConfig,
|
||||
)
|
||||
from . import exceptions as exceptions
|
||||
from . import utils as utils
|
||||
from ._configuration import GenerationConfig as GenerationConfig
|
||||
from ._configuration import LLMConfig as LLMConfig
|
||||
from ._configuration import SamplingParams as SamplingParams
|
||||
from ._schema import EmbeddingsOutput as EmbeddingsOutput
|
||||
from ._schema import GenerationInput as GenerationInput
|
||||
from ._schema import GenerationOutput as GenerationOutput
|
||||
from ._schema import HfAgentInput as HfAgentInput
|
||||
from ._schema import MetadataOutput as MetadataOutput
|
||||
from ._schema import unmarshal_vllm_outputs as unmarshal_vllm_outputs
|
||||
from ._strategies import AmdGpuResource as AmdGpuResource
|
||||
from ._strategies import CascadingResourceStrategy as CascadingResourceStrategy
|
||||
from ._strategies import LiteralResourceSpec as LiteralResourceSpec
|
||||
from ._strategies import NvidiaGpuResource as NvidiaGpuResource
|
||||
from ._strategies import available_resource_spec as available_resource_spec
|
||||
from ._strategies import get_resource as get_resource
|
||||
from .config import CONFIG_MAPPING as CONFIG_MAPPING
|
||||
from .config import CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
|
||||
from .config import START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING
|
||||
from .config import START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING
|
||||
from .config import START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING
|
||||
from .config import START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING
|
||||
from .config import START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING
|
||||
from .config import START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING
|
||||
from .config import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING
|
||||
from .config import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING
|
||||
from .config import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING
|
||||
from .config import START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING
|
||||
from .config import START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING
|
||||
from .config import AutoConfig as AutoConfig
|
||||
from .config import BaichuanConfig as BaichuanConfig
|
||||
from .config import ChatGLMConfig as ChatGLMConfig
|
||||
from .config import DollyV2Config as DollyV2Config
|
||||
from .config import FalconConfig as FalconConfig
|
||||
from .config import FlanT5Config as FlanT5Config
|
||||
from .config import GPTNeoXConfig as GPTNeoXConfig
|
||||
from .config import LlamaConfig as LlamaConfig
|
||||
from .config import MPTConfig as MPTConfig
|
||||
from .config import OPTConfig as OPTConfig
|
||||
from .config import StableLMConfig as StableLMConfig
|
||||
from .config import StarCoderConfig as StarCoderConfig
|
||||
|
||||
@@ -47,24 +47,50 @@ import click_option_group as cog
|
||||
import inflection
|
||||
import orjson
|
||||
|
||||
# NOTE: Using internal API from attr here, since we are actually allowing subclass of openllm_core.LLMConfig to become 'attrs'-ish
|
||||
from attr._compat import set_closure_cell
|
||||
from attr._make import _CountingAttr, _make_init, _transform_attrs
|
||||
from cattr.gen import make_dict_structure_fn, make_dict_unstructure_fn, override
|
||||
from attr._make import _CountingAttr
|
||||
from attr._make import _make_init
|
||||
from attr._make import _transform_attrs
|
||||
from cattr.gen import make_dict_structure_fn
|
||||
from cattr.gen import make_dict_unstructure_fn
|
||||
from cattr.gen import override
|
||||
from deepmerge.merger import Merger
|
||||
|
||||
import openllm_core
|
||||
|
||||
from ._strategies import LiteralResourceSpec, available_resource_spec, resource_spec
|
||||
from ._typing_compat import AdapterType, AnyCallable, At, DictStrAny, ListStr, LiteralRuntime, LiteralString, NotRequired, Required, Self, overload
|
||||
from ._strategies import LiteralResourceSpec
|
||||
from ._strategies import available_resource_spec
|
||||
from ._strategies import resource_spec
|
||||
from ._typing_compat import AdapterType
|
||||
from ._typing_compat import AnyCallable
|
||||
from ._typing_compat import At
|
||||
from ._typing_compat import DictStrAny
|
||||
from ._typing_compat import ListStr
|
||||
from ._typing_compat import LiteralRuntime
|
||||
from ._typing_compat import LiteralString
|
||||
from ._typing_compat import NotRequired
|
||||
from ._typing_compat import Required
|
||||
from ._typing_compat import Self
|
||||
from ._typing_compat import overload
|
||||
from .exceptions import ForbiddenAttributeError
|
||||
from .utils import ENV_VARS_TRUE_VALUES, MYPY, LazyLoader, ReprMixin, bentoml_cattr, codegen, dantic, field_env_key, first_not_none, lenient_issubclass
|
||||
from .utils import ENV_VARS_TRUE_VALUES
|
||||
from .utils import MYPY
|
||||
from .utils import LazyLoader
|
||||
from .utils import ReprMixin
|
||||
from .utils import bentoml_cattr
|
||||
from .utils import codegen
|
||||
from .utils import dantic
|
||||
from .utils import field_env_key
|
||||
from .utils import first_not_none
|
||||
from .utils import lenient_issubclass
|
||||
from .utils.import_utils import BACKENDS_MAPPING
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import click
|
||||
import peft
|
||||
import transformers
|
||||
import vllm
|
||||
|
||||
from transformers.generation.beam_constraints import Constraint
|
||||
else:
|
||||
Constraint = t.Any
|
||||
@@ -781,6 +807,7 @@ class _ConfigAttr:
|
||||
'''Optional tokenizer class for this given LLM. See Llama for example.'''
|
||||
|
||||
# update-config-stubs.py: special stop
|
||||
|
||||
class _ConfigBuilder:
|
||||
"""A modified version of attrs internal _ClassBuilder, and should only be called within __init_subclass__ of LLMConfig.
|
||||
|
||||
|
||||
@@ -6,10 +6,13 @@ import typing as t
|
||||
import attr
|
||||
import inflection
|
||||
|
||||
from openllm_core._configuration import GenerationConfig, LLMConfig
|
||||
from openllm_core._configuration import GenerationConfig
|
||||
from openllm_core._configuration import LLMConfig
|
||||
|
||||
from .utils import bentoml_cattr
|
||||
if t.TYPE_CHECKING: import vllm
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import vllm
|
||||
|
||||
@attr.frozen(slots=True)
|
||||
class GenerationInput:
|
||||
|
||||
@@ -13,11 +13,14 @@ import warnings
|
||||
import psutil
|
||||
|
||||
import bentoml
|
||||
from bentoml._internal.resource import get_resource, system_resources
|
||||
|
||||
from bentoml._internal.resource import get_resource
|
||||
from bentoml._internal.resource import system_resources
|
||||
from bentoml._internal.runner.strategy import THREAD_ENVS
|
||||
|
||||
from ._typing_compat import overload
|
||||
from .utils import DEBUG, ReprMixin
|
||||
from .utils import DEBUG
|
||||
from .utils import ReprMixin
|
||||
|
||||
class DynResource(t.Protocol):
|
||||
resource_id: t.ClassVar[str]
|
||||
@@ -98,10 +101,12 @@ def _from_system(cls: type[DynResource]) -> list[str]:
|
||||
# we don't want to use CLI because parsing is a pain.
|
||||
sys.path.append('/opt/rocm/libexec/rocm_smi')
|
||||
try:
|
||||
from ctypes import byref, c_uint32
|
||||
from ctypes import byref
|
||||
from ctypes import c_uint32
|
||||
|
||||
# refers to https://github.com/RadeonOpenCompute/rocm_smi_lib/blob/master/python_smi_tools/rsmiBindings.py
|
||||
from rsmiBindings import rocmsmi, rsmi_status_t
|
||||
from rsmiBindings import rocmsmi
|
||||
from rsmiBindings import rsmi_status_t
|
||||
|
||||
device_count = c_uint32(0)
|
||||
ret = rocmsmi.rsmi_num_monitor_devices(byref(device_count))
|
||||
@@ -149,7 +154,11 @@ def _from_spec(cls: type[DynResource], spec: t.Any) -> list[str]:
|
||||
raise TypeError(f"'{cls.__name__}.from_spec' only supports parsing spec of type int, str, or list, got '{type(spec)}' instead.")
|
||||
|
||||
def _raw_device_uuid_nvml() -> list[str] | None:
|
||||
from ctypes import CDLL, byref, c_int, c_void_p, create_string_buffer
|
||||
from ctypes import CDLL
|
||||
from ctypes import byref
|
||||
from ctypes import c_int
|
||||
from ctypes import c_void_p
|
||||
from ctypes import create_string_buffer
|
||||
|
||||
try:
|
||||
nvml_h = CDLL('libnvidia-ml.so.1')
|
||||
|
||||
@@ -7,7 +7,9 @@ import typing as t
|
||||
import attr
|
||||
|
||||
import bentoml
|
||||
|
||||
from bentoml._internal.types import ModelSignatureDict as ModelSignatureDict
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import auto_gptq as autogptq
|
||||
import peft
|
||||
@@ -15,11 +17,13 @@ if t.TYPE_CHECKING:
|
||||
import vllm
|
||||
|
||||
import openllm
|
||||
|
||||
from bentoml._internal.runner.runnable import RunnableMethod
|
||||
from bentoml._internal.runner.runner import RunnerMethod
|
||||
from bentoml._internal.runner.strategy import Strategy
|
||||
|
||||
from .utils.lazy import VersionInfo
|
||||
|
||||
M = t.TypeVar(
|
||||
'M',
|
||||
bound='t.Union[transformers.PreTrainedModel, transformers.Pipeline, transformers.TFPreTrainedModel, transformers.FlaxPreTrainedModel, vllm.LLMEngine, peft.PeftModel, autogptq.modeling.BaseGPTQForCausalLM]'
|
||||
@@ -41,14 +45,28 @@ LiteralContainerRegistry = t.Literal['docker', 'gh', 'ecr']
|
||||
LiteralContainerVersionStrategy = t.Literal['release', 'nightly', 'latest', 'custom']
|
||||
|
||||
if sys.version_info[:2] >= (3, 11):
|
||||
from typing import LiteralString as LiteralString, NotRequired as NotRequired, Required as Required, Self as Self, dataclass_transform as dataclass_transform, overload as overload
|
||||
from typing import LiteralString as LiteralString
|
||||
from typing import NotRequired as NotRequired
|
||||
from typing import Required as Required
|
||||
from typing import Self as Self
|
||||
from typing import dataclass_transform as dataclass_transform
|
||||
from typing import overload as overload
|
||||
else:
|
||||
from typing_extensions import LiteralString as LiteralString, NotRequired as NotRequired, Required as Required, Self as Self, dataclass_transform as dataclass_transform, overload as overload
|
||||
from typing_extensions import LiteralString as LiteralString
|
||||
from typing_extensions import NotRequired as NotRequired
|
||||
from typing_extensions import Required as Required
|
||||
from typing_extensions import Self as Self
|
||||
from typing_extensions import dataclass_transform as dataclass_transform
|
||||
from typing_extensions import overload as overload
|
||||
|
||||
if sys.version_info[:2] >= (3, 10):
|
||||
from typing import Concatenate as Concatenate, ParamSpec as ParamSpec, TypeAlias as TypeAlias
|
||||
from typing import Concatenate as Concatenate
|
||||
from typing import ParamSpec as ParamSpec
|
||||
from typing import TypeAlias as TypeAlias
|
||||
else:
|
||||
from typing_extensions import Concatenate as Concatenate, ParamSpec as ParamSpec, TypeAlias as TypeAlias
|
||||
from typing_extensions import Concatenate as Concatenate
|
||||
from typing_extensions import ParamSpec as ParamSpec
|
||||
from typing_extensions import TypeAlias as TypeAlias
|
||||
|
||||
class PeftAdapterOutput(t.TypedDict):
|
||||
success: bool
|
||||
|
||||
@@ -1,14 +1,27 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from .configuration_auto import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig
|
||||
from .configuration_baichuan import START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING, BaichuanConfig as BaichuanConfig
|
||||
from .configuration_chatglm import START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING, ChatGLMConfig as ChatGLMConfig
|
||||
from .configuration_dolly_v2 import START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING, DollyV2Config as DollyV2Config
|
||||
from .configuration_falcon import START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING, FalconConfig as FalconConfig
|
||||
from .configuration_flan_t5 import START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING, FlanT5Config as FlanT5Config
|
||||
from .configuration_gpt_neox import START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING, GPTNeoXConfig as GPTNeoXConfig
|
||||
from .configuration_llama import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING, LlamaConfig as LlamaConfig
|
||||
from .configuration_mpt import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING, MPTConfig as MPTConfig
|
||||
from .configuration_opt import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING, OPTConfig as OPTConfig
|
||||
from .configuration_stablelm import START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING, StableLMConfig as StableLMConfig
|
||||
from .configuration_starcoder import START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING, StarCoderConfig as StarCoderConfig
|
||||
from .configuration_auto import CONFIG_MAPPING as CONFIG_MAPPING
|
||||
from .configuration_auto import CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
|
||||
from .configuration_auto import AutoConfig as AutoConfig
|
||||
from .configuration_baichuan import START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING
|
||||
from .configuration_baichuan import BaichuanConfig as BaichuanConfig
|
||||
from .configuration_chatglm import START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING
|
||||
from .configuration_chatglm import ChatGLMConfig as ChatGLMConfig
|
||||
from .configuration_dolly_v2 import START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING
|
||||
from .configuration_dolly_v2 import DollyV2Config as DollyV2Config
|
||||
from .configuration_falcon import START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING
|
||||
from .configuration_falcon import FalconConfig as FalconConfig
|
||||
from .configuration_flan_t5 import START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING
|
||||
from .configuration_flan_t5 import FlanT5Config as FlanT5Config
|
||||
from .configuration_gpt_neox import START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING
|
||||
from .configuration_gpt_neox import GPTNeoXConfig as GPTNeoXConfig
|
||||
from .configuration_llama import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING
|
||||
from .configuration_llama import LlamaConfig as LlamaConfig
|
||||
from .configuration_mpt import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING
|
||||
from .configuration_mpt import MPTConfig as MPTConfig
|
||||
from .configuration_opt import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING
|
||||
from .configuration_opt import OPTConfig as OPTConfig
|
||||
from .configuration_stablelm import START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING
|
||||
from .configuration_stablelm import StableLMConfig as StableLMConfig
|
||||
from .configuration_starcoder import START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING
|
||||
from .configuration_starcoder import StarCoderConfig as StarCoderConfig
|
||||
|
||||
@@ -2,15 +2,21 @@
|
||||
from __future__ import annotations
|
||||
import importlib
|
||||
import typing as t
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
import inflection
|
||||
|
||||
import openllm_core
|
||||
|
||||
from openllm_core.utils import ReprMixin
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import types
|
||||
from collections import _odict_items, _odict_keys, _odict_values
|
||||
|
||||
from collections import _odict_items
|
||||
from collections import _odict_keys
|
||||
from collections import _odict_values
|
||||
|
||||
from openllm_core._typing_compat import LiteralString
|
||||
ConfigKeysView = _odict_keys[str, type[openllm_core.LLMConfig]]
|
||||
|
||||
@@ -2,7 +2,9 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm_core
|
||||
|
||||
from openllm_core._prompt import process_prompt
|
||||
|
||||
START_BAICHUAN_COMMAND_DOCSTRING = '''\
|
||||
Run a LLMServer for Baichuan model.
|
||||
|
||||
|
||||
@@ -2,7 +2,9 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm_core
|
||||
|
||||
from openllm_core.utils import dantic
|
||||
|
||||
START_CHATGLM_COMMAND_DOCSTRING = '''\
|
||||
Run a LLMServer for ChatGLM model.
|
||||
|
||||
|
||||
@@ -2,9 +2,12 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm_core
|
||||
|
||||
from openllm_core._prompt import process_prompt
|
||||
from openllm_core.utils import dantic
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import transformers
|
||||
|
||||
START_DOLLY_V2_COMMAND_DOCSTRING = '''\
|
||||
Run a LLMServer for dolly-v2 model.
|
||||
|
||||
@@ -2,7 +2,9 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm_core
|
||||
|
||||
from openllm_core._prompt import process_prompt
|
||||
|
||||
START_FALCON_COMMAND_DOCSTRING = '''\
|
||||
Run a LLMServer for FalconLM model.
|
||||
|
||||
|
||||
@@ -2,7 +2,9 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm_core
|
||||
|
||||
from openllm_core._prompt import process_prompt
|
||||
|
||||
START_FLAN_T5_COMMAND_DOCSTRING = '''\
|
||||
Run a LLMServer for FLAN-T5 model.
|
||||
|
||||
|
||||
@@ -2,8 +2,10 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm_core
|
||||
|
||||
from openllm_core._prompt import process_prompt
|
||||
from openllm_core.utils import dantic
|
||||
|
||||
START_GPT_NEOX_COMMAND_DOCSTRING = '''\
|
||||
Run a LLMServer for GPTNeoX model.
|
||||
|
||||
|
||||
@@ -2,8 +2,10 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm_core
|
||||
|
||||
from openllm_core._prompt import process_prompt
|
||||
from openllm_core.utils import dantic
|
||||
|
||||
START_LLAMA_COMMAND_DOCSTRING = '''\
|
||||
Run a LLMServer for Llama model.
|
||||
|
||||
|
||||
@@ -2,8 +2,10 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm_core
|
||||
|
||||
from openllm_core._prompt import process_prompt
|
||||
from openllm_core.utils import dantic
|
||||
|
||||
MPTPromptType = t.Literal['default', 'instruct', 'chat', 'storywriter']
|
||||
|
||||
START_MPT_COMMAND_DOCSTRING = '''\
|
||||
|
||||
@@ -2,8 +2,10 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm_core
|
||||
|
||||
from openllm_core._prompt import process_prompt
|
||||
from openllm_core.utils import dantic
|
||||
|
||||
START_OPT_COMMAND_DOCSTRING = '''\
|
||||
Run a LLMServer for OPT model.
|
||||
|
||||
|
||||
@@ -2,7 +2,9 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm_core
|
||||
|
||||
from openllm_core._prompt import process_prompt
|
||||
|
||||
START_STABLELM_COMMAND_DOCSTRING = '''\
|
||||
Run a LLMServer for StableLM model.
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm_core
|
||||
|
||||
START_STARCODER_COMMAND_DOCSTRING = '''\
|
||||
Run a LLMServer for StarCoder model.
|
||||
|
||||
|
||||
@@ -14,33 +14,35 @@ import sys
|
||||
import types
|
||||
import typing as t
|
||||
import uuid
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from circus.exc import ConflictError
|
||||
|
||||
import openllm_core
|
||||
from bentoml._internal.configuration import (
|
||||
DEBUG_ENV_VAR as DEBUG_ENV_VAR,
|
||||
GRPC_DEBUG_ENV_VAR as _GRPC_DEBUG_ENV_VAR,
|
||||
QUIET_ENV_VAR as QUIET_ENV_VAR,
|
||||
get_debug_mode as _get_debug_mode,
|
||||
get_quiet_mode as _get_quiet_mode,
|
||||
set_quiet_mode as set_quiet_mode,
|
||||
)
|
||||
|
||||
from bentoml._internal.configuration import DEBUG_ENV_VAR as DEBUG_ENV_VAR
|
||||
from bentoml._internal.configuration import GRPC_DEBUG_ENV_VAR as _GRPC_DEBUG_ENV_VAR
|
||||
from bentoml._internal.configuration import QUIET_ENV_VAR as QUIET_ENV_VAR
|
||||
from bentoml._internal.configuration import get_debug_mode as _get_debug_mode
|
||||
from bentoml._internal.configuration import get_quiet_mode as _get_quiet_mode
|
||||
from bentoml._internal.configuration import set_quiet_mode as set_quiet_mode
|
||||
from bentoml._internal.models.model import ModelContext as _ModelContext
|
||||
from bentoml._internal.types import LazyType as LazyType
|
||||
from bentoml._internal.utils import (
|
||||
LazyLoader as LazyLoader,
|
||||
bentoml_cattr as bentoml_cattr,
|
||||
calc_dir_size as calc_dir_size,
|
||||
first_not_none as first_not_none,
|
||||
pkg as pkg,
|
||||
reserve_free_port as reserve_free_port,
|
||||
resolve_user_filepath as resolve_user_filepath,
|
||||
)
|
||||
from openllm_core.utils.lazy import (LazyModule as LazyModule, VersionInfo as VersionInfo,)
|
||||
from bentoml._internal.utils import LazyLoader as LazyLoader
|
||||
from bentoml._internal.utils import bentoml_cattr as bentoml_cattr
|
||||
from bentoml._internal.utils import calc_dir_size as calc_dir_size
|
||||
from bentoml._internal.utils import first_not_none as first_not_none
|
||||
from bentoml._internal.utils import pkg as pkg
|
||||
from bentoml._internal.utils import reserve_free_port as reserve_free_port
|
||||
from bentoml._internal.utils import resolve_user_filepath as resolve_user_filepath
|
||||
from openllm_core.utils.import_utils import ENV_VARS_TRUE_VALUES as ENV_VARS_TRUE_VALUES
|
||||
from openllm_core.utils.lazy import LazyModule as LazyModule
|
||||
from openllm_core.utils.lazy import VersionInfo as VersionInfo
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import AnyCallable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
try:
|
||||
from typing import GenericAlias as _TypingGenericAlias # type: ignore
|
||||
@@ -309,7 +311,6 @@ _import_structure: dict[str, list[str]] = {
|
||||
'lazy': ['LazyModule'],
|
||||
'import_utils': [
|
||||
'OPTIONAL_DEPENDENCIES',
|
||||
'ENV_VARS_TRUE_VALUES',
|
||||
'DummyMetaclass',
|
||||
'EnvVarMixin',
|
||||
'require_backends',
|
||||
@@ -340,37 +341,37 @@ _import_structure: dict[str, list[str]] = {
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
# NOTE: The following exports useful utils from bentoml
|
||||
from . import (analytics as analytics, codegen as codegen, dantic as dantic,)
|
||||
from .import_utils import (
|
||||
ENV_VARS_TRUE_VALUES as ENV_VARS_TRUE_VALUES,
|
||||
OPTIONAL_DEPENDENCIES as OPTIONAL_DEPENDENCIES,
|
||||
DummyMetaclass as DummyMetaclass,
|
||||
EnvVarMixin as EnvVarMixin,
|
||||
is_autogptq_available as is_autogptq_available,
|
||||
is_bitsandbytes_available as is_bitsandbytes_available,
|
||||
is_cpm_kernels_available as is_cpm_kernels_available,
|
||||
is_datasets_available as is_datasets_available,
|
||||
is_einops_available as is_einops_available,
|
||||
is_fairscale_available as is_fairscale_available,
|
||||
is_flax_available as is_flax_available,
|
||||
is_grpc_available as is_grpc_available,
|
||||
is_grpc_health_available as is_grpc_health_available,
|
||||
is_jupyter_available as is_jupyter_available,
|
||||
is_jupytext_available as is_jupytext_available,
|
||||
is_notebook_available as is_notebook_available,
|
||||
is_peft_available as is_peft_available,
|
||||
is_sentencepiece_available as is_sentencepiece_available,
|
||||
is_tf_available as is_tf_available,
|
||||
is_torch_available as is_torch_available,
|
||||
is_transformers_available as is_transformers_available,
|
||||
is_transformers_supports_agent as is_transformers_supports_agent,
|
||||
is_transformers_supports_kbit as is_transformers_supports_kbit,
|
||||
is_triton_available as is_triton_available,
|
||||
is_vllm_available as is_vllm_available,
|
||||
is_xformers_available as is_xformers_available,
|
||||
require_backends as require_backends,
|
||||
)
|
||||
from . import analytics as analytics
|
||||
from . import codegen as codegen
|
||||
from . import dantic as dantic
|
||||
from .import_utils import OPTIONAL_DEPENDENCIES as OPTIONAL_DEPENDENCIES
|
||||
from .import_utils import DummyMetaclass as DummyMetaclass
|
||||
from .import_utils import EnvVarMixin as EnvVarMixin
|
||||
from .import_utils import is_autogptq_available as is_autogptq_available
|
||||
from .import_utils import is_bitsandbytes_available as is_bitsandbytes_available
|
||||
from .import_utils import is_cpm_kernels_available as is_cpm_kernels_available
|
||||
from .import_utils import is_datasets_available as is_datasets_available
|
||||
from .import_utils import is_einops_available as is_einops_available
|
||||
from .import_utils import is_fairscale_available as is_fairscale_available
|
||||
from .import_utils import is_flax_available as is_flax_available
|
||||
from .import_utils import is_grpc_available as is_grpc_available
|
||||
from .import_utils import is_grpc_health_available as is_grpc_health_available
|
||||
from .import_utils import is_jupyter_available as is_jupyter_available
|
||||
from .import_utils import is_jupytext_available as is_jupytext_available
|
||||
from .import_utils import is_notebook_available as is_notebook_available
|
||||
from .import_utils import is_peft_available as is_peft_available
|
||||
from .import_utils import is_sentencepiece_available as is_sentencepiece_available
|
||||
from .import_utils import is_tf_available as is_tf_available
|
||||
from .import_utils import is_torch_available as is_torch_available
|
||||
from .import_utils import is_transformers_available as is_transformers_available
|
||||
from .import_utils import is_transformers_supports_agent as is_transformers_supports_agent
|
||||
from .import_utils import is_transformers_supports_kbit as is_transformers_supports_kbit
|
||||
from .import_utils import is_triton_available as is_triton_available
|
||||
from .import_utils import is_vllm_available as is_vllm_available
|
||||
from .import_utils import is_xformers_available as is_xformers_available
|
||||
from .import_utils import require_backends as require_backends
|
||||
from .representation import ReprMixin as ReprMixin
|
||||
|
||||
__lazy = LazyModule(__name__, globals()['__file__'], _import_structure, extra_objects=_extras)
|
||||
__all__ = __lazy.__all__
|
||||
__dir__ = __lazy.__dir__
|
||||
|
||||
@@ -14,8 +14,10 @@ import typing as t
|
||||
import attr
|
||||
|
||||
import openllm_core
|
||||
|
||||
from bentoml._internal.utils import analytics as _internal_analytics
|
||||
from openllm_core._typing_compat import ParamSpec
|
||||
|
||||
P = ParamSpec('P')
|
||||
T = t.TypeVar('T')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -5,12 +5,18 @@ import linecache
|
||||
import logging
|
||||
import types
|
||||
import typing as t
|
||||
|
||||
from operator import itemgetter
|
||||
|
||||
import orjson
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import openllm_core
|
||||
from openllm_core._typing_compat import AnyCallable, DictStrAny, ListStr, LiteralString
|
||||
|
||||
from openllm_core._typing_compat import AnyCallable
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
from openllm_core._typing_compat import ListStr
|
||||
from openllm_core._typing_compat import LiteralString
|
||||
PartialAny = functools.partial[t.Any]
|
||||
|
||||
_T = t.TypeVar('_T', bound=t.Callable[..., t.Any])
|
||||
@@ -110,7 +116,8 @@ def generate_function(
|
||||
def make_env_transformer(
|
||||
cls: type[openllm_core.LLMConfig], model_name: str, suffix: LiteralString | None = None, default_callback: t.Callable[[str, t.Any], t.Any] | None = None, globs: DictStrAny | None = None,
|
||||
) -> AnyCallable:
|
||||
from openllm_core.utils import dantic, field_env_key
|
||||
from openllm_core.utils import dantic
|
||||
from openllm_core.utils import field_env_key
|
||||
|
||||
def identity(_: str, x_value: t.Any) -> t.Any:
|
||||
return x_value
|
||||
|
||||
@@ -5,6 +5,7 @@ import importlib
|
||||
import os
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from enum import Enum
|
||||
|
||||
import attr
|
||||
@@ -12,8 +13,14 @@ import click
|
||||
import click_option_group as cog
|
||||
import inflection
|
||||
import orjson
|
||||
from click import ParamType, shell_completion as sc, types as click_types
|
||||
if t.TYPE_CHECKING: from attr import _ValidatorType
|
||||
|
||||
from click import ParamType
|
||||
from click import shell_completion as sc
|
||||
from click import types as click_types
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from attr import _ValidatorType
|
||||
|
||||
AnyCallable = t.Callable[..., t.Any]
|
||||
FC = t.TypeVar('FC', bound=t.Union[AnyCallable, click.Command])
|
||||
|
||||
|
||||
@@ -7,19 +7,25 @@ import importlib.util
|
||||
import logging
|
||||
import os
|
||||
import typing as t
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
import inflection
|
||||
import packaging.version
|
||||
|
||||
import openllm_core
|
||||
from bentoml._internal.utils import LazyLoader, pkg
|
||||
from openllm_core._typing_compat import LiteralString, overload
|
||||
|
||||
from bentoml._internal.utils import LazyLoader
|
||||
from bentoml._internal.utils import pkg
|
||||
from openllm_core._typing_compat import LiteralString
|
||||
from openllm_core._typing_compat import overload
|
||||
|
||||
from .representation import ReprMixin
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
BackendOrderedDict = OrderedDict[str, t.Tuple[t.Callable[[], bool], str]]
|
||||
from openllm_core._typing_compat import LiteralRuntime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
OPTIONAL_DEPENDENCIES = {'opt', 'flan-t5', 'vllm', 'fine-tune', 'ggml', 'agents', 'openai', 'playground', 'gptq', 'grpc'}
|
||||
ENV_VARS_TRUE_VALUES = {'1', 'ON', 'YES', 'TRUE'}
|
||||
@@ -406,7 +412,7 @@ class EnvVarMixin(ReprMixin):
|
||||
|
||||
def _framework_value(self) -> LiteralRuntime:
|
||||
from . import first_not_none
|
||||
return t.cast(t.Literal['pt', 'tf', 'flax', 'vllm'], first_not_none(os.environ.get(self['framework']), default=self._implementation))
|
||||
return t.cast(LiteralRuntime, first_not_none(os.environ.get(self['framework']), default=self._implementation))
|
||||
|
||||
def _bettertransformer_value(self) -> bool:
|
||||
from . import first_not_none
|
||||
|
||||
@@ -14,6 +14,7 @@ import warnings
|
||||
import attr
|
||||
|
||||
import openllm_core
|
||||
|
||||
__all__ = ['VersionInfo', 'LazyModule']
|
||||
|
||||
# vendorred from attrs
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
from abc import abstractmethod
|
||||
|
||||
import attr
|
||||
import orjson
|
||||
|
||||
from openllm_core import utils
|
||||
if t.TYPE_CHECKING: from openllm_core._typing_compat import TypeAlias
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import TypeAlias
|
||||
|
||||
ReprArgs: TypeAlias = t.Generator[t.Tuple[t.Optional[str], t.Any], None, None]
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ To start any OpenLLM model:
|
||||
openllm start <model_name> --options ...
|
||||
'''
|
||||
from __future__ import annotations
|
||||
|
||||
if __name__ == '__main__':
|
||||
from openllm.cli.entrypoint import cli
|
||||
cli()
|
||||
|
||||
@@ -3,13 +3,19 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import transformers
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
from bentoml._internal.frameworks.transformers import API_VERSION, MODULE_NAME
|
||||
from bentoml._internal.models.model import ModelOptions, ModelSignature
|
||||
if t.TYPE_CHECKING: import torch
|
||||
|
||||
from bentoml._internal.frameworks.transformers import API_VERSION
|
||||
from bentoml._internal.frameworks.transformers import MODULE_NAME
|
||||
from bentoml._internal.models.model import ModelOptions
|
||||
from bentoml._internal.models.model import ModelSignature
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import torch
|
||||
|
||||
_GENERIC_EMBEDDING_ID = 'sentence-transformers/all-MiniLM-L6-v2'
|
||||
_BENTOMODEL_ID = 'sentence-transformers--all-MiniLM-L6-v2'
|
||||
|
||||
@@ -3,7 +3,11 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import transformers
|
||||
if t.TYPE_CHECKING: import torch, openllm
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import torch
|
||||
|
||||
import openllm
|
||||
|
||||
# reexport from transformers
|
||||
LogitsProcessorList = transformers.LogitsProcessorList
|
||||
|
||||
@@ -16,20 +16,62 @@ import attr
|
||||
import fs.path
|
||||
import inflection
|
||||
import orjson
|
||||
|
||||
from huggingface_hub import hf_hub_download
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
import openllm_core
|
||||
|
||||
from bentoml._internal.models.model import ModelSignature
|
||||
from openllm_core._configuration import FineTuneConfig, LLMConfig, _object_getattribute, _setattr_class
|
||||
from openllm_core._configuration import FineTuneConfig
|
||||
from openllm_core._configuration import LLMConfig
|
||||
from openllm_core._configuration import _object_getattribute
|
||||
from openllm_core._configuration import _setattr_class
|
||||
from openllm_core._schema import unmarshal_vllm_outputs
|
||||
from openllm_core._typing_compat import AdaptersMapping, AdaptersTuple, AdapterType, AnyCallable, DictStrAny, ListStr, LiteralRuntime, LiteralString, LLMEmbeddings, LLMRunnable, LLMRunner, M, ModelSignatureDict as _ModelSignatureDict, NotRequired, PeftAdapterOutput, T, TupleAny, overload
|
||||
from openllm_core.utils import DEBUG, ENV_VARS_TRUE_VALUES, MYPY, EnvVarMixin, LazyLoader, ReprMixin, apply, bentoml_cattr, codegen, device_count, first_not_none, generate_hash_from_file, is_peft_available, is_torch_available, non_intrusive_setattr, normalize_attrs_to_model_tokenizer_pair, resolve_filepath, validate_is_path
|
||||
from openllm_core._typing_compat import AdaptersMapping
|
||||
from openllm_core._typing_compat import AdaptersTuple
|
||||
from openllm_core._typing_compat import AdapterType
|
||||
from openllm_core._typing_compat import AnyCallable
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
from openllm_core._typing_compat import ListStr
|
||||
from openllm_core._typing_compat import LiteralRuntime
|
||||
from openllm_core._typing_compat import LiteralString
|
||||
from openllm_core._typing_compat import LLMEmbeddings
|
||||
from openllm_core._typing_compat import LLMRunnable
|
||||
from openllm_core._typing_compat import LLMRunner
|
||||
from openllm_core._typing_compat import M
|
||||
from openllm_core._typing_compat import ModelSignatureDict as _ModelSignatureDict
|
||||
from openllm_core._typing_compat import NotRequired
|
||||
from openllm_core._typing_compat import PeftAdapterOutput
|
||||
from openllm_core._typing_compat import T
|
||||
from openllm_core._typing_compat import TupleAny
|
||||
from openllm_core._typing_compat import overload
|
||||
from openllm_core.utils import DEBUG
|
||||
from openllm_core.utils import ENV_VARS_TRUE_VALUES
|
||||
from openllm_core.utils import MYPY
|
||||
from openllm_core.utils import EnvVarMixin
|
||||
from openllm_core.utils import LazyLoader
|
||||
from openllm_core.utils import ReprMixin
|
||||
from openllm_core.utils import apply
|
||||
from openllm_core.utils import bentoml_cattr
|
||||
from openllm_core.utils import codegen
|
||||
from openllm_core.utils import device_count
|
||||
from openllm_core.utils import first_not_none
|
||||
from openllm_core.utils import generate_hash_from_file
|
||||
from openllm_core.utils import is_peft_available
|
||||
from openllm_core.utils import is_torch_available
|
||||
from openllm_core.utils import non_intrusive_setattr
|
||||
from openllm_core.utils import normalize_attrs_to_model_tokenizer_pair
|
||||
from openllm_core.utils import resolve_filepath
|
||||
from openllm_core.utils import validate_is_path
|
||||
|
||||
from ._quantisation import infer_quantisation_config
|
||||
from .exceptions import ForbiddenAttributeError, GpuNotAvailableError, OpenLLMException
|
||||
from .exceptions import ForbiddenAttributeError
|
||||
from .exceptions import GpuNotAvailableError
|
||||
from .exceptions import OpenLLMException
|
||||
from .utils import infer_auto_class
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import auto_gptq as autogptq
|
||||
import peft
|
||||
@@ -1077,7 +1119,9 @@ class LLM(LLMInterface[M, T], ReprMixin):
|
||||
**attrs: t.Any
|
||||
) -> t.Iterator[t.Any]:
|
||||
# NOTE: encoder-decoder models will need to implement their own generate_iterator for now
|
||||
from ._generation import get_context_length, is_partial_stop, prepare_logits_processor
|
||||
from ._generation import get_context_length
|
||||
from ._generation import is_partial_stop
|
||||
from ._generation import prepare_logits_processor
|
||||
|
||||
len_prompt = len(prompt)
|
||||
if stop_token_ids is None: stop_token_ids = []
|
||||
|
||||
@@ -4,11 +4,17 @@ import logging
|
||||
import typing as t
|
||||
|
||||
from openllm_core._typing_compat import overload
|
||||
from openllm_core.utils import LazyLoader, is_autogptq_available, is_bitsandbytes_available, is_transformers_supports_kbit, pkg
|
||||
from openllm_core.utils import LazyLoader
|
||||
from openllm_core.utils import is_autogptq_available
|
||||
from openllm_core.utils import is_bitsandbytes_available
|
||||
from openllm_core.utils import is_transformers_supports_kbit
|
||||
from openllm_core.utils import pkg
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
|
||||
from ._llm import LLM
|
||||
|
||||
autogptq, torch, transformers = LazyLoader('autogptq', globals(), 'auto_gptq'), LazyLoader('torch', globals(), 'torch'), LazyLoader('transformers', globals(), 'transformers')
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -5,6 +5,7 @@ import typing as t
|
||||
import warnings
|
||||
|
||||
import orjson
|
||||
|
||||
from starlette.applications import Starlette
|
||||
from starlette.responses import JSONResponse
|
||||
from starlette.routing import Route
|
||||
@@ -12,17 +13,21 @@ from starlette.routing import Route
|
||||
import bentoml
|
||||
import openllm
|
||||
import openllm_core
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import Response
|
||||
|
||||
from bentoml._internal.runner.runner import AbstractRunner, RunnerMethod
|
||||
from bentoml._internal.runner.runner import AbstractRunner
|
||||
from bentoml._internal.runner.runner import RunnerMethod
|
||||
from openllm_core._typing_compat import TypeAlias
|
||||
_EmbeddingMethod: TypeAlias = RunnerMethod[t.Union[bentoml.Runnable, openllm.LLMRunnable[t.Any, t.Any]], [t.List[str]], t.Sequence[openllm.LLMEmbeddings]]
|
||||
_EmbeddingMethod: TypeAlias = RunnerMethod[t.Union[bentoml.Runnable, openllm.LLMRunnable[t.Any, t.Any]], [t.List[str]], t.Sequence[openllm.EmbeddingsOutput]]
|
||||
|
||||
# The following warnings from bitsandbytes, and probably not that important for users to see
|
||||
warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization')
|
||||
warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization')
|
||||
warnings.filterwarnings('ignore', message='The installed version of bitsandbytes was compiled without GPU support.')
|
||||
|
||||
model = os.environ.get('OPENLLM_MODEL', '{__model_name__}') # openllm: model name
|
||||
adapter_map = os.environ.get('OPENLLM_ADAPTER_MAP', '''{__model_adapter_map__}''') # openllm: model adapter map
|
||||
llm_config = openllm.AutoConfig.for_model(model)
|
||||
@@ -37,6 +42,7 @@ generic_embedding_runner = bentoml.Runner(
|
||||
runners: list[AbstractRunner] = [runner]
|
||||
if not runner.supports_embeddings: runners.append(generic_embedding_runner)
|
||||
svc = bentoml.Service(name=f"llm-{llm_config['start_name']}-service", runners=runners)
|
||||
|
||||
_JsonInput = bentoml.io.JSON.from_sample({'prompt': '', 'llm_config': llm_config.model_dump(flatten=True), 'adapter_name': None})
|
||||
|
||||
@svc.api(route='/v1/generate', input=_JsonInput, output=bentoml.io.JSON.from_sample({'responses': [], 'configuration': llm_config.model_dump(flatten=True)}))
|
||||
|
||||
@@ -7,15 +7,26 @@ import os
|
||||
import typing as t
|
||||
|
||||
from openllm_core.utils import LazyModule
|
||||
|
||||
_import_structure: dict[str, list[str]] = {
|
||||
'_package': ['create_bento', 'build_editable', 'construct_python_options', 'construct_docker_options'],
|
||||
'oci': ['CONTAINER_NAMES', 'get_base_container_tag', 'build_container', 'get_base_container_name', 'supported_registries', 'RefResolver']
|
||||
}
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from . import _package as _package, oci as oci
|
||||
from ._package import build_editable as build_editable, construct_docker_options as construct_docker_options, construct_python_options as construct_python_options, create_bento as create_bento
|
||||
from .oci import CONTAINER_NAMES as CONTAINER_NAMES, RefResolver as RefResolver, build_container as build_container, get_base_container_name as get_base_container_name, get_base_container_tag as get_base_container_tag, supported_registries as supported_registries
|
||||
from . import _package as _package
|
||||
from . import oci as oci
|
||||
from ._package import build_editable as build_editable
|
||||
from ._package import construct_docker_options as construct_docker_options
|
||||
from ._package import construct_python_options as construct_python_options
|
||||
from ._package import create_bento as create_bento
|
||||
from .oci import CONTAINER_NAMES as CONTAINER_NAMES
|
||||
from .oci import RefResolver as RefResolver
|
||||
from .oci import build_container as build_container
|
||||
from .oci import get_base_container_name as get_base_container_name
|
||||
from .oci import get_base_container_tag as get_base_container_tag
|
||||
from .oci import supported_registries as supported_registries
|
||||
|
||||
__lazy = LazyModule(__name__, os.path.abspath('__file__'), _import_structure)
|
||||
__all__ = __lazy.__all__
|
||||
__dir__ = __lazy.__dir__
|
||||
|
||||
@@ -6,27 +6,39 @@ import logging
|
||||
import os
|
||||
import string
|
||||
import typing as t
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import fs
|
||||
import fs.copy
|
||||
import fs.errors
|
||||
import orjson
|
||||
from simple_di import Provide, inject
|
||||
|
||||
from simple_di import Provide
|
||||
from simple_di import inject
|
||||
|
||||
import bentoml
|
||||
import openllm_core
|
||||
from bentoml._internal.bento.build_config import BentoBuildConfig, DockerOptions, ModelSpec, PythonOptions
|
||||
|
||||
from bentoml._internal.bento.build_config import BentoBuildConfig
|
||||
from bentoml._internal.bento.build_config import DockerOptions
|
||||
from bentoml._internal.bento.build_config import ModelSpec
|
||||
from bentoml._internal.bento.build_config import PythonOptions
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
|
||||
from . import oci
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from fs.base import FS
|
||||
|
||||
import openllm
|
||||
|
||||
from bentoml._internal.bento import BentoStore
|
||||
from bentoml._internal.models.model import ModelStore
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry
|
||||
from openllm_core._typing_compat import LiteralContainerVersionStrategy
|
||||
from openllm_core._typing_compat import LiteralString
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OPENLLM_DEV_BUILD = 'OPENLLM_DEV_BUILD'
|
||||
|
||||
@@ -9,7 +9,10 @@ import pathlib
|
||||
import shutil
|
||||
import subprocess
|
||||
import typing as t
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
|
||||
import attr
|
||||
import orjson
|
||||
@@ -17,11 +20,17 @@ import orjson
|
||||
import bentoml
|
||||
import openllm
|
||||
import openllm_core
|
||||
|
||||
from openllm_core.utils.lazy import VersionInfo
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from ghapi import all
|
||||
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString, RefTuple
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry
|
||||
from openllm_core._typing_compat import LiteralContainerVersionStrategy
|
||||
from openllm_core._typing_compat import LiteralString
|
||||
from openllm_core._typing_compat import RefTuple
|
||||
|
||||
all = openllm_core.utils.LazyLoader('all', globals(), 'ghapi.all') # noqa: F811
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -9,21 +9,28 @@ import click
|
||||
import click_option_group as cog
|
||||
import inflection
|
||||
import orjson
|
||||
|
||||
from bentoml_cli.utils import BentoMLCommandGroup
|
||||
from click import shell_completion as sc
|
||||
from click.shell_completion import CompletionItem
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralString, ParamSpec
|
||||
from openllm_core._typing_compat import Concatenate
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
from openllm_core._typing_compat import LiteralString
|
||||
from openllm_core._typing_compat import ParamSpec
|
||||
from openllm_core.utils import DEBUG
|
||||
|
||||
from . import termui
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import subprocess
|
||||
|
||||
from openllm_core._configuration import LLMConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
P = ParamSpec('P')
|
||||
|
||||
@@ -7,20 +7,27 @@ import subprocess
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from simple_di import Provide, inject
|
||||
from simple_di import Provide
|
||||
from simple_di import inject
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
import openllm_core
|
||||
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from openllm.exceptions import OpenLLMException
|
||||
|
||||
from . import termui
|
||||
from ._factory import start_command_factory
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from bentoml._internal.bento import BentoStore
|
||||
from openllm_core._configuration import LLMConfig
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralRuntime, LiteralString
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry
|
||||
from openllm_core._typing_compat import LiteralContainerVersionStrategy
|
||||
from openllm_core._typing_compat import LiteralRuntime
|
||||
from openllm_core._typing_compat import LiteralString
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def _start(
|
||||
@@ -81,7 +88,8 @@ def _start(
|
||||
framework: The framework to use for this LLM. By default, this is set to ``pt``.
|
||||
additional_args: Additional arguments to pass to ``openllm start``.
|
||||
"""
|
||||
from .entrypoint import start_command, start_grpc_command
|
||||
from .entrypoint import start_command
|
||||
from .entrypoint import start_grpc_command
|
||||
llm_config = openllm.AutoConfig.for_model(model_name)
|
||||
_ModelEnv = openllm_core.utils.EnvVarMixin(
|
||||
model_name,
|
||||
|
||||
@@ -42,29 +42,80 @@ import fs.copy
|
||||
import fs.errors
|
||||
import inflection
|
||||
import orjson
|
||||
from bentoml_cli.utils import BentoMLCommandGroup, opt_callback
|
||||
from simple_di import Provide, inject
|
||||
|
||||
from bentoml_cli.utils import BentoMLCommandGroup
|
||||
from bentoml_cli.utils import opt_callback
|
||||
from simple_di import Provide
|
||||
from simple_di import inject
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from bentoml._internal.models.model import ModelStore
|
||||
from openllm import bundle, serialisation
|
||||
from openllm import bundle
|
||||
from openllm import serialisation
|
||||
from openllm.exceptions import OpenLLMException
|
||||
from openllm.models.auto import CONFIG_MAPPING, MODEL_FLAX_MAPPING_NAMES, MODEL_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES, MODEL_VLLM_MAPPING_NAMES, AutoConfig, AutoLLM
|
||||
from openllm.models.auto import CONFIG_MAPPING
|
||||
from openllm.models.auto import MODEL_FLAX_MAPPING_NAMES
|
||||
from openllm.models.auto import MODEL_MAPPING_NAMES
|
||||
from openllm.models.auto import MODEL_TF_MAPPING_NAMES
|
||||
from openllm.models.auto import MODEL_VLLM_MAPPING_NAMES
|
||||
from openllm.models.auto import AutoConfig
|
||||
from openllm.models.auto import AutoLLM
|
||||
from openllm.utils import infer_auto_class
|
||||
from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralRuntime, LiteralString, ParamSpec, Self
|
||||
from openllm_core.utils import DEBUG, DEBUG_ENV_VAR, OPTIONAL_DEPENDENCIES, QUIET_ENV_VAR, EnvVarMixin, LazyLoader, analytics, bentoml_cattr, compose, configure_logging, dantic, first_not_none, get_debug_mode, get_quiet_mode, is_torch_available, is_transformers_supports_agent, resolve_user_filepath, set_debug_mode, set_quiet_mode
|
||||
from openllm_core._typing_compat import Concatenate
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
from openllm_core._typing_compat import LiteralRuntime
|
||||
from openllm_core._typing_compat import LiteralString
|
||||
from openllm_core._typing_compat import ParamSpec
|
||||
from openllm_core._typing_compat import Self
|
||||
from openllm_core.utils import DEBUG
|
||||
from openllm_core.utils import DEBUG_ENV_VAR
|
||||
from openllm_core.utils import OPTIONAL_DEPENDENCIES
|
||||
from openllm_core.utils import QUIET_ENV_VAR
|
||||
from openllm_core.utils import EnvVarMixin
|
||||
from openllm_core.utils import LazyLoader
|
||||
from openllm_core.utils import analytics
|
||||
from openllm_core.utils import bentoml_cattr
|
||||
from openllm_core.utils import compose
|
||||
from openllm_core.utils import configure_logging
|
||||
from openllm_core.utils import dantic
|
||||
from openllm_core.utils import first_not_none
|
||||
from openllm_core.utils import get_debug_mode
|
||||
from openllm_core.utils import get_quiet_mode
|
||||
from openllm_core.utils import is_torch_available
|
||||
from openllm_core.utils import is_transformers_supports_agent
|
||||
from openllm_core.utils import resolve_user_filepath
|
||||
from openllm_core.utils import set_debug_mode
|
||||
from openllm_core.utils import set_quiet_mode
|
||||
|
||||
from . import termui
|
||||
from ._factory import FC, LiteralOutput, _AnyCallable, bettertransformer_option, container_registry_option, fast_option, machine_option, model_id_option, model_name_argument, model_version_option, output_option, parse_device_callback, quantize_option, serialisation_option, start_command_factory, workers_per_resource_option
|
||||
from ._factory import FC
|
||||
from ._factory import LiteralOutput
|
||||
from ._factory import _AnyCallable
|
||||
from ._factory import bettertransformer_option
|
||||
from ._factory import container_registry_option
|
||||
from ._factory import fast_option
|
||||
from ._factory import machine_option
|
||||
from ._factory import model_id_option
|
||||
from ._factory import model_name_argument
|
||||
from ._factory import model_version_option
|
||||
from ._factory import output_option
|
||||
from ._factory import parse_device_callback
|
||||
from ._factory import quantize_option
|
||||
from ._factory import serialisation_option
|
||||
from ._factory import start_command_factory
|
||||
from ._factory import workers_per_resource_option
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import torch
|
||||
|
||||
from bentoml._internal.bento import BentoStore
|
||||
from bentoml._internal.container import DefaultBuilder
|
||||
from openllm_core._schema import EmbeddingsOutput
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry
|
||||
from openllm_core._typing_compat import LiteralContainerVersionStrategy
|
||||
else:
|
||||
torch = LazyLoader('torch', globals(), 'torch')
|
||||
|
||||
|
||||
@@ -5,9 +5,14 @@ import click
|
||||
import orjson
|
||||
|
||||
import openllm
|
||||
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import container_registry_option, machine_option
|
||||
if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
|
||||
from openllm.cli._factory import container_registry_option
|
||||
from openllm.cli._factory import machine_option
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry
|
||||
from openllm_core._typing_compat import LiteralContainerVersionStrategy
|
||||
|
||||
@click.command(
|
||||
'build_base_container',
|
||||
|
||||
@@ -5,13 +5,19 @@ import typing as t
|
||||
|
||||
import click
|
||||
import psutil
|
||||
from simple_di import Provide, inject
|
||||
|
||||
from simple_di import Provide
|
||||
from simple_di import inject
|
||||
|
||||
import bentoml
|
||||
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import bento_complete_envvar, machine_option
|
||||
if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
|
||||
from openllm.cli._factory import bento_complete_envvar
|
||||
from openllm.cli._factory import machine_option
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from bentoml._internal.bento import BentoStore
|
||||
|
||||
@click.command('dive_bentos', context_settings=termui.CONTEXT_SETTINGS)
|
||||
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)
|
||||
|
||||
@@ -2,9 +2,12 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import click
|
||||
from simple_di import Provide, inject
|
||||
|
||||
from simple_di import Provide
|
||||
from simple_di import inject
|
||||
|
||||
import bentoml
|
||||
|
||||
from bentoml._internal.bento.bento import BentoInfo
|
||||
from bentoml._internal.bento.build_config import DockerOptions
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
@@ -12,7 +15,9 @@ from bentoml._internal.container.generate import generate_containerfile
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import bento_complete_envvar
|
||||
from openllm_core.utils import bentoml_cattr
|
||||
if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from bentoml._internal.bento import BentoStore
|
||||
|
||||
@click.command('get_containerfile', context_settings=termui.CONTEXT_SETTINGS, help='Return Containerfile of any given Bento.')
|
||||
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)
|
||||
|
||||
@@ -4,12 +4,17 @@ import typing as t
|
||||
import click
|
||||
import inflection
|
||||
import orjson
|
||||
|
||||
from bentoml_cli.utils import opt_callback
|
||||
|
||||
import openllm
|
||||
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import machine_option, model_complete_envvar, output_option
|
||||
from openllm.cli._factory import machine_option
|
||||
from openllm.cli._factory import model_complete_envvar
|
||||
from openllm.cli._factory import output_option
|
||||
from openllm_core._prompt import process_prompt
|
||||
|
||||
LiteralOutput = t.Literal['json', 'pretty', 'porcelain']
|
||||
|
||||
@click.command('get_prompt', context_settings=termui.CONTEXT_SETTINGS)
|
||||
|
||||
@@ -6,9 +6,11 @@ import orjson
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
|
||||
from bentoml._internal.utils import human_readable_size
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import LiteralOutput, output_option
|
||||
from openllm.cli._factory import LiteralOutput
|
||||
from openllm.cli._factory import output_option
|
||||
|
||||
@click.command('list_bentos', context_settings=termui.CONTEXT_SETTINGS)
|
||||
@output_option(default_value='json')
|
||||
|
||||
@@ -7,10 +7,16 @@ import orjson
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
|
||||
from bentoml._internal.utils import human_readable_size
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import LiteralOutput, model_complete_envvar, model_name_argument, output_option
|
||||
if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
|
||||
from openllm.cli._factory import LiteralOutput
|
||||
from openllm.cli._factory import model_complete_envvar
|
||||
from openllm.cli._factory import model_name_argument
|
||||
from openllm.cli._factory import output_option
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
|
||||
@click.command('list_models', context_settings=termui.CONTEXT_SETTINGS)
|
||||
@model_name_argument(required=False, shell_complete=model_complete_envvar)
|
||||
|
||||
@@ -13,12 +13,16 @@ import yaml
|
||||
|
||||
from openllm import playground
|
||||
from openllm.cli import termui
|
||||
from openllm_core.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
|
||||
from openllm_core.utils import is_jupyter_available
|
||||
from openllm_core.utils import is_jupytext_available
|
||||
from openllm_core.utils import is_notebook_available
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import jupytext
|
||||
import nbformat
|
||||
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def load_notebook_metadata() -> DictStrAny:
|
||||
|
||||
@@ -6,7 +6,9 @@ import click
|
||||
import inflection
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
|
||||
def echo(text: t.Any, fg: str = 'green', _with_style: bool = True, **attrs: t.Any) -> None:
|
||||
attrs['fg'] = fg if not openllm.utils.get_debug_mode() else None
|
||||
|
||||
@@ -14,7 +14,14 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm_client
|
||||
if t.TYPE_CHECKING: from openllm_client import AsyncHTTPClient as AsyncHTTPClient, BaseAsyncClient as BaseAsyncClient, BaseClient as BaseClient, HTTPClient as HTTPClient, GrpcClient as GrpcClient, AsyncGrpcClient as AsyncGrpcClient
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_client import AsyncGrpcClient as AsyncGrpcClient
|
||||
from openllm_client import AsyncHTTPClient as AsyncHTTPClient
|
||||
from openllm_client import BaseAsyncClient as BaseAsyncClient
|
||||
from openllm_client import BaseClient as BaseClient
|
||||
from openllm_client import GrpcClient as GrpcClient
|
||||
from openllm_client import HTTPClient as HTTPClient
|
||||
|
||||
def __dir__() -> t.Sequence[str]:
|
||||
return sorted(dir(openllm_client))
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
'''Base exceptions for OpenLLM. This extends BentoML exceptions.'''
|
||||
from __future__ import annotations
|
||||
|
||||
from openllm_core.exceptions import Error as Error, FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError, ForbiddenAttributeError as ForbiddenAttributeError, GpuNotAvailableError as GpuNotAvailableError, MissingAnnotationAttributeError as MissingAnnotationAttributeError, MissingDependencyError as MissingDependencyError, OpenLLMException as OpenLLMException, ValidationError as ValidationError
|
||||
from openllm_core.exceptions import Error as Error
|
||||
from openllm_core.exceptions import FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError
|
||||
from openllm_core.exceptions import ForbiddenAttributeError as ForbiddenAttributeError
|
||||
from openllm_core.exceptions import GpuNotAvailableError as GpuNotAvailableError
|
||||
from openllm_core.exceptions import MissingAnnotationAttributeError as MissingAnnotationAttributeError
|
||||
from openllm_core.exceptions import MissingDependencyError as MissingDependencyError
|
||||
from openllm_core.exceptions import OpenLLMException as OpenLLMException
|
||||
from openllm_core.exceptions import ValidationError as ValidationError
|
||||
|
||||
@@ -3,8 +3,15 @@ import os
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
from openllm_core.config import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig
|
||||
from openllm_core.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
|
||||
from openllm_core.config import CONFIG_MAPPING as CONFIG_MAPPING
|
||||
from openllm_core.config import CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
|
||||
from openllm_core.config import AutoConfig as AutoConfig
|
||||
from openllm_core.utils import LazyModule
|
||||
from openllm_core.utils import is_flax_available
|
||||
from openllm_core.utils import is_tf_available
|
||||
from openllm_core.utils import is_torch_available
|
||||
from openllm_core.utils import is_vllm_available
|
||||
|
||||
_import_structure: dict[str, list[str]] = {
|
||||
'modeling_auto': ['MODEL_MAPPING_NAMES'],
|
||||
'modeling_flax_auto': ['MODEL_FLAX_MAPPING_NAMES'],
|
||||
|
||||
@@ -12,11 +12,14 @@ import openllm
|
||||
from openllm_core.utils import ReprMixin
|
||||
if t.TYPE_CHECKING:
|
||||
import types
|
||||
from collections import _odict_items, _odict_keys, _odict_values
|
||||
from collections import _odict_items
|
||||
from collections import _odict_keys
|
||||
from collections import _odict_values
|
||||
|
||||
from _typeshed import SupportsIter
|
||||
|
||||
from openllm_core._typing_compat import LiteralString, LLMRunner
|
||||
from openllm_core._typing_compat import LiteralString
|
||||
from openllm_core._typing_compat import LLMRunner
|
||||
ConfigModelKeysView = _odict_keys[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
|
||||
ConfigModelValuesView = _odict_values[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
|
||||
ConfigModelItemsView = _odict_items[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
|
||||
|
||||
@@ -4,7 +4,9 @@ from collections import OrderedDict
|
||||
|
||||
from openllm_core.config import CONFIG_MAPPING_NAMES
|
||||
|
||||
from .factory import BaseAutoLLMClass, _LazyAutoMapping
|
||||
from .factory import BaseAutoLLMClass
|
||||
from .factory import _LazyAutoMapping
|
||||
|
||||
MODEL_MAPPING_NAMES = OrderedDict([('chatglm', 'ChatGLM'), ('dolly_v2', 'DollyV2'), ('falcon', 'Falcon'), ('flan_t5', 'FlanT5'), ('gpt_neox', 'GPTNeoX'), ('llama', 'Llama'), ('mpt', 'MPT'), (
|
||||
'opt', 'OPT'
|
||||
), ('stablelm', 'StableLM'), ('starcoder', 'StarCoder'), ('baichuan', 'Baichuan')])
|
||||
|
||||
@@ -4,7 +4,9 @@ from collections import OrderedDict
|
||||
|
||||
from openllm_core.config import CONFIG_MAPPING_NAMES
|
||||
|
||||
from .factory import BaseAutoLLMClass, _LazyAutoMapping
|
||||
from .factory import BaseAutoLLMClass
|
||||
from .factory import _LazyAutoMapping
|
||||
|
||||
MODEL_FLAX_MAPPING_NAMES = OrderedDict([('flan_t5', 'FlaxFlanT5'), ('opt', 'FlaxOPT')])
|
||||
MODEL_FLAX_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FLAX_MAPPING_NAMES)
|
||||
|
||||
|
||||
@@ -4,7 +4,9 @@ from collections import OrderedDict
|
||||
|
||||
from openllm_core.config import CONFIG_MAPPING_NAMES
|
||||
|
||||
from .factory import BaseAutoLLMClass, _LazyAutoMapping
|
||||
from .factory import BaseAutoLLMClass
|
||||
from .factory import _LazyAutoMapping
|
||||
|
||||
MODEL_TF_MAPPING_NAMES = OrderedDict([('flan_t5', 'TFFlanT5'), ('opt', 'TFOPT')])
|
||||
MODEL_TF_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES)
|
||||
|
||||
|
||||
@@ -4,7 +4,9 @@ from collections import OrderedDict
|
||||
|
||||
from openllm_core.config import CONFIG_MAPPING_NAMES
|
||||
|
||||
from .factory import BaseAutoLLMClass, _LazyAutoMapping
|
||||
from .factory import BaseAutoLLMClass
|
||||
from .factory import _LazyAutoMapping
|
||||
|
||||
MODEL_VLLM_MAPPING_NAMES = OrderedDict([('baichuan', 'VLLMBaichuan'), ('dolly_v2', 'VLLMDollyV2'), ('falcon', 'VLLMFalcon'), ('gpt_neox', 'VLLMGPTNeoX'), ('mpt', 'VLLMMPT'), (
|
||||
'opt', 'VLLMOPT'
|
||||
), ('stablelm', 'VLLMStableLM'), ('starcoder', 'VLLMStarCoder'), ('llama', 'VLLMLlama')])
|
||||
|
||||
@@ -3,8 +3,14 @@ import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_baichuan import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING, BaichuanConfig as BaichuanConfig
|
||||
from openllm.utils import LazyModule
|
||||
from openllm.utils import is_cpm_kernels_available
|
||||
from openllm.utils import is_torch_available
|
||||
from openllm.utils import is_vllm_available
|
||||
from openllm_core.config.configuration_baichuan import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
|
||||
from openllm_core.config.configuration_baichuan import START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING
|
||||
from openllm_core.config.configuration_baichuan import BaichuanConfig as BaichuanConfig
|
||||
|
||||
_import_structure: dict[str, list[str]] = {}
|
||||
try:
|
||||
if not is_torch_available() or not is_cpm_kernels_available(): raise MissingDependencyError
|
||||
|
||||
@@ -3,8 +3,13 @@ import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available
|
||||
from openllm_core.config.configuration_chatglm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING, ChatGLMConfig as ChatGLMConfig
|
||||
from openllm.utils import LazyModule
|
||||
from openllm.utils import is_cpm_kernels_available
|
||||
from openllm.utils import is_torch_available
|
||||
from openllm_core.config.configuration_chatglm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
|
||||
from openllm_core.config.configuration_chatglm import START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING
|
||||
from openllm_core.config.configuration_chatglm import ChatGLMConfig as ChatGLMConfig
|
||||
|
||||
_import_structure: dict[str, list[str]] = {}
|
||||
try:
|
||||
if not is_torch_available() or not is_cpm_kernels_available(): raise MissingDependencyError
|
||||
|
||||
@@ -2,7 +2,8 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
if t.TYPE_CHECKING:
|
||||
import transformers
|
||||
|
||||
class ChatGLM(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerFast']):
|
||||
__openllm_internal__ = True
|
||||
|
||||
@@ -3,8 +3,13 @@ import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING, DollyV2Config as DollyV2Config
|
||||
from openllm.utils import LazyModule
|
||||
from openllm.utils import is_torch_available
|
||||
from openllm.utils import is_vllm_available
|
||||
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
|
||||
from openllm_core.config.configuration_dolly_v2 import START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING
|
||||
from openllm_core.config.configuration_dolly_v2 import DollyV2Config as DollyV2Config
|
||||
|
||||
_import_structure: dict[str, list[str]] = {}
|
||||
try:
|
||||
if not is_torch_available(): raise MissingDependencyError
|
||||
|
||||
@@ -5,7 +5,10 @@ import typing as t
|
||||
|
||||
import openllm
|
||||
from openllm_core._typing_compat import overload
|
||||
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE, END_KEY, RESPONSE_KEY, get_special_token_id
|
||||
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE
|
||||
from openllm_core.config.configuration_dolly_v2 import END_KEY
|
||||
from openllm_core.config.configuration_dolly_v2 import RESPONSE_KEY
|
||||
from openllm_core.config.configuration_dolly_v2 import get_special_token_id
|
||||
if t.TYPE_CHECKING: import torch, transformers, tensorflow as tf
|
||||
else: torch, transformers, tf = openllm.utils.LazyLoader('torch', globals(), 'torch'), openllm.utils.LazyLoader('transformers', globals(), 'transformers'), openllm.utils.LazyLoader('tf', globals(), 'tensorflow')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -3,8 +3,13 @@ import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_falcon import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING, FalconConfig as FalconConfig
|
||||
from openllm.utils import LazyModule
|
||||
from openllm.utils import is_torch_available
|
||||
from openllm.utils import is_vllm_available
|
||||
from openllm_core.config.configuration_falcon import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
|
||||
from openllm_core.config.configuration_falcon import START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING
|
||||
from openllm_core.config.configuration_falcon import FalconConfig as FalconConfig
|
||||
|
||||
_import_structure: dict[str, list[str]] = {}
|
||||
try:
|
||||
if not is_torch_available(): raise MissingDependencyError
|
||||
|
||||
@@ -3,8 +3,14 @@ import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available
|
||||
from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING, FlanT5Config as FlanT5Config
|
||||
from openllm.utils import LazyModule
|
||||
from openllm.utils import is_flax_available
|
||||
from openllm.utils import is_tf_available
|
||||
from openllm.utils import is_torch_available
|
||||
from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
|
||||
from openllm_core.config.configuration_flan_t5 import START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING
|
||||
from openllm_core.config.configuration_flan_t5 import FlanT5Config as FlanT5Config
|
||||
|
||||
_import_structure: dict[str, list[str]] = {}
|
||||
try:
|
||||
if not is_torch_available(): raise MissingDependencyError
|
||||
|
||||
@@ -2,7 +2,8 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
if t.TYPE_CHECKING:
|
||||
import transformers
|
||||
|
||||
class FlanT5(openllm.LLM['transformers.T5ForConditionalGeneration', 'transformers.T5TokenizerFast']):
|
||||
__openllm_internal__ = True
|
||||
|
||||
@@ -3,8 +3,13 @@ import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_gpt_neox import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING, GPTNeoXConfig as GPTNeoXConfig
|
||||
from openllm.utils import LazyModule
|
||||
from openllm.utils import is_torch_available
|
||||
from openllm.utils import is_vllm_available
|
||||
from openllm_core.config.configuration_gpt_neox import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
|
||||
from openllm_core.config.configuration_gpt_neox import START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING
|
||||
from openllm_core.config.configuration_gpt_neox import GPTNeoXConfig as GPTNeoXConfig
|
||||
|
||||
_import_structure: dict[str, list[str]] = {}
|
||||
try:
|
||||
if not is_torch_available(): raise MissingDependencyError
|
||||
|
||||
@@ -3,8 +3,14 @@ import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_llama import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING, LlamaConfig as LlamaConfig
|
||||
from openllm.utils import LazyModule
|
||||
from openllm.utils import is_torch_available
|
||||
from openllm.utils import is_vllm_available
|
||||
from openllm_core.config.configuration_llama import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
|
||||
from openllm_core.config.configuration_llama import PROMPT_MAPPING as PROMPT_MAPPING
|
||||
from openllm_core.config.configuration_llama import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING
|
||||
from openllm_core.config.configuration_llama import LlamaConfig as LlamaConfig
|
||||
|
||||
_import_structure: dict[str, list[str]] = {}
|
||||
try:
|
||||
if not is_vllm_available(): raise MissingDependencyError
|
||||
|
||||
@@ -2,7 +2,8 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
if t.TYPE_CHECKING:
|
||||
import transformers
|
||||
|
||||
class Llama(openllm.LLM['transformers.LlamaForCausalLM', 'transformers.LlamaTokenizerFast']):
|
||||
__openllm_internal__ = True
|
||||
|
||||
@@ -3,8 +3,14 @@ import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_mpt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING, MPTConfig as MPTConfig
|
||||
from openllm.utils import LazyModule
|
||||
from openllm.utils import is_torch_available
|
||||
from openllm.utils import is_vllm_available
|
||||
from openllm_core.config.configuration_mpt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
|
||||
from openllm_core.config.configuration_mpt import PROMPT_MAPPING as PROMPT_MAPPING
|
||||
from openllm_core.config.configuration_mpt import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING
|
||||
from openllm_core.config.configuration_mpt import MPTConfig as MPTConfig
|
||||
|
||||
_import_structure: dict[str, list[str]] = {}
|
||||
try:
|
||||
if not is_torch_available(): raise MissingDependencyError
|
||||
|
||||
@@ -4,8 +4,11 @@ import typing as t
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
from openllm.utils import generate_labels, is_triton_available
|
||||
if t.TYPE_CHECKING: import transformers, torch
|
||||
from openllm.utils import generate_labels
|
||||
from openllm.utils import is_triton_available
|
||||
if t.TYPE_CHECKING:
|
||||
import torch
|
||||
import transformers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -3,8 +3,15 @@ import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING, OPTConfig as OPTConfig
|
||||
from openllm.utils import LazyModule
|
||||
from openllm.utils import is_flax_available
|
||||
from openllm.utils import is_tf_available
|
||||
from openllm.utils import is_torch_available
|
||||
from openllm.utils import is_vllm_available
|
||||
from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
|
||||
from openllm_core.config.configuration_opt import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING
|
||||
from openllm_core.config.configuration_opt import OPTConfig as OPTConfig
|
||||
|
||||
_import_structure: dict[str, list[str]] = {}
|
||||
try:
|
||||
if not is_torch_available(): raise MissingDependencyError
|
||||
|
||||
@@ -3,8 +3,13 @@ import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_stablelm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING, StableLMConfig as StableLMConfig
|
||||
from openllm.utils import LazyModule
|
||||
from openllm.utils import is_torch_available
|
||||
from openllm.utils import is_vllm_available
|
||||
from openllm_core.config.configuration_stablelm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
|
||||
from openllm_core.config.configuration_stablelm import START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING
|
||||
from openllm_core.config.configuration_stablelm import StableLMConfig as StableLMConfig
|
||||
|
||||
_import_structure: dict[str, list[str]] = {}
|
||||
try:
|
||||
if not is_torch_available(): raise MissingDependencyError
|
||||
|
||||
@@ -2,7 +2,8 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
if t.TYPE_CHECKING:
|
||||
import transformers
|
||||
|
||||
class StableLM(openllm.LLM['transformers.GPTNeoXForCausalLM', 'transformers.GPTNeoXTokenizerFast']):
|
||||
__openllm_internal__ = True
|
||||
|
||||
@@ -3,8 +3,13 @@ import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_starcoder import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING, StarCoderConfig as StarCoderConfig
|
||||
from openllm.utils import LazyModule
|
||||
from openllm.utils import is_torch_available
|
||||
from openllm.utils import is_vllm_available
|
||||
from openllm_core.config.configuration_starcoder import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
|
||||
from openllm_core.config.configuration_starcoder import START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING
|
||||
from openllm_core.config.configuration_starcoder import StarCoderConfig as StarCoderConfig
|
||||
|
||||
_import_structure: dict[str, list[str]] = {}
|
||||
try:
|
||||
if not is_torch_available(): raise MissingDependencyError
|
||||
|
||||
@@ -5,7 +5,11 @@ import typing as t
|
||||
import bentoml
|
||||
import openllm
|
||||
from openllm.utils import generate_labels
|
||||
from openllm_core.config.configuration_starcoder import EOD, FIM_MIDDLE, FIM_PAD, FIM_PREFIX, FIM_SUFFIX
|
||||
from openllm_core.config.configuration_starcoder import EOD
|
||||
from openllm_core.config.configuration_starcoder import FIM_MIDDLE
|
||||
from openllm_core.config.configuration_starcoder import FIM_PAD
|
||||
from openllm_core.config.configuration_starcoder import FIM_PREFIX
|
||||
from openllm_core.config.configuration_starcoder import FIM_SUFFIX
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
|
||||
class StarCoder(openllm.LLM['transformers.GPTBigCodeForCausalLM', 'transformers.GPT2TokenizerFast']):
|
||||
|
||||
@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
from datasets import load_dataset
|
||||
from trl import SFTTrainer
|
||||
|
||||
DEFAULT_MODEL_ID = "ybelkada/falcon-7b-sharded-bf16"
|
||||
DATASET_NAME = "timdettmers/openassistant-guanaco"
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import logging
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
|
||||
openllm.utils.configure_logging()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -23,6 +23,7 @@ from datasets import load_dataset
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from peft import PeftModel
|
||||
|
||||
DEFAULT_MODEL_ID = "facebook/opt-6.7b"
|
||||
|
||||
def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any, training_args: TrainingArguments):
|
||||
|
||||
@@ -30,12 +30,19 @@ import cloudpickle
|
||||
import fs
|
||||
|
||||
import openllm
|
||||
|
||||
from bentoml._internal.models.model import CUSTOM_OBJECTS_FILENAME
|
||||
from openllm_core._typing_compat import M, ParamSpec, T
|
||||
from openllm_core._typing_compat import M
|
||||
from openllm_core._typing_compat import ParamSpec
|
||||
from openllm_core._typing_compat import T
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import bentoml
|
||||
|
||||
from . import constants as constants, ggml as ggml, transformers as transformers
|
||||
from . import constants as constants
|
||||
from . import ggml as ggml
|
||||
from . import transformers as transformers
|
||||
|
||||
P = ParamSpec('P')
|
||||
|
||||
def load_tokenizer(llm: openllm.LLM[t.Any, T], **tokenizer_attrs: t.Any) -> T:
|
||||
@@ -44,7 +51,8 @@ def load_tokenizer(llm: openllm.LLM[t.Any, T], **tokenizer_attrs: t.Any) -> T:
|
||||
By default, it will try to find the bentomodel whether it is in store..
|
||||
If model is not found, it will raises a ``bentoml.exceptions.NotFound``.
|
||||
'''
|
||||
from .transformers._helpers import infer_tokenizers_from_llm, process_config
|
||||
from .transformers._helpers import infer_tokenizers_from_llm
|
||||
from .transformers._helpers import process_config
|
||||
|
||||
config, *_ = process_config(llm._bentomodel.path, llm.__llm_trust_remote_code__)
|
||||
bentomodel_fs = fs.open_fs(llm._bentomodel.path)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
FRAMEWORK_TO_AUTOCLASS_MAPPING = {
|
||||
'pt': ('AutoModelForCausalLM', 'AutoModelForSeq2SeqLM'),
|
||||
'tf': ('TFAutoModelForCausalLM', 'TFAutoModelForSeq2SeqLM'),
|
||||
|
||||
@@ -7,7 +7,9 @@ import typing as t
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: from openllm_core._typing_compat import M
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import M
|
||||
|
||||
_conversion_strategy = {'pt': 'ggml'}
|
||||
|
||||
|
||||
@@ -5,15 +5,23 @@ import logging
|
||||
import typing as t
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
from simple_di import Provide, inject
|
||||
from simple_di import Provide
|
||||
from simple_di import inject
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from bentoml._internal.models.model import ModelOptions
|
||||
|
||||
from ._helpers import check_unintialised_params, infer_autoclass_from_llm, infer_tokenizers_from_llm, make_model_signatures, process_config, update_model
|
||||
from ._helpers import check_unintialised_params
|
||||
from ._helpers import infer_autoclass_from_llm
|
||||
from ._helpers import infer_tokenizers_from_llm
|
||||
from ._helpers import make_model_signatures
|
||||
from ._helpers import process_config
|
||||
from ._helpers import update_model
|
||||
from .weights import HfIgnore
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import types
|
||||
|
||||
@@ -24,7 +32,9 @@ if t.TYPE_CHECKING:
|
||||
import vllm
|
||||
|
||||
from bentoml._internal.models import ModelStore
|
||||
from openllm_core._typing_compat import DictStrAny, M, T
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
from openllm_core._typing_compat import M
|
||||
from openllm_core._typing_compat import T
|
||||
else:
|
||||
vllm = openllm.utils.LazyLoader('vllm', globals(), 'vllm')
|
||||
autogptq = openllm.utils.LazyLoader('autogptq', globals(), 'auto_gptq')
|
||||
|
||||
@@ -4,16 +4,24 @@ import typing as t
|
||||
|
||||
import openllm
|
||||
import openllm_core
|
||||
from bentoml._internal.models.model import ModelInfo, ModelSignature
|
||||
from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING, HUB_ATTRS
|
||||
|
||||
from bentoml._internal.models.model import ModelInfo
|
||||
from bentoml._internal.models.model import ModelSignature
|
||||
from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING
|
||||
from openllm.serialisation.constants import HUB_ATTRS
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import torch
|
||||
import transformers
|
||||
|
||||
from transformers.models.auto.auto_factory import _BaseAutoModelClass
|
||||
|
||||
import bentoml
|
||||
|
||||
from bentoml._internal.models.model import ModelSignaturesType
|
||||
from openllm_core._typing_compat import DictStrAny, M, T
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
from openllm_core._typing_compat import M
|
||||
from openllm_core._typing_compat import T
|
||||
else:
|
||||
transformers, torch = openllm_core.utils.LazyLoader('transformers', globals(), 'transformers'), openllm_core.utils.LazyLoader('torch', globals(), 'torch')
|
||||
|
||||
|
||||
@@ -2,10 +2,14 @@ from __future__ import annotations
|
||||
import typing as t
|
||||
|
||||
import attr
|
||||
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import openllm
|
||||
from openllm_core._typing_compat import M, T
|
||||
|
||||
from openllm_core._typing_compat import M
|
||||
from openllm_core._typing_compat import T
|
||||
|
||||
def has_safetensors_weights(model_id: str, revision: str | None = None) -> bool:
|
||||
return any(s.rfilename.endswith('.safetensors') for s in HfApi().model_info(model_id, revision=revision).siblings)
|
||||
|
||||
@@ -8,7 +8,9 @@ import typing as t
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: from ._typing_compat import LiteralRuntime
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import LiteralRuntime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -8,9 +8,14 @@ import typing as t
|
||||
|
||||
import openllm_core
|
||||
|
||||
from . import dummy_flax_objects as dummy_flax_objects, dummy_pt_objects as dummy_pt_objects, dummy_tf_objects as dummy_tf_objects, dummy_vllm_objects as dummy_vllm_objects
|
||||
from . import dummy_flax_objects as dummy_flax_objects
|
||||
from . import dummy_pt_objects as dummy_pt_objects
|
||||
from . import dummy_tf_objects as dummy_tf_objects
|
||||
from . import dummy_vllm_objects as dummy_vllm_objects
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import openllm
|
||||
|
||||
from openllm_core._typing_compat import LiteralRuntime
|
||||
|
||||
def generate_labels(llm: openllm.LLM[t.Any, t.Any]) -> dict[str, t.Any]:
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from __future__ import annotations
|
||||
import os
|
||||
|
||||
from hypothesis import HealthCheck, settings
|
||||
from hypothesis import HealthCheck
|
||||
from hypothesis import settings
|
||||
|
||||
settings.register_profile('CI', settings(suppress_health_check=[HealthCheck.too_slow]), deadline=None)
|
||||
|
||||
if 'CI' in os.environ: settings.load_profile('CI')
|
||||
|
||||
@@ -5,7 +5,9 @@ import typing as t
|
||||
from hypothesis import strategies as st
|
||||
|
||||
import openllm
|
||||
|
||||
from openllm_core._configuration import ModelSettings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
env_strats = st.sampled_from([openllm.utils.EnvVarMixin(model_name) for model_name in openllm.CONFIG_MAPPING.keys()])
|
||||
|
||||
@@ -3,17 +3,25 @@ import contextlib
|
||||
import os
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from unittest import mock
|
||||
|
||||
import attr
|
||||
import pytest
|
||||
import transformers
|
||||
from hypothesis import assume, given, strategies as st
|
||||
|
||||
from hypothesis import assume
|
||||
from hypothesis import given
|
||||
from hypothesis import strategies as st
|
||||
|
||||
import openllm
|
||||
from openllm_core._configuration import GenerationConfig, ModelSettings, field_env_key
|
||||
|
||||
from ._strategies._configuration import make_llm_config, model_settings
|
||||
from openllm_core._configuration import GenerationConfig
|
||||
from openllm_core._configuration import ModelSettings
|
||||
from openllm_core._configuration import field_env_key
|
||||
|
||||
from ._strategies._configuration import make_llm_config
|
||||
from ._strategies._configuration import model_settings
|
||||
|
||||
# XXX: @aarnphm fixes TypedDict behaviour in 3.11
|
||||
@pytest.mark.skipif(sys.version_info[:2] == (3, 11), reason='TypedDict in 3.11 behaves differently, so we need to fix this')
|
||||
|
||||
@@ -6,7 +6,9 @@ import typing as t
|
||||
import pytest
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralRuntime
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import LiteralRuntime
|
||||
|
||||
_FRAMEWORK_MAPPING = {'flan_t5': 'google/flan-t5-small', 'opt': 'facebook/opt-125m', 'baichuan': 'baichuan-inc/Baichuan-7B',}
|
||||
_PROMPT_MAPPING = {'qa': 'Answer the following yes/no question by reasoning step-by-step. Can you write a whole Haiku in a single tweet?',}
|
||||
|
||||
@@ -6,7 +6,9 @@ import logging
|
||||
import sys
|
||||
import time
|
||||
import typing as t
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from abc import ABC
|
||||
from abc import abstractmethod
|
||||
|
||||
import attr
|
||||
import docker
|
||||
@@ -14,18 +16,25 @@ import docker.errors
|
||||
import docker.types
|
||||
import orjson
|
||||
import pytest
|
||||
|
||||
from syrupy.extensions.json import JSONSnapshotExtension
|
||||
|
||||
import openllm
|
||||
|
||||
from openllm._llm import normalise_model_name
|
||||
from openllm_core._typing_compat import DictStrAny, ListAny
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
from openllm_core._typing_compat import ListAny
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import subprocess
|
||||
|
||||
from syrupy.assertion import SnapshotAssertion
|
||||
from syrupy.types import PropertyFilter, PropertyMatcher, SerializableData, SerializedData
|
||||
from syrupy.types import PropertyFilter
|
||||
from syrupy.types import PropertyMatcher
|
||||
from syrupy.types import SerializableData
|
||||
from syrupy.types import SerializedData
|
||||
|
||||
from openllm._configuration import GenerationConfig
|
||||
from openllm.client import BaseAsyncClient
|
||||
|
||||
@@ -4,10 +4,14 @@ import typing as t
|
||||
import pytest
|
||||
|
||||
import openllm
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import contextlib
|
||||
|
||||
from .conftest import HandleProtocol, ResponseComparator, _Handle
|
||||
from .conftest import HandleProtocol
|
||||
from .conftest import ResponseComparator
|
||||
from .conftest import _Handle
|
||||
|
||||
model = 'flan_t5'
|
||||
model_id = 'google/flan-t5-small'
|
||||
|
||||
|
||||
@@ -4,10 +4,14 @@ import typing as t
|
||||
import pytest
|
||||
|
||||
import openllm
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import contextlib
|
||||
|
||||
from .conftest import HandleProtocol, ResponseComparator, _Handle
|
||||
from .conftest import HandleProtocol
|
||||
from .conftest import ResponseComparator
|
||||
from .conftest import _Handle
|
||||
|
||||
model = 'opt'
|
||||
model_id = 'facebook/opt-125m'
|
||||
|
||||
|
||||
@@ -3,7 +3,9 @@ import os
|
||||
import typing as t
|
||||
|
||||
import pytest
|
||||
if t.TYPE_CHECKING: import openllm
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import openllm
|
||||
|
||||
@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='Model is too large for CI')
|
||||
def test_flan_t5_implementation(prompt: str, llm: openllm.LLM[t.Any, t.Any]):
|
||||
|
||||
@@ -6,8 +6,11 @@ import typing as t
|
||||
import pytest
|
||||
|
||||
import openllm
|
||||
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
if t.TYPE_CHECKING: from pathlib import Path
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
HF_INTERNAL_T5_TESTING = 'hf-internal-testing/tiny-random-t5'
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user