fix: persistent styling between ruff and yapf (#279)

This commit is contained in:
Aaron Pham
2023-08-30 11:37:41 -04:00
committed by GitHub
parent f678f71e18
commit c9cef1d773
145 changed files with 1051 additions and 395 deletions

View File

@@ -7,6 +7,7 @@ To start any OpenLLM model:
openllm start <model_name> --options ...
'''
from __future__ import annotations
if __name__ == '__main__':
from openllm.cli.entrypoint import cli
cli()

View File

@@ -3,13 +3,19 @@ from __future__ import annotations
import typing as t
import transformers
from huggingface_hub import snapshot_download
import bentoml
import openllm
from bentoml._internal.frameworks.transformers import API_VERSION, MODULE_NAME
from bentoml._internal.models.model import ModelOptions, ModelSignature
if t.TYPE_CHECKING: import torch
from bentoml._internal.frameworks.transformers import API_VERSION
from bentoml._internal.frameworks.transformers import MODULE_NAME
from bentoml._internal.models.model import ModelOptions
from bentoml._internal.models.model import ModelSignature
if t.TYPE_CHECKING:
import torch
_GENERIC_EMBEDDING_ID = 'sentence-transformers/all-MiniLM-L6-v2'
_BENTOMODEL_ID = 'sentence-transformers--all-MiniLM-L6-v2'

View File

@@ -3,7 +3,11 @@ from __future__ import annotations
import typing as t
import transformers
if t.TYPE_CHECKING: import torch, openllm
if t.TYPE_CHECKING:
import torch
import openllm
# reexport from transformers
LogitsProcessorList = transformers.LogitsProcessorList

View File

@@ -16,20 +16,62 @@ import attr
import fs.path
import inflection
import orjson
from huggingface_hub import hf_hub_download
import bentoml
import openllm
import openllm_core
from bentoml._internal.models.model import ModelSignature
from openllm_core._configuration import FineTuneConfig, LLMConfig, _object_getattribute, _setattr_class
from openllm_core._configuration import FineTuneConfig
from openllm_core._configuration import LLMConfig
from openllm_core._configuration import _object_getattribute
from openllm_core._configuration import _setattr_class
from openllm_core._schema import unmarshal_vllm_outputs
from openllm_core._typing_compat import AdaptersMapping, AdaptersTuple, AdapterType, AnyCallable, DictStrAny, ListStr, LiteralRuntime, LiteralString, LLMEmbeddings, LLMRunnable, LLMRunner, M, ModelSignatureDict as _ModelSignatureDict, NotRequired, PeftAdapterOutput, T, TupleAny, overload
from openllm_core.utils import DEBUG, ENV_VARS_TRUE_VALUES, MYPY, EnvVarMixin, LazyLoader, ReprMixin, apply, bentoml_cattr, codegen, device_count, first_not_none, generate_hash_from_file, is_peft_available, is_torch_available, non_intrusive_setattr, normalize_attrs_to_model_tokenizer_pair, resolve_filepath, validate_is_path
from openllm_core._typing_compat import AdaptersMapping
from openllm_core._typing_compat import AdaptersTuple
from openllm_core._typing_compat import AdapterType
from openllm_core._typing_compat import AnyCallable
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import ListStr
from openllm_core._typing_compat import LiteralRuntime
from openllm_core._typing_compat import LiteralString
from openllm_core._typing_compat import LLMEmbeddings
from openllm_core._typing_compat import LLMRunnable
from openllm_core._typing_compat import LLMRunner
from openllm_core._typing_compat import M
from openllm_core._typing_compat import ModelSignatureDict as _ModelSignatureDict
from openllm_core._typing_compat import NotRequired
from openllm_core._typing_compat import PeftAdapterOutput
from openllm_core._typing_compat import T
from openllm_core._typing_compat import TupleAny
from openllm_core._typing_compat import overload
from openllm_core.utils import DEBUG
from openllm_core.utils import ENV_VARS_TRUE_VALUES
from openllm_core.utils import MYPY
from openllm_core.utils import EnvVarMixin
from openllm_core.utils import LazyLoader
from openllm_core.utils import ReprMixin
from openllm_core.utils import apply
from openllm_core.utils import bentoml_cattr
from openllm_core.utils import codegen
from openllm_core.utils import device_count
from openllm_core.utils import first_not_none
from openllm_core.utils import generate_hash_from_file
from openllm_core.utils import is_peft_available
from openllm_core.utils import is_torch_available
from openllm_core.utils import non_intrusive_setattr
from openllm_core.utils import normalize_attrs_to_model_tokenizer_pair
from openllm_core.utils import resolve_filepath
from openllm_core.utils import validate_is_path
from ._quantisation import infer_quantisation_config
from .exceptions import ForbiddenAttributeError, GpuNotAvailableError, OpenLLMException
from .exceptions import ForbiddenAttributeError
from .exceptions import GpuNotAvailableError
from .exceptions import OpenLLMException
from .utils import infer_auto_class
if t.TYPE_CHECKING:
import auto_gptq as autogptq
import peft
@@ -1077,7 +1119,9 @@ class LLM(LLMInterface[M, T], ReprMixin):
**attrs: t.Any
) -> t.Iterator[t.Any]:
# NOTE: encoder-decoder models will need to implement their own generate_iterator for now
from ._generation import get_context_length, is_partial_stop, prepare_logits_processor
from ._generation import get_context_length
from ._generation import is_partial_stop
from ._generation import prepare_logits_processor
len_prompt = len(prompt)
if stop_token_ids is None: stop_token_ids = []

View File

@@ -4,11 +4,17 @@ import logging
import typing as t
from openllm_core._typing_compat import overload
from openllm_core.utils import LazyLoader, is_autogptq_available, is_bitsandbytes_available, is_transformers_supports_kbit, pkg
from openllm_core.utils import LazyLoader
from openllm_core.utils import is_autogptq_available
from openllm_core.utils import is_bitsandbytes_available
from openllm_core.utils import is_transformers_supports_kbit
from openllm_core.utils import pkg
if t.TYPE_CHECKING:
from openllm_core._typing_compat import DictStrAny
from ._llm import LLM
autogptq, torch, transformers = LazyLoader('autogptq', globals(), 'auto_gptq'), LazyLoader('torch', globals(), 'torch'), LazyLoader('transformers', globals(), 'transformers')
logger = logging.getLogger(__name__)

View File

@@ -5,6 +5,7 @@ import typing as t
import warnings
import orjson
from starlette.applications import Starlette
from starlette.responses import JSONResponse
from starlette.routing import Route
@@ -12,17 +13,21 @@ from starlette.routing import Route
import bentoml
import openllm
import openllm_core
if t.TYPE_CHECKING:
from starlette.requests import Request
from starlette.responses import Response
from bentoml._internal.runner.runner import AbstractRunner, RunnerMethod
from bentoml._internal.runner.runner import AbstractRunner
from bentoml._internal.runner.runner import RunnerMethod
from openllm_core._typing_compat import TypeAlias
_EmbeddingMethod: TypeAlias = RunnerMethod[t.Union[bentoml.Runnable, openllm.LLMRunnable[t.Any, t.Any]], [t.List[str]], t.Sequence[openllm.LLMEmbeddings]]
_EmbeddingMethod: TypeAlias = RunnerMethod[t.Union[bentoml.Runnable, openllm.LLMRunnable[t.Any, t.Any]], [t.List[str]], t.Sequence[openllm.EmbeddingsOutput]]
# The following warnings from bitsandbytes, and probably not that important for users to see
warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization')
warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization')
warnings.filterwarnings('ignore', message='The installed version of bitsandbytes was compiled without GPU support.')
model = os.environ.get('OPENLLM_MODEL', '{__model_name__}') # openllm: model name
adapter_map = os.environ.get('OPENLLM_ADAPTER_MAP', '''{__model_adapter_map__}''') # openllm: model adapter map
llm_config = openllm.AutoConfig.for_model(model)
@@ -37,6 +42,7 @@ generic_embedding_runner = bentoml.Runner(
runners: list[AbstractRunner] = [runner]
if not runner.supports_embeddings: runners.append(generic_embedding_runner)
svc = bentoml.Service(name=f"llm-{llm_config['start_name']}-service", runners=runners)
_JsonInput = bentoml.io.JSON.from_sample({'prompt': '', 'llm_config': llm_config.model_dump(flatten=True), 'adapter_name': None})
@svc.api(route='/v1/generate', input=_JsonInput, output=bentoml.io.JSON.from_sample({'responses': [], 'configuration': llm_config.model_dump(flatten=True)}))

View File

@@ -7,15 +7,26 @@ import os
import typing as t
from openllm_core.utils import LazyModule
_import_structure: dict[str, list[str]] = {
'_package': ['create_bento', 'build_editable', 'construct_python_options', 'construct_docker_options'],
'oci': ['CONTAINER_NAMES', 'get_base_container_tag', 'build_container', 'get_base_container_name', 'supported_registries', 'RefResolver']
}
if t.TYPE_CHECKING:
from . import _package as _package, oci as oci
from ._package import build_editable as build_editable, construct_docker_options as construct_docker_options, construct_python_options as construct_python_options, create_bento as create_bento
from .oci import CONTAINER_NAMES as CONTAINER_NAMES, RefResolver as RefResolver, build_container as build_container, get_base_container_name as get_base_container_name, get_base_container_tag as get_base_container_tag, supported_registries as supported_registries
from . import _package as _package
from . import oci as oci
from ._package import build_editable as build_editable
from ._package import construct_docker_options as construct_docker_options
from ._package import construct_python_options as construct_python_options
from ._package import create_bento as create_bento
from .oci import CONTAINER_NAMES as CONTAINER_NAMES
from .oci import RefResolver as RefResolver
from .oci import build_container as build_container
from .oci import get_base_container_name as get_base_container_name
from .oci import get_base_container_tag as get_base_container_tag
from .oci import supported_registries as supported_registries
__lazy = LazyModule(__name__, os.path.abspath('__file__'), _import_structure)
__all__ = __lazy.__all__
__dir__ = __lazy.__dir__

View File

@@ -6,27 +6,39 @@ import logging
import os
import string
import typing as t
from pathlib import Path
import fs
import fs.copy
import fs.errors
import orjson
from simple_di import Provide, inject
from simple_di import Provide
from simple_di import inject
import bentoml
import openllm_core
from bentoml._internal.bento.build_config import BentoBuildConfig, DockerOptions, ModelSpec, PythonOptions
from bentoml._internal.bento.build_config import BentoBuildConfig
from bentoml._internal.bento.build_config import DockerOptions
from bentoml._internal.bento.build_config import ModelSpec
from bentoml._internal.bento.build_config import PythonOptions
from bentoml._internal.configuration.containers import BentoMLContainer
from . import oci
if t.TYPE_CHECKING:
from fs.base import FS
import openllm
from bentoml._internal.bento import BentoStore
from bentoml._internal.models.model import ModelStore
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString
from openllm_core._typing_compat import LiteralContainerRegistry
from openllm_core._typing_compat import LiteralContainerVersionStrategy
from openllm_core._typing_compat import LiteralString
logger = logging.getLogger(__name__)
OPENLLM_DEV_BUILD = 'OPENLLM_DEV_BUILD'

View File

@@ -9,7 +9,10 @@ import pathlib
import shutil
import subprocess
import typing as t
from datetime import datetime, timedelta, timezone
from datetime import datetime
from datetime import timedelta
from datetime import timezone
import attr
import orjson
@@ -17,11 +20,17 @@ import orjson
import bentoml
import openllm
import openllm_core
from openllm_core.utils.lazy import VersionInfo
if t.TYPE_CHECKING:
from ghapi import all
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString, RefTuple
from openllm_core._typing_compat import LiteralContainerRegistry
from openllm_core._typing_compat import LiteralContainerVersionStrategy
from openllm_core._typing_compat import LiteralString
from openllm_core._typing_compat import RefTuple
all = openllm_core.utils.LazyLoader('all', globals(), 'ghapi.all') # noqa: F811
logger = logging.getLogger(__name__)

View File

@@ -9,21 +9,28 @@ import click
import click_option_group as cog
import inflection
import orjson
from bentoml_cli.utils import BentoMLCommandGroup
from click import shell_completion as sc
from click.shell_completion import CompletionItem
import bentoml
import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralString, ParamSpec
from openllm_core._typing_compat import Concatenate
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import LiteralString
from openllm_core._typing_compat import ParamSpec
from openllm_core.utils import DEBUG
from . import termui
if t.TYPE_CHECKING:
import subprocess
from openllm_core._configuration import LLMConfig
logger = logging.getLogger(__name__)
P = ParamSpec('P')

View File

@@ -7,20 +7,27 @@ import subprocess
import sys
import typing as t
from simple_di import Provide, inject
from simple_di import Provide
from simple_di import inject
import bentoml
import openllm
import openllm_core
from bentoml._internal.configuration.containers import BentoMLContainer
from openllm.exceptions import OpenLLMException
from . import termui
from ._factory import start_command_factory
if t.TYPE_CHECKING:
from bentoml._internal.bento import BentoStore
from openllm_core._configuration import LLMConfig
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralRuntime, LiteralString
from openllm_core._typing_compat import LiteralContainerRegistry
from openllm_core._typing_compat import LiteralContainerVersionStrategy
from openllm_core._typing_compat import LiteralRuntime
from openllm_core._typing_compat import LiteralString
logger = logging.getLogger(__name__)
def _start(
@@ -81,7 +88,8 @@ def _start(
framework: The framework to use for this LLM. By default, this is set to ``pt``.
additional_args: Additional arguments to pass to ``openllm start``.
"""
from .entrypoint import start_command, start_grpc_command
from .entrypoint import start_command
from .entrypoint import start_grpc_command
llm_config = openllm.AutoConfig.for_model(model_name)
_ModelEnv = openllm_core.utils.EnvVarMixin(
model_name,

View File

@@ -42,29 +42,80 @@ import fs.copy
import fs.errors
import inflection
import orjson
from bentoml_cli.utils import BentoMLCommandGroup, opt_callback
from simple_di import Provide, inject
from bentoml_cli.utils import BentoMLCommandGroup
from bentoml_cli.utils import opt_callback
from simple_di import Provide
from simple_di import inject
import bentoml
import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.models.model import ModelStore
from openllm import bundle, serialisation
from openllm import bundle
from openllm import serialisation
from openllm.exceptions import OpenLLMException
from openllm.models.auto import CONFIG_MAPPING, MODEL_FLAX_MAPPING_NAMES, MODEL_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES, MODEL_VLLM_MAPPING_NAMES, AutoConfig, AutoLLM
from openllm.models.auto import CONFIG_MAPPING
from openllm.models.auto import MODEL_FLAX_MAPPING_NAMES
from openllm.models.auto import MODEL_MAPPING_NAMES
from openllm.models.auto import MODEL_TF_MAPPING_NAMES
from openllm.models.auto import MODEL_VLLM_MAPPING_NAMES
from openllm.models.auto import AutoConfig
from openllm.models.auto import AutoLLM
from openllm.utils import infer_auto_class
from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralRuntime, LiteralString, ParamSpec, Self
from openllm_core.utils import DEBUG, DEBUG_ENV_VAR, OPTIONAL_DEPENDENCIES, QUIET_ENV_VAR, EnvVarMixin, LazyLoader, analytics, bentoml_cattr, compose, configure_logging, dantic, first_not_none, get_debug_mode, get_quiet_mode, is_torch_available, is_transformers_supports_agent, resolve_user_filepath, set_debug_mode, set_quiet_mode
from openllm_core._typing_compat import Concatenate
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import LiteralRuntime
from openllm_core._typing_compat import LiteralString
from openllm_core._typing_compat import ParamSpec
from openllm_core._typing_compat import Self
from openllm_core.utils import DEBUG
from openllm_core.utils import DEBUG_ENV_VAR
from openllm_core.utils import OPTIONAL_DEPENDENCIES
from openllm_core.utils import QUIET_ENV_VAR
from openllm_core.utils import EnvVarMixin
from openllm_core.utils import LazyLoader
from openllm_core.utils import analytics
from openllm_core.utils import bentoml_cattr
from openllm_core.utils import compose
from openllm_core.utils import configure_logging
from openllm_core.utils import dantic
from openllm_core.utils import first_not_none
from openllm_core.utils import get_debug_mode
from openllm_core.utils import get_quiet_mode
from openllm_core.utils import is_torch_available
from openllm_core.utils import is_transformers_supports_agent
from openllm_core.utils import resolve_user_filepath
from openllm_core.utils import set_debug_mode
from openllm_core.utils import set_quiet_mode
from . import termui
from ._factory import FC, LiteralOutput, _AnyCallable, bettertransformer_option, container_registry_option, fast_option, machine_option, model_id_option, model_name_argument, model_version_option, output_option, parse_device_callback, quantize_option, serialisation_option, start_command_factory, workers_per_resource_option
from ._factory import FC
from ._factory import LiteralOutput
from ._factory import _AnyCallable
from ._factory import bettertransformer_option
from ._factory import container_registry_option
from ._factory import fast_option
from ._factory import machine_option
from ._factory import model_id_option
from ._factory import model_name_argument
from ._factory import model_version_option
from ._factory import output_option
from ._factory import parse_device_callback
from ._factory import quantize_option
from ._factory import serialisation_option
from ._factory import start_command_factory
from ._factory import workers_per_resource_option
if t.TYPE_CHECKING:
import torch
from bentoml._internal.bento import BentoStore
from bentoml._internal.container import DefaultBuilder
from openllm_core._schema import EmbeddingsOutput
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
from openllm_core._typing_compat import LiteralContainerRegistry
from openllm_core._typing_compat import LiteralContainerVersionStrategy
else:
torch = LazyLoader('torch', globals(), 'torch')

View File

@@ -5,9 +5,14 @@ import click
import orjson
import openllm
from openllm.cli import termui
from openllm.cli._factory import container_registry_option, machine_option
if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
from openllm.cli._factory import container_registry_option
from openllm.cli._factory import machine_option
if t.TYPE_CHECKING:
from openllm_core._typing_compat import LiteralContainerRegistry
from openllm_core._typing_compat import LiteralContainerVersionStrategy
@click.command(
'build_base_container',

View File

@@ -5,13 +5,19 @@ import typing as t
import click
import psutil
from simple_di import Provide, inject
from simple_di import Provide
from simple_di import inject
import bentoml
from bentoml._internal.configuration.containers import BentoMLContainer
from openllm.cli import termui
from openllm.cli._factory import bento_complete_envvar, machine_option
if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
from openllm.cli._factory import bento_complete_envvar
from openllm.cli._factory import machine_option
if t.TYPE_CHECKING:
from bentoml._internal.bento import BentoStore
@click.command('dive_bentos', context_settings=termui.CONTEXT_SETTINGS)
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)

View File

@@ -2,9 +2,12 @@ from __future__ import annotations
import typing as t
import click
from simple_di import Provide, inject
from simple_di import Provide
from simple_di import inject
import bentoml
from bentoml._internal.bento.bento import BentoInfo
from bentoml._internal.bento.build_config import DockerOptions
from bentoml._internal.configuration.containers import BentoMLContainer
@@ -12,7 +15,9 @@ from bentoml._internal.container.generate import generate_containerfile
from openllm.cli import termui
from openllm.cli._factory import bento_complete_envvar
from openllm_core.utils import bentoml_cattr
if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
if t.TYPE_CHECKING:
from bentoml._internal.bento import BentoStore
@click.command('get_containerfile', context_settings=termui.CONTEXT_SETTINGS, help='Return Containerfile of any given Bento.')
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)

View File

@@ -4,12 +4,17 @@ import typing as t
import click
import inflection
import orjson
from bentoml_cli.utils import opt_callback
import openllm
from openllm.cli import termui
from openllm.cli._factory import machine_option, model_complete_envvar, output_option
from openllm.cli._factory import machine_option
from openllm.cli._factory import model_complete_envvar
from openllm.cli._factory import output_option
from openllm_core._prompt import process_prompt
LiteralOutput = t.Literal['json', 'pretty', 'porcelain']
@click.command('get_prompt', context_settings=termui.CONTEXT_SETTINGS)

View File

@@ -6,9 +6,11 @@ import orjson
import bentoml
import openllm
from bentoml._internal.utils import human_readable_size
from openllm.cli import termui
from openllm.cli._factory import LiteralOutput, output_option
from openllm.cli._factory import LiteralOutput
from openllm.cli._factory import output_option
@click.command('list_bentos', context_settings=termui.CONTEXT_SETTINGS)
@output_option(default_value='json')

View File

@@ -7,10 +7,16 @@ import orjson
import bentoml
import openllm
from bentoml._internal.utils import human_readable_size
from openllm.cli import termui
from openllm.cli._factory import LiteralOutput, model_complete_envvar, model_name_argument, output_option
if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
from openllm.cli._factory import LiteralOutput
from openllm.cli._factory import model_complete_envvar
from openllm.cli._factory import model_name_argument
from openllm.cli._factory import output_option
if t.TYPE_CHECKING:
from openllm_core._typing_compat import DictStrAny
@click.command('list_models', context_settings=termui.CONTEXT_SETTINGS)
@model_name_argument(required=False, shell_complete=model_complete_envvar)

View File

@@ -13,12 +13,16 @@ import yaml
from openllm import playground
from openllm.cli import termui
from openllm_core.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
from openllm_core.utils import is_jupyter_available
from openllm_core.utils import is_jupytext_available
from openllm_core.utils import is_notebook_available
if t.TYPE_CHECKING:
import jupytext
import nbformat
from openllm_core._typing_compat import DictStrAny
logger = logging.getLogger(__name__)
def load_notebook_metadata() -> DictStrAny:

View File

@@ -6,7 +6,9 @@ import click
import inflection
import openllm
if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
if t.TYPE_CHECKING:
from openllm_core._typing_compat import DictStrAny
def echo(text: t.Any, fg: str = 'green', _with_style: bool = True, **attrs: t.Any) -> None:
attrs['fg'] = fg if not openllm.utils.get_debug_mode() else None

View File

@@ -14,7 +14,14 @@ from __future__ import annotations
import typing as t
import openllm_client
if t.TYPE_CHECKING: from openllm_client import AsyncHTTPClient as AsyncHTTPClient, BaseAsyncClient as BaseAsyncClient, BaseClient as BaseClient, HTTPClient as HTTPClient, GrpcClient as GrpcClient, AsyncGrpcClient as AsyncGrpcClient
if t.TYPE_CHECKING:
from openllm_client import AsyncGrpcClient as AsyncGrpcClient
from openllm_client import AsyncHTTPClient as AsyncHTTPClient
from openllm_client import BaseAsyncClient as BaseAsyncClient
from openllm_client import BaseClient as BaseClient
from openllm_client import GrpcClient as GrpcClient
from openllm_client import HTTPClient as HTTPClient
def __dir__() -> t.Sequence[str]:
return sorted(dir(openllm_client))

View File

@@ -1,4 +1,11 @@
'''Base exceptions for OpenLLM. This extends BentoML exceptions.'''
from __future__ import annotations
from openllm_core.exceptions import Error as Error, FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError, ForbiddenAttributeError as ForbiddenAttributeError, GpuNotAvailableError as GpuNotAvailableError, MissingAnnotationAttributeError as MissingAnnotationAttributeError, MissingDependencyError as MissingDependencyError, OpenLLMException as OpenLLMException, ValidationError as ValidationError
from openllm_core.exceptions import Error as Error
from openllm_core.exceptions import FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError
from openllm_core.exceptions import ForbiddenAttributeError as ForbiddenAttributeError
from openllm_core.exceptions import GpuNotAvailableError as GpuNotAvailableError
from openllm_core.exceptions import MissingAnnotationAttributeError as MissingAnnotationAttributeError
from openllm_core.exceptions import MissingDependencyError as MissingDependencyError
from openllm_core.exceptions import OpenLLMException as OpenLLMException
from openllm_core.exceptions import ValidationError as ValidationError

View File

@@ -3,8 +3,15 @@ import os
import typing as t
import openllm
from openllm_core.config import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig
from openllm_core.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
from openllm_core.config import CONFIG_MAPPING as CONFIG_MAPPING
from openllm_core.config import CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
from openllm_core.config import AutoConfig as AutoConfig
from openllm_core.utils import LazyModule
from openllm_core.utils import is_flax_available
from openllm_core.utils import is_tf_available
from openllm_core.utils import is_torch_available
from openllm_core.utils import is_vllm_available
_import_structure: dict[str, list[str]] = {
'modeling_auto': ['MODEL_MAPPING_NAMES'],
'modeling_flax_auto': ['MODEL_FLAX_MAPPING_NAMES'],

View File

@@ -12,11 +12,14 @@ import openllm
from openllm_core.utils import ReprMixin
if t.TYPE_CHECKING:
import types
from collections import _odict_items, _odict_keys, _odict_values
from collections import _odict_items
from collections import _odict_keys
from collections import _odict_values
from _typeshed import SupportsIter
from openllm_core._typing_compat import LiteralString, LLMRunner
from openllm_core._typing_compat import LiteralString
from openllm_core._typing_compat import LLMRunner
ConfigModelKeysView = _odict_keys[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
ConfigModelValuesView = _odict_values[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
ConfigModelItemsView = _odict_items[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]

View File

@@ -4,7 +4,9 @@ from collections import OrderedDict
from openllm_core.config import CONFIG_MAPPING_NAMES
from .factory import BaseAutoLLMClass, _LazyAutoMapping
from .factory import BaseAutoLLMClass
from .factory import _LazyAutoMapping
MODEL_MAPPING_NAMES = OrderedDict([('chatglm', 'ChatGLM'), ('dolly_v2', 'DollyV2'), ('falcon', 'Falcon'), ('flan_t5', 'FlanT5'), ('gpt_neox', 'GPTNeoX'), ('llama', 'Llama'), ('mpt', 'MPT'), (
'opt', 'OPT'
), ('stablelm', 'StableLM'), ('starcoder', 'StarCoder'), ('baichuan', 'Baichuan')])

View File

@@ -4,7 +4,9 @@ from collections import OrderedDict
from openllm_core.config import CONFIG_MAPPING_NAMES
from .factory import BaseAutoLLMClass, _LazyAutoMapping
from .factory import BaseAutoLLMClass
from .factory import _LazyAutoMapping
MODEL_FLAX_MAPPING_NAMES = OrderedDict([('flan_t5', 'FlaxFlanT5'), ('opt', 'FlaxOPT')])
MODEL_FLAX_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FLAX_MAPPING_NAMES)

View File

@@ -4,7 +4,9 @@ from collections import OrderedDict
from openllm_core.config import CONFIG_MAPPING_NAMES
from .factory import BaseAutoLLMClass, _LazyAutoMapping
from .factory import BaseAutoLLMClass
from .factory import _LazyAutoMapping
MODEL_TF_MAPPING_NAMES = OrderedDict([('flan_t5', 'TFFlanT5'), ('opt', 'TFOPT')])
MODEL_TF_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES)

View File

@@ -4,7 +4,9 @@ from collections import OrderedDict
from openllm_core.config import CONFIG_MAPPING_NAMES
from .factory import BaseAutoLLMClass, _LazyAutoMapping
from .factory import BaseAutoLLMClass
from .factory import _LazyAutoMapping
MODEL_VLLM_MAPPING_NAMES = OrderedDict([('baichuan', 'VLLMBaichuan'), ('dolly_v2', 'VLLMDollyV2'), ('falcon', 'VLLMFalcon'), ('gpt_neox', 'VLLMGPTNeoX'), ('mpt', 'VLLMMPT'), (
'opt', 'VLLMOPT'
), ('stablelm', 'VLLMStableLM'), ('starcoder', 'VLLMStarCoder'), ('llama', 'VLLMLlama')])

View File

@@ -3,8 +3,14 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available, is_vllm_available
from openllm_core.config.configuration_baichuan import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING, BaichuanConfig as BaichuanConfig
from openllm.utils import LazyModule
from openllm.utils import is_cpm_kernels_available
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_baichuan import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_baichuan import START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING
from openllm_core.config.configuration_baichuan import BaichuanConfig as BaichuanConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available() or not is_cpm_kernels_available(): raise MissingDependencyError

View File

@@ -3,8 +3,13 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available
from openllm_core.config.configuration_chatglm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING, ChatGLMConfig as ChatGLMConfig
from openllm.utils import LazyModule
from openllm.utils import is_cpm_kernels_available
from openllm.utils import is_torch_available
from openllm_core.config.configuration_chatglm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_chatglm import START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING
from openllm_core.config.configuration_chatglm import ChatGLMConfig as ChatGLMConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available() or not is_cpm_kernels_available(): raise MissingDependencyError

View File

@@ -2,7 +2,8 @@ from __future__ import annotations
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
if t.TYPE_CHECKING:
import transformers
class ChatGLM(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerFast']):
__openllm_internal__ = True

View File

@@ -3,8 +3,13 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING, DollyV2Config as DollyV2Config
from openllm.utils import LazyModule
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_dolly_v2 import START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING
from openllm_core.config.configuration_dolly_v2 import DollyV2Config as DollyV2Config
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -5,7 +5,10 @@ import typing as t
import openllm
from openllm_core._typing_compat import overload
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE, END_KEY, RESPONSE_KEY, get_special_token_id
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_dolly_v2 import END_KEY
from openllm_core.config.configuration_dolly_v2 import RESPONSE_KEY
from openllm_core.config.configuration_dolly_v2 import get_special_token_id
if t.TYPE_CHECKING: import torch, transformers, tensorflow as tf
else: torch, transformers, tf = openllm.utils.LazyLoader('torch', globals(), 'torch'), openllm.utils.LazyLoader('transformers', globals(), 'transformers'), openllm.utils.LazyLoader('tf', globals(), 'tensorflow')
logger = logging.getLogger(__name__)

View File

@@ -3,8 +3,13 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_falcon import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING, FalconConfig as FalconConfig
from openllm.utils import LazyModule
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_falcon import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_falcon import START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING
from openllm_core.config.configuration_falcon import FalconConfig as FalconConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -3,8 +3,14 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available
from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING, FlanT5Config as FlanT5Config
from openllm.utils import LazyModule
from openllm.utils import is_flax_available
from openllm.utils import is_tf_available
from openllm.utils import is_torch_available
from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_flan_t5 import START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING
from openllm_core.config.configuration_flan_t5 import FlanT5Config as FlanT5Config
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -2,7 +2,8 @@ from __future__ import annotations
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
if t.TYPE_CHECKING:
import transformers
class FlanT5(openllm.LLM['transformers.T5ForConditionalGeneration', 'transformers.T5TokenizerFast']):
__openllm_internal__ = True

View File

@@ -3,8 +3,13 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_gpt_neox import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING, GPTNeoXConfig as GPTNeoXConfig
from openllm.utils import LazyModule
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_gpt_neox import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_gpt_neox import START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING
from openllm_core.config.configuration_gpt_neox import GPTNeoXConfig as GPTNeoXConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -3,8 +3,14 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_llama import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING, LlamaConfig as LlamaConfig
from openllm.utils import LazyModule
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_llama import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_llama import PROMPT_MAPPING as PROMPT_MAPPING
from openllm_core.config.configuration_llama import START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING
from openllm_core.config.configuration_llama import LlamaConfig as LlamaConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_vllm_available(): raise MissingDependencyError

View File

@@ -2,7 +2,8 @@ from __future__ import annotations
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
if t.TYPE_CHECKING:
import transformers
class Llama(openllm.LLM['transformers.LlamaForCausalLM', 'transformers.LlamaTokenizerFast']):
__openllm_internal__ = True

View File

@@ -3,8 +3,14 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_mpt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING, MPTConfig as MPTConfig
from openllm.utils import LazyModule
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_mpt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_mpt import PROMPT_MAPPING as PROMPT_MAPPING
from openllm_core.config.configuration_mpt import START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING
from openllm_core.config.configuration_mpt import MPTConfig as MPTConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -4,8 +4,11 @@ import typing as t
import bentoml
import openllm
from openllm.utils import generate_labels, is_triton_available
if t.TYPE_CHECKING: import transformers, torch
from openllm.utils import generate_labels
from openllm.utils import is_triton_available
if t.TYPE_CHECKING:
import torch
import transformers
logger = logging.getLogger(__name__)

View File

@@ -3,8 +3,15 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING, OPTConfig as OPTConfig
from openllm.utils import LazyModule
from openllm.utils import is_flax_available
from openllm.utils import is_tf_available
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_opt import START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING
from openllm_core.config.configuration_opt import OPTConfig as OPTConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -3,8 +3,13 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_stablelm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING, StableLMConfig as StableLMConfig
from openllm.utils import LazyModule
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_stablelm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_stablelm import START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING
from openllm_core.config.configuration_stablelm import StableLMConfig as StableLMConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -2,7 +2,8 @@ from __future__ import annotations
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
if t.TYPE_CHECKING:
import transformers
class StableLM(openllm.LLM['transformers.GPTNeoXForCausalLM', 'transformers.GPTNeoXTokenizerFast']):
__openllm_internal__ = True

View File

@@ -3,8 +3,13 @@ import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_starcoder import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING, StarCoderConfig as StarCoderConfig
from openllm.utils import LazyModule
from openllm.utils import is_torch_available
from openllm.utils import is_vllm_available
from openllm_core.config.configuration_starcoder import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE
from openllm_core.config.configuration_starcoder import START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING
from openllm_core.config.configuration_starcoder import StarCoderConfig as StarCoderConfig
_import_structure: dict[str, list[str]] = {}
try:
if not is_torch_available(): raise MissingDependencyError

View File

@@ -5,7 +5,11 @@ import typing as t
import bentoml
import openllm
from openllm.utils import generate_labels
from openllm_core.config.configuration_starcoder import EOD, FIM_MIDDLE, FIM_PAD, FIM_PREFIX, FIM_SUFFIX
from openllm_core.config.configuration_starcoder import EOD
from openllm_core.config.configuration_starcoder import FIM_MIDDLE
from openllm_core.config.configuration_starcoder import FIM_PAD
from openllm_core.config.configuration_starcoder import FIM_PREFIX
from openllm_core.config.configuration_starcoder import FIM_SUFFIX
if t.TYPE_CHECKING: import transformers
class StarCoder(openllm.LLM['transformers.GPTBigCodeForCausalLM', 'transformers.GPT2TokenizerFast']):

View File

@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)
from datasets import load_dataset
from trl import SFTTrainer
DEFAULT_MODEL_ID = "ybelkada/falcon-7b-sharded-bf16"
DATASET_NAME = "timdettmers/openassistant-guanaco"

View File

@@ -4,6 +4,7 @@ import logging
import typing as t
import openllm
openllm.utils.configure_logging()
logger = logging.getLogger(__name__)

View File

@@ -23,6 +23,7 @@ from datasets import load_dataset
if t.TYPE_CHECKING:
from peft import PeftModel
DEFAULT_MODEL_ID = "facebook/opt-6.7b"
def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any, training_args: TrainingArguments):

View File

@@ -30,12 +30,19 @@ import cloudpickle
import fs
import openllm
from bentoml._internal.models.model import CUSTOM_OBJECTS_FILENAME
from openllm_core._typing_compat import M, ParamSpec, T
from openllm_core._typing_compat import M
from openllm_core._typing_compat import ParamSpec
from openllm_core._typing_compat import T
if t.TYPE_CHECKING:
import bentoml
from . import constants as constants, ggml as ggml, transformers as transformers
from . import constants as constants
from . import ggml as ggml
from . import transformers as transformers
P = ParamSpec('P')
def load_tokenizer(llm: openllm.LLM[t.Any, T], **tokenizer_attrs: t.Any) -> T:
@@ -44,7 +51,8 @@ def load_tokenizer(llm: openllm.LLM[t.Any, T], **tokenizer_attrs: t.Any) -> T:
By default, it will try to find the bentomodel whether it is in store..
If model is not found, it will raises a ``bentoml.exceptions.NotFound``.
'''
from .transformers._helpers import infer_tokenizers_from_llm, process_config
from .transformers._helpers import infer_tokenizers_from_llm
from .transformers._helpers import process_config
config, *_ = process_config(llm._bentomodel.path, llm.__llm_trust_remote_code__)
bentomodel_fs = fs.open_fs(llm._bentomodel.path)

View File

@@ -1,4 +1,5 @@
from __future__ import annotations
FRAMEWORK_TO_AUTOCLASS_MAPPING = {
'pt': ('AutoModelForCausalLM', 'AutoModelForSeq2SeqLM'),
'tf': ('TFAutoModelForCausalLM', 'TFAutoModelForSeq2SeqLM'),

View File

@@ -7,7 +7,9 @@ import typing as t
import bentoml
import openllm
if t.TYPE_CHECKING: from openllm_core._typing_compat import M
if t.TYPE_CHECKING:
from openllm_core._typing_compat import M
_conversion_strategy = {'pt': 'ggml'}

View File

@@ -5,15 +5,23 @@ import logging
import typing as t
from huggingface_hub import snapshot_download
from simple_di import Provide, inject
from simple_di import Provide
from simple_di import inject
import bentoml
import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.models.model import ModelOptions
from ._helpers import check_unintialised_params, infer_autoclass_from_llm, infer_tokenizers_from_llm, make_model_signatures, process_config, update_model
from ._helpers import check_unintialised_params
from ._helpers import infer_autoclass_from_llm
from ._helpers import infer_tokenizers_from_llm
from ._helpers import make_model_signatures
from ._helpers import process_config
from ._helpers import update_model
from .weights import HfIgnore
if t.TYPE_CHECKING:
import types
@@ -24,7 +32,9 @@ if t.TYPE_CHECKING:
import vllm
from bentoml._internal.models import ModelStore
from openllm_core._typing_compat import DictStrAny, M, T
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import M
from openllm_core._typing_compat import T
else:
vllm = openllm.utils.LazyLoader('vllm', globals(), 'vllm')
autogptq = openllm.utils.LazyLoader('autogptq', globals(), 'auto_gptq')

View File

@@ -4,16 +4,24 @@ import typing as t
import openllm
import openllm_core
from bentoml._internal.models.model import ModelInfo, ModelSignature
from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING, HUB_ATTRS
from bentoml._internal.models.model import ModelInfo
from bentoml._internal.models.model import ModelSignature
from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING
from openllm.serialisation.constants import HUB_ATTRS
if t.TYPE_CHECKING:
import torch
import transformers
from transformers.models.auto.auto_factory import _BaseAutoModelClass
import bentoml
from bentoml._internal.models.model import ModelSignaturesType
from openllm_core._typing_compat import DictStrAny, M, T
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import M
from openllm_core._typing_compat import T
else:
transformers, torch = openllm_core.utils.LazyLoader('transformers', globals(), 'transformers'), openllm_core.utils.LazyLoader('torch', globals(), 'torch')

View File

@@ -2,10 +2,14 @@ from __future__ import annotations
import typing as t
import attr
from huggingface_hub import HfApi
if t.TYPE_CHECKING:
import openllm
from openllm_core._typing_compat import M, T
from openllm_core._typing_compat import M
from openllm_core._typing_compat import T
def has_safetensors_weights(model_id: str, revision: str | None = None) -> bool:
return any(s.rfilename.endswith('.safetensors') for s in HfApi().model_info(model_id, revision=revision).siblings)

View File

@@ -8,7 +8,9 @@ import typing as t
import bentoml
import openllm
if t.TYPE_CHECKING: from ._typing_compat import LiteralRuntime
if t.TYPE_CHECKING:
from openllm_core._typing_compat import LiteralRuntime
logger = logging.getLogger(__name__)

View File

@@ -8,9 +8,14 @@ import typing as t
import openllm_core
from . import dummy_flax_objects as dummy_flax_objects, dummy_pt_objects as dummy_pt_objects, dummy_tf_objects as dummy_tf_objects, dummy_vllm_objects as dummy_vllm_objects
from . import dummy_flax_objects as dummy_flax_objects
from . import dummy_pt_objects as dummy_pt_objects
from . import dummy_tf_objects as dummy_tf_objects
from . import dummy_vllm_objects as dummy_vllm_objects
if t.TYPE_CHECKING:
import openllm
from openllm_core._typing_compat import LiteralRuntime
def generate_labels(llm: openllm.LLM[t.Any, t.Any]) -> dict[str, t.Any]:

View File

@@ -1,7 +1,9 @@
from __future__ import annotations
import os
from hypothesis import HealthCheck, settings
from hypothesis import HealthCheck
from hypothesis import settings
settings.register_profile('CI', settings(suppress_health_check=[HealthCheck.too_slow]), deadline=None)
if 'CI' in os.environ: settings.load_profile('CI')

View File

@@ -5,7 +5,9 @@ import typing as t
from hypothesis import strategies as st
import openllm
from openllm_core._configuration import ModelSettings
logger = logging.getLogger(__name__)
env_strats = st.sampled_from([openllm.utils.EnvVarMixin(model_name) for model_name in openllm.CONFIG_MAPPING.keys()])

View File

@@ -3,17 +3,25 @@ import contextlib
import os
import sys
import typing as t
from unittest import mock
import attr
import pytest
import transformers
from hypothesis import assume, given, strategies as st
from hypothesis import assume
from hypothesis import given
from hypothesis import strategies as st
import openllm
from openllm_core._configuration import GenerationConfig, ModelSettings, field_env_key
from ._strategies._configuration import make_llm_config, model_settings
from openllm_core._configuration import GenerationConfig
from openllm_core._configuration import ModelSettings
from openllm_core._configuration import field_env_key
from ._strategies._configuration import make_llm_config
from ._strategies._configuration import model_settings
# XXX: @aarnphm fixes TypedDict behaviour in 3.11
@pytest.mark.skipif(sys.version_info[:2] == (3, 11), reason='TypedDict in 3.11 behaves differently, so we need to fix this')

View File

@@ -6,7 +6,9 @@ import typing as t
import pytest
import openllm
if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralRuntime
if t.TYPE_CHECKING:
from openllm_core._typing_compat import LiteralRuntime
_FRAMEWORK_MAPPING = {'flan_t5': 'google/flan-t5-small', 'opt': 'facebook/opt-125m', 'baichuan': 'baichuan-inc/Baichuan-7B',}
_PROMPT_MAPPING = {'qa': 'Answer the following yes/no question by reasoning step-by-step. Can you write a whole Haiku in a single tweet?',}

View File

@@ -6,7 +6,9 @@ import logging
import sys
import time
import typing as t
from abc import ABC, abstractmethod
from abc import ABC
from abc import abstractmethod
import attr
import docker
@@ -14,18 +16,25 @@ import docker.errors
import docker.types
import orjson
import pytest
from syrupy.extensions.json import JSONSnapshotExtension
import openllm
from openllm._llm import normalise_model_name
from openllm_core._typing_compat import DictStrAny, ListAny
from openllm_core._typing_compat import DictStrAny
from openllm_core._typing_compat import ListAny
logger = logging.getLogger(__name__)
if t.TYPE_CHECKING:
import subprocess
from syrupy.assertion import SnapshotAssertion
from syrupy.types import PropertyFilter, PropertyMatcher, SerializableData, SerializedData
from syrupy.types import PropertyFilter
from syrupy.types import PropertyMatcher
from syrupy.types import SerializableData
from syrupy.types import SerializedData
from openllm._configuration import GenerationConfig
from openllm.client import BaseAsyncClient

View File

@@ -4,10 +4,14 @@ import typing as t
import pytest
import openllm
if t.TYPE_CHECKING:
import contextlib
from .conftest import HandleProtocol, ResponseComparator, _Handle
from .conftest import HandleProtocol
from .conftest import ResponseComparator
from .conftest import _Handle
model = 'flan_t5'
model_id = 'google/flan-t5-small'

View File

@@ -4,10 +4,14 @@ import typing as t
import pytest
import openllm
if t.TYPE_CHECKING:
import contextlib
from .conftest import HandleProtocol, ResponseComparator, _Handle
from .conftest import HandleProtocol
from .conftest import ResponseComparator
from .conftest import _Handle
model = 'opt'
model_id = 'facebook/opt-125m'

View File

@@ -3,7 +3,9 @@ import os
import typing as t
import pytest
if t.TYPE_CHECKING: import openllm
if t.TYPE_CHECKING:
import openllm
@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='Model is too large for CI')
def test_flan_t5_implementation(prompt: str, llm: openllm.LLM[t.Any, t.Any]):

View File

@@ -6,8 +6,11 @@ import typing as t
import pytest
import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
if t.TYPE_CHECKING: from pathlib import Path
if t.TYPE_CHECKING:
from pathlib import Path
HF_INTERNAL_T5_TESTING = 'hf-internal-testing/tiny-random-t5'

View File

@@ -5,9 +5,14 @@ import typing as t
import pytest
import bentoml
from openllm_core import _strategies as strategy
from openllm_core._strategies import CascadingResourceStrategy, NvidiaGpuResource, get_resource
if t.TYPE_CHECKING: from _pytest.monkeypatch import MonkeyPatch
from openllm_core._strategies import CascadingResourceStrategy
from openllm_core._strategies import NvidiaGpuResource
from openllm_core._strategies import get_resource
if t.TYPE_CHECKING:
from _pytest.monkeypatch import MonkeyPatch
def test_nvidia_gpu_resource_from_env(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as mcls: