cron(style): run formatter [generated] [skip ci] (#257)

This commit is contained in:
Aaron Pham
2023-08-25 06:38:59 -04:00
committed by GitHub
parent f5dd9be122
commit 46c8904806
150 changed files with 913 additions and 379 deletions

View File

@@ -7,7 +7,6 @@ To start any OpenLLM model:
openllm start <model_name> --options ...
'''
from __future__ import annotations
if __name__ == '__main__':
from openllm.cli.entrypoint import cli
cli()

View File

@@ -1,8 +1,13 @@
# See https://github.com/bentoml/sentence-embedding-bento for more information.
from __future__ import annotations
import bentoml, openllm, transformers, typing as t
import typing as t
import transformers
from huggingface_hub import snapshot_download
from bentoml._internal.frameworks.transformers import MODULE_NAME, API_VERSION
import bentoml
import openllm
from bentoml._internal.frameworks.transformers import API_VERSION, MODULE_NAME
from bentoml._internal.models.model import ModelOptions, ModelSignature
if t.TYPE_CHECKING: import torch
@@ -44,7 +49,8 @@ class GenericEmbeddingRunnable(bentoml.Runnable):
@bentoml.Runnable.method(batchable=True, batch_dim=0)
def encode(self, sentences: list[str]) -> t.Sequence[openllm.LLMEmbeddings]:
import torch, torch.nn.functional as F
import torch
import torch.nn.functional as F
encoded_input = self.tokenizer(sentences, padding=True, truncation=True, return_tensors='pt').to(self.device)
attention_mask = encoded_input['attention_mask']
# Compute token embeddings

View File

@@ -1,6 +1,8 @@
# mypy: disable-error-code="misc"
from __future__ import annotations
import typing as t, transformers
import typing as t
import transformers
if t.TYPE_CHECKING: import torch, openllm
# reexport from transformers

View File

@@ -1,18 +1,43 @@
# mypy: disable-error-code="name-defined,attr-defined"
from __future__ import annotations
import functools, inspect, logging, os, re, traceback, types, typing as t, uuid, attr, fs.path, inflection, orjson, bentoml, openllm, openllm_core, gc, pathlib, abc
import abc
import functools
import gc
import inspect
import logging
import os
import pathlib
import re
import traceback
import types
import typing as t
import uuid
import attr
import fs.path
import inflection
import orjson
from huggingface_hub import hf_hub_download
import bentoml
import openllm
import openllm_core
from bentoml._internal.models.model import ModelSignature
from openllm_core._configuration import FineTuneConfig, LLMConfig, _object_getattribute, _setattr_class
from openllm_core._schema import unmarshal_vllm_outputs
from openllm_core._typing_compat import AdaptersMapping, AdaptersTuple, AdapterType, AnyCallable, DictStrAny, ListStr, LiteralRuntime, LiteralString, LLMEmbeddings, LLMRunnable, LLMRunner, M, ModelSignatureDict as _ModelSignatureDict, NotRequired, PeftAdapterOutput, T, TupleAny, overload
from openllm_core.utils import DEBUG, ENV_VARS_TRUE_VALUES, MYPY, EnvVarMixin, LazyLoader, ReprMixin, apply, bentoml_cattr, codegen, device_count, first_not_none, generate_hash_from_file, is_peft_available, is_torch_available, non_intrusive_setattr, normalize_attrs_to_model_tokenizer_pair, resolve_filepath, validate_is_path
from ._quantisation import infer_quantisation_config
from .exceptions import ForbiddenAttributeError, GpuNotAvailableError, OpenLLMException
from .utils import infer_auto_class
from openllm_core._typing_compat import AdaptersMapping, AdaptersTuple, AnyCallable, AdapterType, LiteralRuntime, DictStrAny, ListStr, LLMEmbeddings, LLMRunnable, LLMRunner, ModelSignatureDict as _ModelSignatureDict, PeftAdapterOutput, TupleAny, NotRequired, overload, M, T, LiteralString
if t.TYPE_CHECKING:
import auto_gptq as autogptq, peft, torch, transformers, vllm
import auto_gptq as autogptq
import peft
import torch
import transformers
import vllm
from openllm_core._configuration import PeftType
from openllm_core.utils.representation import ReprArgs
else:
@@ -1001,7 +1026,7 @@ class LLM(LLMInterface[M, T], ReprMixin):
) -> t.Iterator[t.Any]:
# NOTE: encoder-decoder models will need to implement their own generate_iterator for now
# inspired from fastchat's generate_stream_func
from ._generation import prepare_logits_processor, get_context_length, is_partial_stop
from ._generation import get_context_length, is_partial_stop, prepare_logits_processor
len_prompt = len(prompt)
if stop_token_ids is None: stop_token_ids = []

View File

@@ -1,11 +1,14 @@
# mypy: disable-error-code="name-defined,no-redef"
from __future__ import annotations
import logging, typing as t
from openllm_core.utils import LazyLoader, is_autogptq_available, is_bitsandbytes_available, is_transformers_supports_kbit, pkg
import logging
import typing as t
from openllm_core._typing_compat import overload
from openllm_core.utils import LazyLoader, is_autogptq_available, is_bitsandbytes_available, is_transformers_supports_kbit, pkg
if t.TYPE_CHECKING:
from ._llm import LLM
from openllm_core._typing_compat import DictStrAny
from ._llm import LLM
autogptq, torch, transformers = LazyLoader('autogptq', globals(), 'auto_gptq'), LazyLoader('torch', globals(), 'torch'), LazyLoader('transformers', globals(), 'transformers')
logger = logging.getLogger(__name__)

View File

@@ -1,14 +1,23 @@
# mypy: disable-error-code="call-arg,misc,attr-defined,type-abstract,type-arg,valid-type,arg-type"
from __future__ import annotations
import os, warnings, orjson, bentoml, openllm, openllm_core, typing as t
import os
import typing as t
import warnings
import orjson
from starlette.applications import Starlette
from starlette.responses import JSONResponse
from starlette.routing import Route
import bentoml
import openllm
import openllm_core
if t.TYPE_CHECKING:
from openllm_core._typing_compat import TypeAlias
from starlette.requests import Request
from starlette.responses import Response
from bentoml._internal.runner.runner import RunnerMethod, AbstractRunner
from bentoml._internal.runner.runner import AbstractRunner, RunnerMethod
from openllm_core._typing_compat import TypeAlias
_EmbeddingMethod: TypeAlias = RunnerMethod[t.Union[bentoml.Runnable, openllm.LLMRunnable[t.Any, t.Any]], [t.List[str]], t.Sequence[openllm.LLMEmbeddings]]
# The following warnings from bitsandbytes, and probably not that important for users to see
warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization')

View File

@@ -3,7 +3,9 @@
These utilities will stay internal, and its API can be changed or updated without backward-compatibility.
"""
from __future__ import annotations
import os, typing as t
import os
import typing as t
from openllm_core.utils import LazyModule
_import_structure: dict[str, list[str]] = {
'_package': ['create_bento', 'build_editable', 'construct_python_options', 'construct_docker_options'],

View File

@@ -1,18 +1,32 @@
# mypy: disable-error-code="misc"
from __future__ import annotations
import fs, fs.copy, fs.errors, orjson, bentoml, openllm_core, importlib.metadata, inspect, logging, os, typing as t, string
import importlib.metadata
import inspect
import logging
import os
import string
import typing as t
from pathlib import Path
import fs
import fs.copy
import fs.errors
import orjson
from simple_di import Provide, inject
import bentoml
import openllm_core
from bentoml._internal.bento.build_config import BentoBuildConfig, DockerOptions, ModelSpec, PythonOptions
from bentoml._internal.configuration.containers import BentoMLContainer
from . import oci
from . import oci
if t.TYPE_CHECKING:
import openllm
from fs.base import FS
from openllm_core._typing_compat import LiteralString, LiteralContainerRegistry, LiteralContainerVersionStrategy
import openllm
from bentoml._internal.bento import BentoStore
from bentoml._internal.models.model import ModelStore
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString
logger = logging.getLogger(__name__)
OPENLLM_DEV_BUILD = 'OPENLLM_DEV_BUILD'

View File

@@ -1,15 +1,27 @@
# mypy: disable-error-code="misc"
'''OCI-related utilities for OpenLLM. This module is considered to be internal and API are subjected to change.'''
from __future__ import annotations
import functools, importlib, logging, os, pathlib, shutil, subprocess, typing as t, openllm_core
import functools
import importlib
import logging
import os
import pathlib
import shutil
import subprocess
import typing as t
from datetime import datetime, timedelta, timezone
import attr, orjson, bentoml, openllm
from openllm_core.utils.lazy import VersionInfo
import attr
import orjson
import bentoml
import openllm
import openllm_core
from openllm_core.utils.lazy import VersionInfo
if t.TYPE_CHECKING:
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
from ghapi import all
from openllm_core._typing_compat import RefTuple, LiteralString
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString, RefTuple
all = openllm_core.utils.LazyLoader('all', globals(), 'ghapi.all') # noqa: F811
logger = logging.getLogger(__name__)

View File

@@ -1,15 +1,28 @@
from __future__ import annotations
import functools, importlib.util, os, typing as t, logging, click, click_option_group as cog, inflection, orjson, bentoml, openllm
from click import shell_completion as sc
from bentoml_cli.utils import BentoMLCommandGroup
from click.shell_completion import CompletionItem
from openllm_core.utils import DEBUG
from bentoml._internal.configuration.containers import BentoMLContainer
from openllm_core._typing_compat import LiteralString, DictStrAny, ParamSpec, Concatenate
from . import termui
import functools
import importlib.util
import logging
import os
import typing as t
import click
import click_option_group as cog
import inflection
import orjson
from bentoml_cli.utils import BentoMLCommandGroup
from click import shell_completion as sc
from click.shell_completion import CompletionItem
import bentoml
import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralString, ParamSpec
from openllm_core.utils import DEBUG
from . import termui
if t.TYPE_CHECKING:
import subprocess
from openllm_core._configuration import LLMConfig
logger = logging.getLogger(__name__)

View File

@@ -1,15 +1,26 @@
from __future__ import annotations
import itertools, logging, os, re, subprocess, sys, typing as t, bentoml, openllm, openllm_core
import itertools
import logging
import os
import re
import subprocess
import sys
import typing as t
from simple_di import Provide, inject
import bentoml
import openllm
import openllm_core
from bentoml._internal.configuration.containers import BentoMLContainer
from openllm.exceptions import OpenLLMException
from . import termui
from ._factory import start_command_factory
if t.TYPE_CHECKING:
from openllm_core._configuration import LLMConfig
from openllm_core._typing_compat import LiteralString, LiteralRuntime, LiteralContainerRegistry, LiteralContainerVersionStrategy
from bentoml._internal.bento import BentoStore
from openllm_core._configuration import LLMConfig
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralRuntime, LiteralString
logger = logging.getLogger(__name__)
def _start(
model_name: str,

View File

@@ -20,22 +20,47 @@ bentomodel = openllm.import_model("falcon", model_id='tiiuae/falcon-7b-instruct'
```
"""
from __future__ import annotations
import functools, http.client, inspect, itertools, logging, os, platform, re, subprocess, sys, time, traceback, typing as t, attr, click, click_option_group as cog, fs, fs.copy, fs.errors, inflection, orjson, bentoml, openllm
from simple_di import Provide, inject
import functools
import http.client
import inspect
import itertools
import logging
import os
import platform
import re
import subprocess
import sys
import time
import traceback
import typing as t
import attr
import click
import click_option_group as cog
import fs
import fs.copy
import fs.errors
import inflection
import orjson
from bentoml_cli.utils import BentoMLCommandGroup, opt_callback
from simple_di import Provide, inject
import bentoml
import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.models.model import ModelStore
from . import termui
from ._factory import FC, LiteralOutput, _AnyCallable, bettertransformer_option, container_registry_option, fast_option, machine_option, model_id_option, model_name_argument, model_version_option, output_option, parse_device_callback, quantize_option, serialisation_option, start_command_factory, workers_per_resource_option
from openllm import bundle, serialisation
from openllm.exceptions import OpenLLMException
from openllm.models.auto import CONFIG_MAPPING, MODEL_FLAX_MAPPING_NAMES, MODEL_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES, MODEL_VLLM_MAPPING_NAMES, AutoConfig, AutoLLM
from openllm_core._typing_compat import DictStrAny, ParamSpec, Concatenate, LiteralString, Self, LiteralRuntime
from openllm_core.utils import DEBUG, DEBUG_ENV_VAR, OPTIONAL_DEPENDENCIES, QUIET_ENV_VAR, EnvVarMixin, LazyLoader, analytics, bentoml_cattr, compose, configure_logging, dantic, first_not_none, get_debug_mode, get_quiet_mode, is_torch_available, is_transformers_supports_agent, resolve_user_filepath, set_debug_mode, set_quiet_mode
from openllm.utils import infer_auto_class
from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralRuntime, LiteralString, ParamSpec, Self
from openllm_core.utils import DEBUG, DEBUG_ENV_VAR, OPTIONAL_DEPENDENCIES, QUIET_ENV_VAR, EnvVarMixin, LazyLoader, analytics, bentoml_cattr, compose, configure_logging, dantic, first_not_none, get_debug_mode, get_quiet_mode, is_torch_available, is_transformers_supports_agent, resolve_user_filepath, set_debug_mode, set_quiet_mode
from . import termui
from ._factory import FC, LiteralOutput, _AnyCallable, bettertransformer_option, container_registry_option, fast_option, machine_option, model_id_option, model_name_argument, model_version_option, output_option, parse_device_callback, quantize_option, serialisation_option, start_command_factory, workers_per_resource_option
if t.TYPE_CHECKING:
import torch
from bentoml._internal.bento import BentoStore
from bentoml._internal.container import DefaultBuilder
from openllm_core._schema import EmbeddingsOutput

View File

@@ -1,7 +1,12 @@
from __future__ import annotations
import typing as t, click, orjson, openllm
import typing as t
import click
import orjson
import openllm
from openllm.cli import termui
from openllm.cli._factory import machine_option, container_registry_option
from openllm.cli._factory import container_registry_option, machine_option
if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
@click.command(
'build_base_container',

View File

@@ -1,11 +1,16 @@
from __future__ import annotations
import shutil, subprocess, typing as t, click, psutil, bentoml
from simple_di import Provide, inject
from bentoml._internal.configuration.containers import BentoMLContainer
import shutil
import subprocess
import typing as t
import click
import psutil
from simple_di import Provide, inject
import bentoml
from bentoml._internal.configuration.containers import BentoMLContainer
from openllm.cli import termui
from openllm.cli._factory import bento_complete_envvar, machine_option
if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
@click.command('dive_bentos', context_settings=termui.CONTEXT_SETTINGS)
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)

View File

@@ -1,6 +1,10 @@
from __future__ import annotations
import typing as t, click, bentoml
import typing as t
import click
from simple_di import Provide, inject
import bentoml
from bentoml._internal.bento.bento import BentoInfo
from bentoml._internal.bento.build_config import DockerOptions
from bentoml._internal.configuration.containers import BentoMLContainer
@@ -8,7 +12,6 @@ from bentoml._internal.container.generate import generate_containerfile
from openllm.cli import termui
from openllm.cli._factory import bento_complete_envvar
from openllm_core.utils import bentoml_cattr
if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
@click.command('get_containerfile', context_settings=termui.CONTEXT_SETTINGS, help='Return Containerfile of any given Bento.')
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)

View File

@@ -1,8 +1,14 @@
from __future__ import annotations
import typing as t, click, inflection, orjson, openllm
import typing as t
import click
import inflection
import orjson
from bentoml_cli.utils import opt_callback
import openllm
from openllm.cli import termui
from openllm.cli._factory import model_complete_envvar, output_option, machine_option
from openllm.cli._factory import machine_option, model_complete_envvar, output_option
from openllm_core._prompt import process_prompt
LiteralOutput = t.Literal['json', 'pretty', 'porcelain']
@click.command('get_prompt', context_settings=termui.CONTEXT_SETTINGS)

View File

@@ -1,5 +1,11 @@
from __future__ import annotations
import click, inflection, orjson, bentoml, openllm
import click
import inflection
import orjson
import bentoml
import openllm
from bentoml._internal.utils import human_readable_size
from openllm.cli import termui
from openllm.cli._factory import LiteralOutput, output_option

View File

@@ -1,9 +1,15 @@
from __future__ import annotations
import typing as t, bentoml, openllm, orjson, inflection, click
from openllm.cli import termui
from bentoml._internal.utils import human_readable_size
from openllm.cli._factory import LiteralOutput, model_name_argument, output_option, model_complete_envvar
import typing as t
import click
import inflection
import orjson
import bentoml
import openllm
from bentoml._internal.utils import human_readable_size
from openllm.cli import termui
from openllm.cli._factory import LiteralOutput, model_complete_envvar, model_name_argument, output_option
if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
@click.command('list_models', context_settings=termui.CONTEXT_SETTINGS)
@model_name_argument(required=False, shell_complete=model_complete_envvar)

View File

@@ -1,11 +1,23 @@
from __future__ import annotations
import importlib.machinery, logging, os, pkgutil, subprocess, sys, tempfile, typing as t, click, yaml
from openllm.cli import termui
from openllm import playground
from openllm_core.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
import importlib.machinery
import logging
import os
import pkgutil
import subprocess
import sys
import tempfile
import typing as t
import click
import yaml
from openllm import playground
from openllm.cli import termui
from openllm_core.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
if t.TYPE_CHECKING:
import jupytext, nbformat
import jupytext
import nbformat
from openllm_core._typing_compat import DictStrAny
logger = logging.getLogger(__name__)
def load_notebook_metadata() -> DictStrAny:

View File

@@ -1,5 +1,11 @@
from __future__ import annotations
import os, typing as t, click, inflection, openllm
import os
import typing as t
import click
import inflection
import openllm
if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
def echo(text: t.Any, fg: str = 'green', _with_style: bool = True, **attrs: t.Any) -> None:
attrs['fg'] = fg if not openllm.utils.get_debug_mode() else None

View File

@@ -11,7 +11,9 @@ client.embed("What is the difference between gather and scatter?")
```
'''
from __future__ import annotations
import openllm_client, typing as t
import typing as t
import openllm_client
if t.TYPE_CHECKING: from openllm_client import AsyncHTTPClient as AsyncHTTPClient, BaseAsyncClient as BaseAsyncClient, BaseClient as BaseClient, HTTPClient as HTTPClient, GrpcClient as GrpcClient, AsyncGrpcClient as AsyncGrpcClient
def __dir__() -> t.Sequence[str]:
return sorted(dir(openllm_client))

View File

@@ -1,3 +1,4 @@
'''Base exceptions for OpenLLM. This extends BentoML exceptions.'''
from __future__ import annotations
from openllm_core.exceptions import OpenLLMException as OpenLLMException, GpuNotAvailableError as GpuNotAvailableError, ValidationError as ValidationError, ForbiddenAttributeError as ForbiddenAttributeError, MissingAnnotationAttributeError as MissingAnnotationAttributeError, MissingDependencyError as MissingDependencyError, Error as Error, FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError
from openllm_core.exceptions import Error as Error, FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError, ForbiddenAttributeError as ForbiddenAttributeError, GpuNotAvailableError as GpuNotAvailableError, MissingAnnotationAttributeError as MissingAnnotationAttributeError, MissingDependencyError as MissingDependencyError, OpenLLMException as OpenLLMException, ValidationError as ValidationError

View File

@@ -1,8 +1,10 @@
from __future__ import annotations
import typing as t, os
import os
import typing as t
import openllm
from openllm_core.config import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig
from openllm_core.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
from openllm_core.config import AutoConfig as AutoConfig, CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
_import_structure: dict[str, list[str]] = {
'modeling_auto': ['MODEL_MAPPING_NAMES'],
'modeling_flax_auto': ['MODEL_FLAX_MAPPING_NAMES'],

View File

@@ -1,16 +1,22 @@
# mypy: disable-error-code="type-arg"
from __future__ import annotations
import importlib, inspect, logging, typing as t
import importlib
import inspect
import logging
import typing as t
from collections import OrderedDict
import inflection, openllm
from openllm_core.utils import ReprMixin
import inflection
import openllm
from openllm_core.utils import ReprMixin
if t.TYPE_CHECKING:
from openllm_core._typing_compat import LiteralString, LLMRunner
import types
from collections import _odict_items, _odict_keys, _odict_values
from _typeshed import SupportsIter
from openllm_core._typing_compat import LiteralString, LLMRunner
ConfigModelKeysView = _odict_keys[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
ConfigModelValuesView = _odict_values[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
ConfigModelItemsView = _odict_items[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]

View File

@@ -1,8 +1,10 @@
from __future__ import annotations
import typing as t
from collections import OrderedDict
from .factory import BaseAutoLLMClass, _LazyAutoMapping
from openllm_core.config import CONFIG_MAPPING_NAMES
from .factory import BaseAutoLLMClass, _LazyAutoMapping
MODEL_MAPPING_NAMES = OrderedDict([('chatglm', 'ChatGLM'), ('dolly_v2', 'DollyV2'), ('falcon', 'Falcon'), ('flan_t5', 'FlanT5'), ('gpt_neox', 'GPTNeoX'), ('llama', 'Llama'), ('mpt', 'MPT'), (
'opt', 'OPT'
), ('stablelm', 'StableLM'), ('starcoder', 'StarCoder'), ('baichuan', 'Baichuan')])

View File

@@ -1,8 +1,10 @@
from __future__ import annotations
import typing as t
from collections import OrderedDict
from .factory import BaseAutoLLMClass, _LazyAutoMapping
from openllm_core.config import CONFIG_MAPPING_NAMES
from .factory import BaseAutoLLMClass, _LazyAutoMapping
MODEL_FLAX_MAPPING_NAMES = OrderedDict([('flan_t5', 'FlaxFlanT5'), ('opt', 'FlaxOPT')])
MODEL_FLAX_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FLAX_MAPPING_NAMES)
class AutoFlaxLLM(BaseAutoLLMClass):

View File

@@ -1,8 +1,10 @@
from __future__ import annotations
import typing as t
from collections import OrderedDict
from .factory import BaseAutoLLMClass, _LazyAutoMapping
from openllm_core.config import CONFIG_MAPPING_NAMES
from .factory import BaseAutoLLMClass, _LazyAutoMapping
MODEL_TF_MAPPING_NAMES = OrderedDict([('flan_t5', 'TFFlanT5'), ('opt', 'TFOPT')])
MODEL_TF_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES)
class AutoTFLLM(BaseAutoLLMClass):

View File

@@ -1,8 +1,10 @@
from __future__ import annotations
import typing as t
from collections import OrderedDict
from .factory import BaseAutoLLMClass, _LazyAutoMapping
from openllm_core.config import CONFIG_MAPPING_NAMES
from .factory import BaseAutoLLMClass, _LazyAutoMapping
MODEL_VLLM_MAPPING_NAMES = OrderedDict([('baichuan', 'VLLMBaichuan'), ('dolly_v2', 'VLLMDollyV2'), ('falcon', 'VLLMFalcon'), ('gpt_neox', 'VLLMGPTNeoX'), ('mpt', 'VLLMMPT'), (
'opt', 'VLLMOPT'
), ('stablelm', 'VLLMStableLM'), ('starcoder', 'VLLMStarCoder'), ('llama', 'VLLMLlama')])

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import sys, typing as t
import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available, is_vllm_available
from openllm_core.config.configuration_baichuan import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING, BaichuanConfig as BaichuanConfig

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import typing as t, openllm
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
class Baichuan(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerBase']):
__openllm_internal__ = True

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import typing as t, openllm
import typing as t
import openllm
if t.TYPE_CHECKING: import vllm, transformers
class VLLMBaichuan(openllm.LLM['vllm.LLMEngine', 'transformers.PreTrainedTokenizerBase']):
__openllm_internal__ = True

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import sys, typing as t
import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available
from openllm_core.config.configuration_chatglm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING, ChatGLMConfig as ChatGLMConfig

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import typing as t, openllm
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
class ChatGLM(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerFast']):
__openllm_internal__ = True
@@ -13,7 +15,8 @@ class ChatGLM(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrain
return self.model.chat(self.tokenizer, prompt, generation_config=self.config.model_construct_env(**attrs).to_generation_config())
def embeddings(self, prompts: list[str]) -> openllm.LLMEmbeddings:
import torch, torch.nn.functional as F
import torch
import torch.nn.functional as F
embeddings: list[list[float]] = []
num_tokens = 0
for prompt in prompts:

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import sys, typing as t
import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING, DollyV2Config as DollyV2Config

View File

@@ -1,8 +1,11 @@
from __future__ import annotations
import logging, re, typing as t, openllm
import logging
import re
import typing as t
import openllm
from openllm_core._typing_compat import overload
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE, END_KEY, RESPONSE_KEY, get_special_token_id
if t.TYPE_CHECKING: import torch, transformers, tensorflow as tf
else: torch, transformers, tf = openllm.utils.LazyLoader('torch', globals(), 'torch'), openllm.utils.LazyLoader('transformers', globals(), 'transformers'), openllm.utils.LazyLoader('tf', globals(), 'tensorflow')
logger = logging.getLogger(__name__)

View File

@@ -1,5 +1,8 @@
from __future__ import annotations
import logging, typing as t, openllm
import logging
import typing as t
import openllm
if t.TYPE_CHECKING: import vllm, transformers
logger = logging.getLogger(__name__)

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import sys, typing as t
import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_falcon import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING, FalconConfig as FalconConfig

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import typing as t, openllm
import typing as t
import openllm
if t.TYPE_CHECKING: import torch, transformers
else: torch, transformers = openllm.utils.LazyLoader('torch', globals(), 'torch'), openllm.utils.LazyLoader('transformers', globals(), 'transformers')
class Falcon(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerBase']):

View File

@@ -1,5 +1,8 @@
from __future__ import annotations
import logging, typing as t, openllm
import logging
import typing as t
import openllm
if t.TYPE_CHECKING: import vllm, transformers
logger = logging.getLogger(__name__)

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import sys, typing as t
import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available
from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING, FlanT5Config as FlanT5Config

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import typing as t, openllm
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
class FlanT5(openllm.LLM['transformers.T5ForConditionalGeneration', 'transformers.T5TokenizerFast']):
__openllm_internal__ = True
@@ -13,7 +15,8 @@ class FlanT5(openllm.LLM['transformers.T5ForConditionalGeneration', 'transformer
)
def embeddings(self, prompts: list[str]) -> openllm.LLMEmbeddings:
import torch, torch.nn.functional as F
import torch
import torch.nn.functional as F
embeddings: list[list[float]] = []
num_tokens = 0
for prompt in prompts:

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import typing as t, openllm
import typing as t
import openllm
from openllm_core._prompt import process_prompt
from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE
if t.TYPE_CHECKING: import transformers

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import typing as t, openllm
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
class TFFlanT5(openllm.LLM['transformers.TFT5ForConditionalGeneration', 'transformers.T5TokenizerFast']):
__openllm_internal__ = True

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import sys, typing as t
import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_gpt_neox import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING, GPTNeoXConfig as GPTNeoXConfig

View File

@@ -1,5 +1,8 @@
from __future__ import annotations
import logging, typing as t, openllm
import logging
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
logger = logging.getLogger(__name__)

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import typing as t, openllm
import typing as t
import openllm
if t.TYPE_CHECKING: import vllm, transformers
class VLLMGPTNeoX(openllm.LLM['vllm.LLMEngine', 'transformers.GPTNeoXTokenizerFast']):
__openllm_internal__ = True

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import sys, typing as t
import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_llama import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING, LlamaConfig as LlamaConfig

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import typing as t, openllm
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
class Llama(openllm.LLM['transformers.LlamaForCausalLM', 'transformers.LlamaTokenizerFast']):
__openllm_internal__ = True
@@ -10,7 +12,8 @@ class Llama(openllm.LLM['transformers.LlamaForCausalLM', 'transformers.LlamaToke
return {'torch_dtype': torch.float16 if torch.cuda.is_available() else torch.float32}, {}
def embeddings(self, prompts: list[str]) -> openllm.LLMEmbeddings:
import torch, torch.nn.functional as F
import torch
import torch.nn.functional as F
encoding = self.tokenizer(prompts, padding=True, return_tensors='pt').to(self.device)
input_ids, attention_mask = encoding['input_ids'], encoding['attention_mask']
with torch.inference_mode():

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import typing as t, openllm
import typing as t
import openllm
if t.TYPE_CHECKING: import vllm, transformers
class VLLMLlama(openllm.LLM['vllm.LLMEngine', 'transformers.LlamaTokenizerFast']):
__openllm_internal__ = True

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import sys, typing as t
import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_mpt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING, MPTConfig as MPTConfig

View File

@@ -1,5 +1,9 @@
from __future__ import annotations
import logging, typing as t, bentoml, openllm
import logging
import typing as t
import bentoml
import openllm
from openllm.utils import generate_labels, is_triton_available
if t.TYPE_CHECKING: import transformers, torch
@@ -31,7 +35,8 @@ class MPT(openllm.LLM['transformers.PreTrainedModel', 'transformers.GPTNeoXToken
return {'device_map': 'auto' if torch.cuda.is_available() and torch.cuda.device_count() > 1 else None, 'torch_dtype': torch.bfloat16 if torch.cuda.is_available() else torch.float32}, {}
def import_model(self, *args: t.Any, trust_remote_code: bool = True, **attrs: t.Any) -> bentoml.Model:
import torch, transformers
import torch
import transformers
_, tokenizer_attrs = self.llm_parameters
torch_dtype = attrs.pop('torch_dtype', self.dtype)
device_map = attrs.pop('device_map', None)

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import typing as t, openllm
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers, vllm
class VLLMMPT(openllm.LLM['vllm.LLMEngine', 'transformers.GPTNeoXTokenizerFast']):
__openllm_internal__ = True

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import sys, typing as t
import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING, OPTConfig as OPTConfig

View File

@@ -1,5 +1,9 @@
from __future__ import annotations
import logging, typing as t, bentoml, openllm
import logging
import typing as t
import bentoml
import openllm
from openllm._prompt import process_prompt
from openllm.utils import generate_labels
from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE

View File

@@ -1,5 +1,8 @@
from __future__ import annotations
import logging, typing as t, openllm
import logging
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
logger = logging.getLogger(__name__)

View File

@@ -1,5 +1,8 @@
from __future__ import annotations
import typing as t, bentoml, openllm
import typing as t
import bentoml
import openllm
from openllm_core.utils import generate_labels
if t.TYPE_CHECKING: import transformers
class TFOPT(openllm.LLM['transformers.TFOPTForCausalLM', 'transformers.GPT2Tokenizer']):

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import typing as t, openllm
import typing as t
import openllm
from openllm_core._prompt import process_prompt
from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE
if t.TYPE_CHECKING: import vllm, transformers

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import sys, typing as t
import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_stablelm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING, StableLMConfig as StableLMConfig

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import typing as t, openllm
import typing as t
import openllm
if t.TYPE_CHECKING: import transformers
class StableLM(openllm.LLM['transformers.GPTNeoXForCausalLM', 'transformers.GPTNeoXTokenizerFast']):
__openllm_internal__ = True

View File

@@ -1,5 +1,8 @@
from __future__ import annotations
import logging, typing as t, openllm
import logging
import typing as t
import openllm
if t.TYPE_CHECKING: import vllm, transformers
class VLLMStableLM(openllm.LLM['vllm.LLMEngine', 'transformers.GPTNeoXTokenizerFast']):
__openllm_internal__ = True

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import sys, typing as t
import sys
import typing as t
from openllm.exceptions import MissingDependencyError
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
from openllm_core.config.configuration_starcoder import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING, StarCoderConfig as StarCoderConfig

View File

@@ -1,5 +1,9 @@
from __future__ import annotations
import logging, typing as t, bentoml, openllm
import logging
import typing as t
import bentoml
import openllm
from openllm.utils import generate_labels
from openllm_core.config.configuration_starcoder import EOD, FIM_MIDDLE, FIM_PAD, FIM_PREFIX, FIM_SUFFIX
if t.TYPE_CHECKING: import transformers
@@ -12,7 +16,8 @@ class StarCoder(openllm.LLM['transformers.GPTBigCodeForCausalLM', 'transformers.
return {'device_map': 'auto' if torch.cuda.is_available() and torch.cuda.device_count() > 1 else None, 'torch_dtype': torch.float16 if torch.cuda.is_available() else torch.float32}, {}
def import_model(self, *args: t.Any, trust_remote_code: bool = False, **attrs: t.Any) -> bentoml.Model:
import torch, transformers
import torch
import transformers
torch_dtype, device_map = attrs.pop('torch_dtype', torch.float16), attrs.pop('device_map', 'auto')
tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_id, **self.llm_parameters[-1])
tokenizer.add_special_tokens({'additional_special_tokens': [EOD, FIM_PREFIX, FIM_MIDDLE, FIM_SUFFIX, FIM_PAD], 'pad_token': EOD})

View File

@@ -1,5 +1,8 @@
from __future__ import annotations
import logging, typing as t, openllm
import logging
import typing as t
import openllm
if t.TYPE_CHECKING: import vllm, transformers
class VLLMStarCoder(openllm.LLM['vllm.LLMEngine', 'transformers.GPT2TokenizerFast']):
__openllm_internal__ = True

View File

@@ -22,10 +22,8 @@ logger = logging.getLogger(__name__)
from datasets import load_dataset
from trl import SFTTrainer
DEFAULT_MODEL_ID = "ybelkada/falcon-7b-sharded-bf16"
DATASET_NAME = "timdettmers/openassistant-guanaco"
@dataclasses.dataclass
class TrainingArguments:
per_device_train_batch_size: int = dataclasses.field(default=4)
@@ -42,12 +40,10 @@ class TrainingArguments:
group_by_length: bool = dataclasses.field(default=True)
lr_scheduler_type: str = dataclasses.field(default="constant")
output_dir: str = dataclasses.field(default=os.path.join(os.getcwd(), "outputs", "falcon"))
@dataclasses.dataclass
class ModelArguments:
model_id: str = dataclasses.field(default=DEFAULT_MODEL_ID)
max_sequence_length: int = dataclasses.field(default=512)
parser = transformers.HfArgumentParser((ModelArguments, TrainingArguments))
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
# If we pass only one argument to the script and it's the path to a json file,
@@ -56,13 +52,20 @@ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
else:
model_args, training_args = t.cast(t.Tuple[ModelArguments, TrainingArguments], parser.parse_args_into_dataclasses())
model, tokenizer = openllm.AutoLLM.for_model("falcon", model_id=model_args.model_id, quantize="int4", bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, ensure_available=True,).prepare_for_training(adapter_type="lora", lora_alpha=16, lora_dropout=0.1, r=16, bias="none", target_modules=["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h",],)
model, tokenizer = openllm.AutoLLM.for_model("falcon", model_id=model_args.model_id, quantize="int4", bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, ensure_available=True).prepare_for_training(adapter_type="lora", lora_alpha=16, lora_dropout=0.1, r=16, bias="none", target_modules=["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h"])
model.config.use_cache = False
tokenizer.pad_token = tokenizer.eos_token
dataset = load_dataset(DATASET_NAME, split="train")
trainer = SFTTrainer(model=model, train_dataset=dataset, dataset_text_field="text", max_seq_length=model_args.max_sequence_length, tokenizer=tokenizer, args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args),),)
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
dataset_text_field="text",
max_seq_length=model_args.max_sequence_length,
tokenizer=tokenizer,
args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
)
# upcast layernorm in float32 for more stable training
for name, module in trainer.model.named_modules():

View File

@@ -4,7 +4,6 @@ import logging
import typing as t
import openllm
openllm.utils.configure_logging()
logger = logging.getLogger(__name__)
@@ -13,7 +12,6 @@ MAX_NEW_TOKENS = 384
Q = "Answer the following question, step by step:\n{q}\nA:"
question = "What is the meaning of life?"
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("question", default=question)
@@ -44,11 +42,9 @@ def main() -> int:
logger.info("=" * 10, "Response:", r.llm.postprocess_generate(prompt, res))
return 0
def _mp_fn(index: t.Any): # noqa # type: ignore
# For xla_spawn (TPUs)
main()
if openllm.utils.in_notebook():
main()
else:

View File

@@ -29,7 +29,6 @@ from random import randint, randrange
import bitsandbytes as bnb
from datasets import load_dataset
# COPIED FROM https://github.com/artidoro/qlora/blob/main/qlora.py
def find_all_linear_names(model):
lora_module_names = set()
@@ -41,13 +40,11 @@ def find_all_linear_names(model):
if "lm_head" in lora_module_names: # needed for 16-bit
lora_module_names.remove("lm_head")
return list(lora_module_names)
# Change this to the local converted path if you don't have access to the meta-llama model
DEFAULT_MODEL_ID = "meta-llama/Llama-2-7b-hf"
# change this to 'main' if you want to use the latest llama
DEFAULT_MODEL_VERSION = "335a02887eb6684d487240bbc28b5699298c3135"
DATASET_NAME = "databricks/databricks-dolly-15k"
def format_dolly(sample):
instruction = f"### Instruction\n{sample['instruction']}"
context = f"### Context\n{sample['context']}" if len(sample["context"]) > 0 else None
@@ -55,15 +52,12 @@ def format_dolly(sample):
# join all the parts together
prompt = "\n\n".join([i for i in [instruction, context, response] if i is not None])
return prompt
# template dataset to add prompt to each sample
def template_dataset(sample, tokenizer):
sample["text"] = f"{format_dolly(sample)}{tokenizer.eos_token}"
return sample
# empty list to save remainder from batches to use in next batch
remainder = {"input_ids": [], "attention_mask": [], "token_type_ids": []}
def chunk(sample, chunk_length=2048):
# define global remainder variable to save remainder from batches to use in next batch
global remainder
@@ -84,7 +78,6 @@ def chunk(sample, chunk_length=2048):
# prepare labels
result["labels"] = result["input_ids"].copy()
return result
def prepare_datasets(tokenizer, dataset_name=DATASET_NAME):
# Load dataset from the hub
dataset = load_dataset(dataset_name, split="train")
@@ -103,11 +96,20 @@ def prepare_datasets(tokenizer, dataset_name=DATASET_NAME):
# Print total number of samples
print(f"Total number of samples: {len(lm_dataset)}")
return lm_dataset
def prepare_for_int4_training(model_id: str, model_version: str | None = None, gradient_checkpointing: bool = True, bf16: bool = True,) -> tuple[peft.PeftModel, transformers.LlamaTokenizerFast]:
def prepare_for_int4_training(model_id: str, model_version: str | None = None, gradient_checkpointing: bool = True, bf16: bool = True,
) -> tuple[peft.PeftModel, transformers.LlamaTokenizerFast]:
from peft.tuners.lora import LoraLayer
llm = openllm.AutoLLM.for_model("llama", model_id=model_id, model_version=model_version, ensure_available=True, quantize="int4", bnb_4bit_compute_dtype=torch.bfloat16, use_cache=not gradient_checkpointing, device_map="auto",)
llm = openllm.AutoLLM.for_model(
"llama",
model_id=model_id,
model_version=model_version,
ensure_available=True,
quantize="int4",
bnb_4bit_compute_dtype=torch.bfloat16,
use_cache=not gradient_checkpointing,
device_map="auto",
)
print("Model summary:", llm.model)
# get lora target modules
@@ -128,7 +130,6 @@ def prepare_for_int4_training(model_id: str, model_version: str | None = None, g
if bf16 and module.weight.dtype == torch.float32:
module = module.to(torch.bfloat16)
return model, tokenizer
@dataclasses.dataclass
class TrainingArguments:
per_device_train_batch_size: int = dataclasses.field(default=1)
@@ -140,14 +141,12 @@ class TrainingArguments:
report_to: str = dataclasses.field(default="none")
output_dir: str = dataclasses.field(default=os.path.join(os.getcwd(), "outputs", "llama"))
save_strategy: str = dataclasses.field(default="no")
@dataclasses.dataclass
class ModelArguments:
model_id: str = dataclasses.field(default=DEFAULT_MODEL_ID)
model_version: str = dataclasses.field(default=DEFAULT_MODEL_VERSION)
seed: int = dataclasses.field(default=42)
merge_weights: bool = dataclasses.field(default=False)
if openllm.utils.in_notebook():
model_args, training_rags = ModelArguments(), TrainingArguments()
else:
@@ -161,7 +160,6 @@ else:
# import the model first hand
openllm.import_model("llama", model_id=model_args.model_id, model_version=model_args.model_version)
def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
import peft
@@ -170,7 +168,12 @@ def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
model, tokenizer = prepare_for_int4_training(model_args.model_id, gradient_checkpointing=training_args.gradient_checkpointing, bf16=training_args.bf16,)
datasets = prepare_datasets(tokenizer)
trainer = transformers.Trainer(model=model, args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)), train_dataset=datasets, data_collator=transformers.default_data_collator,)
trainer = transformers.Trainer(
model=model,
args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
train_dataset=datasets,
data_collator=transformers.default_data_collator,
)
trainer.train()
@@ -191,5 +194,4 @@ def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
model.save_pretrained(os.path.join(os.getcwd(), "outputs", "merged_llama_lora"), safe_serialization=True, max_shard_size="2GB")
else:
trainer.model.save_pretrained(os.path.join(training_args.output_dir, "lora"))
train_loop(model_args, training_args)

View File

@@ -23,12 +23,14 @@ from datasets import load_dataset
if t.TYPE_CHECKING:
from peft import PeftModel
DEFAULT_MODEL_ID = "facebook/opt-6.7b"
def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any, training_args: TrainingArguments,):
return transformers.Trainer(model=model, train_dataset=dataset_dict["train"], args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args),), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),)
def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any, training_args: TrainingArguments):
return transformers.Trainer(
model=model,
train_dataset=dataset_dict["train"],
args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
@dataclasses.dataclass
class TrainingArguments:
per_device_train_batch_size: int = dataclasses.field(default=4)
@@ -39,11 +41,9 @@ class TrainingArguments:
fp16: bool = dataclasses.field(default=True)
logging_steps: int = dataclasses.field(default=1)
output_dir: str = dataclasses.field(default=os.path.join(os.getcwd(), "outputs", "opt"))
@dataclasses.dataclass
class ModelArguments:
model_id: str = dataclasses.field(default=DEFAULT_MODEL_ID)
parser = transformers.HfArgumentParser((ModelArguments, TrainingArguments))
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
# If we pass only one argument to the script and it's the path to a json file,
@@ -52,7 +52,7 @@ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
else:
model_args, training_args = t.cast(t.Tuple[ModelArguments, TrainingArguments], parser.parse_args_into_dataclasses())
model, tokenizer = openllm.AutoLLM.for_model("opt", model_id=model_args.model_id, quantize="int8", ensure_available=True,).prepare_for_training(adapter_type="lora", r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none",)
model, tokenizer = openllm.AutoLLM.for_model("opt", model_id=model_args.model_id, quantize="int8", ensure_available=True).prepare_for_training(adapter_type="lora", r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
# ft on english_quotes
data = load_dataset("Abirate/english_quotes")

View File

@@ -23,13 +23,18 @@ llm.save_pretrained("./path/to/local-dolly")
```
"""
from __future__ import annotations
import importlib, typing as t
import cloudpickle, fs, openllm
from bentoml._internal.models.model import CUSTOM_OBJECTS_FILENAME
from openllm_core._typing_compat import M, T, ParamSpec
import importlib
import typing as t
import cloudpickle
import fs
import openllm
from bentoml._internal.models.model import CUSTOM_OBJECTS_FILENAME
from openllm_core._typing_compat import M, ParamSpec, T
if t.TYPE_CHECKING:
import bentoml
from . import constants as constants, ggml as ggml, transformers as transformers
P = ParamSpec('P')
def load_tokenizer(llm: openllm.LLM[t.Any, T], **tokenizer_attrs: t.Any) -> T:

View File

@@ -4,8 +4,9 @@ This requires ctransformers to be installed.
'''
from __future__ import annotations
import typing as t
import bentoml, openllm
import bentoml
import openllm
if t.TYPE_CHECKING: from openllm_core._typing_compat import M
_conversion_strategy = {'pt': 'ggml'}

View File

@@ -1,19 +1,27 @@
'''Serialisation related implementation for Transformers-based implementation.'''
from __future__ import annotations
import importlib, logging, typing as t
import bentoml, openllm
import importlib
import logging
import typing as t
from huggingface_hub import snapshot_download
from simple_di import Provide, inject
import bentoml
import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.models.model import ModelOptions
from .weights import HfIgnore
from ._helpers import check_unintialised_params, infer_autoclass_from_llm, infer_tokenizers_from_llm, make_model_signatures, process_config, update_model
from ._helpers import check_unintialised_params, infer_autoclass_from_llm, infer_tokenizers_from_llm, make_model_signatures, process_config, update_model
from .weights import HfIgnore
if t.TYPE_CHECKING:
import types
import vllm, auto_gptq as autogptq, transformers, torch
import auto_gptq as autogptq
import torch
import torch.nn
import transformers
import vllm
from bentoml._internal.models import ModelStore
from openllm_core._typing_compat import DictStrAny, M, T

View File

@@ -1,11 +1,17 @@
from __future__ import annotations
import copy, typing as t, openllm_core, openllm
import copy
import typing as t
import openllm
import openllm_core
from bentoml._internal.models.model import ModelInfo, ModelSignature
from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING, HUB_ATTRS
if t.TYPE_CHECKING:
import torch, transformers, bentoml
import torch
import transformers
from transformers.models.auto.auto_factory import _BaseAutoModelClass
import bentoml
from bentoml._internal.models.model import ModelSignaturesType
from openllm_core._typing_compat import DictStrAny, M, T
else:

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import typing as t, attr
import typing as t
import attr
from huggingface_hub import HfApi
if t.TYPE_CHECKING:
import openllm

View File

@@ -1,6 +1,13 @@
'''Tests utilities for OpenLLM.'''
from __future__ import annotations
import contextlib, logging, shutil, subprocess, typing as t, bentoml, openllm
import contextlib
import logging
import shutil
import subprocess
import typing as t
import bentoml
import openllm
if t.TYPE_CHECKING: from ._typing_compat import LiteralRuntime
logger = logging.getLogger(__name__)

View File

@@ -4,12 +4,19 @@ User can import these function for convenience, but
we won't ensure backward compatibility for these functions. So use with caution.
"""
from __future__ import annotations
import typing as t, openllm_core
from . import (dummy_flax_objects as dummy_flax_objects, dummy_pt_objects as dummy_pt_objects, dummy_tf_objects as dummy_tf_objects, dummy_vllm_objects as dummy_vllm_objects,)
import typing as t
import openllm_core
from . import (
dummy_flax_objects as dummy_flax_objects,
dummy_pt_objects as dummy_pt_objects,
dummy_tf_objects as dummy_tf_objects,
dummy_vllm_objects as dummy_vllm_objects,
)
if t.TYPE_CHECKING:
from openllm_core._typing_compat import LiteralRuntime
import openllm
from openllm_core._typing_compat import LiteralRuntime
def generate_labels(llm: openllm.LLM[t.Any, t.Any]) -> dict[str, t.Any]:
return {'runtime': llm.runtime, 'framework': 'openllm', 'model_name': llm.config['model_name'], 'architecture': llm.config['architecture'], 'serialisation_format': llm._serialisation_format}
def infer_auto_class(implementation: LiteralRuntime) -> type[openllm.AutoLLM | openllm.AutoTFLLM | openllm.AutoFlaxLLM | openllm.AutoVLLM]:

View File

@@ -1,7 +1,11 @@
from __future__ import annotations
import logging, typing as t, openllm
from openllm_core._configuration import ModelSettings
import logging
import typing as t
from hypothesis import strategies as st
import openllm
from openllm_core._configuration import ModelSettings
logger = logging.getLogger(__name__)
env_strats = st.sampled_from([openllm.utils.EnvVarMixin(model_name) for model_name in openllm.CONFIG_MAPPING.keys()])

View File

@@ -1,8 +1,18 @@
from __future__ import annotations
import contextlib, os, sys, typing as t, attr, pytest, transformers, openllm
import contextlib
import os
import sys
import typing as t
from unittest import mock
from openllm_core._configuration import GenerationConfig, ModelSettings, field_env_key
import attr
import pytest
import transformers
from hypothesis import assume, given, strategies as st
import openllm
from openllm_core._configuration import GenerationConfig, ModelSettings, field_env_key
from ._strategies._configuration import make_llm_config, model_settings
# XXX: @aarnphm fixes TypedDict behaviour in 3.11
@pytest.mark.skipif(sys.version_info[:2] == (3, 11), reason='TypedDict in 3.11 behaves differently, so we need to fix this')

View File

@@ -1,5 +1,11 @@
from __future__ import annotations
import itertools, os, typing as t, pytest, openllm
import itertools
import os
import typing as t
import pytest
import openllm
if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralRuntime
_FRAMEWORK_MAPPING = {'flan_t5': 'google/flan-t5-small', 'opt': 'facebook/opt-125m', 'baichuan': 'baichuan-inc/Baichuan-7B',}

View File

@@ -1,16 +1,32 @@
from __future__ import annotations
import asyncio, contextlib, functools, logging, sys, time, typing as t
import asyncio
import contextlib
import functools
import logging
import sys
import time
import typing as t
from abc import ABC, abstractmethod
import attr, docker, docker.errors, docker.types, orjson, pytest, openllm
import attr
import docker
import docker.errors
import docker.types
import orjson
import pytest
from syrupy.extensions.json import JSONSnapshotExtension
import openllm
from openllm._llm import normalise_model_name
from openllm_core._typing_compat import DictStrAny, ListAny
logger = logging.getLogger(__name__)
if t.TYPE_CHECKING:
import subprocess
from syrupy.assertion import SnapshotAssertion
from syrupy.types import PropertyFilter, PropertyMatcher, SerializableData, SerializedData
from openllm._configuration import GenerationConfig
from openllm.client import BaseAsyncClient
class ResponseComparator(JSONSnapshotExtension):

View File

@@ -4,7 +4,6 @@ import typing as t
import pytest
import openllm
if t.TYPE_CHECKING:
import contextlib

View File

@@ -4,7 +4,6 @@ import typing as t
import pytest
import openllm
if t.TYPE_CHECKING:
import contextlib

View File

@@ -1,6 +1,8 @@
from __future__ import annotations
import os, typing as t, pytest
import os
import typing as t
import pytest
if t.TYPE_CHECKING: import openllm
@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='Model is too large for CI')
def test_flan_t5_implementation(prompt: str, llm: openllm.LLM[t.Any, t.Any]):

View File

@@ -1,5 +1,11 @@
from __future__ import annotations
import functools, os, typing as t, pytest, openllm
import functools
import os
import typing as t
import pytest
import openllm
from bentoml._internal.configuration.containers import BentoMLContainer
if t.TYPE_CHECKING: from pathlib import Path

View File

@@ -1,5 +1,10 @@
from __future__ import annotations
import os, typing as t, pytest, bentoml
import os
import typing as t
import pytest
import bentoml
from openllm_core import _strategies as strategy
from openllm_core._strategies import CascadingResourceStrategy, NvidiaGpuResource, get_resource
if t.TYPE_CHECKING: from _pytest.monkeypatch import MonkeyPatch