mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-05-19 14:16:22 -04:00
cron(style): run formatter [generated] [skip ci] (#257)
This commit is contained in:
@@ -7,7 +7,6 @@ To start any OpenLLM model:
|
||||
openllm start <model_name> --options ...
|
||||
'''
|
||||
from __future__ import annotations
|
||||
|
||||
if __name__ == '__main__':
|
||||
from openllm.cli.entrypoint import cli
|
||||
cli()
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
# See https://github.com/bentoml/sentence-embedding-bento for more information.
|
||||
from __future__ import annotations
|
||||
import bentoml, openllm, transformers, typing as t
|
||||
import typing as t
|
||||
|
||||
import transformers
|
||||
from huggingface_hub import snapshot_download
|
||||
from bentoml._internal.frameworks.transformers import MODULE_NAME, API_VERSION
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
from bentoml._internal.frameworks.transformers import API_VERSION, MODULE_NAME
|
||||
from bentoml._internal.models.model import ModelOptions, ModelSignature
|
||||
if t.TYPE_CHECKING: import torch
|
||||
|
||||
@@ -44,7 +49,8 @@ class GenericEmbeddingRunnable(bentoml.Runnable):
|
||||
|
||||
@bentoml.Runnable.method(batchable=True, batch_dim=0)
|
||||
def encode(self, sentences: list[str]) -> t.Sequence[openllm.LLMEmbeddings]:
|
||||
import torch, torch.nn.functional as F
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
encoded_input = self.tokenizer(sentences, padding=True, truncation=True, return_tensors='pt').to(self.device)
|
||||
attention_mask = encoded_input['attention_mask']
|
||||
# Compute token embeddings
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# mypy: disable-error-code="misc"
|
||||
from __future__ import annotations
|
||||
import typing as t, transformers
|
||||
import typing as t
|
||||
|
||||
import transformers
|
||||
if t.TYPE_CHECKING: import torch, openllm
|
||||
|
||||
# reexport from transformers
|
||||
|
||||
@@ -1,18 +1,43 @@
|
||||
# mypy: disable-error-code="name-defined,attr-defined"
|
||||
from __future__ import annotations
|
||||
import functools, inspect, logging, os, re, traceback, types, typing as t, uuid, attr, fs.path, inflection, orjson, bentoml, openllm, openllm_core, gc, pathlib, abc
|
||||
import abc
|
||||
import functools
|
||||
import gc
|
||||
import inspect
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import traceback
|
||||
import types
|
||||
import typing as t
|
||||
import uuid
|
||||
|
||||
import attr
|
||||
import fs.path
|
||||
import inflection
|
||||
import orjson
|
||||
from huggingface_hub import hf_hub_download
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
import openllm_core
|
||||
from bentoml._internal.models.model import ModelSignature
|
||||
from openllm_core._configuration import FineTuneConfig, LLMConfig, _object_getattribute, _setattr_class
|
||||
from openllm_core._schema import unmarshal_vllm_outputs
|
||||
from openllm_core._typing_compat import AdaptersMapping, AdaptersTuple, AdapterType, AnyCallable, DictStrAny, ListStr, LiteralRuntime, LiteralString, LLMEmbeddings, LLMRunnable, LLMRunner, M, ModelSignatureDict as _ModelSignatureDict, NotRequired, PeftAdapterOutput, T, TupleAny, overload
|
||||
from openllm_core.utils import DEBUG, ENV_VARS_TRUE_VALUES, MYPY, EnvVarMixin, LazyLoader, ReprMixin, apply, bentoml_cattr, codegen, device_count, first_not_none, generate_hash_from_file, is_peft_available, is_torch_available, non_intrusive_setattr, normalize_attrs_to_model_tokenizer_pair, resolve_filepath, validate_is_path
|
||||
|
||||
from ._quantisation import infer_quantisation_config
|
||||
from .exceptions import ForbiddenAttributeError, GpuNotAvailableError, OpenLLMException
|
||||
from .utils import infer_auto_class
|
||||
from openllm_core._typing_compat import AdaptersMapping, AdaptersTuple, AnyCallable, AdapterType, LiteralRuntime, DictStrAny, ListStr, LLMEmbeddings, LLMRunnable, LLMRunner, ModelSignatureDict as _ModelSignatureDict, PeftAdapterOutput, TupleAny, NotRequired, overload, M, T, LiteralString
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import auto_gptq as autogptq, peft, torch, transformers, vllm
|
||||
import auto_gptq as autogptq
|
||||
import peft
|
||||
import torch
|
||||
import transformers
|
||||
import vllm
|
||||
|
||||
from openllm_core._configuration import PeftType
|
||||
from openllm_core.utils.representation import ReprArgs
|
||||
else:
|
||||
@@ -1001,7 +1026,7 @@ class LLM(LLMInterface[M, T], ReprMixin):
|
||||
) -> t.Iterator[t.Any]:
|
||||
# NOTE: encoder-decoder models will need to implement their own generate_iterator for now
|
||||
# inspired from fastchat's generate_stream_func
|
||||
from ._generation import prepare_logits_processor, get_context_length, is_partial_stop
|
||||
from ._generation import get_context_length, is_partial_stop, prepare_logits_processor
|
||||
|
||||
len_prompt = len(prompt)
|
||||
if stop_token_ids is None: stop_token_ids = []
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
# mypy: disable-error-code="name-defined,no-redef"
|
||||
from __future__ import annotations
|
||||
import logging, typing as t
|
||||
from openllm_core.utils import LazyLoader, is_autogptq_available, is_bitsandbytes_available, is_transformers_supports_kbit, pkg
|
||||
import logging
|
||||
import typing as t
|
||||
|
||||
from openllm_core._typing_compat import overload
|
||||
from openllm_core.utils import LazyLoader, is_autogptq_available, is_bitsandbytes_available, is_transformers_supports_kbit, pkg
|
||||
if t.TYPE_CHECKING:
|
||||
from ._llm import LLM
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
|
||||
from ._llm import LLM
|
||||
autogptq, torch, transformers = LazyLoader('autogptq', globals(), 'auto_gptq'), LazyLoader('torch', globals(), 'torch'), LazyLoader('transformers', globals(), 'transformers')
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1,14 +1,23 @@
|
||||
# mypy: disable-error-code="call-arg,misc,attr-defined,type-abstract,type-arg,valid-type,arg-type"
|
||||
from __future__ import annotations
|
||||
import os, warnings, orjson, bentoml, openllm, openllm_core, typing as t
|
||||
import os
|
||||
import typing as t
|
||||
import warnings
|
||||
|
||||
import orjson
|
||||
from starlette.applications import Starlette
|
||||
from starlette.responses import JSONResponse
|
||||
from starlette.routing import Route
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
import openllm_core
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import TypeAlias
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import Response
|
||||
from bentoml._internal.runner.runner import RunnerMethod, AbstractRunner
|
||||
|
||||
from bentoml._internal.runner.runner import AbstractRunner, RunnerMethod
|
||||
from openllm_core._typing_compat import TypeAlias
|
||||
_EmbeddingMethod: TypeAlias = RunnerMethod[t.Union[bentoml.Runnable, openllm.LLMRunnable[t.Any, t.Any]], [t.List[str]], t.Sequence[openllm.LLMEmbeddings]]
|
||||
# The following warnings from bitsandbytes, and probably not that important for users to see
|
||||
warnings.filterwarnings('ignore', message='MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization')
|
||||
|
||||
@@ -3,7 +3,9 @@
|
||||
These utilities will stay internal, and its API can be changed or updated without backward-compatibility.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import os, typing as t
|
||||
import os
|
||||
import typing as t
|
||||
|
||||
from openllm_core.utils import LazyModule
|
||||
_import_structure: dict[str, list[str]] = {
|
||||
'_package': ['create_bento', 'build_editable', 'construct_python_options', 'construct_docker_options'],
|
||||
|
||||
@@ -1,18 +1,32 @@
|
||||
# mypy: disable-error-code="misc"
|
||||
from __future__ import annotations
|
||||
import fs, fs.copy, fs.errors, orjson, bentoml, openllm_core, importlib.metadata, inspect, logging, os, typing as t, string
|
||||
import importlib.metadata
|
||||
import inspect
|
||||
import logging
|
||||
import os
|
||||
import string
|
||||
import typing as t
|
||||
from pathlib import Path
|
||||
|
||||
import fs
|
||||
import fs.copy
|
||||
import fs.errors
|
||||
import orjson
|
||||
from simple_di import Provide, inject
|
||||
|
||||
import bentoml
|
||||
import openllm_core
|
||||
from bentoml._internal.bento.build_config import BentoBuildConfig, DockerOptions, ModelSpec, PythonOptions
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from . import oci
|
||||
|
||||
from . import oci
|
||||
if t.TYPE_CHECKING:
|
||||
import openllm
|
||||
from fs.base import FS
|
||||
from openllm_core._typing_compat import LiteralString, LiteralContainerRegistry, LiteralContainerVersionStrategy
|
||||
|
||||
import openllm
|
||||
from bentoml._internal.bento import BentoStore
|
||||
from bentoml._internal.models.model import ModelStore
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OPENLLM_DEV_BUILD = 'OPENLLM_DEV_BUILD'
|
||||
|
||||
@@ -1,15 +1,27 @@
|
||||
# mypy: disable-error-code="misc"
|
||||
'''OCI-related utilities for OpenLLM. This module is considered to be internal and API are subjected to change.'''
|
||||
from __future__ import annotations
|
||||
import functools, importlib, logging, os, pathlib, shutil, subprocess, typing as t, openllm_core
|
||||
import functools
|
||||
import importlib
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
import subprocess
|
||||
import typing as t
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import attr, orjson, bentoml, openllm
|
||||
from openllm_core.utils.lazy import VersionInfo
|
||||
|
||||
import attr
|
||||
import orjson
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
import openllm_core
|
||||
from openllm_core.utils.lazy import VersionInfo
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
|
||||
from ghapi import all
|
||||
from openllm_core._typing_compat import RefTuple, LiteralString
|
||||
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralString, RefTuple
|
||||
all = openllm_core.utils.LazyLoader('all', globals(), 'ghapi.all') # noqa: F811
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1,15 +1,28 @@
|
||||
from __future__ import annotations
|
||||
import functools, importlib.util, os, typing as t, logging, click, click_option_group as cog, inflection, orjson, bentoml, openllm
|
||||
from click import shell_completion as sc
|
||||
from bentoml_cli.utils import BentoMLCommandGroup
|
||||
from click.shell_completion import CompletionItem
|
||||
from openllm_core.utils import DEBUG
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from openllm_core._typing_compat import LiteralString, DictStrAny, ParamSpec, Concatenate
|
||||
from . import termui
|
||||
import functools
|
||||
import importlib.util
|
||||
import logging
|
||||
import os
|
||||
import typing as t
|
||||
|
||||
import click
|
||||
import click_option_group as cog
|
||||
import inflection
|
||||
import orjson
|
||||
from bentoml_cli.utils import BentoMLCommandGroup
|
||||
from click import shell_completion as sc
|
||||
from click.shell_completion import CompletionItem
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralString, ParamSpec
|
||||
from openllm_core.utils import DEBUG
|
||||
|
||||
from . import termui
|
||||
if t.TYPE_CHECKING:
|
||||
import subprocess
|
||||
|
||||
from openllm_core._configuration import LLMConfig
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -1,15 +1,26 @@
|
||||
from __future__ import annotations
|
||||
import itertools, logging, os, re, subprocess, sys, typing as t, bentoml, openllm, openllm_core
|
||||
import itertools
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from simple_di import Provide, inject
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
import openllm_core
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from openllm.exceptions import OpenLLMException
|
||||
|
||||
from . import termui
|
||||
from ._factory import start_command_factory
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._configuration import LLMConfig
|
||||
from openllm_core._typing_compat import LiteralString, LiteralRuntime, LiteralContainerRegistry, LiteralContainerVersionStrategy
|
||||
from bentoml._internal.bento import BentoStore
|
||||
from openllm_core._configuration import LLMConfig
|
||||
from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy, LiteralRuntime, LiteralString
|
||||
logger = logging.getLogger(__name__)
|
||||
def _start(
|
||||
model_name: str,
|
||||
|
||||
@@ -20,22 +20,47 @@ bentomodel = openllm.import_model("falcon", model_id='tiiuae/falcon-7b-instruct'
|
||||
```
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import functools, http.client, inspect, itertools, logging, os, platform, re, subprocess, sys, time, traceback, typing as t, attr, click, click_option_group as cog, fs, fs.copy, fs.errors, inflection, orjson, bentoml, openllm
|
||||
from simple_di import Provide, inject
|
||||
import functools
|
||||
import http.client
|
||||
import inspect
|
||||
import itertools
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import typing as t
|
||||
|
||||
import attr
|
||||
import click
|
||||
import click_option_group as cog
|
||||
import fs
|
||||
import fs.copy
|
||||
import fs.errors
|
||||
import inflection
|
||||
import orjson
|
||||
from bentoml_cli.utils import BentoMLCommandGroup, opt_callback
|
||||
from simple_di import Provide, inject
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from bentoml._internal.models.model import ModelStore
|
||||
from . import termui
|
||||
from ._factory import FC, LiteralOutput, _AnyCallable, bettertransformer_option, container_registry_option, fast_option, machine_option, model_id_option, model_name_argument, model_version_option, output_option, parse_device_callback, quantize_option, serialisation_option, start_command_factory, workers_per_resource_option
|
||||
from openllm import bundle, serialisation
|
||||
from openllm.exceptions import OpenLLMException
|
||||
from openllm.models.auto import CONFIG_MAPPING, MODEL_FLAX_MAPPING_NAMES, MODEL_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES, MODEL_VLLM_MAPPING_NAMES, AutoConfig, AutoLLM
|
||||
from openllm_core._typing_compat import DictStrAny, ParamSpec, Concatenate, LiteralString, Self, LiteralRuntime
|
||||
from openllm_core.utils import DEBUG, DEBUG_ENV_VAR, OPTIONAL_DEPENDENCIES, QUIET_ENV_VAR, EnvVarMixin, LazyLoader, analytics, bentoml_cattr, compose, configure_logging, dantic, first_not_none, get_debug_mode, get_quiet_mode, is_torch_available, is_transformers_supports_agent, resolve_user_filepath, set_debug_mode, set_quiet_mode
|
||||
from openllm.utils import infer_auto_class
|
||||
from openllm_core._typing_compat import Concatenate, DictStrAny, LiteralRuntime, LiteralString, ParamSpec, Self
|
||||
from openllm_core.utils import DEBUG, DEBUG_ENV_VAR, OPTIONAL_DEPENDENCIES, QUIET_ENV_VAR, EnvVarMixin, LazyLoader, analytics, bentoml_cattr, compose, configure_logging, dantic, first_not_none, get_debug_mode, get_quiet_mode, is_torch_available, is_transformers_supports_agent, resolve_user_filepath, set_debug_mode, set_quiet_mode
|
||||
|
||||
from . import termui
|
||||
from ._factory import FC, LiteralOutput, _AnyCallable, bettertransformer_option, container_registry_option, fast_option, machine_option, model_id_option, model_name_argument, model_version_option, output_option, parse_device_callback, quantize_option, serialisation_option, start_command_factory, workers_per_resource_option
|
||||
if t.TYPE_CHECKING:
|
||||
import torch
|
||||
|
||||
from bentoml._internal.bento import BentoStore
|
||||
from bentoml._internal.container import DefaultBuilder
|
||||
from openllm_core._schema import EmbeddingsOutput
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, click, orjson, openllm
|
||||
import typing as t
|
||||
|
||||
import click
|
||||
import orjson
|
||||
|
||||
import openllm
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import machine_option, container_registry_option
|
||||
from openllm.cli._factory import container_registry_option, machine_option
|
||||
if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
|
||||
@click.command(
|
||||
'build_base_container',
|
||||
|
||||
@@ -1,11 +1,16 @@
|
||||
from __future__ import annotations
|
||||
import shutil, subprocess, typing as t, click, psutil, bentoml
|
||||
from simple_di import Provide, inject
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
import shutil
|
||||
import subprocess
|
||||
import typing as t
|
||||
|
||||
import click
|
||||
import psutil
|
||||
from simple_di import Provide, inject
|
||||
|
||||
import bentoml
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import bento_complete_envvar, machine_option
|
||||
|
||||
if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
|
||||
@click.command('dive_bentos', context_settings=termui.CONTEXT_SETTINGS)
|
||||
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, click, bentoml
|
||||
import typing as t
|
||||
|
||||
import click
|
||||
from simple_di import Provide, inject
|
||||
|
||||
import bentoml
|
||||
from bentoml._internal.bento.bento import BentoInfo
|
||||
from bentoml._internal.bento.build_config import DockerOptions
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
@@ -8,7 +12,6 @@ from bentoml._internal.container.generate import generate_containerfile
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import bento_complete_envvar
|
||||
from openllm_core.utils import bentoml_cattr
|
||||
|
||||
if t.TYPE_CHECKING: from bentoml._internal.bento import BentoStore
|
||||
@click.command('get_containerfile', context_settings=termui.CONTEXT_SETTINGS, help='Return Containerfile of any given Bento.')
|
||||
@click.argument('bento', type=str, shell_complete=bento_complete_envvar)
|
||||
|
||||
@@ -1,8 +1,14 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, click, inflection, orjson, openllm
|
||||
import typing as t
|
||||
|
||||
import click
|
||||
import inflection
|
||||
import orjson
|
||||
from bentoml_cli.utils import opt_callback
|
||||
|
||||
import openllm
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import model_complete_envvar, output_option, machine_option
|
||||
from openllm.cli._factory import machine_option, model_complete_envvar, output_option
|
||||
from openllm_core._prompt import process_prompt
|
||||
LiteralOutput = t.Literal['json', 'pretty', 'porcelain']
|
||||
@click.command('get_prompt', context_settings=termui.CONTEXT_SETTINGS)
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
from __future__ import annotations
|
||||
import click, inflection, orjson, bentoml, openllm
|
||||
|
||||
import click
|
||||
import inflection
|
||||
import orjson
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
from bentoml._internal.utils import human_readable_size
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import LiteralOutput, output_option
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, bentoml, openllm, orjson, inflection, click
|
||||
from openllm.cli import termui
|
||||
from bentoml._internal.utils import human_readable_size
|
||||
from openllm.cli._factory import LiteralOutput, model_name_argument, output_option, model_complete_envvar
|
||||
import typing as t
|
||||
|
||||
import click
|
||||
import inflection
|
||||
import orjson
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
from bentoml._internal.utils import human_readable_size
|
||||
from openllm.cli import termui
|
||||
from openllm.cli._factory import LiteralOutput, model_complete_envvar, model_name_argument, output_option
|
||||
if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
|
||||
@click.command('list_models', context_settings=termui.CONTEXT_SETTINGS)
|
||||
@model_name_argument(required=False, shell_complete=model_complete_envvar)
|
||||
|
||||
@@ -1,11 +1,23 @@
|
||||
from __future__ import annotations
|
||||
import importlib.machinery, logging, os, pkgutil, subprocess, sys, tempfile, typing as t, click, yaml
|
||||
from openllm.cli import termui
|
||||
from openllm import playground
|
||||
from openllm_core.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
|
||||
import importlib.machinery
|
||||
import logging
|
||||
import os
|
||||
import pkgutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import typing as t
|
||||
|
||||
import click
|
||||
import yaml
|
||||
|
||||
from openllm import playground
|
||||
from openllm.cli import termui
|
||||
from openllm_core.utils import is_jupyter_available, is_jupytext_available, is_notebook_available
|
||||
if t.TYPE_CHECKING:
|
||||
import jupytext, nbformat
|
||||
import jupytext
|
||||
import nbformat
|
||||
|
||||
from openllm_core._typing_compat import DictStrAny
|
||||
logger = logging.getLogger(__name__)
|
||||
def load_notebook_metadata() -> DictStrAny:
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
from __future__ import annotations
|
||||
import os, typing as t, click, inflection, openllm
|
||||
import os
|
||||
import typing as t
|
||||
|
||||
import click
|
||||
import inflection
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: from openllm_core._typing_compat import DictStrAny
|
||||
def echo(text: t.Any, fg: str = 'green', _with_style: bool = True, **attrs: t.Any) -> None:
|
||||
attrs['fg'] = fg if not openllm.utils.get_debug_mode() else None
|
||||
|
||||
@@ -11,7 +11,9 @@ client.embed("What is the difference between gather and scatter?")
|
||||
```
|
||||
'''
|
||||
from __future__ import annotations
|
||||
import openllm_client, typing as t
|
||||
import typing as t
|
||||
|
||||
import openllm_client
|
||||
if t.TYPE_CHECKING: from openllm_client import AsyncHTTPClient as AsyncHTTPClient, BaseAsyncClient as BaseAsyncClient, BaseClient as BaseClient, HTTPClient as HTTPClient, GrpcClient as GrpcClient, AsyncGrpcClient as AsyncGrpcClient
|
||||
def __dir__() -> t.Sequence[str]:
|
||||
return sorted(dir(openllm_client))
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
'''Base exceptions for OpenLLM. This extends BentoML exceptions.'''
|
||||
from __future__ import annotations
|
||||
from openllm_core.exceptions import OpenLLMException as OpenLLMException, GpuNotAvailableError as GpuNotAvailableError, ValidationError as ValidationError, ForbiddenAttributeError as ForbiddenAttributeError, MissingAnnotationAttributeError as MissingAnnotationAttributeError, MissingDependencyError as MissingDependencyError, Error as Error, FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError
|
||||
|
||||
from openllm_core.exceptions import Error as Error, FineTuneStrategyNotSupportedError as FineTuneStrategyNotSupportedError, ForbiddenAttributeError as ForbiddenAttributeError, GpuNotAvailableError as GpuNotAvailableError, MissingAnnotationAttributeError as MissingAnnotationAttributeError, MissingDependencyError as MissingDependencyError, OpenLLMException as OpenLLMException, ValidationError as ValidationError
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, os
|
||||
import os
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
from openllm_core.config import CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES, AutoConfig as AutoConfig
|
||||
from openllm_core.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
|
||||
from openllm_core.config import AutoConfig as AutoConfig, CONFIG_MAPPING as CONFIG_MAPPING, CONFIG_MAPPING_NAMES as CONFIG_MAPPING_NAMES
|
||||
_import_structure: dict[str, list[str]] = {
|
||||
'modeling_auto': ['MODEL_MAPPING_NAMES'],
|
||||
'modeling_flax_auto': ['MODEL_FLAX_MAPPING_NAMES'],
|
||||
|
||||
@@ -1,16 +1,22 @@
|
||||
# mypy: disable-error-code="type-arg"
|
||||
from __future__ import annotations
|
||||
import importlib, inspect, logging, typing as t
|
||||
import importlib
|
||||
import inspect
|
||||
import logging
|
||||
import typing as t
|
||||
from collections import OrderedDict
|
||||
import inflection, openllm
|
||||
from openllm_core.utils import ReprMixin
|
||||
|
||||
import inflection
|
||||
|
||||
import openllm
|
||||
from openllm_core.utils import ReprMixin
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import LiteralString, LLMRunner
|
||||
import types
|
||||
from collections import _odict_items, _odict_keys, _odict_values
|
||||
|
||||
from _typeshed import SupportsIter
|
||||
|
||||
from openllm_core._typing_compat import LiteralString, LLMRunner
|
||||
ConfigModelKeysView = _odict_keys[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
|
||||
ConfigModelValuesView = _odict_values[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
|
||||
ConfigModelItemsView = _odict_items[type[openllm.LLMConfig], type[openllm.LLM[t.Any, t.Any]]]
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from __future__ import annotations
|
||||
import typing as t
|
||||
from collections import OrderedDict
|
||||
from .factory import BaseAutoLLMClass, _LazyAutoMapping
|
||||
|
||||
from openllm_core.config import CONFIG_MAPPING_NAMES
|
||||
|
||||
from .factory import BaseAutoLLMClass, _LazyAutoMapping
|
||||
MODEL_MAPPING_NAMES = OrderedDict([('chatglm', 'ChatGLM'), ('dolly_v2', 'DollyV2'), ('falcon', 'Falcon'), ('flan_t5', 'FlanT5'), ('gpt_neox', 'GPTNeoX'), ('llama', 'Llama'), ('mpt', 'MPT'), (
|
||||
'opt', 'OPT'
|
||||
), ('stablelm', 'StableLM'), ('starcoder', 'StarCoder'), ('baichuan', 'Baichuan')])
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from __future__ import annotations
|
||||
import typing as t
|
||||
from collections import OrderedDict
|
||||
from .factory import BaseAutoLLMClass, _LazyAutoMapping
|
||||
|
||||
from openllm_core.config import CONFIG_MAPPING_NAMES
|
||||
|
||||
from .factory import BaseAutoLLMClass, _LazyAutoMapping
|
||||
MODEL_FLAX_MAPPING_NAMES = OrderedDict([('flan_t5', 'FlaxFlanT5'), ('opt', 'FlaxOPT')])
|
||||
MODEL_FLAX_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FLAX_MAPPING_NAMES)
|
||||
class AutoFlaxLLM(BaseAutoLLMClass):
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from __future__ import annotations
|
||||
import typing as t
|
||||
from collections import OrderedDict
|
||||
from .factory import BaseAutoLLMClass, _LazyAutoMapping
|
||||
|
||||
from openllm_core.config import CONFIG_MAPPING_NAMES
|
||||
|
||||
from .factory import BaseAutoLLMClass, _LazyAutoMapping
|
||||
MODEL_TF_MAPPING_NAMES = OrderedDict([('flan_t5', 'TFFlanT5'), ('opt', 'TFOPT')])
|
||||
MODEL_TF_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_TF_MAPPING_NAMES)
|
||||
class AutoTFLLM(BaseAutoLLMClass):
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from __future__ import annotations
|
||||
import typing as t
|
||||
from collections import OrderedDict
|
||||
from .factory import BaseAutoLLMClass, _LazyAutoMapping
|
||||
|
||||
from openllm_core.config import CONFIG_MAPPING_NAMES
|
||||
|
||||
from .factory import BaseAutoLLMClass, _LazyAutoMapping
|
||||
MODEL_VLLM_MAPPING_NAMES = OrderedDict([('baichuan', 'VLLMBaichuan'), ('dolly_v2', 'VLLMDollyV2'), ('falcon', 'VLLMFalcon'), ('gpt_neox', 'VLLMGPTNeoX'), ('mpt', 'VLLMMPT'), (
|
||||
'opt', 'VLLMOPT'
|
||||
), ('stablelm', 'VLLMStableLM'), ('starcoder', 'VLLMStarCoder'), ('llama', 'VLLMLlama')])
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import sys, typing as t
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_baichuan import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_BAICHUAN_COMMAND_DOCSTRING as START_BAICHUAN_COMMAND_DOCSTRING, BaichuanConfig as BaichuanConfig
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, openllm
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
class Baichuan(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerBase']):
|
||||
__openllm_internal__ = True
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, openllm
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import vllm, transformers
|
||||
class VLLMBaichuan(openllm.LLM['vllm.LLMEngine', 'transformers.PreTrainedTokenizerBase']):
|
||||
__openllm_internal__ = True
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import sys, typing as t
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_cpm_kernels_available, is_torch_available
|
||||
from openllm_core.config.configuration_chatglm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_CHATGLM_COMMAND_DOCSTRING as START_CHATGLM_COMMAND_DOCSTRING, ChatGLMConfig as ChatGLMConfig
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, openllm
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
class ChatGLM(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerFast']):
|
||||
__openllm_internal__ = True
|
||||
@@ -13,7 +15,8 @@ class ChatGLM(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrain
|
||||
return self.model.chat(self.tokenizer, prompt, generation_config=self.config.model_construct_env(**attrs).to_generation_config())
|
||||
|
||||
def embeddings(self, prompts: list[str]) -> openllm.LLMEmbeddings:
|
||||
import torch, torch.nn.functional as F
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
embeddings: list[list[float]] = []
|
||||
num_tokens = 0
|
||||
for prompt in prompts:
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import sys, typing as t
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_DOLLY_V2_COMMAND_DOCSTRING as START_DOLLY_V2_COMMAND_DOCSTRING, DollyV2Config as DollyV2Config
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
from __future__ import annotations
|
||||
import logging, re, typing as t, openllm
|
||||
import logging
|
||||
import re
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
from openllm_core._typing_compat import overload
|
||||
from openllm_core.config.configuration_dolly_v2 import DEFAULT_PROMPT_TEMPLATE, END_KEY, RESPONSE_KEY, get_special_token_id
|
||||
|
||||
if t.TYPE_CHECKING: import torch, transformers, tensorflow as tf
|
||||
else: torch, transformers, tf = openllm.utils.LazyLoader('torch', globals(), 'torch'), openllm.utils.LazyLoader('transformers', globals(), 'transformers'), openllm.utils.LazyLoader('tf', globals(), 'tensorflow')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
import logging, typing as t, openllm
|
||||
import logging
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import vllm, transformers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import sys, typing as t
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_falcon import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FALCON_COMMAND_DOCSTRING as START_FALCON_COMMAND_DOCSTRING, FalconConfig as FalconConfig
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, openllm
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import torch, transformers
|
||||
else: torch, transformers = openllm.utils.LazyLoader('torch', globals(), 'torch'), openllm.utils.LazyLoader('transformers', globals(), 'transformers')
|
||||
class Falcon(openllm.LLM['transformers.PreTrainedModel', 'transformers.PreTrainedTokenizerBase']):
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
import logging, typing as t, openllm
|
||||
import logging
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import vllm, transformers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import sys, typing as t
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available
|
||||
from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_FLAN_T5_COMMAND_DOCSTRING as START_FLAN_T5_COMMAND_DOCSTRING, FlanT5Config as FlanT5Config
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, openllm
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
class FlanT5(openllm.LLM['transformers.T5ForConditionalGeneration', 'transformers.T5TokenizerFast']):
|
||||
__openllm_internal__ = True
|
||||
@@ -13,7 +15,8 @@ class FlanT5(openllm.LLM['transformers.T5ForConditionalGeneration', 'transformer
|
||||
)
|
||||
|
||||
def embeddings(self, prompts: list[str]) -> openllm.LLMEmbeddings:
|
||||
import torch, torch.nn.functional as F
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
embeddings: list[list[float]] = []
|
||||
num_tokens = 0
|
||||
for prompt in prompts:
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, openllm
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
from openllm_core._prompt import process_prompt
|
||||
from openllm_core.config.configuration_flan_t5 import DEFAULT_PROMPT_TEMPLATE
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, openllm
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
class TFFlanT5(openllm.LLM['transformers.TFT5ForConditionalGeneration', 'transformers.T5TokenizerFast']):
|
||||
__openllm_internal__ = True
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import sys, typing as t
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_gpt_neox import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_GPT_NEOX_COMMAND_DOCSTRING as START_GPT_NEOX_COMMAND_DOCSTRING, GPTNeoXConfig as GPTNeoXConfig
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
import logging, typing as t, openllm
|
||||
import logging
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, openllm
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import vllm, transformers
|
||||
class VLLMGPTNeoX(openllm.LLM['vllm.LLMEngine', 'transformers.GPTNeoXTokenizerFast']):
|
||||
__openllm_internal__ = True
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import sys, typing as t
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_llama import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_LLAMA_COMMAND_DOCSTRING as START_LLAMA_COMMAND_DOCSTRING, LlamaConfig as LlamaConfig
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, openllm
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
class Llama(openllm.LLM['transformers.LlamaForCausalLM', 'transformers.LlamaTokenizerFast']):
|
||||
__openllm_internal__ = True
|
||||
@@ -10,7 +12,8 @@ class Llama(openllm.LLM['transformers.LlamaForCausalLM', 'transformers.LlamaToke
|
||||
return {'torch_dtype': torch.float16 if torch.cuda.is_available() else torch.float32}, {}
|
||||
|
||||
def embeddings(self, prompts: list[str]) -> openllm.LLMEmbeddings:
|
||||
import torch, torch.nn.functional as F
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
encoding = self.tokenizer(prompts, padding=True, return_tensors='pt').to(self.device)
|
||||
input_ids, attention_mask = encoding['input_ids'], encoding['attention_mask']
|
||||
with torch.inference_mode():
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, openllm
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import vllm, transformers
|
||||
class VLLMLlama(openllm.LLM['vllm.LLMEngine', 'transformers.LlamaTokenizerFast']):
|
||||
__openllm_internal__ = True
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import sys, typing as t
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_mpt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, PROMPT_MAPPING as PROMPT_MAPPING, START_MPT_COMMAND_DOCSTRING as START_MPT_COMMAND_DOCSTRING, MPTConfig as MPTConfig
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
from __future__ import annotations
|
||||
import logging, typing as t, bentoml, openllm
|
||||
import logging
|
||||
import typing as t
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
from openllm.utils import generate_labels, is_triton_available
|
||||
if t.TYPE_CHECKING: import transformers, torch
|
||||
|
||||
@@ -31,7 +35,8 @@ class MPT(openllm.LLM['transformers.PreTrainedModel', 'transformers.GPTNeoXToken
|
||||
return {'device_map': 'auto' if torch.cuda.is_available() and torch.cuda.device_count() > 1 else None, 'torch_dtype': torch.bfloat16 if torch.cuda.is_available() else torch.float32}, {}
|
||||
|
||||
def import_model(self, *args: t.Any, trust_remote_code: bool = True, **attrs: t.Any) -> bentoml.Model:
|
||||
import torch, transformers
|
||||
import torch
|
||||
import transformers
|
||||
_, tokenizer_attrs = self.llm_parameters
|
||||
torch_dtype = attrs.pop('torch_dtype', self.dtype)
|
||||
device_map = attrs.pop('device_map', None)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, openllm
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import transformers, vllm
|
||||
class VLLMMPT(openllm.LLM['vllm.LLMEngine', 'transformers.GPTNeoXTokenizerFast']):
|
||||
__openllm_internal__ = True
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import sys, typing as t
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_flax_available, is_tf_available, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_OPT_COMMAND_DOCSTRING as START_OPT_COMMAND_DOCSTRING, OPTConfig as OPTConfig
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
from __future__ import annotations
|
||||
import logging, typing as t, bentoml, openllm
|
||||
import logging
|
||||
import typing as t
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
from openllm._prompt import process_prompt
|
||||
from openllm.utils import generate_labels
|
||||
from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
import logging, typing as t, openllm
|
||||
import logging
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, bentoml, openllm
|
||||
import typing as t
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
from openllm_core.utils import generate_labels
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
class TFOPT(openllm.LLM['transformers.TFOPTForCausalLM', 'transformers.GPT2Tokenizer']):
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, openllm
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
from openllm_core._prompt import process_prompt
|
||||
from openllm_core.config.configuration_opt import DEFAULT_PROMPT_TEMPLATE
|
||||
if t.TYPE_CHECKING: import vllm, transformers
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import sys, typing as t
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_stablelm import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STABLELM_COMMAND_DOCSTRING as START_STABLELM_COMMAND_DOCSTRING, StableLMConfig as StableLMConfig
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, openllm
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
class StableLM(openllm.LLM['transformers.GPTNeoXForCausalLM', 'transformers.GPTNeoXTokenizerFast']):
|
||||
__openllm_internal__ = True
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
import logging, typing as t, openllm
|
||||
import logging
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import vllm, transformers
|
||||
class VLLMStableLM(openllm.LLM['vllm.LLMEngine', 'transformers.GPTNeoXTokenizerFast']):
|
||||
__openllm_internal__ = True
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import sys, typing as t
|
||||
import sys
|
||||
import typing as t
|
||||
|
||||
from openllm.exceptions import MissingDependencyError
|
||||
from openllm.utils import LazyModule, is_torch_available, is_vllm_available
|
||||
from openllm_core.config.configuration_starcoder import DEFAULT_PROMPT_TEMPLATE as DEFAULT_PROMPT_TEMPLATE, START_STARCODER_COMMAND_DOCSTRING as START_STARCODER_COMMAND_DOCSTRING, StarCoderConfig as StarCoderConfig
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
from __future__ import annotations
|
||||
import logging, typing as t, bentoml, openllm
|
||||
import logging
|
||||
import typing as t
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
from openllm.utils import generate_labels
|
||||
from openllm_core.config.configuration_starcoder import EOD, FIM_MIDDLE, FIM_PAD, FIM_PREFIX, FIM_SUFFIX
|
||||
if t.TYPE_CHECKING: import transformers
|
||||
@@ -12,7 +16,8 @@ class StarCoder(openllm.LLM['transformers.GPTBigCodeForCausalLM', 'transformers.
|
||||
return {'device_map': 'auto' if torch.cuda.is_available() and torch.cuda.device_count() > 1 else None, 'torch_dtype': torch.float16 if torch.cuda.is_available() else torch.float32}, {}
|
||||
|
||||
def import_model(self, *args: t.Any, trust_remote_code: bool = False, **attrs: t.Any) -> bentoml.Model:
|
||||
import torch, transformers
|
||||
import torch
|
||||
import transformers
|
||||
torch_dtype, device_map = attrs.pop('torch_dtype', torch.float16), attrs.pop('device_map', 'auto')
|
||||
tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_id, **self.llm_parameters[-1])
|
||||
tokenizer.add_special_tokens({'additional_special_tokens': [EOD, FIM_PREFIX, FIM_MIDDLE, FIM_SUFFIX, FIM_PAD], 'pad_token': EOD})
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
import logging, typing as t, openllm
|
||||
import logging
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: import vllm, transformers
|
||||
class VLLMStarCoder(openllm.LLM['vllm.LLMEngine', 'transformers.GPT2TokenizerFast']):
|
||||
__openllm_internal__ = True
|
||||
|
||||
@@ -22,10 +22,8 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
from datasets import load_dataset
|
||||
from trl import SFTTrainer
|
||||
|
||||
DEFAULT_MODEL_ID = "ybelkada/falcon-7b-sharded-bf16"
|
||||
DATASET_NAME = "timdettmers/openassistant-guanaco"
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TrainingArguments:
|
||||
per_device_train_batch_size: int = dataclasses.field(default=4)
|
||||
@@ -42,12 +40,10 @@ class TrainingArguments:
|
||||
group_by_length: bool = dataclasses.field(default=True)
|
||||
lr_scheduler_type: str = dataclasses.field(default="constant")
|
||||
output_dir: str = dataclasses.field(default=os.path.join(os.getcwd(), "outputs", "falcon"))
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ModelArguments:
|
||||
model_id: str = dataclasses.field(default=DEFAULT_MODEL_ID)
|
||||
max_sequence_length: int = dataclasses.field(default=512)
|
||||
|
||||
parser = transformers.HfArgumentParser((ModelArguments, TrainingArguments))
|
||||
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
||||
# If we pass only one argument to the script and it's the path to a json file,
|
||||
@@ -56,13 +52,20 @@ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
||||
else:
|
||||
model_args, training_args = t.cast(t.Tuple[ModelArguments, TrainingArguments], parser.parse_args_into_dataclasses())
|
||||
|
||||
model, tokenizer = openllm.AutoLLM.for_model("falcon", model_id=model_args.model_id, quantize="int4", bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, ensure_available=True,).prepare_for_training(adapter_type="lora", lora_alpha=16, lora_dropout=0.1, r=16, bias="none", target_modules=["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h",],)
|
||||
model, tokenizer = openllm.AutoLLM.for_model("falcon", model_id=model_args.model_id, quantize="int4", bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, ensure_available=True).prepare_for_training(adapter_type="lora", lora_alpha=16, lora_dropout=0.1, r=16, bias="none", target_modules=["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h"])
|
||||
model.config.use_cache = False
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
dataset = load_dataset(DATASET_NAME, split="train")
|
||||
|
||||
trainer = SFTTrainer(model=model, train_dataset=dataset, dataset_text_field="text", max_seq_length=model_args.max_sequence_length, tokenizer=tokenizer, args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args),),)
|
||||
trainer = SFTTrainer(
|
||||
model=model,
|
||||
train_dataset=dataset,
|
||||
dataset_text_field="text",
|
||||
max_seq_length=model_args.max_sequence_length,
|
||||
tokenizer=tokenizer,
|
||||
args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
|
||||
)
|
||||
|
||||
# upcast layernorm in float32 for more stable training
|
||||
for name, module in trainer.model.named_modules():
|
||||
|
||||
@@ -4,7 +4,6 @@ import logging
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
|
||||
openllm.utils.configure_logging()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -13,7 +12,6 @@ MAX_NEW_TOKENS = 384
|
||||
|
||||
Q = "Answer the following question, step by step:\n{q}\nA:"
|
||||
question = "What is the meaning of life?"
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("question", default=question)
|
||||
@@ -44,11 +42,9 @@ def main() -> int:
|
||||
logger.info("=" * 10, "Response:", r.llm.postprocess_generate(prompt, res))
|
||||
|
||||
return 0
|
||||
|
||||
def _mp_fn(index: t.Any): # noqa # type: ignore
|
||||
# For xla_spawn (TPUs)
|
||||
main()
|
||||
|
||||
if openllm.utils.in_notebook():
|
||||
main()
|
||||
else:
|
||||
|
||||
@@ -29,7 +29,6 @@ from random import randint, randrange
|
||||
|
||||
import bitsandbytes as bnb
|
||||
from datasets import load_dataset
|
||||
|
||||
# COPIED FROM https://github.com/artidoro/qlora/blob/main/qlora.py
|
||||
def find_all_linear_names(model):
|
||||
lora_module_names = set()
|
||||
@@ -41,13 +40,11 @@ def find_all_linear_names(model):
|
||||
if "lm_head" in lora_module_names: # needed for 16-bit
|
||||
lora_module_names.remove("lm_head")
|
||||
return list(lora_module_names)
|
||||
|
||||
# Change this to the local converted path if you don't have access to the meta-llama model
|
||||
DEFAULT_MODEL_ID = "meta-llama/Llama-2-7b-hf"
|
||||
# change this to 'main' if you want to use the latest llama
|
||||
DEFAULT_MODEL_VERSION = "335a02887eb6684d487240bbc28b5699298c3135"
|
||||
DATASET_NAME = "databricks/databricks-dolly-15k"
|
||||
|
||||
def format_dolly(sample):
|
||||
instruction = f"### Instruction\n{sample['instruction']}"
|
||||
context = f"### Context\n{sample['context']}" if len(sample["context"]) > 0 else None
|
||||
@@ -55,15 +52,12 @@ def format_dolly(sample):
|
||||
# join all the parts together
|
||||
prompt = "\n\n".join([i for i in [instruction, context, response] if i is not None])
|
||||
return prompt
|
||||
|
||||
# template dataset to add prompt to each sample
|
||||
def template_dataset(sample, tokenizer):
|
||||
sample["text"] = f"{format_dolly(sample)}{tokenizer.eos_token}"
|
||||
return sample
|
||||
|
||||
# empty list to save remainder from batches to use in next batch
|
||||
remainder = {"input_ids": [], "attention_mask": [], "token_type_ids": []}
|
||||
|
||||
def chunk(sample, chunk_length=2048):
|
||||
# define global remainder variable to save remainder from batches to use in next batch
|
||||
global remainder
|
||||
@@ -84,7 +78,6 @@ def chunk(sample, chunk_length=2048):
|
||||
# prepare labels
|
||||
result["labels"] = result["input_ids"].copy()
|
||||
return result
|
||||
|
||||
def prepare_datasets(tokenizer, dataset_name=DATASET_NAME):
|
||||
# Load dataset from the hub
|
||||
dataset = load_dataset(dataset_name, split="train")
|
||||
@@ -103,11 +96,20 @@ def prepare_datasets(tokenizer, dataset_name=DATASET_NAME):
|
||||
# Print total number of samples
|
||||
print(f"Total number of samples: {len(lm_dataset)}")
|
||||
return lm_dataset
|
||||
|
||||
def prepare_for_int4_training(model_id: str, model_version: str | None = None, gradient_checkpointing: bool = True, bf16: bool = True,) -> tuple[peft.PeftModel, transformers.LlamaTokenizerFast]:
|
||||
def prepare_for_int4_training(model_id: str, model_version: str | None = None, gradient_checkpointing: bool = True, bf16: bool = True,
|
||||
) -> tuple[peft.PeftModel, transformers.LlamaTokenizerFast]:
|
||||
from peft.tuners.lora import LoraLayer
|
||||
|
||||
llm = openllm.AutoLLM.for_model("llama", model_id=model_id, model_version=model_version, ensure_available=True, quantize="int4", bnb_4bit_compute_dtype=torch.bfloat16, use_cache=not gradient_checkpointing, device_map="auto",)
|
||||
llm = openllm.AutoLLM.for_model(
|
||||
"llama",
|
||||
model_id=model_id,
|
||||
model_version=model_version,
|
||||
ensure_available=True,
|
||||
quantize="int4",
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
use_cache=not gradient_checkpointing,
|
||||
device_map="auto",
|
||||
)
|
||||
print("Model summary:", llm.model)
|
||||
|
||||
# get lora target modules
|
||||
@@ -128,7 +130,6 @@ def prepare_for_int4_training(model_id: str, model_version: str | None = None, g
|
||||
if bf16 and module.weight.dtype == torch.float32:
|
||||
module = module.to(torch.bfloat16)
|
||||
return model, tokenizer
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TrainingArguments:
|
||||
per_device_train_batch_size: int = dataclasses.field(default=1)
|
||||
@@ -140,14 +141,12 @@ class TrainingArguments:
|
||||
report_to: str = dataclasses.field(default="none")
|
||||
output_dir: str = dataclasses.field(default=os.path.join(os.getcwd(), "outputs", "llama"))
|
||||
save_strategy: str = dataclasses.field(default="no")
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ModelArguments:
|
||||
model_id: str = dataclasses.field(default=DEFAULT_MODEL_ID)
|
||||
model_version: str = dataclasses.field(default=DEFAULT_MODEL_VERSION)
|
||||
seed: int = dataclasses.field(default=42)
|
||||
merge_weights: bool = dataclasses.field(default=False)
|
||||
|
||||
if openllm.utils.in_notebook():
|
||||
model_args, training_rags = ModelArguments(), TrainingArguments()
|
||||
else:
|
||||
@@ -161,7 +160,6 @@ else:
|
||||
|
||||
# import the model first hand
|
||||
openllm.import_model("llama", model_id=model_args.model_id, model_version=model_args.model_version)
|
||||
|
||||
def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
|
||||
import peft
|
||||
|
||||
@@ -170,7 +168,12 @@ def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
|
||||
model, tokenizer = prepare_for_int4_training(model_args.model_id, gradient_checkpointing=training_args.gradient_checkpointing, bf16=training_args.bf16,)
|
||||
datasets = prepare_datasets(tokenizer)
|
||||
|
||||
trainer = transformers.Trainer(model=model, args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)), train_dataset=datasets, data_collator=transformers.default_data_collator,)
|
||||
trainer = transformers.Trainer(
|
||||
model=model,
|
||||
args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
|
||||
train_dataset=datasets,
|
||||
data_collator=transformers.default_data_collator,
|
||||
)
|
||||
|
||||
trainer.train()
|
||||
|
||||
@@ -191,5 +194,4 @@ def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
|
||||
model.save_pretrained(os.path.join(os.getcwd(), "outputs", "merged_llama_lora"), safe_serialization=True, max_shard_size="2GB")
|
||||
else:
|
||||
trainer.model.save_pretrained(os.path.join(training_args.output_dir, "lora"))
|
||||
|
||||
train_loop(model_args, training_args)
|
||||
|
||||
@@ -23,12 +23,14 @@ from datasets import load_dataset
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from peft import PeftModel
|
||||
|
||||
DEFAULT_MODEL_ID = "facebook/opt-6.7b"
|
||||
|
||||
def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any, training_args: TrainingArguments,):
|
||||
return transformers.Trainer(model=model, train_dataset=dataset_dict["train"], args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args),), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),)
|
||||
|
||||
def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any, training_args: TrainingArguments):
|
||||
return transformers.Trainer(
|
||||
model=model,
|
||||
train_dataset=dataset_dict["train"],
|
||||
args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
|
||||
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
|
||||
)
|
||||
@dataclasses.dataclass
|
||||
class TrainingArguments:
|
||||
per_device_train_batch_size: int = dataclasses.field(default=4)
|
||||
@@ -39,11 +41,9 @@ class TrainingArguments:
|
||||
fp16: bool = dataclasses.field(default=True)
|
||||
logging_steps: int = dataclasses.field(default=1)
|
||||
output_dir: str = dataclasses.field(default=os.path.join(os.getcwd(), "outputs", "opt"))
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ModelArguments:
|
||||
model_id: str = dataclasses.field(default=DEFAULT_MODEL_ID)
|
||||
|
||||
parser = transformers.HfArgumentParser((ModelArguments, TrainingArguments))
|
||||
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
||||
# If we pass only one argument to the script and it's the path to a json file,
|
||||
@@ -52,7 +52,7 @@ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
||||
else:
|
||||
model_args, training_args = t.cast(t.Tuple[ModelArguments, TrainingArguments], parser.parse_args_into_dataclasses())
|
||||
|
||||
model, tokenizer = openllm.AutoLLM.for_model("opt", model_id=model_args.model_id, quantize="int8", ensure_available=True,).prepare_for_training(adapter_type="lora", r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none",)
|
||||
model, tokenizer = openllm.AutoLLM.for_model("opt", model_id=model_args.model_id, quantize="int8", ensure_available=True).prepare_for_training(adapter_type="lora", r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
|
||||
|
||||
# ft on english_quotes
|
||||
data = load_dataset("Abirate/english_quotes")
|
||||
|
||||
@@ -23,13 +23,18 @@ llm.save_pretrained("./path/to/local-dolly")
|
||||
```
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import importlib, typing as t
|
||||
import cloudpickle, fs, openllm
|
||||
from bentoml._internal.models.model import CUSTOM_OBJECTS_FILENAME
|
||||
from openllm_core._typing_compat import M, T, ParamSpec
|
||||
import importlib
|
||||
import typing as t
|
||||
|
||||
import cloudpickle
|
||||
import fs
|
||||
|
||||
import openllm
|
||||
from bentoml._internal.models.model import CUSTOM_OBJECTS_FILENAME
|
||||
from openllm_core._typing_compat import M, ParamSpec, T
|
||||
if t.TYPE_CHECKING:
|
||||
import bentoml
|
||||
|
||||
from . import constants as constants, ggml as ggml, transformers as transformers
|
||||
P = ParamSpec('P')
|
||||
def load_tokenizer(llm: openllm.LLM[t.Any, T], **tokenizer_attrs: t.Any) -> T:
|
||||
|
||||
@@ -4,8 +4,9 @@ This requires ctransformers to be installed.
|
||||
'''
|
||||
from __future__ import annotations
|
||||
import typing as t
|
||||
import bentoml, openllm
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: from openllm_core._typing_compat import M
|
||||
|
||||
_conversion_strategy = {'pt': 'ggml'}
|
||||
|
||||
@@ -1,19 +1,27 @@
|
||||
'''Serialisation related implementation for Transformers-based implementation.'''
|
||||
from __future__ import annotations
|
||||
import importlib, logging, typing as t
|
||||
import bentoml, openllm
|
||||
import importlib
|
||||
import logging
|
||||
import typing as t
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
from simple_di import Provide, inject
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
from bentoml._internal.models.model import ModelOptions
|
||||
from .weights import HfIgnore
|
||||
from ._helpers import check_unintialised_params, infer_autoclass_from_llm, infer_tokenizers_from_llm, make_model_signatures, process_config, update_model
|
||||
|
||||
from ._helpers import check_unintialised_params, infer_autoclass_from_llm, infer_tokenizers_from_llm, make_model_signatures, process_config, update_model
|
||||
from .weights import HfIgnore
|
||||
if t.TYPE_CHECKING:
|
||||
import types
|
||||
|
||||
import vllm, auto_gptq as autogptq, transformers, torch
|
||||
import auto_gptq as autogptq
|
||||
import torch
|
||||
import torch.nn
|
||||
import transformers
|
||||
import vllm
|
||||
|
||||
from bentoml._internal.models import ModelStore
|
||||
from openllm_core._typing_compat import DictStrAny, M, T
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
from __future__ import annotations
|
||||
import copy, typing as t, openllm_core, openllm
|
||||
import copy
|
||||
import typing as t
|
||||
|
||||
import openllm
|
||||
import openllm_core
|
||||
from bentoml._internal.models.model import ModelInfo, ModelSignature
|
||||
from openllm.serialisation.constants import FRAMEWORK_TO_AUTOCLASS_MAPPING, HUB_ATTRS
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import torch, transformers, bentoml
|
||||
import torch
|
||||
import transformers
|
||||
from transformers.models.auto.auto_factory import _BaseAutoModelClass
|
||||
|
||||
import bentoml
|
||||
from bentoml._internal.models.model import ModelSignaturesType
|
||||
from openllm_core._typing_compat import DictStrAny, M, T
|
||||
else:
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
import typing as t, attr
|
||||
import typing as t
|
||||
|
||||
import attr
|
||||
from huggingface_hub import HfApi
|
||||
if t.TYPE_CHECKING:
|
||||
import openllm
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
'''Tests utilities for OpenLLM.'''
|
||||
from __future__ import annotations
|
||||
import contextlib, logging, shutil, subprocess, typing as t, bentoml, openllm
|
||||
import contextlib
|
||||
import logging
|
||||
import shutil
|
||||
import subprocess
|
||||
import typing as t
|
||||
|
||||
import bentoml
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: from ._typing_compat import LiteralRuntime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -4,12 +4,19 @@ User can import these function for convenience, but
|
||||
we won't ensure backward compatibility for these functions. So use with caution.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import typing as t, openllm_core
|
||||
from . import (dummy_flax_objects as dummy_flax_objects, dummy_pt_objects as dummy_pt_objects, dummy_tf_objects as dummy_tf_objects, dummy_vllm_objects as dummy_vllm_objects,)
|
||||
import typing as t
|
||||
|
||||
import openllm_core
|
||||
|
||||
from . import (
|
||||
dummy_flax_objects as dummy_flax_objects,
|
||||
dummy_pt_objects as dummy_pt_objects,
|
||||
dummy_tf_objects as dummy_tf_objects,
|
||||
dummy_vllm_objects as dummy_vllm_objects,
|
||||
)
|
||||
if t.TYPE_CHECKING:
|
||||
from openllm_core._typing_compat import LiteralRuntime
|
||||
import openllm
|
||||
from openllm_core._typing_compat import LiteralRuntime
|
||||
def generate_labels(llm: openllm.LLM[t.Any, t.Any]) -> dict[str, t.Any]:
|
||||
return {'runtime': llm.runtime, 'framework': 'openllm', 'model_name': llm.config['model_name'], 'architecture': llm.config['architecture'], 'serialisation_format': llm._serialisation_format}
|
||||
def infer_auto_class(implementation: LiteralRuntime) -> type[openllm.AutoLLM | openllm.AutoTFLLM | openllm.AutoFlaxLLM | openllm.AutoVLLM]:
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
from __future__ import annotations
|
||||
import logging, typing as t, openllm
|
||||
from openllm_core._configuration import ModelSettings
|
||||
import logging
|
||||
import typing as t
|
||||
|
||||
from hypothesis import strategies as st
|
||||
|
||||
import openllm
|
||||
from openllm_core._configuration import ModelSettings
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
env_strats = st.sampled_from([openllm.utils.EnvVarMixin(model_name) for model_name in openllm.CONFIG_MAPPING.keys()])
|
||||
|
||||
@@ -1,8 +1,18 @@
|
||||
from __future__ import annotations
|
||||
import contextlib, os, sys, typing as t, attr, pytest, transformers, openllm
|
||||
import contextlib
|
||||
import os
|
||||
import sys
|
||||
import typing as t
|
||||
from unittest import mock
|
||||
from openllm_core._configuration import GenerationConfig, ModelSettings, field_env_key
|
||||
|
||||
import attr
|
||||
import pytest
|
||||
import transformers
|
||||
from hypothesis import assume, given, strategies as st
|
||||
|
||||
import openllm
|
||||
from openllm_core._configuration import GenerationConfig, ModelSettings, field_env_key
|
||||
|
||||
from ._strategies._configuration import make_llm_config, model_settings
|
||||
# XXX: @aarnphm fixes TypedDict behaviour in 3.11
|
||||
@pytest.mark.skipif(sys.version_info[:2] == (3, 11), reason='TypedDict in 3.11 behaves differently, so we need to fix this')
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
from __future__ import annotations
|
||||
import itertools, os, typing as t, pytest, openllm
|
||||
import itertools
|
||||
import os
|
||||
import typing as t
|
||||
|
||||
import pytest
|
||||
|
||||
import openllm
|
||||
if t.TYPE_CHECKING: from openllm_core._typing_compat import LiteralRuntime
|
||||
|
||||
_FRAMEWORK_MAPPING = {'flan_t5': 'google/flan-t5-small', 'opt': 'facebook/opt-125m', 'baichuan': 'baichuan-inc/Baichuan-7B',}
|
||||
|
||||
@@ -1,16 +1,32 @@
|
||||
from __future__ import annotations
|
||||
import asyncio, contextlib, functools, logging, sys, time, typing as t
|
||||
import asyncio
|
||||
import contextlib
|
||||
import functools
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
import typing as t
|
||||
from abc import ABC, abstractmethod
|
||||
import attr, docker, docker.errors, docker.types, orjson, pytest, openllm
|
||||
|
||||
import attr
|
||||
import docker
|
||||
import docker.errors
|
||||
import docker.types
|
||||
import orjson
|
||||
import pytest
|
||||
from syrupy.extensions.json import JSONSnapshotExtension
|
||||
|
||||
import openllm
|
||||
from openllm._llm import normalise_model_name
|
||||
from openllm_core._typing_compat import DictStrAny, ListAny
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import subprocess
|
||||
|
||||
from syrupy.assertion import SnapshotAssertion
|
||||
from syrupy.types import PropertyFilter, PropertyMatcher, SerializableData, SerializedData
|
||||
|
||||
from openllm._configuration import GenerationConfig
|
||||
from openllm.client import BaseAsyncClient
|
||||
class ResponseComparator(JSONSnapshotExtension):
|
||||
|
||||
@@ -4,7 +4,6 @@ import typing as t
|
||||
import pytest
|
||||
|
||||
import openllm
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import contextlib
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ import typing as t
|
||||
import pytest
|
||||
|
||||
import openllm
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
import contextlib
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from __future__ import annotations
|
||||
import os, typing as t, pytest
|
||||
import os
|
||||
import typing as t
|
||||
|
||||
import pytest
|
||||
if t.TYPE_CHECKING: import openllm
|
||||
@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') is not None, reason='Model is too large for CI')
|
||||
def test_flan_t5_implementation(prompt: str, llm: openllm.LLM[t.Any, t.Any]):
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
from __future__ import annotations
|
||||
import functools, os, typing as t, pytest, openllm
|
||||
import functools
|
||||
import os
|
||||
import typing as t
|
||||
|
||||
import pytest
|
||||
|
||||
import openllm
|
||||
from bentoml._internal.configuration.containers import BentoMLContainer
|
||||
if t.TYPE_CHECKING: from pathlib import Path
|
||||
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
from __future__ import annotations
|
||||
import os, typing as t, pytest, bentoml
|
||||
import os
|
||||
import typing as t
|
||||
|
||||
import pytest
|
||||
|
||||
import bentoml
|
||||
from openllm_core import _strategies as strategy
|
||||
from openllm_core._strategies import CascadingResourceStrategy, NvidiaGpuResource, get_resource
|
||||
if t.TYPE_CHECKING: from _pytest.monkeypatch import MonkeyPatch
|
||||
|
||||
Reference in New Issue
Block a user