feat: Yi models (#651)

Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2023-11-14 21:55:24 -05:00
committed by GitHub
parent b4b70e2f20
commit 6a6d689a77
8 changed files with 115 additions and 5 deletions

View File

@@ -36,9 +36,9 @@ from openllm_core.exceptions import MissingDependencyError
from openllm_core.prompts import PromptTemplate
from openllm_core.utils import (
DEBUG,
ENV_VARS_TRUE_VALUES,
ReprMixin,
apply,
check_bool_env,
codegen,
converter,
first_not_none,
@@ -326,7 +326,10 @@ class LLM(t.Generic[M, T], ReprMixin):
@property
def trust_remote_code(self) -> bool:
return first_not_none(check_bool_env('TRUST_REMOTE_CODE', False), default=self.__llm_trust_remote_code__)
env = os.getenv('TRUST_REMOTE_CODE')
if env is not None:
return str(env).upper() in ENV_VARS_TRUE_VALUES
return self.__llm_trust_remote_code__
@property
def runner_name(self) -> str:

View File

@@ -134,6 +134,7 @@ def construct_docker_options(
'BENTOML_DEBUG': str(True),
'BENTOML_QUIET': str(False),
'BENTOML_CONFIG_OPTIONS': f"'{environ['BENTOML_CONFIG_OPTIONS']}'",
'TRUST_REMOTE_CODE': str(llm.trust_remote_code),
}
if adapter_map:
env_dict['BITSANDBYTES_NOWELCOME'] = os.environ.get('BITSANDBYTES_NOWELCOME', '1')

View File

@@ -277,7 +277,7 @@ class OpenLLMCommandGroup(BentoMLCommandGroup):
def list_commands(self, ctx: click.Context) -> list[str]:
return super().list_commands(ctx) + t.cast('Extensions', extension_command).list_commands(ctx)
def command(self, *args: t.Any, **kwargs: t.Any) -> t.Callable[[t.Callable[..., t.Any]], click.Command]: # type: ignore[override] # XXX: fix decorator on BentoMLCommandGroup
def command(self, *args: t.Any, **kwargs: t.Any) -> t.Callable[[t.Callable[..., t.Any]], click.Command]:
"""Override the default 'cli.command' with supports for aliases for given command, and it wraps the implementation with common parameters."""
if 'context_settings' not in kwargs:
kwargs['context_settings'] = {}
@@ -457,7 +457,6 @@ def start_command(
quantize=quantize,
serialisation=serialisation,
torch_dtype=dtype,
trust_remote_code=check_bool_env('TRUST_REMOTE_CODE'),
)
backend_warning(llm.__llm_backend__)
@@ -635,6 +634,7 @@ def process_environ(
'OPENLLM_BACKEND': llm.__llm_backend__,
'OPENLLM_CONFIG': config.model_dump_json(flatten=True).decode(),
'TORCH_DTYPE': str(llm._torch_dtype).split('.')[-1],
'TRUST_REMOTE_CODE': str(llm.trust_remote_code),
}
)
if llm.quantise: