fix(style): remove weird break on split item

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron
2023-10-07 02:21:31 -04:00
parent c6e95de9b4
commit 625b82a0fc
38 changed files with 243 additions and 474 deletions

View File

@@ -34,7 +34,8 @@ def get_or_download(ids: str = _BENTOMODEL_ID) -> bentoml.Model:
options=ModelOptions(),
context=openllm.utils.generate_context(framework_name='transformers'),
labels={
'runtime': 'pt', 'framework': 'openllm'
'runtime': 'pt',
'framework': 'openllm'
},
signatures=model_signatures) as bentomodel:
snapshot_download(_GENERIC_EMBEDDING_ID,

View File

@@ -281,22 +281,10 @@ class LLM(LLMInterface[M, T], ReprMixin):
if t.TYPE_CHECKING: __name__: str
if t.TYPE_CHECKING and not MYPY:
def __attrs_init__(self,
config: LLMConfig,
quantization_config: t.Optional[t.Union[transformers.BitsAndBytesConfig, transformers.GPTQConfig]],
quantize: t.Optional[LiteralQuantise],
model_id: str,
model_decls: TupleAny,
model_attrs: DictStrAny,
tokenizer_attrs: DictStrAny,
tag: bentoml.Tag,
adapters_mapping: t.Optional[AdaptersMapping],
model_version: t.Optional[str],
serialisation: LiteralSerialisation,
_local: bool,
prompt_template: PromptTemplate | None,
system_message: str | None,
**attrs: t.Any) -> None:
def __attrs_init__(self, config: LLMConfig, quantization_config: t.Optional[t.Union[transformers.BitsAndBytesConfig,
transformers.GPTQConfig]], quantize: t.Optional[LiteralQuantise], model_id: str, model_decls: TupleAny,
model_attrs: DictStrAny, tokenizer_attrs: DictStrAny, tag: bentoml.Tag, adapters_mapping: t.Optional[AdaptersMapping], model_version: t.Optional[str],
serialisation: LiteralSerialisation, _local: bool, prompt_template: PromptTemplate | None, system_message: str | None, **attrs: t.Any) -> None:
'''Generated __attrs_init__ for openllm.LLM.'''
config: LLMConfig
@@ -540,20 +528,9 @@ class LLM(LLMInterface[M, T], ReprMixin):
def generate_tag(cls, *param_decls: t.Any, **attrs: t.Any) -> bentoml.Tag:
return bentoml.Tag.from_taglike(cls._generate_tag_str(*param_decls, **attrs))
def __init__(self,
*args: t.Any,
model_id: str,
llm_config: LLMConfig,
quantization_config: transformers.BitsAndBytesConfig | transformers.GPTQConfig | None,
_quantize: LiteralQuantise | None,
_model_version: str,
_tag: bentoml.Tag,
_serialisation: LiteralSerialisation,
_local: bool,
_prompt_template: PromptTemplate | None,
_system_message: str | None,
_adapters_mapping: AdaptersMapping | None,
**attrs: t.Any,
def __init__(self, *args: t.Any, model_id: str, llm_config: LLMConfig, quantization_config: transformers.BitsAndBytesConfig | transformers.GPTQConfig | None,
_quantize: LiteralQuantise | None, _model_version: str, _tag: bentoml.Tag, _serialisation: LiteralSerialisation, _local: bool, _prompt_template: PromptTemplate | None,
_system_message: str | None, _adapters_mapping: AdaptersMapping | None, **attrs: t.Any,
):
'''Initialize the LLM with given pretrained model.
@@ -651,22 +628,13 @@ class LLM(LLMInterface[M, T], ReprMixin):
# parsing tokenizer and model kwargs, as the hierachy is param pass > default
normalized_model_kwds, normalized_tokenizer_kwds = normalize_attrs_to_model_tokenizer_pair(**attrs)
# NOTE: Save the args and kwargs for latter load
self.__attrs_init__(llm_config,
quantization_config,
_quantize,
model_id,
args, {
**model_kwds, **normalized_model_kwds
}, {
**tokenizer_kwds, **normalized_tokenizer_kwds
},
_tag,
_adapters_mapping,
_model_version,
_serialisation,
_local,
_prompt_template,
_system_message)
self.__attrs_init__(llm_config, quantization_config, _quantize, model_id, args, {
**model_kwds,
**normalized_model_kwds
}, {
**tokenizer_kwds,
**normalized_tokenizer_kwds
}, _tag, _adapters_mapping, _model_version, _serialisation, _local, _prompt_template, _system_message)
self.llm_post_init()
@@ -1306,10 +1274,11 @@ def llm_runnable_class(self: LLM[M, T], embeddings_sig: ModelSignature, generate
pre = now
yield ' '.join(output_text[pre:]) + ' '
return types.new_class(self.__class__.__name__ + 'Runnable', (_Runnable,), {},
lambda ns: ns.update({
'SUPPORTED_RESOURCES': ('nvidia.com/gpu', 'amd.com/gpu', 'cpu'), '__module__': self.__module__, '__doc__': self.config['env'].start_docstring
}))
return types.new_class(self.__class__.__name__ + 'Runnable', (_Runnable,), {}, lambda ns: ns.update({
'SUPPORTED_RESOURCES': ('nvidia.com/gpu', 'amd.com/gpu', 'cpu'),
'__module__': self.__module__,
'__doc__': self.config['env'].start_docstring
}))
def llm_runner_class(self: LLM[M, T]) -> type[LLMRunner[M, T]]:
def available_adapters(_: LLMRunner[M, T]) -> PeftAdapterOutput:

View File

@@ -120,8 +120,13 @@ async def completion_v1(input_dict: dict[str, t.Any], ctx: bentoml.Context) -> s
@svc.api(route='/v1/chat/completions',
input=bentoml.io.JSON.from_sample(
openllm.utils.bentoml_cattr.unstructure(
openllm.openai.ChatCompletionRequest(messages=[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Hello!'}],
model=runner.llm_type))),
openllm.openai.ChatCompletionRequest(messages=[{
'role': 'system',
'content': 'You are a helpful assistant.'
}, {
'role': 'user',
'content': 'Hello!'
}], model=runner.llm_type))),
output=bentoml.io.Text())
async def chat_completion_v1(input_dict: dict[str, t.Any], ctx: bentoml.Context) -> str | t.AsyncGenerator[str, None]:
prompt = openllm.openai.messages_to_prompt(input_dict['messages'])
@@ -194,32 +199,10 @@ def metadata_v1(_: str) -> openllm.MetadataOutput:
input=bentoml.io.JSON.from_sample(['Hey Jude, welcome to the jungle!', 'What is the meaning of life?']),
output=bentoml.io.JSON.from_sample({
'embeddings': [
0.007917795330286026,
-0.014421648345887661,
0.00481307040899992,
0.007331526838243008,
-0.0066398633643984795,
0.00945580005645752,
0.0087016262114048,
-0.010709521360695362,
0.012635177001357079,
0.010541186667978764,
-0.00730888033285737,
-0.001783102168701589,
0.02339819073677063,
-0.010825827717781067,
-0.015888236463069916,
0.01876218430697918,
0.0076906150206923485,
0.0009032754460349679,
-0.010024012066423893,
0.01090280432254076,
-0.008668390102684498,
0.02070549875497818,
0.0014594447566196322,
-0.018775740638375282,
-0.014814382418990135,
0.01796768605709076
0.007917795330286026, -0.014421648345887661, 0.00481307040899992, 0.007331526838243008, -0.0066398633643984795, 0.00945580005645752, 0.0087016262114048, -0.010709521360695362,
0.012635177001357079, 0.010541186667978764, -0.00730888033285737, -0.001783102168701589, 0.02339819073677063, -0.010825827717781067, -0.015888236463069916,
0.01876218430697918, 0.0076906150206923485, 0.0009032754460349679, -0.010024012066423893, 0.01090280432254076, -0.008668390102684498, 0.02070549875497818,
0.0014594447566196322, -0.018775740638375282, -0.014814382418990135, 0.01796768605709076
],
'num_tokens': 20
}))

View File

@@ -87,15 +87,7 @@ def construct_python_options(llm: openllm.LLM[t.Any, t.Any], llm_fs: FS, extra_d
elif backend_envvar == 'tf':
if not openllm_core.utils.is_tf_available():
raise ValueError(f"TensorFlow is not available, while {env.backend} is set to 'tf'")
candidates = ('tensorflow',
'tensorflow-cpu',
'tensorflow-gpu',
'tf-nightly',
'tf-nightly-cpu',
'tf-nightly-gpu',
'intel-tensorflow',
'intel-tensorflow-avx512',
'tensorflow-rocm',
candidates = ('tensorflow', 'tensorflow-cpu', 'tensorflow-gpu', 'tf-nightly', 'tf-nightly-cpu', 'tf-nightly-gpu', 'intel-tensorflow', 'intel-tensorflow-avx512', 'tensorflow-rocm',
'tensorflow-macos',
)
# For the metadata, we have to look for both tensorflow and tensorflow-cpu
@@ -123,14 +115,8 @@ def construct_python_options(llm: openllm.LLM[t.Any, t.Any], llm_fs: FS, extra_d
lock_packages=False,
extra_index_url=['https://download.pytorch.org/whl/cu118', 'https://huggingface.github.io/autogptq-index/whl/cu118/'])
def construct_docker_options(llm: openllm.LLM[t.Any, t.Any],
_: FS,
workers_per_resource: float,
quantize: LiteralString | None,
adapter_map: dict[str, str | None] | None,
dockerfile_template: str | None,
serialisation: LiteralSerialisation,
container_registry: LiteralContainerRegistry,
def construct_docker_options(llm: openllm.LLM[t.Any, t.Any], _: FS, workers_per_resource: float, quantize: LiteralString | None, adapter_map: dict[str, str | None] | None,
dockerfile_template: str | None, serialisation: LiteralSerialisation, container_registry: LiteralContainerRegistry,
container_version_strategy: LiteralContainerVersionStrategy) -> DockerOptions:
from openllm.cli._factory import parse_config_options
environ = parse_config_options(llm.config, llm.config['timeout'], workers_per_resource, None, True, os.environ.copy())
@@ -217,7 +203,11 @@ def create_bento(bento_tag: bentoml.Tag,
_serialisation: LiteralSerialisation = openllm_core.utils.first_not_none(serialisation, default=llm.config['serialisation'])
labels = dict(llm.identifying_params)
labels.update({
'_type': llm.llm_type, '_framework': llm.config['env']['backend_value'], 'start_name': llm.config['start_name'], 'base_name_or_path': llm.model_id, 'bundler': 'openllm.bundle'
'_type': llm.llm_type,
'_framework': llm.config['env']['backend_value'],
'start_name': llm.config['start_name'],
'base_name_or_path': llm.model_id,
'bundler': 'openllm.bundle'
})
if adapter_map: labels.update(adapter_map)
if isinstance(workers_per_resource, str):
@@ -244,14 +234,7 @@ def create_bento(bento_tag: bentoml.Tag,
exclude=['/venv', '/.venv', '__pycache__/', '*.py[cod]', '*$py.class'],
python=construct_python_options(llm, llm_fs, extra_dependencies, adapter_map),
models=[llm_spec],
docker=construct_docker_options(llm,
llm_fs,
workers_per_resource,
quantize,
adapter_map,
dockerfile_template,
_serialisation,
container_registry,
docker=construct_docker_options(llm, llm_fs, workers_per_resource, quantize, adapter_map, dockerfile_template, _serialisation, container_registry,
container_version_strategy))
bento = bentoml.Bento.create(build_config=build_config, version=bento_tag.version, build_ctx=llm_fs.getsyspath('/'))

View File

@@ -54,9 +54,7 @@ def parse_config_options(config: LLMConfig, server_timeout: int, workers_per_res
# TODO: Support amd.com/gpu on k8s
_bentoml_config_options_env = environ.pop('BENTOML_CONFIG_OPTIONS', '')
_bentoml_config_options_opts = [
'tracing.sample_rate=1.0',
f'api_server.traffic.timeout={server_timeout}',
f'runners."llm-{config["start_name"]}-runner".traffic.timeout={config["timeout"]}',
'tracing.sample_rate=1.0', f'api_server.traffic.timeout={server_timeout}', f'runners."llm-{config["start_name"]}-runner".traffic.timeout={config["timeout"]}',
f'runners."llm-{config["start_name"]}-runner".workers_per_resource={workers_per_resource}'
]
if device:
@@ -118,22 +116,9 @@ Available official model_id(s): [default: {llm_config['default_id']}]
@group.command(**command_attrs)
@start_decorator(llm_config, serve_grpc=_serve_grpc)
@click.pass_context
def start_cmd(ctx: click.Context,
/,
server_timeout: int,
model_id: str | None,
model_version: str | None,
system_message: str | None,
prompt_template_file: t.IO[t.Any] | None,
workers_per_resource: t.Literal['conserved', 'round_robin'] | LiteralString,
device: t.Tuple[str, ...],
quantize: LiteralQuantise | None,
backend: LiteralBackend,
serialisation: LiteralSerialisation | None,
cors: bool,
adapter_id: str | None,
return_process: bool,
**attrs: t.Any,
def start_cmd(ctx: click.Context, /, server_timeout: int, model_id: str | None, model_version: str | None, system_message: str | None, prompt_template_file: t.IO[t.Any] | None,
workers_per_resource: t.Literal['conserved', 'round_robin'] | LiteralString, device: t.Tuple[str, ...], quantize: LiteralQuantise | None, backend: LiteralBackend,
serialisation: LiteralSerialisation | None, cors: bool, adapter_id: str | None, return_process: bool, **attrs: t.Any,
) -> LLMConfig | subprocess.Popen[bytes]:
_serialisation = openllm_core.utils.first_not_none(serialisation, default=llm_config['serialisation'])
if _serialisation == 'safetensors' and quantize is not None and openllm_core.utils.check_bool_env('OPENLLM_SERIALIZATION_WARNING'):
@@ -235,16 +220,10 @@ Available official model_id(s): [default: {llm_config['default_id']}]
def start_decorator(llm_config: LLMConfig, serve_grpc: bool = False) -> t.Callable[[FC], t.Callable[[FC], FC]]:
def wrapper(fn: FC) -> t.Callable[[FC], FC]:
composed = openllm.utils.compose(
llm_config.to_click_options,
_http_server_args if not serve_grpc else _grpc_server_args,
cog.optgroup.group('General LLM Options', help=f"The following options are related to running '{llm_config['start_name']}' LLM Server."),
model_id_option(factory=cog.optgroup),
model_version_option(factory=cog.optgroup),
system_message_option(factory=cog.optgroup),
prompt_template_file_option(factory=cog.optgroup),
cog.optgroup.option('--server-timeout', type=int, default=None, help='Server timeout in seconds'),
workers_per_resource_option(factory=cog.optgroup),
cors_option(factory=cog.optgroup),
llm_config.to_click_options, _http_server_args if not serve_grpc else _grpc_server_args,
cog.optgroup.group('General LLM Options', help=f"The following options are related to running '{llm_config['start_name']}' LLM Server."), model_id_option(factory=cog.optgroup),
model_version_option(factory=cog.optgroup), system_message_option(factory=cog.optgroup), prompt_template_file_option(factory=cog.optgroup),
cog.optgroup.option('--server-timeout', type=int, default=None, help='Server timeout in seconds'), workers_per_resource_option(factory=cog.optgroup), cors_option(factory=cog.optgroup),
backend_option(factory=cog.optgroup),
cog.optgroup.group('LLM Optimization Options',
help='''Optimization related options.
@@ -255,9 +234,7 @@ def start_decorator(llm_config: LLMConfig, serve_grpc: bool = False) -> t.Callab
- DeepSpeed Inference: [link](https://www.deepspeed.ai/inference/)
- GGML: Fast inference on [bare metal](https://github.com/ggerganov/ggml)
'''),
quantize_option(factory=cog.optgroup),
serialisation_option(factory=cog.optgroup),
'''), quantize_option(factory=cog.optgroup), serialisation_option(factory=cog.optgroup),
cog.optgroup.option('--device',
type=openllm.utils.dantic.CUDA,
multiple=True,
@@ -286,8 +263,8 @@ def start_decorator(llm_config: LLMConfig, serve_grpc: bool = False) -> t.Callab
help='Optional name or path for given LoRA adapter' + f" to wrap '{llm_config['model_name']}'",
multiple=True,
callback=_id_callback,
metavar='[PATH | [remote/][adapter_name:]adapter_id][, ...]'),
click.option('--return-process', is_flag=True, default=False, help='Internal use only.', hidden=True),
metavar='[PATH | [remote/][adapter_name:]adapter_id][, ...]'), click.option('--return-process', is_flag=True, default=False, help='Internal use only.',
hidden=True),
)
return composed(fn)

View File

@@ -22,7 +22,8 @@ def cli(ctx: click.Context, output: LiteralOutput) -> None:
'tag': str(b.tag),
'size': human_readable_size(openllm.utils.calc_dir_size(b.path)),
'models': [{
'tag': str(m.tag), 'size': human_readable_size(openllm.utils.calc_dir_size(m.path))
'tag': str(m.tag),
'size': human_readable_size(openllm.utils.calc_dir_size(m.path))
} for m in (bentoml.models.get(_.tag) for _ in b.info.models)]
} for b in tuple(i for i in bentoml.list() if all(
k in i.info.labels for k in {'start_name', 'bundler'})) if b.info.labels['start_name'] == k] for k in tuple(inflection.dasherize(key) for key in openllm.CONFIG_MAPPING.keys())

View File

@@ -57,7 +57,9 @@ __lazy = LazyModule(__name__,
os.path.abspath('__file__'),
_import_structure,
extra_objects={
'CONFIG_MAPPING': CONFIG_MAPPING, 'CONFIG_MAPPING_NAMES': CONFIG_MAPPING_NAMES, 'AutoConfig': AutoConfig,
'CONFIG_MAPPING': CONFIG_MAPPING,
'CONFIG_MAPPING_NAMES': CONFIG_MAPPING_NAMES,
'AutoConfig': AutoConfig,
})
__all__ = __lazy.__all__
__dir__ = __lazy.__dir__

View File

@@ -160,10 +160,9 @@ class _LazyAutoMapping(OrderedDict, ReprMixin):
[self._load_attr_from_module(key, name) for key, name in self._model_mapping.items() if key in self._config_mapping.keys()] + list(self._extra_content.values()))
def items(self) -> ConfigModelItemsView:
return t.cast('ConfigModelItemsView',
[(self._load_attr_from_module(key, self._config_mapping[key]), self._load_attr_from_module(key, self._model_mapping[key]))
for key in self._model_mapping.keys()
if key in self._config_mapping.keys()] + list(self._extra_content.items()))
return t.cast('ConfigModelItemsView', [(self._load_attr_from_module(key, self._config_mapping[key]), self._load_attr_from_module(key, self._model_mapping[key]))
for key in self._model_mapping.keys()
if key in self._config_mapping.keys()] + list(self._extra_content.items()))
def __iter__(self) -> t.Iterator[type[openllm.LLMConfig]]:
return iter(t.cast('SupportsIter[t.Iterator[type[openllm.LLMConfig]]]', self.keys()))

View File

@@ -23,5 +23,7 @@ sys.modules[__name__] = LazyModule(__name__,
globals()['__file__'],
_import_structure,
extra_objects={
'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_CHATGLM_COMMAND_DOCSTRING': START_CHATGLM_COMMAND_DOCSTRING, 'ChatGLMConfig': ChatGLMConfig
'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
'START_CHATGLM_COMMAND_DOCSTRING': START_CHATGLM_COMMAND_DOCSTRING,
'ChatGLMConfig': ChatGLMConfig
})

View File

@@ -30,5 +30,7 @@ sys.modules[__name__] = LazyModule(__name__,
globals()['__file__'],
_import_structure,
extra_objects={
'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_DOLLY_V2_COMMAND_DOCSTRING': START_DOLLY_V2_COMMAND_DOCSTRING, 'DollyV2Config': DollyV2Config
'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
'START_DOLLY_V2_COMMAND_DOCSTRING': START_DOLLY_V2_COMMAND_DOCSTRING,
'DollyV2Config': DollyV2Config
})

View File

@@ -30,5 +30,7 @@ sys.modules[__name__] = LazyModule(__name__,
globals()['__file__'],
_import_structure,
extra_objects={
'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_FALCON_COMMAND_DOCSTRING': START_FALCON_COMMAND_DOCSTRING, 'FalconConfig': FalconConfig
'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
'START_FALCON_COMMAND_DOCSTRING': START_FALCON_COMMAND_DOCSTRING,
'FalconConfig': FalconConfig
})

View File

@@ -30,5 +30,7 @@ sys.modules[__name__] = LazyModule(__name__,
globals()['__file__'],
_import_structure,
extra_objects={
'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_GPT_NEOX_COMMAND_DOCSTRING': START_GPT_NEOX_COMMAND_DOCSTRING, 'GPTNeoXConfig': GPTNeoXConfig
'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
'START_GPT_NEOX_COMMAND_DOCSTRING': START_GPT_NEOX_COMMAND_DOCSTRING,
'GPTNeoXConfig': GPTNeoXConfig
})

View File

@@ -46,5 +46,7 @@ sys.modules[__name__] = LazyModule(__name__,
globals()['__file__'],
_import_structure,
extra_objects={
'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_OPT_COMMAND_DOCSTRING': START_OPT_COMMAND_DOCSTRING, 'OPTConfig': OPTConfig,
'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE,
'START_OPT_COMMAND_DOCSTRING': START_OPT_COMMAND_DOCSTRING,
'OPTConfig': OPTConfig,
})

View File

@@ -33,7 +33,11 @@ class FlaxOPT(openllm.LLM['transformers.TFOPTForCausalLM', 'transformers.GPT2Tok
use_default_prompt_template: bool = False,
**attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]:
return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), {
'max_new_tokens': max_new_tokens, 'temperature': temperature, 'top_k': top_k, 'num_return_sequences': num_return_sequences, 'repetition_penalty': repetition_penalty
'max_new_tokens': max_new_tokens,
'temperature': temperature,
'top_k': top_k,
'num_return_sequences': num_return_sequences,
'repetition_penalty': repetition_penalty
}, {}
def generate(self, prompt: str, **attrs: t.Any) -> list[str]:

View File

@@ -19,5 +19,8 @@ class VLLMOPT(openllm.LLM['vllm.LLMEngine', 'transformers.GPT2Tokenizer']):
use_default_prompt_template: bool = True,
**attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]:
return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), {
'max_new_tokens': max_new_tokens, 'temperature': temperature, 'top_k': top_k, 'num_return_sequences': num_return_sequences
'max_new_tokens': max_new_tokens,
'temperature': temperature,
'top_k': top_k,
'num_return_sequences': num_return_sequences
}, {}

View File

@@ -34,11 +34,13 @@ def test_missing_default():
make_llm_config('MissingArchitecture', {'default_id': 'huggingface/t5-tiny-testing', 'model_ids': ['huggingface/t5-tiny-testing'], 'requirements': ['bentoml'],},)
def test_forbidden_access():
cl_ = make_llm_config(
'ForbiddenAccess', {
'default_id': 'huggingface/t5-tiny-testing', 'model_ids': ['huggingface/t5-tiny-testing', 'bentoml/t5-tiny-testing'], 'architecture': 'PreTrainedModel', 'requirements': ['bentoml'],
},
)
cl_ = make_llm_config('ForbiddenAccess', {
'default_id': 'huggingface/t5-tiny-testing',
'model_ids': ['huggingface/t5-tiny-testing', 'bentoml/t5-tiny-testing'],
'architecture': 'PreTrainedModel',
'requirements': ['bentoml'],
},
)
assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), '__config__',)
assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), 'GenerationConfig',)
@@ -128,7 +130,9 @@ def test_struct_envvar_with_overwrite_provided_env(monkeypatch: pytest.MonkeyPat
mk.setenv(field_env_key('field1'), str(4.0))
mk.setenv(field_env_key('temperature', suffix='generation'), str(0.2))
sent = make_llm_config('OverwriteWithEnvAvailable', {
'default_id': 'asdfasdf', 'model_ids': ['asdf', 'asdfasdfads'], 'architecture': 'PreTrainedModel'
'default_id': 'asdfasdf',
'model_ids': ['asdf', 'asdfasdfads'],
'architecture': 'PreTrainedModel'
},
fields=(('field1', 'float', 3.0),),
).model_construct_env(field1=20.0, temperature=0.4)

View File

@@ -196,7 +196,8 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode
detach=True,
device_requests=devs,
ports={
'3000/tcp': port, '3001/tcp': prom_port
'3000/tcp': port,
'3001/tcp': prom_port
},
)