fix(build): only load model when eager is True

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron
2023-11-20 17:06:25 -05:00
parent 5b92e848e2
commit f753662ae6
2 changed files with 17 additions and 6 deletions

View File

@@ -1047,10 +1047,17 @@ def build_command(
serialisation=first_not_none(
serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'
),
_eager=False,
)
if llm.__llm_backend__ not in llm.config['backend']:
raise click.ClickException(f"'{backend}' is not supported with {model_id}")
backend_warning(llm.__llm_backend__, build=True)
try:
model = bentoml.models.get(llm.tag)
except bentoml.exceptions.NotFound:
model = openllm.serialisation.import_model(llm, trust_remote_code=llm.trust_remote_code)
llm._tag = model.tag
os.environ.update(
**process_environ(
llm.config,