mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-03-04 15:16:03 -05:00
230 lines
9.4 KiB
Python
230 lines
9.4 KiB
Python
# Copyright 2023 BentoML Team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
Any build-related utilities. This is used for CI.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import importlib.metadata
|
|
import logging
|
|
import os
|
|
import typing as t
|
|
from pathlib import Path
|
|
|
|
import bentoml
|
|
import fs
|
|
import inflection
|
|
from bentoml._internal.bento.build_config import DockerOptions, PythonOptions
|
|
from bentoml._internal.configuration import get_debug_mode
|
|
|
|
import openllm
|
|
import openllm.utils as utils
|
|
from openllm.utils import pkg
|
|
|
|
if t.TYPE_CHECKING:
|
|
from fs.base import FS
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
OPENLLM_DEV_BUILD = "OPENLLM_DEV_BUILD"
|
|
|
|
|
|
def build_editable(path: str) -> str | None:
|
|
"""Build OpenLLM if the OPENLLM_DEV_BUILD environment variable is set."""
|
|
if str(os.environ.get(OPENLLM_DEV_BUILD, False)).lower() != "true":
|
|
return
|
|
|
|
# We need to build the package in editable mode, so that we can import it
|
|
from build import ProjectBuilder
|
|
from build.env import IsolatedEnvBuilder
|
|
|
|
module_location = pkg.source_locations("openllm")
|
|
if not module_location:
|
|
raise RuntimeError(
|
|
"Could not find the source location of OpenLLM. Make sure to unset"
|
|
" OPENLLM_DEV_BUILD if you are developing OpenLLM."
|
|
)
|
|
pyproject_path = Path(module_location).parent.parent / "pyproject.toml"
|
|
if os.path.isfile(pyproject_path.__fspath__()):
|
|
logger.info("OpenLLM is installed in editable mode. Generating built wheels...")
|
|
with IsolatedEnvBuilder() as env:
|
|
builder = ProjectBuilder(pyproject_path.parent)
|
|
builder.python_executable = env.executable
|
|
builder.scripts_dir = env.scripts_dir
|
|
env.install(builder.build_system_requires)
|
|
return builder.build("wheel", path, config_settings={"--global-option": "--quiet"})
|
|
raise RuntimeError(
|
|
"Custom OpenLLM build is currently not supported. Please install OpenLLM from PyPI or built it from Git source."
|
|
)
|
|
|
|
|
|
def construct_python_options(llm: openllm.LLM, llm_fs: FS) -> PythonOptions:
|
|
# NOTE: add openllm to the default dependencies
|
|
# if users has openllm custom built wheels, it will still respect
|
|
# that since bentoml will always install dependencies from requirements.txt
|
|
# first, then proceed to install everything inside the wheels/ folder.
|
|
packages: list[str] = ["openllm"]
|
|
|
|
ModelEnv = openllm.utils.ModelEnv(llm.__openllm_start_name__)
|
|
if llm.config.__openllm_requirements__ is not None:
|
|
packages.extend(llm.config.__openllm_requirements__)
|
|
|
|
if not (str(os.environ.get("BENTOML_BUNDLE_LOCAL_BUILD", False)).lower() == "false"):
|
|
packages.append(f"bentoml>={'.'.join([str(i) for i in pkg.pkg_version_info('bentoml')])}")
|
|
|
|
to_use_framework = ModelEnv.get_framework_env()
|
|
if to_use_framework == "flax":
|
|
assert utils.is_flax_available(), f"Flax is not available, while {ModelEnv.framework} is set to 'flax'"
|
|
packages.extend(
|
|
[
|
|
f"flax>={importlib.metadata.version('flax')}",
|
|
f"jax>={importlib.metadata.version('jax')}",
|
|
f"jaxlib>={importlib.metadata.version('jaxlib')}",
|
|
]
|
|
)
|
|
elif to_use_framework == "tf":
|
|
assert utils.is_tf_available(), f"TensorFlow is not available, while {ModelEnv.framework} is set to 'tf'"
|
|
candidates = (
|
|
"tensorflow",
|
|
"tensorflow-cpu",
|
|
"tensorflow-gpu",
|
|
"tf-nightly",
|
|
"tf-nightly-cpu",
|
|
"tf-nightly-gpu",
|
|
"intel-tensorflow",
|
|
"intel-tensorflow-avx512",
|
|
"tensorflow-rocm",
|
|
"tensorflow-macos",
|
|
)
|
|
# For the metadata, we have to look for both tensorflow and tensorflow-cpu
|
|
for candidate in candidates:
|
|
try:
|
|
_tf_version = importlib.metadata.version(candidate)
|
|
packages.extend([f"tensorflow>={_tf_version}"])
|
|
break
|
|
except importlib.metadata.PackageNotFoundError:
|
|
pass
|
|
else:
|
|
assert utils.is_torch_available(), "PyTorch is not available. Make sure to have it locally installed."
|
|
packages.extend([f"torch>={importlib.metadata.version('torch')}"])
|
|
|
|
wheels: list[str] = []
|
|
built_wheels = build_editable(llm_fs.getsyspath("/"))
|
|
if built_wheels is not None:
|
|
wheels.append(llm_fs.getsyspath(f"/{built_wheels.split('/')[-1]}"))
|
|
|
|
return PythonOptions(packages=packages, wheels=wheels, lock_packages=True)
|
|
|
|
|
|
def construct_docker_options(llm: openllm.LLM, _: FS) -> DockerOptions:
|
|
ModelEnv = openllm.utils.ModelEnv(llm.__openllm_start_name__)
|
|
_bentoml_config_options = os.environ.pop("BENTOML_CONFIG_OPTIONS", "")
|
|
_bentoml_config_options += (
|
|
" "
|
|
if _bentoml_config_options
|
|
else ""
|
|
+ "api_server.traffic.timeout=3600" # NOTE: Currently we hardcode this value
|
|
+ f' runners."llm-{llm.config.__openllm_start_name__}-runner".traffic.timeout'
|
|
+ f"={llm.config.__openllm_timeout__}"
|
|
)
|
|
return DockerOptions(
|
|
cuda_version="11.6", # NOTE: Torch 2.0 currently only support 11.6 as the latest CUDA version
|
|
env={
|
|
ModelEnv.framework: ModelEnv.get_framework_env(),
|
|
"OPENLLM_MODEL": llm.config.__openllm_model_name__,
|
|
"BENTOML_DEBUG": str(get_debug_mode()),
|
|
"BENTOML_CONFIG_OPTIONS": _bentoml_config_options,
|
|
},
|
|
system_packages=["git"],
|
|
)
|
|
|
|
|
|
@t.overload
|
|
def build(model_name: str, *, __cli__: t.Literal[False] = ..., **attrs: t.Any) -> bentoml.Bento:
|
|
...
|
|
|
|
|
|
@t.overload
|
|
def build(model_name: str, *, __cli__: t.Literal[True] = ..., **attrs: t.Any) -> tuple[bentoml.Bento, bool]:
|
|
...
|
|
|
|
|
|
def build(model_name: str, *, __cli__: bool = False, **attrs: t.Any) -> tuple[bentoml.Bento, bool] | bentoml.Bento:
|
|
"""Package a LLM into a Bento."""
|
|
|
|
overwrite_existing_bento = attrs.pop("_overwrite_existing_bento", False)
|
|
current_model_envvar = os.environ.pop("OPENLLM_MODEL", None)
|
|
_previously_built = False
|
|
|
|
ModelEnv = openllm.utils.ModelEnv(model_name)
|
|
|
|
logger.info("Packing '%s' into a Bento with kwargs=%s...", model_name, attrs)
|
|
|
|
# NOTE: We set this environment variable so that our service.py logic won't raise RuntimeError
|
|
# during build. This is a current limitation of bentoml build where we actually import the service.py into sys.path
|
|
try:
|
|
os.environ["OPENLLM_MODEL"] = inflection.underscore(model_name)
|
|
|
|
to_use_framework = ModelEnv.get_framework_env()
|
|
if to_use_framework == "flax":
|
|
llm = openllm.AutoFlaxLLM.for_model(model_name, **attrs)
|
|
elif to_use_framework == "tf":
|
|
llm = openllm.AutoTFLLM.for_model(model_name, **attrs)
|
|
else:
|
|
llm = openllm.AutoLLM.for_model(model_name, **attrs)
|
|
|
|
labels = dict(llm.identifying_params)
|
|
labels.update({"_type": llm.llm_type, "_framework": to_use_framework})
|
|
service_name = f"generated_{inflection.underscore(model_name)}_service.py"
|
|
|
|
with fs.open_fs(f"temp://llm_{llm.config.__openllm_model_name__}") as llm_fs:
|
|
# add service.py definition to this temporary folder
|
|
utils.codegen.write_service(model_name, service_name, llm_fs)
|
|
|
|
bento_tag = bentoml.Tag.from_taglike(f"{model_name}-service:{llm.tag.version}")
|
|
try:
|
|
bento = bentoml.get(bento_tag)
|
|
if overwrite_existing_bento:
|
|
bentoml.delete(bento_tag)
|
|
raise bentoml.exceptions.NotFound("Overwriting previously saved Bento.")
|
|
_previously_built = True
|
|
except bentoml.exceptions.NotFound:
|
|
logger.info("Building Bento for LLM '%s'", llm.__openllm_start_name__)
|
|
bento = bentoml.bentos.build(
|
|
f"{service_name}:svc",
|
|
name=bento_tag.name,
|
|
labels=labels,
|
|
description=f"OpenLLM service for {llm.__openllm_start_name__}",
|
|
include=[
|
|
f for f in llm_fs.walk.files(filter=["*.py"])
|
|
], # NOTE: By default, we are using _service.py as the default service, for now.
|
|
exclude=["/venv", "__pycache__/", "*.py[cod]", "*$py.class"],
|
|
python=construct_python_options(llm, llm_fs),
|
|
docker=construct_docker_options(llm, llm_fs),
|
|
version=bento_tag.version,
|
|
build_ctx=llm_fs.getsyspath("/"),
|
|
)
|
|
if __cli__:
|
|
return bento, _previously_built
|
|
else:
|
|
return bento
|
|
except Exception as e:
|
|
logger.error("\nException caught during building LLM %s: \n", model_name, exc_info=e)
|
|
raise
|
|
finally:
|
|
del os.environ["OPENLLM_MODEL"]
|
|
# restore original OPENLLM_MODEL envvar if set.
|
|
if current_model_envvar is not None:
|
|
os.environ["OPENLLM_MODEL"] = current_model_envvar
|