# mypy: disable-error-code="misc" from __future__ import annotations import importlib.metadata import logging import os from pathlib import Path import orjson from simple_di import Provide, inject import bentoml import openllm_core from bentoml._internal.bento.build_config import BentoBuildConfig, DockerOptions, ModelSpec, PythonOptions from bentoml._internal.configuration.containers import BentoMLContainer from openllm_core.utils import SHOW_CODEGEN, check_bool_env, pkg from . import oci logger = logging.getLogger(__name__) OPENLLM_DEV_BUILD = 'OPENLLM_DEV_BUILD' def build_editable(path, package='openllm'): '''Build OpenLLM if the OPENLLM_DEV_BUILD environment variable is set.''' if not check_bool_env(OPENLLM_DEV_BUILD, default=False): return None # We need to build the package in editable mode, so that we can import it from build import ProjectBuilder from build.env import IsolatedEnvBuilder module_location = pkg.source_locations(package) if not module_location: raise RuntimeError( 'Could not find the source location of OpenLLM. Make sure to unset OPENLLM_DEV_BUILD if you are developing OpenLLM.' ) pyproject_path = Path(module_location).parent.parent / 'pyproject.toml' if os.path.isfile(pyproject_path.__fspath__()): logger.info('Generating built wheels for package %s...', package) with IsolatedEnvBuilder() as env: builder = ProjectBuilder(pyproject_path.parent) builder.python_executable = env.executable builder.scripts_dir = env.scripts_dir env.install(builder.build_system_requires) return builder.build('wheel', path, config_settings={'--global-option': '--quiet'}) raise RuntimeError( 'Custom OpenLLM build is currently not supported. Please install OpenLLM from PyPI or built it from Git source.' ) def construct_python_options(llm, llm_fs, extra_dependencies=None, adapter_map=None): packages = ['scipy', 'bentoml[tracing]>=1.1.10'] # apparently bnb misses this one if adapter_map is not None: packages += ['openllm[fine-tune]'] if extra_dependencies is not None: packages += [f'openllm[{k}]' for k in extra_dependencies] if llm.config['requirements'] is not None: packages.extend(llm.config['requirements']) wheels = None built_wheels = [build_editable(llm_fs.getsyspath('/'), p) for p in ('openllm_core', 'openllm_client', 'openllm')] if all(i for i in built_wheels): wheels = [llm_fs.getsyspath(f"/{i.split('/')[-1]}") for i in built_wheels] return PythonOptions(packages=packages, wheels=wheels, lock_packages=True) def construct_docker_options( llm, _, quantize, adapter_map, dockerfile_template, serialisation, container_registry, container_version_strategy ): from openllm_cli.entrypoint import process_environ environ = process_environ( llm.config, llm.config['timeout'], 1.0, None, True, llm.model_id, None, llm._serialisation, llm, use_current_env=False, ) # XXX: We need to quote this so that the envvar in container recognize as valid json environ['OPENLLM_CONFIG'] = f"'{environ['OPENLLM_CONFIG']}'" environ.pop('BENTOML_HOME', None) # NOTE: irrelevant in container environ['NVIDIA_DRIVER_CAPABILITIES'] = 'compute,utility' return DockerOptions( base_image=oci.RefResolver.construct_base_image(container_registry, container_version_strategy), env=environ, dockerfile_template=dockerfile_template, ) _service_file = Path(os.path.abspath(__file__)).parent.parent / '_service.py' _SERVICE_VARS = '''\ import orjson;model_id,model_tag,adapter_map,serialization,trust_remote_code='{__model_id__}','{__model_tag__}',orjson.loads("""{__model_adapter_map__}"""),'{__model_serialization__}',{__model_trust_remote_code__} ''' def write_service(llm, llm_fs, adapter_map): logger.debug('Generating service vars %s (dir=%s)', llm.model_id, llm_fs.getsyspath('/')) script = f"# GENERATED BY 'openllm build {llm.model_id}'. DO NOT EDIT\n\n# fmt: off\n" + _SERVICE_VARS.format( __model_id__=llm.model_id, __model_tag__=str(llm.tag), __model_adapter_map__=orjson.dumps(adapter_map).decode(), __model_serialization__=llm.config['serialisation'], __model_trust_remote_code__=str(llm.trust_remote_code), ) if SHOW_CODEGEN: logger.info('Generated _service_vars.py:\n%s', script) llm_fs.writetext('_service_vars.py', script) with open(_service_file.__fspath__(), 'r') as f: service_src = f.read() llm_fs.writetext(llm.config['service_name'], service_src) @inject def create_bento( bento_tag, llm_fs, llm, quantize, dockerfile_template, adapter_map=None, extra_dependencies=None, serialisation=None, container_registry='ecr', container_version_strategy='release', _bento_store=Provide[BentoMLContainer.bento_store], _model_store=Provide[BentoMLContainer.model_store], ): _serialisation = openllm_core.utils.first_not_none(serialisation, default=llm.config['serialisation']) labels = dict(llm.identifying_params) labels.update( { '_type': llm.llm_type, '_framework': llm.__llm_backend__, 'start_name': llm.config['start_name'], 'base_name_or_path': llm.model_id, 'bundler': 'openllm.bundle', **{ f'{package.replace("-","_")}_version': importlib.metadata.version(package) for package in {'openllm', 'openllm-core', 'openllm-client'} }, } ) if adapter_map: labels.update(adapter_map) logger.debug("Building Bento '%s' with model backend '%s'", bento_tag, llm.__llm_backend__) # add service.py definition to this temporary folder write_service(llm, llm_fs, adapter_map) bento = bentoml.Bento.create( version=bento_tag.version, build_ctx=llm_fs.getsyspath('/'), build_config=BentoBuildConfig( service=f"{llm.config['service_name']}:svc", name=bento_tag.name, labels=labels, models=[ModelSpec.from_item({'tag': str(llm.tag), 'alias': llm.tag.name})], description=f"OpenLLM service for {llm.config['start_name']}", include=list(llm_fs.walk.files()), exclude=['/venv', '/.venv', '__pycache__/', '*.py[cod]', '*$py.class'], python=construct_python_options(llm, llm_fs, extra_dependencies, adapter_map), docker=construct_docker_options( llm, llm_fs, quantize, adapter_map, dockerfile_template, _serialisation, container_registry, container_version_strategy, ), ), ) return bento.save(bento_store=_bento_store, model_store=_model_store)