refactor: packages (#249)

2026-06-12 02:20:32 -04:00 · 2023-08-22 08:55:46 -04:00
parent a964e659c1
commit 3ffb25a872
148 changed files with 2899 additions and 1937 deletions
--- a/openllm-python/src/openllm/bundle/init.py
+++ b/openllm-python/src/openllm/bundle/init.py
@@ -4,15 +4,12 @@ These utilities will stay internal, and its API can be changed or updated withou
 """
 from __future__ import annotations
 import os, typing as t
-from openllm.utils import LazyModule
+from openllm_core.utils import LazyModule

 _import_structure: dict[str, list[str]] = {"_package": ["create_bento", "build_editable", "construct_python_options", "construct_docker_options"], "oci": ["CONTAINER_NAMES", "get_base_container_tag", "build_container", "get_base_container_name", "supported_registries", "RefResolver"]}

 if t.TYPE_CHECKING:
-  from . import (
-    _package as _package,
-    oci as oci,
-  )
+  from . import _package as _package, oci as oci
  from ._package import (
    build_editable as build_editable,
    construct_docker_options as construct_docker_options,
@@ -28,7 +25,7 @@ if t.TYPE_CHECKING:
    supported_registries as supported_registries,
  )

-__lazy=LazyModule(__name__, os.path.abspath("__file__"), _import_structure)
-__all__=__lazy.__all__
-__dir__=__lazy.__dir__
-__getattr__=__lazy.__getattr__
+__lazy = LazyModule(__name__, os.path.abspath("__file__"), _import_structure)
+__all__ = __lazy.__all__
+__dir__ = __lazy.__dir__
+__getattr__ = __lazy.__getattr__
--- a/openllm-python/src/openllm/bundle/_package.py
+++ b/openllm-python/src/openllm/bundle/_package.py
@@ -1,35 +1,34 @@
 # mypy: disable-error-code="misc"
 from __future__ import annotations
-import importlib.metadata, inspect, logging, os, typing as t
+import fs, fs.copy, fs.errors, orjson, bentoml, openllm_core, importlib.metadata, inspect, logging, os, typing as t, string
 from pathlib import Path
-import fs, fs.copy, fs.errors, orjson, bentoml, openllm
 from simple_di import Provide, inject
 from bentoml._internal.bento.build_config import BentoBuildConfig, DockerOptions, ModelSpec, PythonOptions
 from bentoml._internal.configuration.containers import BentoMLContainer
 from . import oci

 if t.TYPE_CHECKING:
+  import openllm
  from fs.base import FS
-  from openllm._typing_compat import LiteralString
+  from openllm_core._typing_compat import LiteralString, LiteralContainerRegistry, LiteralContainerVersionStrategy
  from bentoml._internal.bento import BentoStore
  from bentoml._internal.models.model import ModelStore
-  from .oci import LiteralContainerRegistry, LiteralContainerVersionStrategy

 logger = logging.getLogger(__name__)

 OPENLLM_DEV_BUILD = "OPENLLM_DEV_BUILD"

-def build_editable(path: str) -> str | None:
+def build_editable(path: str, package: t.Literal["openllm", "openllm_core", "openllm_client"] = "openllm") -> str | None:
  """Build OpenLLM if the OPENLLM_DEV_BUILD environment variable is set."""
  if str(os.environ.get(OPENLLM_DEV_BUILD, False)).lower() != "true": return None
  # We need to build the package in editable mode, so that we can import it
  from build import ProjectBuilder
  from build.env import IsolatedEnvBuilder
-  module_location = openllm.utils.pkg.source_locations("openllm")
+  module_location = openllm_core.utils.pkg.source_locations(package)
  if not module_location: raise RuntimeError("Could not find the source location of OpenLLM. Make sure to unset OPENLLM_DEV_BUILD if you are developing OpenLLM.")
  pyproject_path = Path(module_location).parent.parent/"pyproject.toml"
  if os.path.isfile(pyproject_path.__fspath__()):
-    logger.info("OpenLLM is installed in editable mode. Generating built wheels...")
+    logger.info("Generating built wheels for package %s...", package)
    with IsolatedEnvBuilder() as env:
      builder = ProjectBuilder(pyproject_path.parent)
      builder.python_executable = env.executable
@@ -49,15 +48,15 @@ def construct_python_options(llm: openllm.LLM[t.Any, t.Any], llm_fs: FS, extra_d

  req = llm.config["requirements"]
  if req is not None: packages.extend(req)
-  if str(os.environ.get("BENTOML_BUNDLE_LOCAL_BUILD", False)).lower() == "false": packages.append(f"bentoml>={'.'.join([str(i) for i in openllm.utils.pkg.pkg_version_info('bentoml')])}")
+  if str(os.environ.get("BENTOML_BUNDLE_LOCAL_BUILD", False)).lower() == "false": packages.append(f"bentoml>={'.'.join([str(i) for i in openllm_core.utils.pkg.pkg_version_info('bentoml')])}")

  env = llm.config["env"]
  framework_envvar = env["framework_value"]
  if framework_envvar == "flax":
-    if not openllm.utils.is_flax_available(): raise ValueError(f"Flax is not available, while {env.framework} is set to 'flax'")
+    if not openllm_core.utils.is_flax_available(): raise ValueError(f"Flax is not available, while {env.framework} is set to 'flax'")
    packages.extend([importlib.metadata.version("flax"), importlib.metadata.version("jax"), importlib.metadata.version("jaxlib")])
  elif framework_envvar == "tf":
-    if not openllm.utils.is_tf_available(): raise ValueError(f"TensorFlow is not available, while {env.framework} is set to 'tf'")
+    if not openllm_core.utils.is_tf_available(): raise ValueError(f"TensorFlow is not available, while {env.framework} is set to 'tf'")
    candidates = ("tensorflow", "tensorflow-cpu", "tensorflow-gpu", "tf-nightly", "tf-nightly-cpu", "tf-nightly-gpu", "intel-tensorflow", "intel-tensorflow-avx512", "tensorflow-rocm", "tensorflow-macos",)
    # For the metadata, we have to look for both tensorflow and tensorflow-cpu
    for candidate in candidates:
@@ -68,19 +67,19 @@ def construct_python_options(llm: openllm.LLM[t.Any, t.Any], llm_fs: FS, extra_d
          _tf_version = importlib.metadata.version(candidate)
          packages.extend([f"tensorflow>={_tf_version}"])
        break
-      except importlib.metadata.PackageNotFoundError: pass  # noqa: PERF203 # Ok to ignore here since we actually need to check for all possible tensorflow distribution.
+      except importlib.metadata.PackageNotFoundError: pass  # Ok to ignore here since we actually need to check for all possible tensorflow distribution.
  else:
-    if not openllm.utils.is_torch_available(): raise ValueError("PyTorch is not available. Make sure to have it locally installed.")
+    if not openllm_core.utils.is_torch_available(): raise ValueError("PyTorch is not available. Make sure to have it locally installed.")
    packages.extend([f'torch>={importlib.metadata.version("torch")}'])
  wheels: list[str] = []
-  built_wheels = build_editable(llm_fs.getsyspath("/"))
-  if built_wheels is not None: wheels.append(llm_fs.getsyspath(f"/{built_wheels.split('/')[-1]}"))
+  built_wheels: list[str | None] = [build_editable(llm_fs.getsyspath("/"), t.cast(t.Literal["openllm", "openllm_core", "openllm_client"], p)) for p in ("openllm_core", "openllm_client", "openllm")]
+  if all(i for i in built_wheels): wheels.extend([llm_fs.getsyspath(f"/{i.split('/')[-1]}") for i in t.cast(t.List[str], built_wheels)])
  return PythonOptions(packages=packages, wheels=wheels, lock_packages=False, extra_index_url=["https://download.pytorch.org/whl/cu118"])

 def construct_docker_options(llm: openllm.LLM[t.Any, t.Any], _: FS, workers_per_resource: float, quantize: LiteralString | None, bettertransformer: bool | None, adapter_map: dict[str, str | None] | None, dockerfile_template: str | None, runtime: t.Literal["ggml", "transformers"], serialisation_format: t.Literal["safetensors", "legacy"], container_registry: LiteralContainerRegistry, container_version_strategy: LiteralContainerVersionStrategy) -> DockerOptions:
  from openllm.cli._factory import parse_config_options
  environ = parse_config_options(llm.config, llm.config["timeout"], workers_per_resource, None, True, os.environ.copy())
-  env: openllm.utils.EnvVarMixin = llm.config["env"]
+  env: openllm_core.utils.EnvVarMixin = llm.config["env"]
  if env["framework_value"] == "vllm": serialisation_format = "legacy"
  env_dict = {
      env.framework: env["framework_value"], env.config: f"'{llm.config.model_dump_json().decode()}'",
@@ -91,13 +90,45 @@ def construct_docker_options(llm: openllm.LLM[t.Any, t.Any], _: FS, workers_per_
  if adapter_map: env_dict["BITSANDBYTES_NOWELCOME"] = os.environ.get("BITSANDBYTES_NOWELCOME", "1")

  # We need to handle None separately here, as env from subprocess doesn't accept None value.
-  _env = openllm.utils.EnvVarMixin(llm.config["model_name"], bettertransformer=bettertransformer, quantize=quantize, runtime=runtime)
+  _env = openllm_core.utils.EnvVarMixin(llm.config["model_name"], bettertransformer=bettertransformer, quantize=quantize, runtime=runtime)

  env_dict[_env.bettertransformer] = str(_env["bettertransformer_value"])
  if _env["quantize_value"] is not None: env_dict[_env.quantize] = t.cast(str, _env["quantize_value"])
  env_dict[_env.runtime] = _env["runtime_value"]
  return DockerOptions(base_image=f"{oci.CONTAINER_NAMES[container_registry]}:{oci.get_base_container_tag(container_version_strategy)}", env=env_dict, dockerfile_template=dockerfile_template)

+OPENLLM_MODEL_NAME = "# openllm: model name"
+OPENLLM_MODEL_ADAPTER_MAP = "# openllm: model adapter map"
+class ModelNameFormatter(string.Formatter):
+  model_keyword: LiteralString = "__model_name__"
+  def __init__(self, model_name: str):
+    """The formatter that extends model_name to be formatted the 'service.py'."""
+    super().__init__()
+    self.model_name = model_name
+  def vformat(self, format_string: str, *args: t.Any, **attrs: t.Any) -> t.Any: return super().vformat(format_string, (), {self.model_keyword: self.model_name})
+  def can_format(self, value: str) -> bool:
+    try:
+      self.parse(value)
+      return True
+    except ValueError: return False
+class ModelIdFormatter(ModelNameFormatter):
+  model_keyword: LiteralString = "__model_id__"
+class ModelAdapterMapFormatter(ModelNameFormatter):
+  model_keyword: LiteralString = "__model_adapter_map__"
+
+_service_file = Path(os.path.abspath(__file__)).parent.parent/"_service.py"
+def write_service(llm: openllm.LLM[t.Any, t.Any], adapter_map: dict[str, str | None] | None, llm_fs: FS) -> None:
+  from openllm_core.utils import DEBUG
+  model_name = llm.config["model_name"]
+  logger.debug("Generating service file for %s at %s (dir=%s)", model_name, llm.config["service_name"], llm_fs.getsyspath("/"))
+  with open(_service_file.__fspath__(), "r") as f: src_contents = f.readlines()
+  for it in src_contents:
+    if OPENLLM_MODEL_NAME in it: src_contents[src_contents.index(it)] = (ModelNameFormatter(model_name).vformat(it)[:-(len(OPENLLM_MODEL_NAME) + 3)] + "\n")
+    elif OPENLLM_MODEL_ADAPTER_MAP in it: src_contents[src_contents.index(it)] = (ModelAdapterMapFormatter(orjson.dumps(adapter_map).decode()).vformat(it)[:-(len(OPENLLM_MODEL_ADAPTER_MAP) + 3)] + "\n")
+  script = f"# GENERATED BY 'openllm build {model_name}'. DO NOT EDIT\n\n" + "".join(src_contents)
+  if DEBUG: logger.info("Generated script:\n%s", script)
+  llm_fs.writetext(llm.config["service_name"], script)
+
@inject
 def create_bento(bento_tag: bentoml.Tag, llm_fs: FS, llm: openllm.LLM[t.Any, t.Any], workers_per_resource: str | float, quantize: LiteralString | None, bettertransformer: bool | None, dockerfile_template: str | None, adapter_map: dict[str, str | None] | None = None, extra_dependencies: tuple[str, ...] | None = None,
                runtime: t.Literal[ "ggml", "transformers"] = "transformers", serialisation_format: t.Literal["safetensors", "legacy"] = "safetensors", container_registry: LiteralContainerRegistry = "ecr", container_version_strategy: LiteralContainerVersionStrategy = "release",
@@ -108,14 +139,14 @@ def create_bento(bento_tag: bentoml.Tag, llm_fs: FS, llm: openllm.LLM[t.Any, t.A
  if adapter_map: labels.update(adapter_map)
  if isinstance(workers_per_resource, str):
    if workers_per_resource == "round_robin": workers_per_resource = 1.0
-    elif workers_per_resource == "conserved": workers_per_resource = 1.0 if openllm.utils.device_count() == 0 else float(1 / openllm.utils.device_count())
+    elif workers_per_resource == "conserved": workers_per_resource = 1.0 if openllm_core.utils.device_count() == 0 else float(1 / openllm_core.utils.device_count())
    else:
      try: workers_per_resource = float(workers_per_resource)
      except ValueError: raise ValueError("'workers_per_resource' only accept ['round_robin', 'conserved'] as possible strategies.") from None
  elif isinstance(workers_per_resource, int): workers_per_resource = float(workers_per_resource)
  logger.info("Building Bento for '%s'", llm.config["start_name"])
  # add service.py definition to this temporary folder
-  openllm.utils.codegen.write_service(llm, adapter_map, llm_fs)
+  write_service(llm, adapter_map, llm_fs)

  llm_spec = ModelSpec.from_item({"tag": str(llm.tag), "alias": llm.tag.name})
  build_config = BentoBuildConfig(
@@ -134,7 +165,7 @@ def create_bento(bento_tag: bentoml.Tag, llm_fs: FS, llm: openllm.LLM[t.Any, t.A
    if "__bento_name__" in it: service_contents[service_contents.index(it)] = it.format(__bento_name__=str(bento.tag))

  script = "".join(service_contents)
-  if openllm.utils.DEBUG: logger.info("Generated script:\n%s", script)
+  if openllm_core.utils.DEBUG: logger.info("Generated script:\n%s", script)

  bento._fs.writetext(service_fs_path, script)
  if "model_store" in inspect.signature(bento.save).parameters: return bento.save(bento_store=_bento_store, model_store=_model_store)
--- a/openllm-python/src/openllm/bundle/oci/init.py
+++ b/openllm-python/src/openllm/bundle/oci/init.py
@@ -1,26 +1,23 @@
 # mypy: disable-error-code="misc"
 """OCI-related utilities for OpenLLM. This module is considered to be internal and API are subjected to change."""
 from __future__ import annotations
-import functools, importlib, logging, os, pathlib, shutil, subprocess, typing as t
+import functools, importlib, logging, os, pathlib, shutil, subprocess, typing as t, openllm_core
 from datetime import datetime, timedelta, timezone
 import attr, orjson, bentoml, openllm
-from openllm.utils.lazy import VersionInfo
+from openllm_core.utils.lazy import VersionInfo

 if t.TYPE_CHECKING:
+  from openllm_core._typing_compat import LiteralContainerRegistry, LiteralContainerVersionStrategy
  from ghapi import all
-  from openllm._typing_compat import RefTuple, LiteralString
+  from openllm_core._typing_compat import RefTuple, LiteralString

-all = openllm.utils.LazyLoader("all", globals(), "ghapi.all")  # noqa: F811
+all = openllm_core.utils.LazyLoader("all", globals(), "ghapi.all")  # noqa: F811

 logger = logging.getLogger(__name__)

 _BUILDER = bentoml.container.get_backend("buildx")
 ROOT_DIR = pathlib.Path(os.path.abspath("__file__")).parent.parent.parent

-# TODO: support quay
-LiteralContainerRegistry = t.Literal["docker", "gh", "ecr"]
-LiteralContainerVersionStrategy = t.Literal["release", "nightly", "latest", "custom"]
-
 # XXX: This registry will be hard code for now for easier to maintain
 # but in the future, we can infer based on git repo and everything to make it more options for users
 # to build the base image. For now, all of the base image will be <registry>/bentoml/openllm:...
@@ -31,10 +28,10 @@ _CONTAINER_REGISTRY: dict[LiteralContainerRegistry, str] = {"docker": "docker.io
 _OWNER = "bentoml"
 _REPO = "openllm"

-_module_location = openllm.utils.pkg.source_locations("openllm")
+_module_location = openllm_core.utils.pkg.source_locations("openllm")

@functools.lru_cache
-@openllm.utils.apply(str.lower)
+@openllm_core.utils.apply(str.lower)
 def get_base_container_name(reg: LiteralContainerRegistry) -> str: return _CONTAINER_REGISTRY[reg]

 def _convert_version_from_string(s: str) -> VersionInfo: return VersionInfo.from_version_string(s)
@@ -43,7 +40,7 @@ def _commit_time_range(r: int = 5) -> str: return (datetime.now(timezone.utc) -
 class VersionNotSupported(openllm.exceptions.OpenLLMException):
  """Raised when the stable release is too low that it doesn't include OpenLLM base container."""

-_RefTuple: type[RefTuple] = openllm.utils.codegen.make_attr_tuple_class("_RefTuple", ["git_hash", "version", "strategy"])
+_RefTuple: type[RefTuple] = openllm_core.utils.codegen.make_attr_tuple_class("_RefTuple", ["git_hash", "version", "strategy"])

 def nightly_resolver(cls: type[RefResolver]) -> str:
  # NOTE: all openllm container will have sha-<git_hash[:7]>
@@ -60,7 +57,7 @@ def nightly_resolver(cls: type[RefResolver]) -> str:
@attr.attrs(eq=False, order=False, slots=True, frozen=True)
 class RefResolver:
  git_hash: str = attr.field()
-  version: openllm.utils.VersionInfo = attr.field(converter=_convert_version_from_string)
+  version: openllm_core.utils.VersionInfo = attr.field(converter=_convert_version_from_string)
  strategy: LiteralContainerVersionStrategy = attr.field()
  _ghapi: t.ClassVar[all.GhApi] = all.GhApi(owner=_OWNER, repo=_REPO)
  @classmethod
@@ -74,7 +71,7 @@ class RefResolver:
      version_str = meta["name"].lstrip("v")
      version: tuple[str, str | None] = (cls._ghapi.git.get_ref(ref=f"tags/{meta['name']}")["object"]["sha"], version_str)
    else: version = ("", version_str)
-    if openllm.utils.VersionInfo.from_version_string(t.cast(str, version_str)) < (0, 2, 12): raise VersionNotSupported(f"Version {version_str} doesn't support OpenLLM base container. Consider using 'nightly' or upgrade 'openllm>=0.2.12'")
+    if openllm_core.utils.VersionInfo.from_version_string(t.cast(str, version_str)) < (0, 2, 12): raise VersionNotSupported(f"Version {version_str} doesn't support OpenLLM base container. Consider using 'nightly' or upgrade 'openllm>=0.2.12'")
    return _RefTuple((*version, "release" if _use_base_strategy else "custom"))
  @classmethod
  @functools.lru_cache(maxsize=64)
@@ -101,7 +98,7 @@ def build_container(registries: LiteralContainerRegistry | t.Sequence[LiteralCon
  try:
    if not _BUILDER.health(): raise openllm.exceptions.Error
  except (openllm.exceptions.Error, subprocess.CalledProcessError): raise RuntimeError("Building base container requires BuildKit (via Buildx) to be installed. See https://docs.docker.com/build/buildx/install/ for instalation instruction.") from None
-  if openllm.utils.device_count() == 0: raise RuntimeError("Building base container requires GPUs (None available)")
+  if openllm_core.utils.device_count() == 0: raise RuntimeError("Building base container requires GPUs (None available)")
  if not shutil.which("nvidia-container-runtime"): raise RuntimeError("NVIDIA Container Toolkit is required to compile CUDA kernel in container.")
  if not _module_location: raise RuntimeError("Failed to determine source location of 'openllm'. (Possible broken installation)")
  pyproject_path = pathlib.Path(_module_location).parent.parent / "pyproject.toml"
@@ -111,7 +108,7 @@ def build_container(registries: LiteralContainerRegistry | t.Sequence[LiteralCon
    registries = [registries] if isinstance(registries, str) else list(registries)
    tags = {name: f"{_CONTAINER_REGISTRY[name]}:{get_base_container_tag(version_strategy)}" for name in registries}
  try:
-    outputs = _BUILDER.build(file=pathlib.Path(__file__).parent.joinpath("Dockerfile").resolve().__fspath__(), context_path=pyproject_path.parent.__fspath__(), tag=tuple(tags.values()), push=push, progress="plain" if openllm.utils.get_debug_mode() else "auto", quiet=machine)
+    outputs = _BUILDER.build(file=pathlib.Path(__file__).parent.joinpath("Dockerfile").resolve().__fspath__(), context_path=pyproject_path.parent.__fspath__(), tag=tuple(tags.values()), push=push, progress="plain" if openllm_core.utils.get_debug_mode() else "auto", quiet=machine)
    if machine and outputs is not None: tags["image_sha"] = outputs.decode("utf-8").strip()
  except Exception as err: raise openllm.exceptions.OpenLLMException(f"Failed to containerize base container images (Scroll up to see error above, or set OPENLLMDEVDEBUG=True for more traceback):\n{err}") from err
  return tags