mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-06-12 10:29:36 -04:00
fix: update build dependencies and format chat prompt (#569)
chore: update correct check and format prompt Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
# mypy: disable-error-code="misc"
|
||||
from __future__ import annotations
|
||||
import importlib.metadata
|
||||
import inspect
|
||||
import logging
|
||||
import os
|
||||
@@ -82,10 +81,9 @@ def construct_python_options(llm: openllm.LLM[t.Any, t.Any], llm_fs: FS, extra_d
|
||||
env['backend_value']
|
||||
if not openllm_core.utils.is_torch_available():
|
||||
raise ValueError('PyTorch is not available. Make sure to have it locally installed.')
|
||||
packages.extend([f'torch>={importlib.metadata.version("torch")}'])
|
||||
packages.extend(['torch==2.0.1+cu118', 'vllm==0.2.1.post1', 'xformers==0.0.22']) # XXX: Currently locking this for correctness
|
||||
wheels: list[str] = []
|
||||
built_wheels: list[str |
|
||||
None] = [build_editable(llm_fs.getsyspath('/'), t.cast(t.Literal['openllm', 'openllm_core', 'openllm_client'], p)) for p in ('openllm_core', 'openllm_client', 'openllm')]
|
||||
built_wheels = [build_editable(llm_fs.getsyspath('/'), t.cast(t.Literal['openllm', 'openllm_core', 'openllm_client'], p)) for p in ('openllm_core', 'openllm_client', 'openllm')]
|
||||
if all(i for i in built_wheels):
|
||||
wheels.extend([llm_fs.getsyspath(f"/{i.split('/')[-1]}") for i in t.cast(t.List[str], built_wheels)])
|
||||
return PythonOptions(packages=packages,
|
||||
@@ -93,11 +91,10 @@ def construct_python_options(llm: openllm.LLM[t.Any, t.Any], llm_fs: FS, extra_d
|
||||
lock_packages=False,
|
||||
extra_index_url=['https://download.pytorch.org/whl/cu118', 'https://huggingface.github.io/autogptq-index/whl/cu118/'])
|
||||
|
||||
def construct_docker_options(llm: openllm.LLM[t.Any,
|
||||
t.Any], _: FS, workers_per_resource: float, quantize: LiteralString | None, adapter_map: dict[str, str] | None, dockerfile_template: str | None,
|
||||
def construct_docker_options(llm: openllm.LLM[t.Any, t.Any], _: FS, quantize: LiteralString | None, adapter_map: dict[str, str] | None, dockerfile_template: str | None,
|
||||
serialisation: LiteralSerialisation, container_registry: LiteralContainerRegistry, container_version_strategy: LiteralContainerVersionStrategy) -> DockerOptions:
|
||||
from openllm.cli._factory import parse_config_options
|
||||
environ = parse_config_options(llm.config, llm.config['timeout'], workers_per_resource, None, True, os.environ.copy())
|
||||
environ = parse_config_options(llm.config, llm.config['timeout'], 1.0, None, True, os.environ.copy())
|
||||
env: openllm_core.utils.EnvVarMixin = llm.config['env']
|
||||
env_dict = {
|
||||
env.backend: env['backend_value'],
|
||||
@@ -221,13 +218,12 @@ def create_bento(bento_tag: bentoml.Tag,
|
||||
build_config = BentoBuildConfig(service=f"{llm.config['service_name']}:svc",
|
||||
name=bento_tag.name,
|
||||
labels=labels,
|
||||
models=[llm_spec],
|
||||
description=f"OpenLLM service for {llm.config['start_name']}",
|
||||
include=list(llm_fs.walk.files()),
|
||||
exclude=['/venv', '/.venv', '__pycache__/', '*.py[cod]', '*$py.class'],
|
||||
python=construct_python_options(llm, llm_fs, extra_dependencies, adapter_map),
|
||||
models=[llm_spec],
|
||||
docker=construct_docker_options(llm, llm_fs, workers_per_resource, quantize, adapter_map, dockerfile_template, _serialisation, container_registry,
|
||||
container_version_strategy))
|
||||
docker=construct_docker_options(llm, llm_fs, quantize, adapter_map, dockerfile_template, _serialisation, container_registry, container_version_strategy))
|
||||
|
||||
bento = bentoml.Bento.create(build_config=build_config, version=bento_tag.version, build_ctx=llm_fs.getsyspath('/'))
|
||||
# NOTE: the model_id_path here are only used for setting this environment variable within the container built with for BentoLLM.
|
||||
|
||||
@@ -57,7 +57,7 @@ FROM pytorch-install as kernel-builder
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends ninja-build && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \
|
||||
RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \
|
||||
/opt/conda/bin/conda clean -ya
|
||||
|
||||
# base image
|
||||
@@ -95,7 +95,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install --extra-index-url "https://download.pytorch.org/whl/cu118" \
|
||||
--extra-index-url "https://huggingface.github.io/autogptq-index/whl/cu118/" \
|
||||
-v --no-cache-dir \
|
||||
"ray==2.6.0" "einops" "vllm>=0.1.4" "auto-gptq[triton]" "torch>=2.0.1+cu118" xformers "jax[cuda11_local]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html ".[opt,mpt,fine-tune,llama,chatglm]"
|
||||
"ray==2.6.0" "einops" "vllm==0.2.1.post1" "auto-gptq[triton]" "torch==2.0.1+cu118" xformers
|
||||
|
||||
FROM base-container
|
||||
|
||||
|
||||
Reference in New Issue
Block a user