mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-05-19 05:57:39 -04:00
feat(vllm): bump to 0.2.2 (#695)
* feat(vllm): bump to 0.2.2 Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update changelog Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: move up to CUDA 12.1 Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * fix: remove auto-gptq installation since the builder image doesn't have access to GPU Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * fix: update containerization warning Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -199,7 +199,7 @@ class LLM(t.Generic[M, T], ReprMixin):
|
||||
system_message=system_message,
|
||||
LLM__model_attrs=model_attrs,
|
||||
LLM__tokenizer_attrs=tokenizer_attrs,
|
||||
llm_dtype__=torch_dtype.lower(),
|
||||
llm_dtype__=dtype.lower(),
|
||||
llm_backend__=backend,
|
||||
llm_config__=llm_config,
|
||||
llm_trust_remote_code__=trust_remote_code,
|
||||
|
||||
@@ -65,15 +65,7 @@ def construct_python_options(llm, llm_fs, extra_dependencies=None, adapter_map=N
|
||||
built_wheels = [build_editable(llm_fs.getsyspath('/'), p) for p in ('openllm_core', 'openllm_client', 'openllm')]
|
||||
if all(i for i in built_wheels):
|
||||
wheels = [llm_fs.getsyspath(f"/{i.split('/')[-1]}") for i in built_wheels]
|
||||
return PythonOptions(
|
||||
packages=packages,
|
||||
wheels=wheels,
|
||||
lock_packages=True,
|
||||
extra_index_url=[
|
||||
'https://download.pytorch.org/whl/cu118',
|
||||
'https://huggingface.github.io/autogptq-index/whl/cu118/',
|
||||
],
|
||||
)
|
||||
return PythonOptions(packages=packages, wheels=wheels, lock_packages=True)
|
||||
|
||||
|
||||
def construct_docker_options(
|
||||
|
||||
@@ -10,13 +10,13 @@ ENV PATH /opt/conda/bin:$PATH
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
ccache \
|
||||
curl \
|
||||
libssl-dev ca-certificates make \
|
||||
git && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
ccache \
|
||||
curl \
|
||||
libssl-dev ca-certificates make \
|
||||
git && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
# Install required dependencies
|
||||
@@ -29,8 +29,8 @@ COPY hatch.toml README.md CHANGELOG.md openllm-python/pyproject.toml ./
|
||||
# pip install autoawq --no-cache-dir && \
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install --extra-index-url "https://huggingface.github.io/autogptq-index/whl/cu118/" \
|
||||
-v --no-cache-dir \
|
||||
"ray==2.6.0" "einops" "vllm==0.2.1.post1" "auto-gptq[triton]" "torch==2.0.1" xformers && \
|
||||
-v --no-cache-dir \
|
||||
"ray==2.6.0" "vllm==0.2.2" xformers && \
|
||||
pip install --no-cache-dir -e .
|
||||
|
||||
FROM base-container
|
||||
|
||||
@@ -50,12 +50,13 @@ class RefResolver:
|
||||
else:
|
||||
raise ValueError(f'Unknown strategy: {strategy_or_version}')
|
||||
|
||||
# fmt: off
|
||||
@property
|
||||
def tag(self):
|
||||
return 'latest' if self.strategy in {'latest', 'nightly'} else repr(self.version)
|
||||
|
||||
def tag(self):return 'latest' if self.strategy in {'latest','nightly'} else repr(self.version)
|
||||
@staticmethod
|
||||
def construct_base_image(reg, strategy=None):
|
||||
def construct_base_image(reg,strategy=None):
|
||||
if reg == 'gh': logger.warning("Setting base registry to 'gh' will affect cold start performance on GCP/AWS.")
|
||||
elif reg == 'docker': logger.warning('docker is base image is yet to be supported. Falling back to "ecr".'); reg = 'ecr'
|
||||
return f'{_CONTAINER_REGISTRY[reg]}:{RefResolver.from_strategy(strategy).tag}'
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user