feat(vllm): bump to 0.2.2 (#695)

* feat(vllm): bump to 0.2.2

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: update changelog

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: move up to CUDA 12.1

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* fix: remove auto-gptq installation

since the builder image doesn't have access to GPU

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* fix: update containerization warning

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

---------

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2023-11-19 02:52:32 -05:00
committed by GitHub
parent 206521e02d
commit 539f250c0f
10 changed files with 25 additions and 31 deletions

View File

@@ -199,7 +199,7 @@ class LLM(t.Generic[M, T], ReprMixin):
system_message=system_message,
LLM__model_attrs=model_attrs,
LLM__tokenizer_attrs=tokenizer_attrs,
llm_dtype__=torch_dtype.lower(),
llm_dtype__=dtype.lower(),
llm_backend__=backend,
llm_config__=llm_config,
llm_trust_remote_code__=trust_remote_code,

View File

@@ -65,15 +65,7 @@ def construct_python_options(llm, llm_fs, extra_dependencies=None, adapter_map=N
built_wheels = [build_editable(llm_fs.getsyspath('/'), p) for p in ('openllm_core', 'openllm_client', 'openllm')]
if all(i for i in built_wheels):
wheels = [llm_fs.getsyspath(f"/{i.split('/')[-1]}") for i in built_wheels]
return PythonOptions(
packages=packages,
wheels=wheels,
lock_packages=True,
extra_index_url=[
'https://download.pytorch.org/whl/cu118',
'https://huggingface.github.io/autogptq-index/whl/cu118/',
],
)
return PythonOptions(packages=packages, wheels=wheels, lock_packages=True)
def construct_docker_options(

View File

@@ -10,13 +10,13 @@ ENV PATH /opt/conda/bin:$PATH
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
ccache \
curl \
libssl-dev ca-certificates make \
git && \
rm -rf /var/lib/apt/lists/*
build-essential \
ca-certificates \
ccache \
curl \
libssl-dev ca-certificates make \
git && \
rm -rf /var/lib/apt/lists/*
# Install required dependencies
@@ -29,8 +29,8 @@ COPY hatch.toml README.md CHANGELOG.md openllm-python/pyproject.toml ./
# pip install autoawq --no-cache-dir && \
RUN --mount=type=cache,target=/root/.cache/pip \
pip install --extra-index-url "https://huggingface.github.io/autogptq-index/whl/cu118/" \
-v --no-cache-dir \
"ray==2.6.0" "einops" "vllm==0.2.1.post1" "auto-gptq[triton]" "torch==2.0.1" xformers && \
-v --no-cache-dir \
"ray==2.6.0" "vllm==0.2.2" xformers && \
pip install --no-cache-dir -e .
FROM base-container

View File

@@ -50,12 +50,13 @@ class RefResolver:
else:
raise ValueError(f'Unknown strategy: {strategy_or_version}')
# fmt: off
@property
def tag(self):
return 'latest' if self.strategy in {'latest', 'nightly'} else repr(self.version)
def tag(self):return 'latest' if self.strategy in {'latest','nightly'} else repr(self.version)
@staticmethod
def construct_base_image(reg, strategy=None):
def construct_base_image(reg,strategy=None):
if reg == 'gh': logger.warning("Setting base registry to 'gh' will affect cold start performance on GCP/AWS.")
elif reg == 'docker': logger.warning('docker is base image is yet to be supported. Falling back to "ecr".'); reg = 'ecr'
return f'{_CONTAINER_REGISTRY[reg]}:{RefResolver.from_strategy(strategy).tag}'