feat(vllm): bump to 0.2.2 (#695)

* feat(vllm): bump to 0.2.2 Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: update changelog Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * chore: move up to CUDA 12.1 Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * fix: remove auto-gptq installation since the builder image doesn't have access to GPU Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * fix: update containerization warning Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
2026-05-19 05:57:39 -04:00 · 2023-11-19 02:52:32 -05:00
parent 206521e02d
commit 539f250c0f
10 changed files with 25 additions and 31 deletions
--- a/openllm-python/src/openllm/_llm.py
+++ b/openllm-python/src/openllm/_llm.py
@@ -199,7 +199,7 @@ class LLM(t.Generic[M, T], ReprMixin):
      system_message=system_message,
      LLM__model_attrs=model_attrs,
      LLM__tokenizer_attrs=tokenizer_attrs,
-      llm_dtype__=torch_dtype.lower(),
+      llm_dtype__=dtype.lower(),
      llm_backend__=backend,
      llm_config__=llm_config,
      llm_trust_remote_code__=trust_remote_code,
--- a/openllm-python/src/openllm/bundle/_package.py
+++ b/openllm-python/src/openllm/bundle/_package.py
@@ -65,15 +65,7 @@ def construct_python_options(llm, llm_fs, extra_dependencies=None, adapter_map=N
  built_wheels = [build_editable(llm_fs.getsyspath('/'), p) for p in ('openllm_core', 'openllm_client', 'openllm')]
  if all(i for i in built_wheels):
    wheels = [llm_fs.getsyspath(f"/{i.split('/')[-1]}") for i in built_wheels]
-  return PythonOptions(
-    packages=packages,
-    wheels=wheels,
-    lock_packages=True,
-    extra_index_url=[
-      'https://download.pytorch.org/whl/cu118',
-      'https://huggingface.github.io/autogptq-index/whl/cu118/',
-    ],
-  )
+  return PythonOptions(packages=packages, wheels=wheels, lock_packages=True)


 def construct_docker_options(
--- a/openllm-python/src/openllm/bundle/oci/Dockerfile
+++ b/openllm-python/src/openllm/bundle/oci/Dockerfile
@@ -10,13 +10,13 @@ ENV PATH /opt/conda/bin:$PATH
 ENV DEBIAN_FRONTEND=noninteractive

 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        build-essential \
-        ca-certificates \
-        ccache \
-        curl \
-        libssl-dev ca-certificates make \
-        git && \
-        rm -rf /var/lib/apt/lists/*
+  build-essential \
+  ca-certificates \
+  ccache \
+  curl \
+  libssl-dev ca-certificates make \
+  git && \
+  rm -rf /var/lib/apt/lists/*


 # Install required dependencies
@@ -29,8 +29,8 @@ COPY hatch.toml README.md CHANGELOG.md openllm-python/pyproject.toml ./
 # pip install autoawq --no-cache-dir && \
 RUN --mount=type=cache,target=/root/.cache/pip \
  pip install --extra-index-url "https://huggingface.github.io/autogptq-index/whl/cu118/" \
-              -v --no-cache-dir \
-              "ray==2.6.0" "einops" "vllm==0.2.1.post1" "auto-gptq[triton]" "torch==2.0.1" xformers && \
+  -v --no-cache-dir \
+  "ray==2.6.0" "vllm==0.2.2" xformers && \
  pip install --no-cache-dir -e .

 FROM base-container
--- a/openllm-python/src/openllm/bundle/oci/init.py
+++ b/openllm-python/src/openllm/bundle/oci/init.py
@@ -50,12 +50,13 @@ class RefResolver:
    else:
      raise ValueError(f'Unknown strategy: {strategy_or_version}')

+  # fmt: off
  @property
-  def tag(self):
-    return 'latest' if self.strategy in {'latest', 'nightly'} else repr(self.version)
-
+  def tag(self):return 'latest' if self.strategy in {'latest','nightly'} else repr(self.version)
  @staticmethod
-  def construct_base_image(reg, strategy=None):
+  def construct_base_image(reg,strategy=None):
+    if reg == 'gh': logger.warning("Setting base registry to 'gh' will affect cold start performance on GCP/AWS.")
+    elif reg == 'docker': logger.warning('docker is base image is yet to be supported. Falling back to "ecr".'); reg = 'ecr'
    return f'{_CONTAINER_REGISTRY[reg]}:{RefResolver.from_strategy(strategy).tag}'