From 2036d4e0153f99e87014c5267195a940a06969f9 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Sat, 26 Aug 2023 09:02:52 -0400 Subject: [PATCH] chore(build): use latest vllm pre-built kernel (#261) --- .github/workflows/build.yml | 2 +- openllm-python/pyproject.toml | 4 +- .../src/openllm/bundle/oci/Dockerfile | 59 ++----------------- tools/dependencies.py | 2 +- 4 files changed, 8 insertions(+), 59 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 80a43382..e7d008f3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -55,7 +55,7 @@ jobs: if: >- contains(needs.get_commit_message.outputs.message, '[ec2 build]') || github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, '00 - EC2 Build')) || (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/main'))) env: - EC2_INSTANCE_TYPE: g5.12xlarge + EC2_INSTANCE_TYPE: t3.2xlarge EC2_AMI_ID: ami-089dafe9af191a0fd EC2_SUBNET_ID: subnet-0ca63188fe98788c1,subnet-05997205433b249d0,subnet-07ef5d3e974275fed,subnet-0161ef0151089bb0b EC2_SECURITY_GROUP: sg-051366641bf2b8049 diff --git a/openllm-python/pyproject.toml b/openllm-python/pyproject.toml index 838e9536..129f5075 100644 --- a/openllm-python/pyproject.toml +++ b/openllm-python/pyproject.toml @@ -102,7 +102,7 @@ falcon = ["einops", "xformers"] fine-tune = ["peft>=0.4.0", "bitsandbytes", "datasets", "accelerate", "trl"] flan-t5 = ["flax>=0.7", "jax", "jaxlib", "tensorflow", "keras"] full = [ - "openllm[agents,baichuan,chatglm,falcon,fine-tune,flan-t5,ggml,gptq,grpc,llama,mpt,openai,opt,playground,starcoder,vllm]", + "openllm[agents,baichuan,chatglm,falcon,fine-tune,flan-t5,ggml,gptq,grpc,llama,mpt,openai,opt,playground,starcoder,vllm]", ] ggml = ["ctransformers"] gptq = ["auto-gptq[triton]"] @@ -113,7 +113,7 @@ openai = ["openai", "tiktoken"] opt = ["flax>=0.7", "jax", "jaxlib", "tensorflow", "keras"] playground = ["jupyter", "notebook", "ipython", "jupytext", "nbformat"] starcoder = ["bitsandbytes"] -vllm = ["vllm", "ray"] +vllm = ["vllm>=0.1.4", "ray"] [tool.hatch.version] fallback-version = "0.0.0" diff --git a/openllm-python/src/openllm/bundle/oci/Dockerfile b/openllm-python/src/openllm/bundle/oci/Dockerfile index 6f0ef484..c3f4e4ce 100644 --- a/openllm-python/src/openllm/bundle/oci/Dockerfile +++ b/openllm-python/src/openllm/bundle/oci/Dockerfile @@ -60,50 +60,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends ninja-build && RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \ /opt/conda/bin/conda clean -ya -# NOTE: Build vllm CUDA kernels -FROM kernel-builder as vllm-builder - -ENV COMMIT_HASH d1744376ae9fdbfa6a2dc763e1c67309e138fa3d -ARG COMMIT_HASH=${COMMIT_HASH} - -WORKDIR /usr/src - -RUN <=2.0.1+cu118" xformers "jax[cuda11_local]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html ".[opt,mpt,fine-tune,llama,chatglm]" + pip install --extra-index-url "https://download.pytorch.org/whl/cu118" \ + --extra-index-url "https://huggingface.github.io/autogptq-index/whl/cu118/" \ + -v --no-cache-dir \ + "ray==2.6.0" "einops" "vllm>=0.1.4" "auto-gptq[triton]" "torch>=2.0.1+cu118" xformers "jax[cuda11_local]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html ".[opt,mpt,fine-tune,llama,chatglm]" FROM base-container diff --git a/tools/dependencies.py b/tools/dependencies.py index 3e895f39..e86ea33c 100755 --- a/tools/dependencies.py +++ b/tools/dependencies.py @@ -125,7 +125,7 @@ AGENTS_DEPS = ['transformers[agents]>=4.30', 'diffusers', 'soundfile'] PLAYGROUND_DEPS = ['jupyter', 'notebook', 'ipython', 'jupytext', 'nbformat'] GGML_DEPS = ['ctransformers'] GPTQ_DEPS = ['auto-gptq[triton]'] -VLLM_DEPS = ['vllm', 'ray'] +VLLM_DEPS = ['vllm>=0.1.4', 'ray'] _base_requirements: dict[str, t.Any] = { inflection.dasherize(name): config_cls.__openllm_requirements__ for name, config_cls in openllm.CONFIG_MAPPING.items() if config_cls.__openllm_requirements__