mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-02-19 15:18:12 -05:00
perf: potentially reduce image size (#675)
* perf: potentially reduce image size Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * perf: use base python packages only Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * fix: typo Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> * perf: Shave off 2GB Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com> --------- Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -1,13 +1,6 @@
|
||||
# syntax=docker/dockerfile-upstream:master
|
||||
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
|
||||
FROM debian:bullseye-slim as pytorch-install
|
||||
|
||||
ARG PYTORCH_VERSION=2.0.1
|
||||
ARG PYTHON_VERSION=3.9
|
||||
ARG CUDA_VERSION=11.8
|
||||
ARG MAMBA_VERSION=23.1.0-1
|
||||
ARG CUDA_CHANNEL=nvidia
|
||||
ARG INSTALL_CHANNEL=pytorch
|
||||
FROM python:3.9-slim-bullseye as base-container
|
||||
|
||||
# Automatically set by buildx
|
||||
ARG TARGETPLATFORM
|
||||
@@ -21,84 +14,24 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
|
||||
ca-certificates \
|
||||
ccache \
|
||||
curl \
|
||||
libssl-dev ca-certificates make \
|
||||
git && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install conda
|
||||
# translating Docker's TARGETPLATFORM into mamba arches
|
||||
RUN <<EOT
|
||||
case ${TARGETPLATFORM} in
|
||||
"linux/arm64") MAMBA_ARCH=aarch64 ;;
|
||||
*) MAMBA_ARCH=x86_64 ;;
|
||||
esac
|
||||
curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
|
||||
EOT
|
||||
|
||||
RUN <<EOT
|
||||
chmod +x ~/mambaforge.sh
|
||||
bash ~/mambaforge.sh -b -p /opt/conda
|
||||
rm ~/mambaforge.sh
|
||||
EOT
|
||||
|
||||
|
||||
# Install pytorch
|
||||
# On arm64 we exit with an error code
|
||||
RUN <<EOT
|
||||
case ${TARGETPLATFORM} in
|
||||
"linux/arm64") exit 1 ;;
|
||||
*) /opt/conda/bin/conda update -y conda && /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;;
|
||||
esac
|
||||
/opt/conda/bin/conda clean -ya
|
||||
EOT
|
||||
|
||||
# CUDA kernels builder image
|
||||
FROM pytorch-install as kernel-builder
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends ninja-build && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \
|
||||
/opt/conda/bin/conda clean -ya
|
||||
|
||||
# base image
|
||||
FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 as base-container
|
||||
|
||||
# Conda env
|
||||
ENV PATH=/opt/conda/bin:$PATH \
|
||||
CONDA_PREFIX=/opt/conda
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libssl-dev ca-certificates make && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy conda with PyTorch installed
|
||||
COPY --from=pytorch-install /opt/conda /opt/conda
|
||||
|
||||
# Install required dependencies
|
||||
COPY openllm-python/src src
|
||||
COPY hatch.toml README.md CHANGELOG.md openllm-python/pyproject.toml ./
|
||||
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
ccache \
|
||||
curl \
|
||||
git && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install all required dependencies
|
||||
# We have to install autoawq first to avoid conflict with torch, then reinstall torch with vllm
|
||||
# below
|
||||
# pip install autoawq --no-cache-dir && \
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install --extra-index-url "https://download.pytorch.org/whl/cu118" \
|
||||
--extra-index-url "https://huggingface.github.io/autogptq-index/whl/cu118/" \
|
||||
pip install --extra-index-url "https://huggingface.github.io/autogptq-index/whl/cu118/" \
|
||||
-v --no-cache-dir \
|
||||
"ray==2.6.0" "einops" "vllm==0.2.1.post1" "auto-gptq[triton]" "torch==2.0.1+cu118" xformers
|
||||
"ray==2.6.0" "einops" "vllm==0.2.1.post1" "auto-gptq[triton]" "torch==2.0.1" xformers && \
|
||||
pip install --no-cache-dir -e .
|
||||
|
||||
FROM base-container
|
||||
|
||||
|
||||
Reference in New Issue
Block a user