mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-16 12:59:33 -04:00
Validated end-to-end on CPU with Qwen2.5-0.5B-Instruct (LoadModel, Predict, TokenizeString, Free all working). - requirements-cpu-after.txt: pin vllm to 0.14.1+cpu (pre-built wheel from GitHub releases) for x86_64 and aarch64. vllm 0.14.1 is the newest CPU wheel whose torch dependency resolves against published PyTorch builds (torch==2.9.1+cpu). Later vllm CPU wheels currently require torch==2.10.0+cpu which is only available on the PyTorch test channel with incompatible torchvision. - requirements-cpu.txt: bump torch to 2.9.1+cpu, add torchvision/torchaudio so uv resolves them consistently from the PyTorch CPU index. - install.sh: add --index-strategy=unsafe-best-match for CPU builds so uv can mix the PyTorch index and PyPI for transitive deps (matches the existing intel profile behaviour). - backend.py LoadModel: vllm >= 0.14 removed AsyncLLMEngine.get_model_config so the old code path errored out with AttributeError on model load. Switch to the new get_tokenizer()/tokenizer accessor with a fallback to building the tokenizer directly from request.Model.
52 lines
2.3 KiB
Bash
Executable File
52 lines
2.3 KiB
Bash
Executable File
#!/bin/bash
|
|
set -e
|
|
|
|
EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
|
|
|
|
# Avoid to overcommit the CPU during build
|
|
# https://github.com/vllm-project/vllm/issues/20079
|
|
# https://docs.vllm.ai/en/v0.8.3/serving/env_vars.html
|
|
# https://docs.redhat.com/it/documentation/red_hat_ai_inference_server/3.0/html/vllm_server_arguments/environment_variables-server-arguments
|
|
export NVCC_THREADS=2
|
|
export MAX_JOBS=1
|
|
|
|
backend_dir=$(dirname $0)
|
|
|
|
if [ -d $backend_dir/common ]; then
|
|
source $backend_dir/common/libbackend.sh
|
|
else
|
|
source $backend_dir/../common/libbackend.sh
|
|
fi
|
|
|
|
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
|
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
|
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
|
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
|
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
|
fi
|
|
|
|
# CPU builds need unsafe-best-match to pull torch==2.10.0+cpu from the
|
|
# pytorch test channel while still resolving transformers/vllm from pypi.
|
|
if [ "x${BUILD_PROFILE}" == "xcpu" ]; then
|
|
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-best-match"
|
|
fi
|
|
|
|
# We don't embed this into the images as it is a large dependency and not always needed.
|
|
# Besides, the speed inference are not actually usable in the current state for production use-cases.
|
|
if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE:-}" == "xtrue" ]; then
|
|
ensureVenv
|
|
# https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html
|
|
if [ ! -d vllm ]; then
|
|
git clone https://github.com/vllm-project/vllm
|
|
fi
|
|
pushd vllm
|
|
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.68.1 protobuf bitsandbytes
|
|
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
|
VLLM_TARGET_DEVICE=cpu python setup.py install
|
|
popd
|
|
rm -rf vllm
|
|
else
|
|
installRequirements
|
|
fi
|