mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-16 12:59:33 -04:00
fix(vllm): CPU build compatibility with vllm 0.14.1
Validated end-to-end on CPU with Qwen2.5-0.5B-Instruct (LoadModel, Predict, TokenizeString, Free all working). - requirements-cpu-after.txt: pin vllm to 0.14.1+cpu (pre-built wheel from GitHub releases) for x86_64 and aarch64. vllm 0.14.1 is the newest CPU wheel whose torch dependency resolves against published PyTorch builds (torch==2.9.1+cpu). Later vllm CPU wheels currently require torch==2.10.0+cpu which is only available on the PyTorch test channel with incompatible torchvision. - requirements-cpu.txt: bump torch to 2.9.1+cpu, add torchvision/torchaudio so uv resolves them consistently from the PyTorch CPU index. - install.sh: add --index-strategy=unsafe-best-match for CPU builds so uv can mix the PyTorch index and PyPI for transitive deps (matches the existing intel profile behaviour). - backend.py LoadModel: vllm >= 0.14 removed AsyncLLMEngine.get_model_config so the old code path errored out with AttributeError on model load. Switch to the new get_tokenizer()/tokenizer accessor with a fallback to building the tokenizer directly from request.Model.
This commit is contained in:
@@ -183,13 +183,26 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
|
||||
try:
|
||||
engine_model_config = await self.llm.get_model_config()
|
||||
self.tokenizer = get_tokenizer(
|
||||
engine_model_config.tokenizer,
|
||||
tokenizer_mode=engine_model_config.tokenizer_mode,
|
||||
trust_remote_code=engine_model_config.trust_remote_code,
|
||||
truncation_side="left",
|
||||
)
|
||||
# vLLM >= 0.14 removed get_model_config() on AsyncLLM; the tokenizer
|
||||
# is either already loaded on the engine or can be built from the
|
||||
# Model name directly.
|
||||
tokenizer = None
|
||||
if hasattr(self.llm, "get_tokenizer"):
|
||||
try:
|
||||
tokenizer = await self.llm.get_tokenizer()
|
||||
except TypeError:
|
||||
tokenizer = self.llm.get_tokenizer()
|
||||
except Exception:
|
||||
tokenizer = None
|
||||
if tokenizer is None and hasattr(self.llm, "tokenizer"):
|
||||
tokenizer = self.llm.tokenizer
|
||||
if tokenizer is None:
|
||||
tokenizer = get_tokenizer(
|
||||
request.Model,
|
||||
trust_remote_code=bool(request.TrustRemoteCode),
|
||||
truncation_side="left",
|
||||
)
|
||||
self.tokenizer = tokenizer
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
|
||||
|
||||
@@ -26,6 +26,12 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
# CPU builds need unsafe-best-match to pull torch==2.10.0+cpu from the
|
||||
# pytorch test channel while still resolving transformers/vllm from pypi.
|
||||
if [ "x${BUILD_PROFILE}" == "xcpu" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-best-match"
|
||||
fi
|
||||
|
||||
# We don't embed this into the images as it is a large dependency and not always needed.
|
||||
# Besides, the speed inference are not actually usable in the current state for production use-cases.
|
||||
if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE:-}" == "xtrue" ]; then
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
https://github.com/vllm-project/vllm/releases/download/v0.8.5/vllm-0.8.5+cpu-cp38-abi3-manylinux_2_35_x86_64.whl
|
||||
vllm @ https://github.com/vllm-project/vllm/releases/download/v0.14.1/vllm-0.14.1+cpu-cp38-abi3-manylinux_2_35_x86_64.whl ; platform_machine == "x86_64"
|
||||
vllm @ https://github.com/vllm-project/vllm/releases/download/v0.14.1/vllm-0.14.1+cpu-cp38-abi3-manylinux_2_35_aarch64.whl ; platform_machine == "aarch64"
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
accelerate
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
torch==2.7.0+cpu
|
||||
accelerate
|
||||
torch==2.9.1+cpu
|
||||
torchvision
|
||||
torchaudio
|
||||
transformers
|
||||
|
||||
Reference in New Issue
Block a user