chore(vllm): track the darwin vllm-metal pin via the autobumper

The Apple Silicon build pinned vLLM 0.23.0 as a hidden string in install.sh
while floating the vllm-metal wheel on releases/latest - the two could drift
apart silently. Make both a tracked, reproducible pair (VLLM_METAL_VERSION +
VLLM_VERSION), fetch the wheel by tag, and add .github/bump_vllm_metal.sh wired
into bump_deps.yaml. It tracks vllm-project/vllm-metal (not vllm/vllm latest),
reading the coupled vLLM source version from vllm-metal's own installer, and
opens a bump PR - mirroring the existing bump_vllm_wheel.sh for the cu130 wheel.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:opus-4.8 [Claude Code]
This commit is contained in:
Ettore Di Giacinto
2026-06-24 20:03:14 +00:00
parent 3447b28bbd
commit 7743a0abc0
3 changed files with 110 additions and 10 deletions

57
.github/bump_vllm_metal.sh vendored Executable file
View File

@@ -0,0 +1,57 @@
#!/bin/bash
# Bump the vllm-metal pins in the vLLM backend's darwin (Apple Silicon) install
# path. The macOS/Metal build (backend/python/vllm/install.sh, Darwin branch)
# installs vllm-metal, which is version-locked to a specific vLLM source release.
# Two values must move together:
# VLLM_METAL_VERSION -> the vllm-metal GitHub release tag (its prebuilt wheel)
# VLLM_VERSION -> the vLLM source version that release builds against
# vllm-metal declares the latter in its OWN install.sh as `vllm_v="X.Y.Z"`. This
# script reads both from vllm-metal's latest release and rewrites them atomically
# -- mirroring bump_vllm_wheel.sh, which does the same for the Linux cu130 wheel.
#
# This deliberately tracks vllm-project/vllm-metal, NOT vllm-project/vllm: the
# darwin build can only use the exact vLLM version vllm-metal supports, so it may
# lag the Linux pin (requirements-cublas13-after.txt) until vllm-metal catches up.
set -xe
REPO=$1 # vllm-project/vllm-metal
FILE=$2 # backend/python/vllm/install.sh
VAR=$3 # VLLM_METAL_VERSION (used for the workflow's output file names)
if [ -z "$FILE" ] || [ -z "$REPO" ] || [ -z "$VAR" ]; then
echo "usage: $0 <repo> <install-file> <var-name>" >&2
exit 1
fi
# vllm-metal ships frequent dev releases, all flagged as non-prerelease, so
# /releases/latest returns the newest one (with its cp312 wheel asset).
LATEST_TAG=$(curl -sS -H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/$REPO/releases/latest" \
| python3 -c "import json,sys; print(json.load(sys.stdin)['tag_name'])")
# The coupled vLLM source version lives in vllm-metal's installer at that tag.
NEW_VLLM_VERSION=$(curl -fsSL \
"https://raw.githubusercontent.com/$REPO/$LATEST_TAG/install.sh" \
| grep -oE 'vllm_v="[0-9]+\.[0-9]+\.[0-9]+"' | head -1 | cut -d'"' -f2)
if [ -z "$LATEST_TAG" ] || [ -z "$NEW_VLLM_VERSION" ]; then
echo "Could not resolve vllm-metal tag ($LATEST_TAG) or its vllm_v ($NEW_VLLM_VERSION)." >&2
exit 1
fi
set +e
CURRENT_TAG=$(grep -oE 'VLLM_METAL_VERSION="[^"]*"' "$FILE" | head -1 | cut -d'"' -f2)
set -e
# Rewrite both pins. peter-evans/create-pull-request opens no PR on a clean tree,
# so a no-op rewrite (already current) is safe.
sed -i "$FILE" \
-e "s|VLLM_METAL_VERSION=\"[^\"]*\"|VLLM_METAL_VERSION=\"$LATEST_TAG\"|" \
-e "s|VLLM_VERSION=\"[^\"]*\"|VLLM_VERSION=\"$NEW_VLLM_VERSION\"|"
if [ -z "$CURRENT_TAG" ]; then
echo "Could not find VLLM_METAL_VERSION=\"...\" in $FILE." >&2
exit 0
fi
echo "vllm-metal ${CURRENT_TAG} -> ${LATEST_TAG} (builds vLLM ${NEW_VLLM_VERSION}): https://github.com/$REPO/releases/tag/${LATEST_TAG}" >> "${VAR}_message.txt"
echo "${LATEST_TAG}" >> "${VAR}_commit.txt"

View File

@@ -154,3 +154,39 @@ jobs:
branch: "update/VLLM_VERSION"
body: ${{ steps.bump.outputs.message }}
signoff: true
bump-vllm-metal:
# The darwin (Apple Silicon) vLLM build installs vllm-metal, which is locked
# to a specific vLLM source release. install.sh pins both VLLM_METAL_VERSION
# (the wheel release) and VLLM_VERSION (the vLLM it builds against); this job
# tracks vllm-project/vllm-metal and rewrites both atomically. Separate from
# bump-vllm-wheel because darwin follows vllm-metal, not vllm/vllm latest.
if: github.repository == 'mudler/LocalAI'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v7
- name: Bump vllm-metal pin 🔧
id: bump
run: |
bash .github/bump_vllm_metal.sh vllm-project/vllm-metal backend/python/vllm/install.sh VLLM_METAL_VERSION
{
echo 'message<<EOF'
cat "VLLM_METAL_VERSION_message.txt"
echo EOF
} >> "$GITHUB_OUTPUT"
{
echo 'commit<<EOF'
cat "VLLM_METAL_VERSION_commit.txt"
echo EOF
} >> "$GITHUB_OUTPUT"
rm -rfv VLLM_METAL_VERSION_message.txt VLLM_METAL_VERSION_commit.txt
- name: Create Pull Request
uses: peter-evans/create-pull-request@v8
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update vllm-project/vllm-metal (darwin)'
title: 'chore: :arrow_up: Update vllm-metal (darwin) to `${{ steps.bump.outputs.commit }}`'
branch: "update/VLLM_METAL_VERSION"
body: ${{ steps.bump.outputs.message }}
signoff: true

View File

@@ -98,11 +98,15 @@ if [ "$(uname -s)" = "Darwin" ]; then
# intel branch below relies on.
pip install uv
# VERSION COUPLING (read before bumping vLLM!): vllm-metal pins this exact
# vLLM version and builds against its source tarball. It equals LocalAI's
# current vllm pin (see requirements-cublas13-after.txt: vllm==0.23.0). A
# vLLM bump on Linux MUST be coordinated with a vllm-metal release that
# supports the new version, or darwin builds will break.
# vllm-metal version pins -- AUTO-BUMPED by .github/bump_vllm_metal.sh, which
# tracks vllm-project/vllm-metal releases (NOT vllm/vllm latest). VLLM_METAL_VERSION
# is the vllm-metal release tag (its prebuilt wheel); VLLM_VERSION is the vLLM
# source version that release builds against (vllm-metal declares it as vllm_v=).
# They move in lockstep, so darwin can lag the Linux vllm pin
# (requirements-cublas13-after.txt, bumped independently against vllm/vllm) until
# vllm-metal supports a newer vLLM. Keep both as plain double-quoted assignments
# each on their own line so the bumper's sed can rewrite them.
VLLM_METAL_VERSION="v0.3.0.dev20260622062346"
VLLM_VERSION="0.23.0"
_vllm_src=$(mktemp -d)
@@ -122,14 +126,17 @@ if [ "$(uname -s)" = "Darwin" ]; then
popd
popd
# 2) Install the prebuilt vllm-metal wheel from its latest GitHub release.
# It pulls mlx / mlx-metal as deps and registers the `metal` platform
# plugin that backend.py resolves to at engine-init time.
_metal_wheel_url=$(curl -fsSL https://api.github.com/repos/vllm-project/vllm-metal/releases/latest \
# 2) Install the prebuilt vllm-metal wheel from the PINNED release
# (${VLLM_METAL_VERSION}). It pulls mlx / mlx-metal as deps and registers
# the `metal` platform plugin that backend.py resolves to at engine-init
# time. Pinning the tag (vs releases/latest) keeps the wheel and the vLLM
# source build above reproducible and coupled; .github/bump_vllm_metal.sh
# advances both together.
_metal_wheel_url=$(curl -fsSL "https://api.github.com/repos/vllm-project/vllm-metal/releases/tags/${VLLM_METAL_VERSION}" \
| grep -oE '"browser_download_url"[[:space:]]*:[[:space:]]*"[^"]+\.whl"' \
| head -n1 | sed -E 's/.*"(https[^"]+)".*/\1/')
if [ -z "${_metal_wheel_url}" ]; then
echo "ERROR: could not resolve a vllm-metal wheel URL from the latest GitHub release" >&2
echo "ERROR: could not resolve a vllm-metal wheel URL for release ${VLLM_METAL_VERSION}" >&2
exit 1
fi
echo "Installing vllm-metal wheel: ${_metal_wheel_url}"