diff --git a/.github/bump_vllm_metal.sh b/.github/bump_vllm_metal.sh new file mode 100755 index 000000000..249e39b4a --- /dev/null +++ b/.github/bump_vllm_metal.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Bump the vllm-metal pins in the vLLM backend's darwin (Apple Silicon) install +# path. The macOS/Metal build (backend/python/vllm/install.sh, Darwin branch) +# installs vllm-metal, which is version-locked to a specific vLLM source release. +# Two values must move together: +# VLLM_METAL_VERSION -> the vllm-metal GitHub release tag (its prebuilt wheel) +# VLLM_VERSION -> the vLLM source version that release builds against +# vllm-metal declares the latter in its OWN install.sh as `vllm_v="X.Y.Z"`. This +# script reads both from vllm-metal's latest release and rewrites them atomically +# -- mirroring bump_vllm_wheel.sh, which does the same for the Linux cu130 wheel. +# +# This deliberately tracks vllm-project/vllm-metal, NOT vllm-project/vllm: the +# darwin build can only use the exact vLLM version vllm-metal supports, so it may +# lag the Linux pin (requirements-cublas13-after.txt) until vllm-metal catches up. +set -xe +REPO=$1 # vllm-project/vllm-metal +FILE=$2 # backend/python/vllm/install.sh +VAR=$3 # VLLM_METAL_VERSION (used for the workflow's output file names) + +if [ -z "$FILE" ] || [ -z "$REPO" ] || [ -z "$VAR" ]; then + echo "usage: $0 " >&2 + exit 1 +fi + +# vllm-metal ships frequent dev releases, all flagged as non-prerelease, so +# /releases/latest returns the newest one (with its cp312 wheel asset). +LATEST_TAG=$(curl -sS -H "Accept: application/vnd.github+json" \ + "https://api.github.com/repos/$REPO/releases/latest" \ + | python3 -c "import json,sys; print(json.load(sys.stdin)['tag_name'])") + +# The coupled vLLM source version lives in vllm-metal's installer at that tag. +NEW_VLLM_VERSION=$(curl -fsSL \ + "https://raw.githubusercontent.com/$REPO/$LATEST_TAG/install.sh" \ + | grep -oE 'vllm_v="[0-9]+\.[0-9]+\.[0-9]+"' | head -1 | cut -d'"' -f2) + +if [ -z "$LATEST_TAG" ] || [ -z "$NEW_VLLM_VERSION" ]; then + echo "Could not resolve vllm-metal tag ($LATEST_TAG) or its vllm_v ($NEW_VLLM_VERSION)." >&2 + exit 1 +fi + +set +e +CURRENT_TAG=$(grep -oE 'VLLM_METAL_VERSION="[^"]*"' "$FILE" | head -1 | cut -d'"' -f2) +set -e + +# Rewrite both pins. peter-evans/create-pull-request opens no PR on a clean tree, +# so a no-op rewrite (already current) is safe. +sed -i "$FILE" \ + -e "s|VLLM_METAL_VERSION=\"[^\"]*\"|VLLM_METAL_VERSION=\"$LATEST_TAG\"|" \ + -e "s|VLLM_VERSION=\"[^\"]*\"|VLLM_VERSION=\"$NEW_VLLM_VERSION\"|" + +if [ -z "$CURRENT_TAG" ]; then + echo "Could not find VLLM_METAL_VERSION=\"...\" in $FILE." >&2 + exit 0 +fi + +echo "vllm-metal ${CURRENT_TAG} -> ${LATEST_TAG} (builds vLLM ${NEW_VLLM_VERSION}): https://github.com/$REPO/releases/tag/${LATEST_TAG}" >> "${VAR}_message.txt" +echo "${LATEST_TAG}" >> "${VAR}_commit.txt" diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index aa4b21af7..a2c37881f 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -154,3 +154,39 @@ jobs: branch: "update/VLLM_VERSION" body: ${{ steps.bump.outputs.message }} signoff: true + + bump-vllm-metal: + # The darwin (Apple Silicon) vLLM build installs vllm-metal, which is locked + # to a specific vLLM source release. install.sh pins both VLLM_METAL_VERSION + # (the wheel release) and VLLM_VERSION (the vLLM it builds against); this job + # tracks vllm-project/vllm-metal and rewrites both atomically. Separate from + # bump-vllm-wheel because darwin follows vllm-metal, not vllm/vllm latest. + if: github.repository == 'mudler/LocalAI' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v7 + - name: Bump vllm-metal pin 🔧 + id: bump + run: | + bash .github/bump_vllm_metal.sh vllm-project/vllm-metal backend/python/vllm/install.sh VLLM_METAL_VERSION + { + echo 'message<> "$GITHUB_OUTPUT" + { + echo 'commit<> "$GITHUB_OUTPUT" + rm -rfv VLLM_METAL_VERSION_message.txt VLLM_METAL_VERSION_commit.txt + - name: Create Pull Request + uses: peter-evans/create-pull-request@v8 + with: + token: ${{ secrets.UPDATE_BOT_TOKEN }} + push-to-fork: ci-forks/LocalAI + commit-message: ':arrow_up: Update vllm-project/vllm-metal (darwin)' + title: 'chore: :arrow_up: Update vllm-metal (darwin) to `${{ steps.bump.outputs.commit }}`' + branch: "update/VLLM_METAL_VERSION" + body: ${{ steps.bump.outputs.message }} + signoff: true diff --git a/backend/python/vllm/install.sh b/backend/python/vllm/install.sh index 2b2e74c36..5e4feb4fb 100755 --- a/backend/python/vllm/install.sh +++ b/backend/python/vllm/install.sh @@ -98,11 +98,15 @@ if [ "$(uname -s)" = "Darwin" ]; then # intel branch below relies on. pip install uv - # VERSION COUPLING (read before bumping vLLM!): vllm-metal pins this exact - # vLLM version and builds against its source tarball. It equals LocalAI's - # current vllm pin (see requirements-cublas13-after.txt: vllm==0.23.0). A - # vLLM bump on Linux MUST be coordinated with a vllm-metal release that - # supports the new version, or darwin builds will break. + # vllm-metal version pins -- AUTO-BUMPED by .github/bump_vllm_metal.sh, which + # tracks vllm-project/vllm-metal releases (NOT vllm/vllm latest). VLLM_METAL_VERSION + # is the vllm-metal release tag (its prebuilt wheel); VLLM_VERSION is the vLLM + # source version that release builds against (vllm-metal declares it as vllm_v=). + # They move in lockstep, so darwin can lag the Linux vllm pin + # (requirements-cublas13-after.txt, bumped independently against vllm/vllm) until + # vllm-metal supports a newer vLLM. Keep both as plain double-quoted assignments + # each on their own line so the bumper's sed can rewrite them. + VLLM_METAL_VERSION="v0.3.0.dev20260622062346" VLLM_VERSION="0.23.0" _vllm_src=$(mktemp -d) @@ -122,14 +126,17 @@ if [ "$(uname -s)" = "Darwin" ]; then popd popd - # 2) Install the prebuilt vllm-metal wheel from its latest GitHub release. - # It pulls mlx / mlx-metal as deps and registers the `metal` platform - # plugin that backend.py resolves to at engine-init time. - _metal_wheel_url=$(curl -fsSL https://api.github.com/repos/vllm-project/vllm-metal/releases/latest \ + # 2) Install the prebuilt vllm-metal wheel from the PINNED release + # (${VLLM_METAL_VERSION}). It pulls mlx / mlx-metal as deps and registers + # the `metal` platform plugin that backend.py resolves to at engine-init + # time. Pinning the tag (vs releases/latest) keeps the wheel and the vLLM + # source build above reproducible and coupled; .github/bump_vllm_metal.sh + # advances both together. + _metal_wheel_url=$(curl -fsSL "https://api.github.com/repos/vllm-project/vllm-metal/releases/tags/${VLLM_METAL_VERSION}" \ | grep -oE '"browser_download_url"[[:space:]]*:[[:space:]]*"[^"]+\.whl"' \ | head -n1 | sed -E 's/.*"(https[^"]+)".*/\1/') if [ -z "${_metal_wheel_url}" ]; then - echo "ERROR: could not resolve a vllm-metal wheel URL from the latest GitHub release" >&2 + echo "ERROR: could not resolve a vllm-metal wheel URL for release ${VLLM_METAL_VERSION}" >&2 exit 1 fi echo "Installing vllm-metal wheel: ${_metal_wheel_url}"