diff --git a/.github/backend-matrix.yml b/.github/backend-matrix.yml index 17d436cc1..f34921db9 100644 --- a/.github/backend-matrix.yml +++ b/.github/backend-matrix.yml @@ -4974,6 +4974,9 @@ includeDarwin: - backend: "kitten-tts" tag-suffix: "-metal-darwin-arm64-kitten-tts" build-type: "mps" + - backend: "trl" + tag-suffix: "-metal-darwin-arm64-trl" + build-type: "mps" - backend: "liquid-audio" tag-suffix: "-metal-darwin-arm64-liquid-audio" build-type: "mps" diff --git a/backend/index.yaml b/backend/index.yaml index f3a2b892d..381aa073b 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -5295,6 +5295,7 @@ nvidia: "cuda12-trl" nvidia-cuda-12: "cuda12-trl" nvidia-cuda-13: "cuda13-trl" + metal: "metal-trl" ## TRL backend images - !!merge <<: *trl name: "cpu-trl" @@ -5326,6 +5327,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-trl" mirrors: - localai/localai-backends:master-gpu-nvidia-cuda-13-trl +- !!merge <<: *trl + name: "metal-trl" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-trl" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-trl +- !!merge <<: *trl + name: "metal-trl-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-trl" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-trl ## llama.cpp quantization backend - &llama-cpp-quantization name: "llama-cpp-quantization" diff --git a/backend/python/trl/install.sh b/backend/python/trl/install.sh index 6963e60ed..ce0552f87 100644 --- a/backend/python/trl/install.sh +++ b/backend/python/trl/install.sh @@ -8,7 +8,13 @@ else source $backend_dir/../common/libbackend.sh fi -EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" +EXTRA_PIP_INSTALL_FLAGS+=" --upgrade" +# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip +# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add +# it when uv is the installer, keeping the Linux/CUDA resolution unchanged. +if [ "x${USE_PIP:-}" != "xtrue" ]; then + EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match" +fi installRequirements # Fetch convert_hf_to_gguf.py and gguf package from the same llama.cpp version diff --git a/backend/python/trl/requirements-mps.txt b/backend/python/trl/requirements-mps.txt new file mode 100644 index 000000000..fbdfb6536 --- /dev/null +++ b/backend/python/trl/requirements-mps.txt @@ -0,0 +1,12 @@ +torch==2.10.0 +trl +peft +datasets>=3.0.0 +transformers>=4.56.2 +accelerate>=1.4.0 +huggingface-hub>=1.3.0 +sentencepiece +# Note: bitsandbytes is intentionally omitted on MPS. It is only used by the +# CUDA (cublas) variants for 8-bit/4-bit quantization and has poor support on +# Apple Silicon. torch here uses the plain PyPI wheels, which ship MPS support +# on macOS arm64.