diff --git a/.github/backend-matrix.yml b/.github/backend-matrix.yml index 593e44cde..729a4d8b3 100644 --- a/.github/backend-matrix.yml +++ b/.github/backend-matrix.yml @@ -4974,6 +4974,9 @@ includeDarwin: - backend: "kitten-tts" tag-suffix: "-metal-darwin-arm64-kitten-tts" build-type: "mps" + - backend: "trl" + tag-suffix: "-metal-darwin-arm64-trl" + build-type: "mps" - backend: "piper" tag-suffix: "-metal-darwin-arm64-piper" build-type: "metal" diff --git a/backend/index.yaml b/backend/index.yaml index 3f61f7b4e..2e22ed4f3 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -5282,6 +5282,7 @@ nvidia: "cuda12-trl" nvidia-cuda-12: "cuda12-trl" nvidia-cuda-13: "cuda13-trl" + metal: "metal-trl" ## TRL backend images - !!merge <<: *trl name: "cpu-trl" @@ -5313,6 +5314,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-trl" mirrors: - localai/localai-backends:master-gpu-nvidia-cuda-13-trl +- !!merge <<: *trl + name: "metal-trl" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-trl" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-trl +- !!merge <<: *trl + name: "metal-trl-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-trl" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-trl ## llama.cpp quantization backend - &llama-cpp-quantization name: "llama-cpp-quantization" diff --git a/backend/python/trl/requirements-mps.txt b/backend/python/trl/requirements-mps.txt new file mode 100644 index 000000000..fbdfb6536 --- /dev/null +++ b/backend/python/trl/requirements-mps.txt @@ -0,0 +1,12 @@ +torch==2.10.0 +trl +peft +datasets>=3.0.0 +transformers>=4.56.2 +accelerate>=1.4.0 +huggingface-hub>=1.3.0 +sentencepiece +# Note: bitsandbytes is intentionally omitted on MPS. It is only used by the +# CUDA (cublas) variants for 8-bit/4-bit quantization and has poor support on +# Apple Silicon. torch here uses the plain PyPI wheels, which ship MPS support +# on macOS arm64.