mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-25 00:59:28 -04:00
Compare commits
2 Commits
feat/darwi
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3c63431e46 | ||
|
|
3f647a2764 |
3
.github/backend-matrix.yml
vendored
3
.github/backend-matrix.yml
vendored
@@ -4974,9 +4974,6 @@ includeDarwin:
|
|||||||
- backend: "kitten-tts"
|
- backend: "kitten-tts"
|
||||||
tag-suffix: "-metal-darwin-arm64-kitten-tts"
|
tag-suffix: "-metal-darwin-arm64-kitten-tts"
|
||||||
build-type: "mps"
|
build-type: "mps"
|
||||||
- backend: "trl"
|
|
||||||
tag-suffix: "-metal-darwin-arm64-trl"
|
|
||||||
build-type: "mps"
|
|
||||||
- backend: "liquid-audio"
|
- backend: "liquid-audio"
|
||||||
tag-suffix: "-metal-darwin-arm64-liquid-audio"
|
tag-suffix: "-metal-darwin-arm64-liquid-audio"
|
||||||
build-type: "mps"
|
build-type: "mps"
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
IK_LLAMA_VERSION?=7ccf1d209588962b96eacca325b37e9b3e8faf5e
|
IK_LLAMA_VERSION?=d5507e33ae7ee2b7b41475f08044d3bde3b839ee
|
||||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# omnivoice.cpp version
|
# omnivoice.cpp version
|
||||||
OMNIVOICE_REPO?=https://github.com/ServeurpersoCom/omnivoice.cpp
|
OMNIVOICE_REPO?=https://github.com/ServeurpersoCom/omnivoice.cpp
|
||||||
OMNIVOICE_VERSION?=96d30169afd5e6bb3fd6a0e9be0eb505bfe81fcd
|
OMNIVOICE_VERSION?=0f37401bebe9b20c0160a888e592108fc1d17607
|
||||||
SO_TARGET?=libgomnivoicecpp.so
|
SO_TARGET?=libgomnivoicecpp.so
|
||||||
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|||||||
@@ -5295,7 +5295,6 @@
|
|||||||
nvidia: "cuda12-trl"
|
nvidia: "cuda12-trl"
|
||||||
nvidia-cuda-12: "cuda12-trl"
|
nvidia-cuda-12: "cuda12-trl"
|
||||||
nvidia-cuda-13: "cuda13-trl"
|
nvidia-cuda-13: "cuda13-trl"
|
||||||
metal: "metal-trl"
|
|
||||||
## TRL backend images
|
## TRL backend images
|
||||||
- !!merge <<: *trl
|
- !!merge <<: *trl
|
||||||
name: "cpu-trl"
|
name: "cpu-trl"
|
||||||
@@ -5327,16 +5326,6 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-trl"
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-trl"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-trl
|
- localai/localai-backends:master-gpu-nvidia-cuda-13-trl
|
||||||
- !!merge <<: *trl
|
|
||||||
name: "metal-trl"
|
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-trl"
|
|
||||||
mirrors:
|
|
||||||
- localai/localai-backends:latest-metal-darwin-arm64-trl
|
|
||||||
- !!merge <<: *trl
|
|
||||||
name: "metal-trl-development"
|
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-trl"
|
|
||||||
mirrors:
|
|
||||||
- localai/localai-backends:master-metal-darwin-arm64-trl
|
|
||||||
## llama.cpp quantization backend
|
## llama.cpp quantization backend
|
||||||
- &llama-cpp-quantization
|
- &llama-cpp-quantization
|
||||||
name: "llama-cpp-quantization"
|
name: "llama-cpp-quantization"
|
||||||
|
|||||||
@@ -8,13 +8,7 @@ else
|
|||||||
source $backend_dir/../common/libbackend.sh
|
source $backend_dir/../common/libbackend.sh
|
||||||
fi
|
fi
|
||||||
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade"
|
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||||
# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip
|
|
||||||
# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add
|
|
||||||
# it when uv is the installer, keeping the Linux/CUDA resolution unchanged.
|
|
||||||
if [ "x${USE_PIP:-}" != "xtrue" ]; then
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match"
|
|
||||||
fi
|
|
||||||
installRequirements
|
installRequirements
|
||||||
|
|
||||||
# Fetch convert_hf_to_gguf.py and gguf package from the same llama.cpp version
|
# Fetch convert_hf_to_gguf.py and gguf package from the same llama.cpp version
|
||||||
|
|||||||
@@ -1,12 +0,0 @@
|
|||||||
torch==2.10.0
|
|
||||||
trl
|
|
||||||
peft
|
|
||||||
datasets>=3.0.0
|
|
||||||
transformers>=4.56.2
|
|
||||||
accelerate>=1.4.0
|
|
||||||
huggingface-hub>=1.3.0
|
|
||||||
sentencepiece
|
|
||||||
# Note: bitsandbytes is intentionally omitted on MPS. It is only used by the
|
|
||||||
# CUDA (cublas) variants for 8-bit/4-bit quantization and has poor support on
|
|
||||||
# Apple Silicon. torch here uses the plain PyPI wheels, which ship MPS support
|
|
||||||
# on macOS arm64.
|
|
||||||
Reference in New Issue
Block a user