mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-01 05:36:49 -04:00
feat(quantization): add quantization backend (#9096)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
4b183b7bb6
commit
f7e8d9e791
16
.github/workflows/backend.yml
vendored
16
.github/workflows/backend.yml
vendored
@@ -131,6 +131,19 @@ jobs:
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-llama-cpp-quantization'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'true'
|
||||
backend: "llama-cpp-quantization"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
@@ -2412,6 +2425,9 @@ jobs:
|
||||
tag-suffix: "-metal-darwin-arm64-local-store"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "llama-cpp-quantization"
|
||||
tag-suffix: "-metal-darwin-arm64-llama-cpp-quantization"
|
||||
build-type: "mps"
|
||||
with:
|
||||
backend: ${{ matrix.backend }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
|
||||
27
.github/workflows/test-extra.yml
vendored
27
.github/workflows/test-extra.yml
vendored
@@ -383,6 +383,33 @@ jobs:
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/voxcpm
|
||||
make --jobs=5 --output-sync=target -C backend/python/voxcpm test
|
||||
tests-llama-cpp-quantization:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential cmake curl git python3-pip
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
- name: Build llama-quantize from llama.cpp
|
||||
run: |
|
||||
git clone --depth 1 https://github.com/ggml-org/llama.cpp.git /tmp/llama.cpp
|
||||
cmake -B /tmp/llama.cpp/build -S /tmp/llama.cpp -DGGML_NATIVE=OFF
|
||||
cmake --build /tmp/llama.cpp/build --target llama-quantize -j$(nproc)
|
||||
sudo cp /tmp/llama.cpp/build/bin/llama-quantize /usr/local/bin/
|
||||
- name: Install backend
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/llama-cpp-quantization
|
||||
- name: Test llama-cpp-quantization
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/llama-cpp-quantization test
|
||||
tests-acestep-cpp:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
Reference in New Issue
Block a user