feat(quantization): add quantization backend (#9096)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-03-22 00:56:34 +01:00
committed by GitHub
parent 4b183b7bb6
commit f7e8d9e791
37 changed files with 2574 additions and 47 deletions

View File

@@ -0,0 +1,26 @@
# Version of llama.cpp to fetch convert_hf_to_gguf.py from
LLAMA_CPP_CONVERT_VERSION ?= master
.PHONY: llama-cpp-quantization
llama-cpp-quantization:
LLAMA_CPP_CONVERT_VERSION=$(LLAMA_CPP_CONVERT_VERSION) bash install.sh
.PHONY: run
run: llama-cpp-quantization
@echo "Running llama-cpp-quantization..."
bash run.sh
@echo "llama-cpp-quantization run."
.PHONY: test
test: llama-cpp-quantization
@echo "Testing llama-cpp-quantization..."
bash test.sh
@echo "llama-cpp-quantization tested."
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__