Files
LocalAI/backend/python/llama-cpp-quantization/test.py
Ettore Di Giacinto f7e8d9e791 feat(quantization): add quantization backend (#9096)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-03-22 00:56:34 +01:00

100 lines
3.4 KiB
Python

"""
Test script for the llama-cpp-quantization gRPC backend.
Downloads a small model (functiongemma-270m-it), converts it to GGUF,
and quantizes it to q4_k_m.
"""
import os
import shutil
import subprocess
import tempfile
import time
import unittest
import grpc
import backend_pb2
import backend_pb2_grpc
SERVER_ADDR = "localhost:50051"
# Small model for CI testing (~540MB)
TEST_MODEL = "unsloth/functiongemma-270m-it"
class TestQuantizationBackend(unittest.TestCase):
"""Tests for the llama-cpp-quantization gRPC service."""
@classmethod
def setUpClass(cls):
cls.service = subprocess.Popen(
["python3", "backend.py", "--addr", SERVER_ADDR]
)
time.sleep(5)
cls.output_dir = tempfile.mkdtemp(prefix="quantize-test-")
@classmethod
def tearDownClass(cls):
cls.service.kill()
cls.service.wait()
# Clean up output directory
if os.path.isdir(cls.output_dir):
shutil.rmtree(cls.output_dir, ignore_errors=True)
def _channel(self):
return grpc.insecure_channel(SERVER_ADDR)
def test_01_health(self):
"""Test that the server starts and responds to health checks."""
with self._channel() as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.Health(backend_pb2.HealthMessage())
self.assertEqual(response.message, b"OK")
def test_02_quantize_small_model(self):
"""Download, convert, and quantize functiongemma-270m-it to q4_k_m."""
with self._channel() as channel:
stub = backend_pb2_grpc.BackendStub(channel)
job_id = "test-quantize-001"
# Start quantization
result = stub.StartQuantization(
backend_pb2.QuantizationRequest(
model=TEST_MODEL,
quantization_type="q4_k_m",
output_dir=self.output_dir,
job_id=job_id,
)
)
self.assertTrue(result.success, f"StartQuantization failed: {result.message}")
self.assertEqual(result.job_id, job_id)
# Stream progress until completion
final_status = None
output_file = None
for update in stub.QuantizationProgress(
backend_pb2.QuantizationProgressRequest(job_id=job_id)
):
print(f" [{update.status}] {update.progress_percent:.1f}% - {update.message}")
final_status = update.status
if update.output_file:
output_file = update.output_file
self.assertEqual(final_status, "completed", f"Expected completed, got {final_status}")
self.assertIsNotNone(output_file, "No output_file in progress updates")
self.assertTrue(os.path.isfile(output_file), f"Output file not found: {output_file}")
# Verify the output is a valid GGUF file (starts with "GGUF" magic)
with open(output_file, "rb") as f:
magic = f.read(4)
self.assertEqual(magic, b"GGUF", f"Output file does not have GGUF magic: {magic!r}")
# Verify reasonable file size (q4_k_m of 270M model should be ~150-400MB)
size_mb = os.path.getsize(output_file) / (1024 * 1024)
print(f" Output file size: {size_mb:.1f} MB")
self.assertGreater(size_mb, 10, "Output file suspiciously small")
if __name__ == "__main__":
unittest.main()