fix: lazy init MLX for quantization and improve library discovery

- Add lazy MLX initialization in quantizeTensor to ensure the library is loaded when quantization is requested - Add exe-relative build path search for dev mode on macOS, so the ollama binary can find libmlxc.dylib in build/lib/ollama/ when running from the repo root
2026-01-18 04:20:25 -05:00 · 2026-01-17 22:46:20 -08:00
1 changed files with 5 additions and 0 deletions
--- a/x/create/client/quantize.go
+++ b/x/create/client/quantize.go
@@ -16,6 +16,11 @@ import (
 // Supported quantization types: "fp8" (affine 8-bit)
 // Uses MLX's native SaveSafetensors to ensure correct dtype handling (especially uint32 for quantized weights).
 func quantizeTensor(r io.Reader, name, dtype string, shape []int32, quantize string) (qweightData, scalesData, qbiasData []byte, qweightShape, scalesShape, qbiasShape []int32, err error) {
+	// Lazy init MLX when needed for quantization
+	if err := mlx.InitMLX(); err != nil {
+		return nil, nil, nil, nil, nil, nil, fmt.Errorf("MLX initialization failed: %w", err)
+	}
+
 	tmpDir := ensureTempDir()

 	// Read safetensors data to a temp file (LoadSafetensorsNative needs a path)