Compare commits

..

1 Commits

Author SHA1 Message Date
jmorganca
582d93ab22 fix: lazy init MLX for quantization and improve library discovery
- Add lazy MLX initialization in quantizeTensor to ensure the library
  is loaded when quantization is requested
- Add exe-relative build path search for dev mode on macOS, so the
  ollama binary can find libmlxc.dylib in build/lib/ollama/ when
  running from the repo root
2026-01-17 22:46:20 -08:00

View File

@@ -16,6 +16,11 @@ import (
// Supported quantization types: "fp8" (affine 8-bit)
// Uses MLX's native SaveSafetensors to ensure correct dtype handling (especially uint32 for quantized weights).
func quantizeTensor(r io.Reader, name, dtype string, shape []int32, quantize string) (qweightData, scalesData, qbiasData []byte, qweightShape, scalesShape, qbiasShape []int32, err error) {
// Lazy init MLX when needed for quantization
if err := mlx.InitMLX(); err != nil {
return nil, nil, nil, nil, nil, nil, fmt.Errorf("MLX initialization failed: %w", err)
}
tmpDir := ensureTempDir()
// Read safetensors data to a temp file (LoadSafetensorsNative needs a path)