MLX - dynamic loading of mlx-c (#13735 )

* MLX - dynamic loading of mlx-c Create a wrapper layer to indirect the dependency on mlx-c so the main ollama binary does not have a load-time dependency on mlx-c, mlx, and on linux, cuda. Lazy load the library via dlopen so we can adjust the path to ensure the dependencies are found and fail gracefully if not present. * review comments * fix broken tests
2026-01-16 19:41:24 -05:00 · 2026-01-16 16:34:22 -08:00
32 changed files with 9155 additions and 571 deletions
--- a/18
+++ b/18
@@ -32,7 +32,7 @@ ENV PATH=/${VULKANVERSION}/x86_64/bin:$PATH
 FROM --platform=linux/arm64 almalinux:8 AS base-arm64
 # install epel-release for ccache
 RUN yum install -y yum-utils epel-release \
-    && dnf install -y clang ccache \
+    && dnf install -y clang ccache git \
    && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
 ENV CC=clang CXX=clang++

@@ -149,6 +149,7 @@ COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 COPY x/ml/backend/mlx x/ml/backend/mlx
 COPY go.mod go.sum .
+COPY MLX_VERSION .
 RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
 ENV PATH=/usr/local/go/bin:$PATH
 RUN go mod download
@@ -156,14 +157,6 @@ RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas \
        && cmake --build --parallel ${PARALLEL} --preset 'MLX CUDA 13' \
        && cmake --install build --component MLX --strip --parallel ${PARALLEL}
-COPY . .
-ARG GOFLAGS="'-ldflags=-w -s'"
-ENV CGO_ENABLED=1
-ARG CGO_CFLAGS
-ARG CGO_CXXFLAGS
-RUN mkdir -p dist/bin
-RUN --mount=type=cache,target=/root/.cache/go-build \
-    go build -tags mlx -trimpath -buildmode=pie -o dist/bin/ollama-mlx .

 FROM base AS build
 WORKDIR /go/src/github.com/ollama/ollama
@@ -172,12 +165,14 @@ RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-
 ENV PATH=/usr/local/go/bin:$PATH
 RUN go mod download
 COPY . .
+# Clone mlx-c headers for CGO (version from MLX_VERSION file)
+RUN git clone --depth 1 --branch "$(cat MLX_VERSION)" https://github.com/ml-explore/mlx-c.git build/_deps/mlx-c-src
 ARG GOFLAGS="'-ldflags=-w -s'"
 ENV CGO_ENABLED=1
-ARG CGO_CFLAGS
+ENV CGO_CFLAGS="-I/go/src/github.com/ollama/ollama/build/_deps/mlx-c-src"
 ARG CGO_CXXFLAGS
 RUN --mount=type=cache,target=/root/.cache/go-build \
-    go build -trimpath -buildmode=pie -o /bin/ollama .
+    go build -tags mlx -trimpath -buildmode=pie -o /bin/ollama .

 FROM --platform=linux/amd64 scratch AS amd64
 # COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
@@ -185,7 +180,6 @@ COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
 COPY --from=cuda-13 dist/lib/ollama /lib/ollama/
 COPY --from=vulkan  dist/lib/ollama  /lib/ollama/
 COPY --from=mlx     /go/src/github.com/ollama/ollama/dist/lib/ollama /lib/ollama/
-COPY --from=mlx     /go/src/github.com/ollama/ollama/dist/bin/ /bin/

 FROM --platform=linux/arm64 scratch AS arm64
 # COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
--- a/1
+++ b/1
@@ -0,0 +1 @@
+v0.4.1
--- a/README.md
+++ b/README.md
@@ -270,10 +270,10 @@ cmake --build --preset MLX --parallel
 cmake --install build --component MLX
 ```

-Next, build the `ollama-mlx` binary, which is a separate build of the Ollama runtime with MLX support enabled (needs to be in the same directory as `ollama`):
+When building with the `-tags mlx` flag, the main `ollama` binary includes MLX support for experimental features like image generation:

 ```shell
-go build -tags mlx -o ollama-mlx .
+go build -tags mlx .
 ```

 Finally, start the server:
--- a/api/types.go
+++ b/api/types.go
@@ -97,15 +97,6 @@ type GenerateRequest struct {
 	// request, for multimodal models.
 	Images []ImageData `json:"images,omitempty"`

-	// Width is the width of the generated image (for image generation models).
-	Width int32 `json:"width,omitempty"`
-
-	// Height is the height of the generated image (for image generation models).
-	Height int32 `json:"height,omitempty"`
-
-	// Steps is the number of diffusion steps (for image generation models).
-	Steps int32 `json:"steps,omitempty"`
-
 	// Options lists model-specific options. For example, temperature can be
 	// set through this field, if the model supports it.
 	Options map[string]any `json:"options"`
@@ -869,18 +860,6 @@ type GenerateResponse struct {
 	// Logprobs contains log probability information for the generated tokens,
 	// if requested via the Logprobs parameter.
 	Logprobs []Logprob `json:"logprobs,omitempty"`
-
-	// Status describes the current phase of generation (e.g., "generating image").
-	Status string `json:"status,omitempty"`
-
-	// Total is the total count for the current phase (e.g., total steps).
-	Total int64 `json:"total,omitempty"`
-
-	// Completed is the completed count for the current phase.
-	Completed int64 `json:"completed,omitempty"`
-
-	// Images contains base64-encoded generated images for image generation models.
-	Images []string `json:"images,omitempty"`
 }

 // ModelDetails provides details about a model.
--- a/docs/api.md
+++ b/docs/api.md
@@ -47,12 +47,6 @@ Generate a response for a given prompt with a provided model. This is a streamin
 - `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)
 - `think`: (for thinking models) should the model think before responding?

-Image generation parameters (for image generation models):
-
- `width`: (optional) width of the generated image in pixels (default: model-specific)
- `height`: (optional) height of the generated image in pixels (default: model-specific)
- `steps`: (optional) number of diffusion steps (default: model-specific)
-
 Advanced parameters (optional):

 - `format`: the format to return a response in. Format can be `json` or a JSON schema
@@ -112,13 +106,6 @@ The final response in the stream also includes additional data about the generat
 - `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory
 - `response`: empty if the response was streamed, if not streamed, this will contain the full response

-For image generation models, the response includes additional fields:
-
- `status`: describes the current phase (e.g., "generating image")
- `total`: total count for the current phase (e.g., total steps)
- `completed`: completed count for the current phase
- `images`: array of base64-encoded generated images (in final response)
-
 To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration` \* `10^9`.

 ```json
--- a/llm/server.go
+++ b/llm/server.go
@@ -1468,7 +1468,6 @@ type CompletionRequest struct {
 	// Image generation fields
 	Width  int32 `json:"width,omitempty"`
 	Height int32 `json:"height,omitempty"`
-	Steps  int32 `json:"steps,omitempty"`
 	Seed   int64 `json:"seed,omitempty"`
 }

@@ -1519,14 +1518,10 @@ type CompletionResponse struct {
 	// Logprobs contains log probability information if requested
 	Logprobs []Logprob `json:"logprobs,omitempty"`

-	// Image contains base64-encoded image data for image generation
-	Image string `json:"image,omitempty"`
-
-	// Step is the current step in image generation
-	Step int `json:"step,omitempty"`
-
-	// TotalSteps is the total number of steps for image generation
-	TotalSteps int `json:"total_steps,omitempty"`
+	// Image generation fields
+	Image []byte `json:"image,omitempty"` // Generated image
+	Step  int    `json:"step,omitempty"`  // Current generation step
+	Total int    `json:"total,omitempty"` // Total generation steps
 }

 func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
--- a/middleware/openai.go
+++ b/middleware/openai.go
@@ -50,11 +50,6 @@ type EmbedWriter struct {
 	encodingFormat string
 }

-type ImageWriter struct {
-	BaseWriter
-	done bool
-}
-
 func (w *BaseWriter) writeError(data []byte) (int, error) {
 	var serr api.StatusError
 	err := json.Unmarshal(data, &serr)
@@ -279,36 +274,6 @@ func (w *EmbedWriter) Write(data []byte) (int, error) {
 	return w.writeResponse(data)
 }

-func (w *ImageWriter) writeResponse(data []byte) (int, error) {
-	var generateResponse api.GenerateResponse
-	err := json.Unmarshal(data, &generateResponse)
-	if err != nil {
-		return 0, err
-	}
-
-	// Image generation doesn't support streaming in the OpenAI API sense,
-	// so we only write the response when done with images
-	if generateResponse.Done && len(generateResponse.Images) > 0 {
-		w.done = true
-		w.ResponseWriter.Header().Set("Content-Type", "application/json")
-		err = json.NewEncoder(w.ResponseWriter).Encode(openai.ToImageGenerationResponse(generateResponse))
-		if err != nil {
-			return 0, err
-		}
-	}
-
-	return len(data), nil
-}
-
-func (w *ImageWriter) Write(data []byte) (int, error) {
-	code := w.ResponseWriter.Status()
-	if code != http.StatusOK {
-		return w.writeError(data)
-	}
-
-	return w.writeResponse(data)
-}
-
 func ListMiddleware() gin.HandlerFunc {
 	return func(c *gin.Context) {
 		w := &ListWriter{
@@ -428,43 +393,6 @@ func EmbeddingsMiddleware() gin.HandlerFunc {
 	}
 }

-func ImageGenerationsMiddleware() gin.HandlerFunc {
-	return func(c *gin.Context) {
-		var req openai.ImageGenerationRequest
-		err := c.ShouldBindJSON(&req)
-		if err != nil {
-			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, err.Error()))
-			return
-		}
-
-		if req.Prompt == "" {
-			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "prompt is required"))
-			return
-		}
-
-		if req.Model == "" {
-			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "model is required"))
-			return
-		}
-
-		var b bytes.Buffer
-		genReq := openai.FromImageGenerationRequest(req)
-		if err := json.NewEncoder(&b).Encode(genReq); err != nil {
-			c.AbortWithStatusJSON(http.StatusInternalServerError, openai.NewError(http.StatusInternalServerError, err.Error()))
-			return
-		}
-
-		c.Request.Body = io.NopCloser(&b)
-
-		w := &ImageWriter{
-			BaseWriter: BaseWriter{ResponseWriter: c.Writer},
-		}
-
-		c.Writer = w
-		c.Next()
-	}
-}
-
 func ChatMiddleware() gin.HandlerFunc {
 	return func(c *gin.Context) {
 		var req openai.ChatCompletionRequest
--- a/middleware/openai_test.go
+++ b/middleware/openai_test.go
@@ -961,143 +961,3 @@ func TestRetrieveMiddleware(t *testing.T) {
 		}
 	}
 }
-
-func TestImageGenerationsMiddleware(t *testing.T) {
-	type testCase struct {
-		name string
-		body string
-		req  api.GenerateRequest
-		err  openai.ErrorResponse
-	}
-
-	var capturedRequest *api.GenerateRequest
-
-	streamFalse := false
-	testCases := []testCase{
-		{
-			name: "image generation handler",
-			body: `{
-				"model": "flux",
-				"prompt": "a cat"
-			}`,
-			req: api.GenerateRequest{
-				Model:  "flux",
-				Prompt: "a cat",
-				Stream: &streamFalse,
-			},
-		},
-		{
-			name: "image generation with size",
-			body: `{
-				"model": "flux",
-				"prompt": "a dog",
-				"size": "512x512"
-			}`,
-			req: api.GenerateRequest{
-				Model:  "flux",
-				Prompt: "a dog",
-				Stream: &streamFalse,
-			},
-		},
-		{
-			name: "missing prompt error",
-			body: `{
-				"model": "flux"
-			}`,
-			err: openai.ErrorResponse{
-				Error: openai.Error{
-					Message: "prompt is required",
-					Type:    "invalid_request_error",
-				},
-			},
-		},
-		{
-			name: "missing model error",
-			body: `{
-				"prompt": "a cat"
-			}`,
-			err: openai.ErrorResponse{
-				Error: openai.Error{
-					Message: "model is required",
-					Type:    "invalid_request_error",
-				},
-			},
-		},
-	}
-
-	endpoint := func(c *gin.Context) {
-		c.Status(http.StatusOK)
-	}
-
-	gin.SetMode(gin.TestMode)
-	router := gin.New()
-	router.Use(ImageGenerationsMiddleware(), captureRequestMiddleware(&capturedRequest))
-	router.Handle(http.MethodPost, "/api/generate", endpoint)
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			req, _ := http.NewRequest(http.MethodPost, "/api/generate", strings.NewReader(tc.body))
-			req.Header.Set("Content-Type", "application/json")
-
-			resp := httptest.NewRecorder()
-			router.ServeHTTP(resp, req)
-
-			var errResp openai.ErrorResponse
-			if resp.Code != http.StatusOK {
-				if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
-					t.Fatal(err)
-				}
-			}
-
-			if capturedRequest != nil && !reflect.DeepEqual(tc.req, *capturedRequest) {
-				t.Fatalf("requests did not match\nExpected: %+v\nActual: %+v", tc.req, *capturedRequest)
-			}
-
-			if !reflect.DeepEqual(tc.err, errResp) {
-				t.Fatalf("errors did not match\nExpected: %+v\nActual: %+v", tc.err, errResp)
-			}
-
-			capturedRequest = nil
-		})
-	}
-}
-
-func TestImageWriterIntegration(t *testing.T) {
-	gin.SetMode(gin.TestMode)
-
-	t.Run("transforms generate response to openai format", func(t *testing.T) {
-		router := gin.New()
-		router.Use(ImageGenerationsMiddleware())
-		router.POST("/api/generate", func(c *gin.Context) {
-			// Simulate an image generation response
-			generateResponse := api.GenerateResponse{
-				Done:      true,
-				CreatedAt: time.Now(),
-				Images:    []string{"base64encodedimage"},
-			}
-			c.JSON(http.StatusOK, generateResponse)
-		})
-
-		req, _ := http.NewRequest(http.MethodPost, "/api/generate", strings.NewReader(`{"model":"flux","prompt":"a cat"}`))
-		req.Header.Set("Content-Type", "application/json")
-
-		resp := httptest.NewRecorder()
-		router.ServeHTTP(resp, req)
-
-		if resp.Code != http.StatusOK {
-			t.Fatalf("expected status 200, got %d: %s", resp.Code, resp.Body.String())
-		}
-
-		var response openai.ImageGenerationResponse
-		if err := json.Unmarshal(resp.Body.Bytes(), &response); err != nil {
-			t.Fatalf("failed to unmarshal response: %v", err)
-		}
-
-		if len(response.Data) != 1 {
-			t.Fatalf("expected 1 image, got %d", len(response.Data))
-		}
-		if response.Data[0].B64JSON != "base64encodedimage" {
-			t.Fatalf("expected image data 'base64encodedimage', got '%s'", response.Data[0].B64JSON)
-		}
-	})
-}
--- a/openai/openai.go
+++ b/openai/openai.go
@@ -737,46 +737,3 @@ func FromCompleteRequest(r CompletionRequest) (api.GenerateRequest, error) {
 		DebugRenderOnly: r.DebugRenderOnly,
 	}, nil
 }
-
-// ImageGenerationRequest is an OpenAI-compatible image generation request.
-type ImageGenerationRequest struct {
-	Model          string `json:"model"`
-	Prompt         string `json:"prompt"`
-	N              int    `json:"n,omitempty"`
-	Size           string `json:"size,omitempty"`
-	ResponseFormat string `json:"response_format,omitempty"`
-}
-
-// ImageGenerationResponse is an OpenAI-compatible image generation response.
-type ImageGenerationResponse struct {
-	Created int64            `json:"created"`
-	Data    []ImageURLOrData `json:"data"`
-}
-
-// ImageURLOrData contains either a URL or base64-encoded image data.
-type ImageURLOrData struct {
-	URL     string `json:"url,omitempty"`
-	B64JSON string `json:"b64_json,omitempty"`
-}
-
-// FromImageGenerationRequest converts an OpenAI image generation request to an Ollama GenerateRequest.
-func FromImageGenerationRequest(r ImageGenerationRequest) api.GenerateRequest {
-	stream := false
-	return api.GenerateRequest{
-		Model:  r.Model,
-		Prompt: r.Prompt,
-		Stream: &stream,
-	}
-}
-
-// ToImageGenerationResponse converts an Ollama GenerateResponse to an OpenAI ImageGenerationResponse.
-func ToImageGenerationResponse(resp api.GenerateResponse) ImageGenerationResponse {
-	data := make([]ImageURLOrData, 0)
-	for _, img := range resp.Images {
-		data = append(data, ImageURLOrData{B64JSON: img})
-	}
-	return ImageGenerationResponse{
-		Created: resp.CreatedAt.Unix(),
-		Data:    data,
-	}
-}
--- a/scripts/build_darwin.sh
+++ b/scripts/build_darwin.sh
@@ -60,7 +60,7 @@ _build_darwin() {
            cmake --install $BUILD_DIR --component MLX
            # Override CGO flags to point to the amd64 build directory
            MLX_CGO_CFLAGS="-O3 -I$(pwd)/$BUILD_DIR/_deps/mlx-c-src -mmacosx-version-min=14.0"
-            MLX_CGO_LDFLAGS="-L$(pwd)/$BUILD_DIR/lib/ollama -lmlxc -lmlx -Wl,-rpath,@executable_path -lc++ -framework Accelerate -mmacosx-version-min=14.0"
+            MLX_CGO_LDFLAGS="-ldl -lc++ -framework Accelerate -mmacosx-version-min=14.0"
        else
            BUILD_DIR=build
            cmake --preset MLX \
@@ -71,10 +71,12 @@ _build_darwin() {
            cmake --install $BUILD_DIR --component MLX
            # Use default CGO flags from mlx.go for arm64
            MLX_CGO_CFLAGS="-O3 -I$(pwd)/$BUILD_DIR/_deps/mlx-c-src -mmacosx-version-min=14.0"
-            MLX_CGO_LDFLAGS="-L$(pwd)/$BUILD_DIR/lib/ollama -lmlxc -lmlx -Wl,-rpath,@executable_path -lc++ -framework Metal -framework Foundation -framework Accelerate -mmacosx-version-min=14.0"
+            MLX_CGO_LDFLAGS="-lc++ -framework Metal -framework Foundation -framework Accelerate -mmacosx-version-min=14.0"
        fi
-        GOOS=darwin GOARCH=$ARCH CGO_ENABLED=1 CGO_CFLAGS="$MLX_CGO_CFLAGS" CGO_LDFLAGS="$MLX_CGO_LDFLAGS" go build -tags mlx -o $INSTALL_PREFIX/ollama-mlx .
-        GOOS=darwin GOARCH=$ARCH CGO_ENABLED=1 go build -o $INSTALL_PREFIX .
+        GOOS=darwin GOARCH=$ARCH CGO_ENABLED=1 CGO_CFLAGS="$MLX_CGO_CFLAGS" CGO_LDFLAGS="$MLX_CGO_LDFLAGS" go build -tags mlx -o $INSTALL_PREFIX .
+        # Copy MLX libraries to same directory as executable for dlopen
+        cp $INSTALL_PREFIX/lib/ollama/libmlxc.dylib $INSTALL_PREFIX/
+        cp $INSTALL_PREFIX/lib/ollama/libmlx.dylib $INSTALL_PREFIX/
    done
 }

@@ -82,12 +84,10 @@ _sign_darwin() {
    status "Creating universal binary..."
    mkdir -p dist/darwin
    lipo -create -output dist/darwin/ollama dist/darwin-*/ollama
-    lipo -create -output dist/darwin/ollama-mlx dist/darwin-*/ollama-mlx
    chmod +x dist/darwin/ollama
-    chmod +x dist/darwin/ollama-mlx

    if [ -n "$APPLE_IDENTITY" ]; then
-        for F in dist/darwin/ollama dist/darwin-*/lib/ollama/* dist/darwin/ollama-mlx; do
+        for F in dist/darwin/ollama dist/darwin-*/lib/ollama/*; do
            codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime $F
        done

@@ -154,7 +154,6 @@ _build_macapp() {
    mkdir -p dist/Ollama.app/Contents/Resources
    if [ -d dist/darwin-amd64 ]; then
        lipo -create -output dist/Ollama.app/Contents/Resources/ollama dist/darwin-amd64/ollama dist/darwin-arm64/ollama
-        lipo -create -output dist/Ollama.app/Contents/Resources/ollama-mlx dist/darwin-amd64/ollama-mlx dist/darwin-arm64/ollama-mlx
        for F in dist/darwin-amd64/lib/ollama/*mlx*.dylib ; do
            lipo -create -output dist/darwin/$(basename $F) $F dist/darwin-arm64/lib/ollama/$(basename $F)
        done
@@ -166,13 +165,12 @@ _build_macapp() {
        cp -a dist/darwin/ollama dist/Ollama.app/Contents/Resources/ollama
        cp dist/darwin/*.so dist/darwin/*.dylib dist/Ollama.app/Contents/Resources/
    fi
-    cp -a dist/darwin/ollama-mlx dist/Ollama.app/Contents/Resources/ollama-mlx
    chmod a+x dist/Ollama.app/Contents/Resources/ollama

    # Sign
    if [ -n "$APPLE_IDENTITY" ]; then
        codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/Ollama.app/Contents/Resources/ollama
-        for lib in dist/Ollama.app/Contents/Resources/*.so dist/Ollama.app/Contents/Resources/*.dylib dist/Ollama.app/Contents/Resources/*.metallib dist/Ollama.app/Contents/Resources/ollama-mlx ; do
+        for lib in dist/Ollama.app/Contents/Resources/*.so dist/Ollama.app/Contents/Resources/*.dylib dist/Ollama.app/Contents/Resources/*.metallib ; do
            codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime ${lib}
        done
        codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier com.electron.ollama --deep --options=runtime dist/Ollama.app
@@ -180,7 +178,7 @@ _build_macapp() {

    rm -f dist/Ollama-darwin.zip
    ditto -c -k --norsrc --keepParent dist/Ollama.app dist/Ollama-darwin.zip
-    (cd dist/Ollama.app/Contents/Resources/; tar -cf - ollama ollama-mlx *.so *.dylib *.metallib 2>/dev/null) | gzip -9vc > dist/ollama-darwin.tgz
+    (cd dist/Ollama.app/Contents/Resources/; tar -cf - ollama *.so *.dylib *.metallib 2>/dev/null) | gzip -9vc > dist/ollama-darwin.tgz

    # Notarize and Staple
    if [ -n "$APPLE_IDENTITY" ]; then
--- a/server/routes.go
+++ b/server/routes.go
@@ -523,9 +523,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 			Truncate:    req.Truncate == nil || *req.Truncate,
 			Logprobs:    req.Logprobs,
 			TopLogprobs: req.TopLogprobs,
-			Width:       req.Width,
-			Height:      req.Height,
-			Steps:       req.Steps,
 		}, func(cr llm.CompletionResponse) {
 			res := api.GenerateResponse{
 				Model:     req.Model,
@@ -541,16 +538,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 				Logprobs: toAPILogprobs(cr.Logprobs),
 			}

-			// Image generation fields
-			if cr.Image != "" {
-				res.Images = []string{cr.Image}
-			}
-			if cr.TotalSteps > 0 {
-				res.Status = "generating image"
-				res.Completed = int64(cr.Step)
-				res.Total = int64(cr.TotalSteps)
-			}
-
 			if builtinParser != nil {
 				content, thinking, toolCalls, err := builtinParser.Add(cr.Content, cr.Done)
 				if err != nil {
@@ -1607,8 +1594,8 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
 	r.GET("/v1/models", middleware.ListMiddleware(), s.ListHandler)
 	r.GET("/v1/models/:model", middleware.RetrieveMiddleware(), s.ShowHandler)
 	r.POST("/v1/responses", middleware.ResponsesMiddleware(), s.ChatHandler)
-	// OpenAI-compatible image generation endpoint
-	r.POST("/v1/images/generations", middleware.ImageGenerationsMiddleware(), s.GenerateHandler)
+	// Experimental OpenAI-compatible image generation endpoint
+	r.POST("/v1/images/generations", s.handleImageGeneration)

 	// Inference (Anthropic compatibility)
 	r.POST("/v1/messages", middleware.AnthropicMessagesMiddleware(), s.ChatHandler)
@@ -1930,6 +1917,62 @@ func toolCallId() string {
 	return "call_" + strings.ToLower(string(b))
 }

+func (s *Server) handleImageGeneration(c *gin.Context) {
+	var req struct {
+		Model  string `json:"model"`
+		Prompt string `json:"prompt"`
+		Size   string `json:"size"`
+	}
+	if err := c.ShouldBindJSON(&req); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+		return
+	}
+
+	m, err := GetModel(req.Model)
+	if err != nil {
+		c.JSON(http.StatusNotFound, gin.H{"error": err.Error()})
+		return
+	}
+
+	runnerCh, errCh := s.sched.GetRunner(c.Request.Context(), m, api.Options{}, nil)
+	var runner *runnerRef
+	select {
+	case runner = <-runnerCh:
+	case err := <-errCh:
+		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		return
+	}
+
+	// Parse size (e.g., "1024x768") into width and height
+	width, height := int32(1024), int32(1024)
+	if req.Size != "" {
+		if _, err := fmt.Sscanf(req.Size, "%dx%d", &width, &height); err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": "invalid size format, expected WxH"})
+			return
+		}
+	}
+
+	var image []byte
+	err = runner.llama.Completion(c.Request.Context(), llm.CompletionRequest{
+		Prompt: req.Prompt,
+		Width:  width,
+		Height: height,
+	}, func(resp llm.CompletionResponse) {
+		if len(resp.Image) > 0 {
+			image = resp.Image
+		}
+	})
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"created": time.Now().Unix(),
+		"data":    []gin.H{{"b64_json": base64.StdEncoding.EncodeToString(image)}},
+	})
+}
+
 func (s *Server) ChatHandler(c *gin.Context) {
 	checkpointStart := time.Now()

--- a/x/imagegen/cli.go
+++ b/x/imagegen/cli.go
@@ -91,7 +91,9 @@ func RunCLI(cmd *cobra.Command, name string, prompt string, interactive bool, ke
 }

 // generateImageWithOptions generates an image with the given options.
-func generateImageWithOptions(cmd *cobra.Command, modelName, prompt string, keepAlive *api.Duration, opts ImageGenOptions) error {
+// Note: opts are currently unused as the native API doesn't support size parameters.
+// Use OpenAI-compatible endpoint (/v1/images/generations) for dimension control.
+func generateImageWithOptions(cmd *cobra.Command, modelName, prompt string, keepAlive *api.Duration, _ ImageGenOptions) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
@@ -100,9 +102,7 @@ func generateImageWithOptions(cmd *cobra.Command, modelName, prompt string, keep
 	req := &api.GenerateRequest{
 		Model:  modelName,
 		Prompt: prompt,
-		Width:  int32(opts.Width),
-		Height: int32(opts.Height),
-		Steps:  int32(opts.Steps),
+		// Note: Size is only available via OpenAI-compatible /v1/images/generations endpoint
 	}
 	if keepAlive != nil {
 		req.KeepAlive = keepAlive
@@ -116,19 +116,26 @@ func generateImageWithOptions(cmd *cobra.Command, modelName, prompt string, keep
 	var stepBar *progress.StepBar
 	var imageBase64 string
 	err = client.Generate(cmd.Context(), req, func(resp api.GenerateResponse) error {
-		// Handle progress updates using structured fields
-		if resp.Total > 0 && resp.Completed > 0 {
-			if stepBar == nil {
+		content := resp.Response
+
+		// Handle progress updates - parse step info and switch to step bar
+		if strings.HasPrefix(content, "\rGenerating:") {
+			var step, total int
+			fmt.Sscanf(content, "\rGenerating: step %d/%d", &step, &total)
+			if stepBar == nil && total > 0 {
 				spinner.Stop()
-				stepBar = progress.NewStepBar("Generating", int(resp.Total))
+				stepBar = progress.NewStepBar("Generating", total)
 				p.Add("", stepBar)
 			}
-			stepBar.Set(int(resp.Completed))
+			if stepBar != nil {
+				stepBar.Set(step)
+			}
+			return nil
 		}

-		// Handle final response with image data
-		if resp.Done && len(resp.Images) > 0 {
-			imageBase64 = resp.Images[0]
+		// Handle final response with base64 image data
+		if resp.Done && strings.HasPrefix(content, "IMAGE_BASE64:") {
+			imageBase64 = content[13:]
 		}

 		return nil
@@ -228,9 +235,12 @@ func runInteractive(cmd *cobra.Command, modelName string, keepAlive *api.Duratio
 		req := &api.GenerateRequest{
 			Model:  modelName,
 			Prompt: line,
-			Width:  int32(opts.Width),
-			Height: int32(opts.Height),
-			Steps:  int32(opts.Steps),
+			Options: map[string]any{
+				"num_ctx":     opts.Width,
+				"num_gpu":     opts.Height,
+				"num_predict": opts.Steps,
+				"seed":        opts.Seed,
+			},
 		}
 		if keepAlive != nil {
 			req.KeepAlive = keepAlive
@@ -245,19 +255,26 @@ func runInteractive(cmd *cobra.Command, modelName string, keepAlive *api.Duratio
 		var imageBase64 string

 		err = client.Generate(cmd.Context(), req, func(resp api.GenerateResponse) error {
-			// Handle progress updates using structured fields
-			if resp.Total > 0 && resp.Completed > 0 {
-				if stepBar == nil {
+			content := resp.Response
+
+			// Handle progress updates - parse step info and switch to step bar
+			if strings.HasPrefix(content, "\rGenerating:") {
+				var step, total int
+				fmt.Sscanf(content, "\rGenerating: step %d/%d", &step, &total)
+				if stepBar == nil && total > 0 {
 					spinner.Stop()
-					stepBar = progress.NewStepBar("Generating", int(resp.Total))
+					stepBar = progress.NewStepBar("Generating", total)
 					p.Add("", stepBar)
 				}
-				stepBar.Set(int(resp.Completed))
+				if stepBar != nil {
+					stepBar.Set(step)
+				}
+				return nil
 			}

-			// Handle final response with image data
-			if resp.Done && len(resp.Images) > 0 {
-				imageBase64 = resp.Images[0]
+			// Handle final response with base64 image data
+			if resp.Done && strings.HasPrefix(content, "IMAGE_BASE64:") {
+				imageBase64 = content[13:]
 			}

 			return nil
--- a/x/imagegen/cmd/engine/generate.go
+++ b/x/imagegen/cmd/engine/generate.go
@@ -65,12 +65,12 @@ func (s *utf8Streamer) Flush() string {
 	return result
 }

-func init() {
-	generationStream = mlx.NewStream()
-}
-
 // withStream runs fn with the generation stream as default
 func withStream(fn func()) {
+	// Lazy initialization of generationStream
+	if generationStream == nil {
+		generationStream = mlx.NewStream()
+	}
 	orig := mlx.GetDefaultStream()
 	mlx.SetDefaultStream(generationStream)
 	fn()
--- a/x/imagegen/cmd/engine/main.go
+++ b/x/imagegen/cmd/engine/main.go
@@ -12,7 +12,6 @@ import (
 	"path/filepath"
 	"runtime/pprof"

-	"github.com/ollama/ollama/x/imagegen"
 	"github.com/ollama/ollama/x/imagegen/mlx"
 	"github.com/ollama/ollama/x/imagegen/models/gemma3"
 	"github.com/ollama/ollama/x/imagegen/models/gpt_oss"
@@ -79,6 +78,11 @@ func main() {
 		return
 	}

+	// Check if MLX initialized successfully
+	if !mlx.IsMLXAvailable() {
+		log.Fatalf("MLX initialization failed: %v", mlx.GetMLXInitError())
+	}
+
 	// CPU profiling
 	if *cpuProfile != "" {
 		f, err := os.Create(*cpuProfile)
--- a/x/imagegen/mlx/compile.go
+++ b/x/imagegen/mlx/compile.go
@@ -3,7 +3,7 @@
 package mlx

 /*
-#include "mlx/c/mlx.h"
+#include "mlx.h"
 #include <stdlib.h>

 // Forward declaration for Go callback
--- a/x/imagegen/mlx/doc.go
+++ b/x/imagegen/mlx/doc.go
@@ -0,0 +1,6 @@
+//go:build mlx
+
+// Package mlx provides Go bindings for the MLX-C library with dynamic loading support.
+//
+//go:generate go run generate_wrappers.go ../../../build/_deps/mlx-c-src/mlx/c mlx.h mlx.c
+package mlx
--- a/x/imagegen/mlx/generate_wrappers.go
+++ b/x/imagegen/mlx/generate_wrappers.go
@@ -0,0 +1,439 @@
+//go:build ignore
+
+// This tool generates MLX-C dynamic loading wrappers.
+// Usage: go run generate_wrappers.go <mlx-c-include-dir> <output-header> [output-impl]
+package main
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"io/fs"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+)
+
+type Function struct {
+	Name          string
+	ReturnType    string
+	Params        string
+	ParamNames    []string
+	NeedsARM64Guard bool
+}
+
+func findHeaders(directory string) ([]string, error) {
+	var headers []string
+	err := filepath.WalkDir(directory, func(path string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+		if !d.IsDir() && strings.HasSuffix(path, ".h") {
+			headers = append(headers, path)
+		}
+		return nil
+	})
+	return headers, err
+}
+
+func cleanContent(content string) string {
+	// Remove single-line comments
+	re := regexp.MustCompile(`//.*?\n`)
+	content = re.ReplaceAllString(content, "\n")
+
+	// Remove multi-line comments
+	re = regexp.MustCompile(`/\*.*?\*/`)
+	content = re.ReplaceAllString(content, "")
+
+	// Remove preprocessor directives (lines starting with #) - use multiline mode
+	re = regexp.MustCompile(`(?m)^\s*#.*?$`)
+	content = re.ReplaceAllString(content, "")
+
+	// Remove extern "C" { and } blocks more conservatively
+	// Only remove the extern "C" { line, not the content inside
+	re = regexp.MustCompile(`extern\s+"C"\s*\{\s*?\n`)
+	content = re.ReplaceAllString(content, "\n")
+	// Remove standalone closing braces that are not part of function declarations
+	re = regexp.MustCompile(`\n\s*\}\s*\n`)
+	content = re.ReplaceAllString(content, "\n")
+
+	// Collapse whitespace and newlines
+	re = regexp.MustCompile(`\s+`)
+	content = re.ReplaceAllString(content, " ")
+
+	return content
+}
+
+func extractParamNames(params string) []string {
+	if params == "" || strings.TrimSpace(params) == "void" {
+		return []string{}
+	}
+
+	var names []string
+
+	// Split by comma, but respect parentheses (for function pointers)
+	parts := splitParams(params)
+
+	// Remove array brackets
+	arrayBrackets := regexp.MustCompile(`\[.*?\]`)
+
+	// Function pointer pattern
+	funcPtrPattern := regexp.MustCompile(`\(\s*\*\s*(\w+)\s*\)`)
+
+	// Type keywords to skip
+	typeKeywords := map[string]bool{
+		"const":     true,
+		"struct":    true,
+		"unsigned":  true,
+		"signed":    true,
+		"long":      true,
+		"short":     true,
+		"int":       true,
+		"char":      true,
+		"float":     true,
+		"double":    true,
+		"void":      true,
+		"size_t":    true,
+		"uint8_t":   true,
+		"uint16_t":  true,
+		"uint32_t":  true,
+		"uint64_t":  true,
+		"int8_t":    true,
+		"int16_t":   true,
+		"int32_t":   true,
+		"int64_t":   true,
+		"intptr_t":  true,
+		"uintptr_t": true,
+	}
+
+	for _, part := range parts {
+		if part == "" {
+			continue
+		}
+
+		// Remove array brackets
+		part = arrayBrackets.ReplaceAllString(part, "")
+
+		// For function pointers like "void (*callback)(int)"
+		if matches := funcPtrPattern.FindStringSubmatch(part); len(matches) > 1 {
+			names = append(names, matches[1])
+			continue
+		}
+
+		// Regular parameter: last identifier
+		tokens := regexp.MustCompile(`\w+`).FindAllString(part, -1)
+		if len(tokens) > 0 {
+			// The last token is usually the parameter name
+			// Skip type keywords
+			for i := len(tokens) - 1; i >= 0; i-- {
+				if !typeKeywords[tokens[i]] {
+					names = append(names, tokens[i])
+					break
+				}
+			}
+		}
+	}
+
+	return names
+}
+
+func splitParams(params string) []string {
+	var parts []string
+	var current bytes.Buffer
+	depth := 0
+
+	for _, char := range params + "," {
+		switch char {
+		case '(':
+			depth++
+			current.WriteRune(char)
+		case ')':
+			depth--
+			current.WriteRune(char)
+		case ',':
+			if depth == 0 {
+				parts = append(parts, strings.TrimSpace(current.String()))
+				current.Reset()
+			} else {
+				current.WriteRune(char)
+			}
+		default:
+			current.WriteRune(char)
+		}
+	}
+
+	return parts
+}
+
+func parseFunctions(content string) []Function {
+	var functions []Function
+
+	// Match function declarations: return_type function_name(params);
+	// Matches both mlx_* and _mlx_* functions
+	pattern := regexp.MustCompile(`\b((?:const\s+)?(?:struct\s+)?[\w\s]+?[\*\s]*)\s+(_?mlx_\w+)\s*\(([^)]*(?:\([^)]*\)[^)]*)*)\)\s*;`)
+
+	matches := pattern.FindAllStringSubmatch(content, -1)
+	for _, match := range matches {
+		returnType := strings.TrimSpace(match[1])
+		funcName := strings.TrimSpace(match[2])
+		params := strings.TrimSpace(match[3])
+
+		// Skip if this looks like a variable declaration
+		if params == "" || strings.Contains(params, "{") {
+			continue
+		}
+
+		// Clean up return type
+		returnType = strings.Join(strings.Fields(returnType), " ")
+
+		// Extract parameter names
+		paramNames := extractParamNames(params)
+
+		// Check if ARM64 guard is needed
+		needsGuard := needsARM64Guard(funcName, returnType, params)
+
+		functions = append(functions, Function{
+			Name:           funcName,
+			ReturnType:     returnType,
+			Params:         params,
+			ParamNames:     paramNames,
+			NeedsARM64Guard: needsGuard,
+		})
+	}
+
+	return functions
+}
+
+func needsARM64Guard(name, retType, params string) bool {
+	return strings.Contains(name, "float16") ||
+		strings.Contains(name, "bfloat16") ||
+		strings.Contains(retType, "float16_t") ||
+		strings.Contains(retType, "bfloat16_t") ||
+		strings.Contains(params, "float16_t") ||
+		strings.Contains(params, "bfloat16_t")
+}
+
+func generateWrapperFiles(functions []Function, headerPath, implPath string) error {
+	// Generate header file
+	var headerBuf bytes.Buffer
+
+	headerBuf.WriteString("// AUTO-GENERATED by generate_wrappers.go - DO NOT EDIT\n")
+	headerBuf.WriteString("// This file provides wrapper declarations for MLX-C functions that use dlopen/dlsym\n")
+	headerBuf.WriteString("//\n")
+	headerBuf.WriteString("// Strategy: Include MLX-C headers for type definitions, then provide wrapper\n")
+	headerBuf.WriteString("// functions that shadow the originals, allowing Go code to call them directly (e.g., C.mlx_add).\n")
+	headerBuf.WriteString("// Function pointers are defined in mlx.c (single compilation unit).\n\n")
+	headerBuf.WriteString("#ifndef MLX_WRAPPERS_H\n")
+	headerBuf.WriteString("#define MLX_WRAPPERS_H\n\n")
+
+	headerBuf.WriteString("// Include MLX headers for type definitions and original declarations\n")
+	headerBuf.WriteString("#include \"mlx/c/mlx.h\"\n")
+	headerBuf.WriteString("#include \"mlx_dynamic.h\"\n")
+	headerBuf.WriteString("#include <stdio.h>\n\n")
+
+	// Undef all MLX functions to avoid conflicts
+	headerBuf.WriteString("// Undefine any existing MLX function macros\n")
+	for _, fn := range functions {
+		headerBuf.WriteString(fmt.Sprintf("#undef %s\n", fn.Name))
+	}
+	headerBuf.WriteString("\n")
+
+	// Function pointer extern declarations
+	headerBuf.WriteString("// Function pointer declarations (defined in mlx.c, loaded via dlsym)\n")
+	for _, fn := range functions {
+		if fn.NeedsARM64Guard {
+			headerBuf.WriteString("#if defined(__aarch64__) || defined(_M_ARM64)\n")
+		}
+		headerBuf.WriteString(fmt.Sprintf("extern %s (*%s_ptr)(%s);\n", fn.ReturnType, fn.Name, fn.Params))
+		if fn.NeedsARM64Guard {
+			headerBuf.WriteString("#endif\n")
+		}
+	}
+	headerBuf.WriteString("\n")
+
+	// Initialization function declaration
+	headerBuf.WriteString("// Initialize all function pointers via dlsym (defined in mlx.c)\n")
+	headerBuf.WriteString("int mlx_load_functions(void* handle);\n\n")
+
+	// Wrapper function declarations
+	headerBuf.WriteString("// Wrapper function declarations that call through function pointers\n")
+	headerBuf.WriteString("// Go code calls these directly as C.mlx_* (no #define redirection needed)\n")
+	for _, fn := range functions {
+		if fn.NeedsARM64Guard {
+			headerBuf.WriteString("#if defined(__aarch64__) || defined(_M_ARM64)\n")
+		}
+		headerBuf.WriteString(fmt.Sprintf("%s %s(%s);\n", fn.ReturnType, fn.Name, fn.Params))
+		if fn.NeedsARM64Guard {
+			headerBuf.WriteString("#endif\n")
+		}
+		headerBuf.WriteString("\n")
+	}
+
+	headerBuf.WriteString("#endif // MLX_WRAPPERS_H\n")
+
+	// Write header file
+	if err := os.WriteFile(headerPath, headerBuf.Bytes(), 0644); err != nil {
+		return fmt.Errorf("failed to write header file: %w", err)
+	}
+
+	// Generate implementation file
+	var implBuf bytes.Buffer
+
+	implBuf.WriteString("// AUTO-GENERATED by generate_wrappers.go - DO NOT EDIT\n")
+	implBuf.WriteString("// This file contains the function pointer definitions and initialization\n")
+	implBuf.WriteString("// All function pointers are in a single compilation unit to avoid duplication\n\n")
+
+	implBuf.WriteString("#include \"mlx/c/mlx.h\"\n")
+	implBuf.WriteString("#include \"mlx_dynamic.h\"\n")
+	implBuf.WriteString("#include <stdio.h>\n")
+	implBuf.WriteString("#include <dlfcn.h>\n\n")
+
+	// Function pointer definitions
+	implBuf.WriteString("// Function pointer definitions\n")
+	for _, fn := range functions {
+		if fn.NeedsARM64Guard {
+			implBuf.WriteString("#if defined(__aarch64__) || defined(_M_ARM64)\n")
+		}
+		implBuf.WriteString(fmt.Sprintf("%s (*%s_ptr)(%s) = NULL;\n", fn.ReturnType, fn.Name, fn.Params))
+		if fn.NeedsARM64Guard {
+			implBuf.WriteString("#endif\n")
+		}
+	}
+	implBuf.WriteString("\n")
+
+	// Initialization function
+	implBuf.WriteString("// Initialize all function pointers via dlsym\n")
+	implBuf.WriteString("int mlx_load_functions(void* handle) {\n")
+	implBuf.WriteString("    if (handle == NULL) {\n")
+	implBuf.WriteString("        fprintf(stderr, \"MLX: Invalid library handle\\n\");\n")
+	implBuf.WriteString("        return -1;\n")
+	implBuf.WriteString("    }\n\n")
+
+	for _, fn := range functions {
+		if fn.NeedsARM64Guard {
+			implBuf.WriteString("#if defined(__aarch64__) || defined(_M_ARM64)\n")
+		}
+		implBuf.WriteString(fmt.Sprintf("    %s_ptr = dlsym(handle, \"%s\");\n", fn.Name, fn.Name))
+		implBuf.WriteString(fmt.Sprintf("    if (%s_ptr == NULL) {\n", fn.Name))
+		implBuf.WriteString(fmt.Sprintf("        fprintf(stderr, \"MLX: Failed to load symbol: %s\\n\");\n", fn.Name))
+		implBuf.WriteString("        return -1;\n")
+		implBuf.WriteString("    }\n")
+		if fn.NeedsARM64Guard {
+			implBuf.WriteString("#endif\n")
+		}
+	}
+
+	implBuf.WriteString("    return 0;\n")
+	implBuf.WriteString("}\n\n")
+
+	// Wrapper function implementations
+	implBuf.WriteString("// Wrapper function implementations that call through function pointers\n")
+	for _, fn := range functions {
+		if fn.NeedsARM64Guard {
+			implBuf.WriteString("#if defined(__aarch64__) || defined(_M_ARM64)\n")
+		}
+		implBuf.WriteString(fmt.Sprintf("%s %s(%s) {\n", fn.ReturnType, fn.Name, fn.Params))
+
+		// Call through function pointer
+		if fn.ReturnType != "void" {
+			implBuf.WriteString(fmt.Sprintf("    return %s_ptr(", fn.Name))
+		} else {
+			implBuf.WriteString(fmt.Sprintf("    %s_ptr(", fn.Name))
+		}
+
+		// Pass parameters
+		implBuf.WriteString(strings.Join(fn.ParamNames, ", "))
+		implBuf.WriteString(");\n")
+		implBuf.WriteString("}\n")
+		if fn.NeedsARM64Guard {
+			implBuf.WriteString("#endif\n")
+		}
+		implBuf.WriteString("\n")
+	}
+
+	// Write implementation file
+	if err := os.WriteFile(implPath, implBuf.Bytes(), 0644); err != nil {
+		return fmt.Errorf("failed to write implementation file: %w", err)
+	}
+
+	return nil
+}
+
+func main() {
+	flag.Usage = func() {
+		fmt.Fprintf(flag.CommandLine.Output(), "Usage: go run generate_wrappers.go <mlx-c-include-dir> <output-header> [output-impl]\n")
+		fmt.Fprintf(flag.CommandLine.Output(), "Generate MLX-C dynamic loading wrappers.\n\n")
+		flag.PrintDefaults()
+	}
+	flag.Parse()
+
+	args := flag.Args()
+	if len(args) < 2 {
+		fmt.Fprintf(flag.CommandLine.Output(), "ERROR: Missing required arguments\n\n")
+		flag.Usage()
+		os.Exit(1)
+	}
+
+	headerDir := args[0]
+	outputHeader := args[1]
+	// Default implementation file is same name with .c extension
+	outputImpl := outputHeader
+	if len(args) > 2 {
+		outputImpl = args[2]
+	} else if strings.HasSuffix(outputHeader, ".h") {
+		outputImpl = outputHeader[:len(outputHeader)-2] + ".c"
+	}
+
+	// Check if header directory exists
+	if _, err := os.Stat(headerDir); os.IsNotExist(err) {
+		fmt.Fprintf(os.Stderr, "ERROR: MLX-C headers directory not found at: %s\n\n", headerDir)
+		fmt.Fprintf(os.Stderr, "Please run CMake first to download MLX-C dependencies:\n")
+		fmt.Fprintf(os.Stderr, "  cmake -B build\n\n")
+		fmt.Fprintf(os.Stderr, "The CMake build will download and extract MLX-C headers needed for wrapper generation.\n")
+		os.Exit(1)
+	}
+
+	fmt.Fprintf(os.Stderr, "Parsing MLX-C headers from: %s\n", headerDir)
+
+	// Find all headers
+	headers, err := findHeaders(headerDir)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "ERROR: Failed to find header files: %v\n", err)
+		os.Exit(1)
+	}
+	fmt.Fprintf(os.Stderr, "Found %d header files\n", len(headers))
+
+	// Parse all headers
+	var allFunctions []Function
+	seen := make(map[string]bool)
+
+	for _, header := range headers {
+		content, err := os.ReadFile(header)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Error reading %s: %v\n", header, err)
+			continue
+		}
+
+		cleaned := cleanContent(string(content))
+		functions := parseFunctions(cleaned)
+
+		// Deduplicate
+		for _, fn := range functions {
+			if !seen[fn.Name] {
+				seen[fn.Name] = true
+				allFunctions = append(allFunctions, fn)
+			}
+		}
+	}
+
+	fmt.Fprintf(os.Stderr, "Found %d unique function declarations\n", len(allFunctions))
+
+	// Generate wrapper files
+	if err := generateWrapperFiles(allFunctions, outputHeader, outputImpl); err != nil {
+		fmt.Fprintf(os.Stderr, "ERROR: Failed to generate wrapper files: %v\n", err)
+		os.Exit(1)
+	}
+
+	fmt.Fprintf(os.Stderr, "Generated %s and %s successfully\n", outputHeader, outputImpl)
+}
--- a/x/imagegen/mlx/mlx.c
+++ b/x/imagegen/mlx/mlx.c
--- a/x/imagegen/mlx/mlx.go
+++ b/x/imagegen/mlx/mlx.go
@@ -3,12 +3,13 @@
 package mlx

 /*
-#cgo CFLAGS: -O3 -I${SRCDIR}/../../../build/_deps/mlx-c-src
-#cgo LDFLAGS: -L${SRCDIR}/../../../build/lib/ollama/ -lmlxc -Wl,-rpath,${SRCDIR}/../../../build/lib/ollama/
+#cgo CFLAGS: -O3 -I${SRCDIR}/../../../build/_deps/mlx-c-src -I${SRCDIR}
 #cgo darwin LDFLAGS: -lc++ -framework Metal -framework Foundation -framework Accelerate
-#cgo linux LDFLAGS: -lstdc++ -lcuda -lcudart -lnvrtc
+#cgo linux LDFLAGS: -lstdc++ -ldl
+#cgo windows LDFLAGS: -lstdc++

-#include "mlx/c/mlx.h"
+// Use generated wrappers instead of direct MLX headers
+#include "mlx.h"
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
@@ -42,192 +43,6 @@ static inline mlx_stream cpu_stream() {
 // CGO noescape/nocallback hints to reduce CGO overhead
 // noescape: pointers won't escape, no heap allocation needed
 // nocallback: function won't call back into Go
-#cgo noescape mlx_add
-#cgo nocallback mlx_add
-#cgo noescape mlx_subtract
-#cgo nocallback mlx_subtract
-#cgo noescape mlx_multiply
-#cgo nocallback mlx_multiply
-#cgo noescape mlx_divide
-#cgo nocallback mlx_divide
-#cgo noescape mlx_negative
-#cgo nocallback mlx_negative
-#cgo noescape mlx_abs
-#cgo nocallback mlx_abs
-#cgo noescape mlx_exp
-#cgo nocallback mlx_exp
-#cgo noescape mlx_log
-#cgo nocallback mlx_log
-#cgo noescape mlx_sqrt
-#cgo nocallback mlx_sqrt
-#cgo noescape mlx_rsqrt
-#cgo nocallback mlx_rsqrt
-#cgo noescape mlx_square
-#cgo nocallback mlx_square
-#cgo noescape mlx_power
-#cgo nocallback mlx_power
-#cgo noescape mlx_erf
-#cgo nocallback mlx_erf
-#cgo noescape mlx_sigmoid
-#cgo nocallback mlx_sigmoid
-#cgo noescape mlx_tanh
-#cgo nocallback mlx_tanh
-#cgo noescape mlx_sin
-#cgo nocallback mlx_sin
-#cgo noescape mlx_cos
-#cgo nocallback mlx_cos
-#cgo noescape mlx_maximum
-#cgo nocallback mlx_maximum
-#cgo noescape mlx_minimum
-#cgo nocallback mlx_minimum
-#cgo noescape mlx_clip
-#cgo nocallback mlx_clip
-#cgo noescape mlx_sum
-#cgo nocallback mlx_sum
-#cgo noescape mlx_sum_axis
-#cgo nocallback mlx_sum_axis
-#cgo noescape mlx_mean
-#cgo nocallback mlx_mean
-#cgo noescape mlx_mean_axis
-#cgo nocallback mlx_mean_axis
-#cgo noescape mlx_var_axis
-#cgo nocallback mlx_var_axis
-#cgo noescape mlx_argmax
-#cgo nocallback mlx_argmax
-#cgo noescape mlx_argmax_axis
-#cgo nocallback mlx_argmax_axis
-#cgo noescape mlx_softmax_axis
-#cgo nocallback mlx_softmax_axis
-#cgo noescape mlx_cumsum
-#cgo nocallback mlx_cumsum
-#cgo noescape mlx_matmul
-#cgo nocallback mlx_matmul
-#cgo noescape mlx_addmm
-#cgo nocallback mlx_addmm
-#cgo noescape mlx_gather_mm
-#cgo nocallback mlx_gather_mm
-#cgo noescape mlx_gather_qmm
-#cgo nocallback mlx_gather_qmm
-#cgo noescape mlx_reshape
-#cgo nocallback mlx_reshape
-#cgo noescape mlx_transpose_axes
-#cgo nocallback mlx_transpose_axes
-#cgo noescape mlx_expand_dims
-#cgo nocallback mlx_expand_dims
-#cgo noescape mlx_squeeze_axis
-#cgo nocallback mlx_squeeze_axis
-#cgo noescape mlx_flatten
-#cgo nocallback mlx_flatten
-#cgo noescape mlx_concatenate_axis
-#cgo nocallback mlx_concatenate_axis
-#cgo noescape mlx_slice
-#cgo nocallback mlx_slice
-#cgo noescape mlx_slice_update
-#cgo nocallback mlx_slice_update
-#cgo noescape mlx_as_strided
-#cgo nocallback mlx_as_strided
-#cgo noescape mlx_view
-#cgo nocallback mlx_view
-#cgo noescape mlx_contiguous
-#cgo nocallback mlx_contiguous
-#cgo noescape mlx_pad
-#cgo nocallback mlx_pad
-#cgo noescape mlx_tile
-#cgo nocallback mlx_tile
-#cgo noescape mlx_take_axis
-#cgo nocallback mlx_take_axis
-#cgo noescape mlx_take_along_axis
-#cgo nocallback mlx_take_along_axis
-#cgo noescape mlx_put_along_axis
-#cgo nocallback mlx_put_along_axis
-#cgo noescape mlx_where
-#cgo nocallback mlx_where
-#cgo noescape mlx_argsort_axis
-#cgo nocallback mlx_argsort_axis
-#cgo noescape mlx_argpartition_axis
-#cgo nocallback mlx_argpartition_axis
-#cgo noescape mlx_topk_axis
-#cgo nocallback mlx_topk_axis
-#cgo noescape mlx_less
-#cgo nocallback mlx_less
-#cgo noescape mlx_greater_equal
-#cgo nocallback mlx_greater_equal
-#cgo noescape mlx_logical_and
-#cgo nocallback mlx_logical_and
-#cgo noescape mlx_zeros
-#cgo nocallback mlx_zeros
-#cgo noescape mlx_zeros_like
-#cgo nocallback mlx_zeros_like
-#cgo noescape mlx_ones
-#cgo nocallback mlx_ones
-#cgo noescape mlx_full
-#cgo nocallback mlx_full
-#cgo noescape mlx_arange
-#cgo nocallback mlx_arange
-#cgo noescape mlx_linspace
-#cgo nocallback mlx_linspace
-#cgo noescape mlx_tri
-#cgo nocallback mlx_tri
-#cgo noescape mlx_astype
-#cgo nocallback mlx_astype
-#cgo noescape mlx_fast_rms_norm
-#cgo nocallback mlx_fast_rms_norm
-#cgo noescape mlx_fast_rope
-#cgo nocallback mlx_fast_rope
-#cgo noescape mlx_fast_scaled_dot_product_attention
-#cgo nocallback mlx_fast_scaled_dot_product_attention
-#cgo noescape mlx_conv2d
-#cgo nocallback mlx_conv2d
-#cgo noescape mlx_conv3d
-#cgo nocallback mlx_conv3d
-#cgo noescape mlx_random_key
-#cgo nocallback mlx_random_key
-#cgo noescape mlx_random_split
-#cgo nocallback mlx_random_split
-#cgo noescape mlx_random_categorical_num_samples
-#cgo nocallback mlx_random_categorical_num_samples
-#cgo noescape mlx_random_normal
-#cgo nocallback mlx_random_normal
-#cgo noescape mlx_random_uniform
-#cgo nocallback mlx_random_uniform
-#cgo noescape mlx_array_eval
-#cgo nocallback mlx_array_eval
-#cgo noescape mlx_eval
-#cgo nocallback mlx_eval
-#cgo noescape mlx_async_eval
-#cgo nocallback mlx_async_eval
-#cgo noescape mlx_synchronize
-#cgo nocallback mlx_synchronize
-#cgo noescape mlx_array_new
-#cgo nocallback mlx_array_new
-#cgo noescape mlx_array_new_data
-#cgo nocallback mlx_array_new_data
-#cgo noescape mlx_array_new_float
-#cgo nocallback mlx_array_new_float
-#cgo noescape mlx_array_free
-#cgo nocallback mlx_array_free
-#cgo noescape mlx_array_size
-#cgo nocallback mlx_array_size
-#cgo noescape mlx_array_ndim
-#cgo nocallback mlx_array_ndim
-#cgo noescape mlx_array_dim
-#cgo nocallback mlx_array_dim
-#cgo noescape mlx_array_dtype
-#cgo nocallback mlx_array_dtype
-#cgo noescape mlx_array_item_int32
-#cgo nocallback mlx_array_item_int32
-#cgo noescape mlx_vector_array_new_data
-#cgo nocallback mlx_vector_array_new_data
-#cgo noescape mlx_vector_array_free
-#cgo nocallback mlx_vector_array_free
-#cgo noescape mlx_array_new_int
-#cgo nocallback mlx_array_new_int
-#cgo noescape mlx_stream_new_device
-#cgo nocallback mlx_stream_new_device
-#cgo noescape mlx_get_default_stream
-#cgo nocallback mlx_get_default_stream
-#cgo noescape mlx_set_default_stream
-#cgo nocallback mlx_set_default_stream
 */
 import "C"
 import (
@@ -1796,7 +1611,57 @@ func ArgmaxKeepArray(logits *Array) *Array {
 var RandomState = []*Array{nil}
 var randomStateMu sync.Mutex

+var mlxInitialized bool
+var mlxInitError error
+
+// InitMLX initializes the MLX library by dynamically loading libmlxc.
+// This must be called before using any MLX functions.
+// Returns an error if the library cannot be loaded.
+func InitMLX() error {
+	if mlxInitialized {
+		return mlxInitError
+	}
+
+	// Try to load the MLX dynamic library
+	ret := C.mlx_dynamic_init()
+	if ret != 0 {
+		errMsg := C.GoString(C.mlx_dynamic_error())
+		mlxInitError = fmt.Errorf("failed to initialize MLX: %s", errMsg)
+		return mlxInitError
+	}
+
+	// Initialize all function pointers via dlsym
+	handle := C.mlx_get_handle()
+	ret = C.mlx_load_functions(handle)
+	if ret != 0 {
+		mlxInitError = fmt.Errorf("failed to load MLX function symbols")
+		return mlxInitError
+	}
+
+	mlxInitialized = true
+	mlxInitError = nil
+	return nil
+}
+
+// IsMLXAvailable returns whether MLX was successfully initialized
+func IsMLXAvailable() bool {
+	return mlxInitialized && mlxInitError == nil
+}
+
+// GetMLXInitError returns any error that occurred during MLX initialization
+func GetMLXInitError() error {
+	return mlxInitError
+}
+
 func init() {
+	// Initialize MLX dynamic library first
+	if err := InitMLX(); err != nil {
+		// Don't panic in init - let the caller handle the error
+		// Store the error for later retrieval
+		mlxInitError = err
+		return
+	}
+
 	// Lock main goroutine to OS thread for CUDA context stability.
 	// CUDA contexts are bound to threads; Go can migrate goroutines between threads.
 	runtime.LockOSThread()
--- a/x/imagegen/mlx/mlx.h
+++ b/x/imagegen/mlx/mlx.h
--- a/x/imagegen/mlx/mlx_dynamic.c
+++ b/x/imagegen/mlx/mlx_dynamic.c
@@ -0,0 +1,144 @@
+// mlx_dynamic.c - Dynamic loading wrapper for MLX-C library
+// This file provides runtime dynamic loading of libmlxc instead of link-time binding
+
+#include "mlx_dynamic.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _WIN32
+#include <windows.h>
+typedef HMODULE lib_handle_t;
+#define LOAD_LIB(path) LoadLibraryA(path)
+#define GET_SYMBOL(handle, name) GetProcAddress(handle, name)
+#define CLOSE_LIB(handle) FreeLibrary(handle)
+#define LIB_ERROR() "LoadLibrary failed"
+#else
+#include <dlfcn.h>
+typedef void* lib_handle_t;
+#define LOAD_LIB(path) dlopen(path, RTLD_LAZY | RTLD_GLOBAL)
+#define GET_SYMBOL(handle, name) dlsym(handle, name)
+#define CLOSE_LIB(handle) dlclose(handle)
+#define LIB_ERROR() dlerror()
+#ifdef __APPLE__
+#include <mach-o/dyld.h>
+#include <libgen.h>
+#endif
+#endif
+
+static lib_handle_t mlx_handle = NULL;
+static int mlx_initialized = 0;
+static char mlx_error_buffer[512] = {0};
+
+#ifdef __APPLE__
+// Get path to library in same directory as executable
+static char* get_exe_relative_path(const char* libname) {
+    static char path[1024];
+    uint32_t size = sizeof(path);
+    if (_NSGetExecutablePath(path, &size) != 0) {
+        return NULL;
+    }
+    // Get directory of executable
+    char* dir = dirname(path);
+    static char fullpath[1024];
+    snprintf(fullpath, sizeof(fullpath), "%s/%s", dir, libname);
+    return fullpath;
+}
+#endif
+
+// Try to load library from a specific path
+static int try_load_lib(const char* path) {
+    if (!path) return 0;
+    mlx_handle = LOAD_LIB(path);
+    return mlx_handle != NULL;
+}
+
+// Initialize MLX dynamic library
+// Returns 0 on success, -1 on failure
+// On failure, call mlx_dynamic_error() to get error message
+int mlx_dynamic_init(void) {
+    if (mlx_initialized) {
+        return 0;  // Already initialized
+    }
+
+    const char* lib_path = NULL;
+    const char* tried_paths[8] = {0};
+    int num_tried = 0;
+
+#ifdef _WIN32
+    // Windows: try same directory as executable
+    lib_path = "libmlxc.dll";
+    tried_paths[num_tried++] = lib_path;
+    if (try_load_lib(lib_path)) goto success;
+#elif defined(__APPLE__)
+    // macOS: try executable directory first
+    lib_path = get_exe_relative_path("libmlxc.dylib");
+    if (lib_path) {
+        tried_paths[num_tried++] = lib_path;
+        if (try_load_lib(lib_path)) goto success;
+    }
+    // Try build directory (for tests run from repo root)
+    lib_path = "./build/lib/ollama/libmlxc.dylib";
+    tried_paths[num_tried++] = lib_path;
+    if (try_load_lib(lib_path)) goto success;
+    // Fallback to system paths
+    lib_path = "libmlxc.dylib";
+    tried_paths[num_tried++] = lib_path;
+    if (try_load_lib(lib_path)) goto success;
+#else
+    // Linux: try build directory first (for tests)
+    lib_path = "./build/lib/ollama/libmlxc.so";
+    tried_paths[num_tried++] = lib_path;
+    if (try_load_lib(lib_path)) goto success;
+    // Fallback to system paths
+    lib_path = "libmlxc.so";
+    tried_paths[num_tried++] = lib_path;
+    if (try_load_lib(lib_path)) goto success;
+#endif
+
+    // Failed to load library - build error message with all tried paths
+    {
+        const char* err = LIB_ERROR();
+        int offset = snprintf(mlx_error_buffer, sizeof(mlx_error_buffer),
+                     "MLX: Failed to load libmlxc library. Tried: ");
+        for (int i = 0; i < num_tried && offset < (int)sizeof(mlx_error_buffer) - 50; i++) {
+            offset += snprintf(mlx_error_buffer + offset, sizeof(mlx_error_buffer) - offset,
+                             "%s%s", i > 0 ? ", " : "", tried_paths[i]);
+        }
+        if (err) {
+            snprintf(mlx_error_buffer + offset, sizeof(mlx_error_buffer) - offset,
+                    ". Last error: %s", err);
+        }
+    }
+    return -1;
+
+success:
+    mlx_initialized = 1;
+    snprintf(mlx_error_buffer, sizeof(mlx_error_buffer),
+             "MLX: Successfully loaded %s", lib_path ? lib_path : "library");
+    return 0;
+}
+
+// Get the last error message
+const char* mlx_dynamic_error(void) {
+    return mlx_error_buffer;
+}
+
+// Check if MLX is initialized
+int mlx_dynamic_is_initialized(void) {
+    return mlx_initialized;
+}
+
+// Get the library handle (for use by generated wrappers)
+void* mlx_get_handle(void) {
+    return mlx_handle;
+}
+
+// Cleanup (optional, called at program exit)
+void mlx_dynamic_cleanup(void) {
+    if (mlx_handle != NULL) {
+        CLOSE_LIB(mlx_handle);
+        mlx_handle = NULL;
+        mlx_initialized = 0;
+    }
+}
--- a/x/imagegen/mlx/mlx_dynamic.h
+++ b/x/imagegen/mlx/mlx_dynamic.h
@@ -0,0 +1,29 @@
+// mlx_dynamic.h - Dynamic loading interface for MLX-C library
+#ifndef MLX_DYNAMIC_H
+#define MLX_DYNAMIC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Initialize the MLX dynamic library
+// Returns 0 on success, -1 on failure
+int mlx_dynamic_init(void);
+
+// Get the last error message from dynamic loading
+const char* mlx_dynamic_error(void);
+
+// Check if MLX is initialized
+int mlx_dynamic_is_initialized(void);
+
+// Get the library handle (for use by generated wrappers)
+void* mlx_get_handle(void);
+
+// Cleanup resources (optional, for clean shutdown)
+void mlx_dynamic_cleanup(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // MLX_DYNAMIC_H
--- a/x/imagegen/mlx/mlx_test.go
+++ b/x/imagegen/mlx/mlx_test.go
@@ -4,9 +4,30 @@ package mlx

 import (
 	"fmt"
+	"os"
+	"path/filepath"
+	"runtime"
 	"testing"
 )

+// TestMain initializes MLX before running tests.
+// If MLX libraries are not available, tests are skipped.
+func TestMain(m *testing.M) {
+	// Change to repo root so ./build/lib/ollama/ path works
+	_, thisFile, _, _ := runtime.Caller(0)
+	repoRoot := filepath.Join(filepath.Dir(thisFile), "..", "..", "..")
+	if err := os.Chdir(repoRoot); err != nil {
+		fmt.Printf("Failed to change to repo root: %v\n", err)
+		os.Exit(1)
+	}
+
+	if err := InitMLX(); err != nil {
+		fmt.Printf("Skipping MLX tests: %v\n", err)
+		os.Exit(0)
+	}
+	os.Exit(m.Run())
+}
+
 // TestBasicCleanup verifies non-kept arrays are freed and kept arrays survive.
 func TestBasicCleanup(t *testing.T) {
 	weight := NewArrayFloat32([]float32{1, 2, 3, 4}, []int32{2, 2})
--- a/x/imagegen/models/qwen_image/pipeline_test.go
+++ b/x/imagegen/models/qwen_image/pipeline_test.go
@@ -3,12 +3,33 @@
 package qwen_image

 import (
+	"fmt"
 	"os"
+	"path/filepath"
+	"runtime"
 	"testing"

 	"github.com/ollama/ollama/x/imagegen/mlx"
 )

+// TestMain initializes MLX before running tests.
+// If MLX libraries are not available, tests are skipped.
+func TestMain(m *testing.M) {
+	// Change to repo root so ./build/lib/ollama/ path works
+	_, thisFile, _, _ := runtime.Caller(0)
+	repoRoot := filepath.Join(filepath.Dir(thisFile), "..", "..", "..", "..")
+	if err := os.Chdir(repoRoot); err != nil {
+		fmt.Printf("Failed to change to repo root: %v\n", err)
+		os.Exit(1)
+	}
+
+	if err := mlx.InitMLX(); err != nil {
+		fmt.Printf("Skipping qwen_image tests: %v\n", err)
+		os.Exit(0)
+	}
+	os.Exit(m.Run())
+}
+
 // TestPipelineOutput runs the full pipeline (integration test).
 // Skips if model weights not found. Requires ~50GB VRAM.
 func TestPipelineOutput(t *testing.T) {
--- a/x/imagegen/models/qwen_image/qwen_image.go
+++ b/x/imagegen/models/qwen_image/qwen_image.go
@@ -9,7 +9,6 @@ import (
 	"path/filepath"
 	"time"

-	"github.com/ollama/ollama/x/imagegen"
 	"github.com/ollama/ollama/x/imagegen/cache"
 	"github.com/ollama/ollama/x/imagegen/mlx"
 	"github.com/ollama/ollama/x/imagegen/tokenizer"
--- a/x/imagegen/models/qwen_image_edit/rope_test.go
+++ b/x/imagegen/models/qwen_image_edit/rope_test.go
@@ -3,13 +3,35 @@
 package qwen_image_edit

 import (
+	"fmt"
 	"math"
+	"os"
+	"path/filepath"
+	"runtime"
 	"testing"

 	"github.com/ollama/ollama/x/imagegen/mlx"
 	"github.com/ollama/ollama/x/imagegen/models/qwen_image"
 )

+// TestMain initializes MLX before running tests.
+// If MLX libraries are not available, tests are skipped.
+func TestMain(m *testing.M) {
+	// Change to repo root so ./build/lib/ollama/ path works
+	_, thisFile, _, _ := runtime.Caller(0)
+	repoRoot := filepath.Join(filepath.Dir(thisFile), "..", "..", "..", "..")
+	if err := os.Chdir(repoRoot); err != nil {
+		fmt.Printf("Failed to change to repo root: %v\n", err)
+		os.Exit(1)
+	}
+
+	if err := mlx.InitMLX(); err != nil {
+		fmt.Printf("Skipping qwen_image_edit tests: %v\n", err)
+		os.Exit(0)
+	}
+	os.Exit(m.Run())
+}
+
 // TestComputeAxisFreqs verifies frequency computation matches Python reference
 func TestComputeAxisFreqs(t *testing.T) {
 	theta := float64(10000)
--- a/x/imagegen/nn/nn_test.go
+++ b/x/imagegen/nn/nn_test.go
@@ -3,12 +3,34 @@
 package nn

 import (
+	"fmt"
 	"math"
+	"os"
+	"path/filepath"
+	"runtime"
 	"testing"

 	"github.com/ollama/ollama/x/imagegen/mlx"
 )

+// TestMain initializes MLX before running tests.
+// If MLX libraries are not available, tests are skipped.
+func TestMain(m *testing.M) {
+	// Change to repo root so ./build/lib/ollama/ path works
+	_, thisFile, _, _ := runtime.Caller(0)
+	repoRoot := filepath.Join(filepath.Dir(thisFile), "..", "..", "..")
+	if err := os.Chdir(repoRoot); err != nil {
+		fmt.Printf("Failed to change to repo root: %v\n", err)
+		os.Exit(1)
+	}
+
+	if err := mlx.InitMLX(); err != nil {
+		fmt.Printf("Skipping nn tests: %v\n", err)
+		os.Exit(0)
+	}
+	os.Exit(m.Run())
+}
+
 // TestLinearNoBias verifies Linear without bias computes x @ w.T correctly.
 func TestLinearNoBias(t *testing.T) {
 	// Weight: [out=2, in=3] -> transposed at forward time
--- a/x/imagegen/runner/runner.go
+++ b/x/imagegen/runner/runner.go
@@ -62,6 +62,12 @@ func Execute(args []string) error {
 		return fmt.Errorf("--port is required")
 	}

+	err := mlx.InitMLX()
+	if err != nil {
+		slog.Error("unable to initialize MLX", "error", err)
+		return err
+	}
+	slog.Info("MLX library initialized")
 	slog.Info("starting image runner", "model", *modelName, "port", *port)

 	// Check memory requirements before loading
--- a/x/imagegen/server.go
+++ b/x/imagegen/server.go
@@ -4,6 +4,7 @@ import (
 	"bufio"
 	"bytes"
 	"context"
+	"encoding/base64"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -61,7 +62,7 @@ func NewServer(modelName string) (*Server, error) {
 		port = rand.Intn(65535-49152) + 49152
 	}

-	// Get the ollama-mlx executable path (in same directory as current executable)
+	// Get the current executable path (we use the same binary with runner subcommand)
 	exe, err := os.Executable()
 	if err != nil {
 		return nil, fmt.Errorf("unable to lookup executable path: %w", err)
@@ -69,10 +70,9 @@ func NewServer(modelName string) (*Server, error) {
 	if eval, err := filepath.EvalSymlinks(exe); err == nil {
 		exe = eval
 	}
-	mlxExe := filepath.Join(filepath.Dir(exe), "ollama-mlx")

-	// Spawn subprocess: ollama-mlx runner --image-engine --model <path> --port <port>
-	cmd := exec.Command(mlxExe, "runner", "--image-engine", "--model", modelName, "--port", strconv.Itoa(port))
+	// Spawn subprocess: ollama runner --image-engine --model <path> --port <port>
+	cmd := exec.Command(exe, "runner", "--image-engine", "--model", modelName, "--port", strconv.Itoa(port))
 	cmd.Env = os.Environ()

 	// On Linux, set LD_LIBRARY_PATH to include MLX library directories
@@ -134,7 +134,7 @@ func NewServer(modelName string) (*Server, error) {
 		}
 	}()

-	slog.Info("starting ollama-mlx image runner subprocess", "exe", mlxExe, "model", modelName, "port", port)
+	slog.Info("starting image runner subprocess", "exe", exe, "model", modelName, "port", port)
 	if err := cmd.Start(); err != nil {
 		return nil, fmt.Errorf("failed to start image runner: %w", err)
 	}
@@ -232,13 +232,11 @@ func (s *Server) Completion(ctx context.Context, req llm.CompletionRequest, fn f
 		Prompt string `json:"prompt"`
 		Width  int32  `json:"width,omitempty"`
 		Height int32  `json:"height,omitempty"`
-		Steps  int32  `json:"steps,omitempty"`
 		Seed   int64  `json:"seed,omitempty"`
 	}{
 		Prompt: req.Prompt,
 		Width:  req.Width,
 		Height: req.Height,
-		Steps:  req.Steps,
 		Seed:   seed,
 	}

@@ -281,11 +279,15 @@ func (s *Server) Completion(ctx context.Context, req llm.CompletionRequest, fn f

 		// Convert to llm.CompletionResponse
 		cresp := llm.CompletionResponse{
-			Content:    raw.Content,
-			Done:       raw.Done,
-			Step:       raw.Step,
-			TotalSteps: raw.Total,
-			Image:      raw.Image,
+			Content: raw.Content,
+			Done:    raw.Done,
+			Step:    raw.Step,
+			Total:   raw.Total,
+		}
+		if raw.Image != "" {
+			if data, err := base64.StdEncoding.DecodeString(raw.Image); err == nil {
+				cresp.Image = data
+			}
 		}

 		fn(cresp)
--- a/x/ml/backend/mlx/CMakeLists.txt
+++ b/x/ml/backend/mlx/CMakeLists.txt
@@ -1,5 +1,9 @@
 include(FetchContent)

+# Read MLX version from top-level file (shared with Dockerfile)
+file(READ "${CMAKE_SOURCE_DIR}/MLX_VERSION" MLX_C_GIT_TAG)
+string(STRIP "${MLX_C_GIT_TAG}" MLX_C_GIT_TAG)
+
 set(MLX_C_BUILD_EXAMPLES OFF)

 set(MLX_BUILD_GGUF OFF)
@@ -50,7 +54,7 @@ endif()
 FetchContent_Declare(
  mlx-c
  GIT_REPOSITORY "https://github.com/ml-explore/mlx-c.git"
-  GIT_TAG v0.4.1)
+  GIT_TAG ${MLX_C_GIT_TAG})
 FetchContent_MakeAvailable(mlx-c)

 set_target_output_directory(mlx)
--- a/x/ml/backend/mlx/mlx_dynamic.c
+++ b/x/ml/backend/mlx/mlx_dynamic.c
@@ -0,0 +1,92 @@
+// mlx_dynamic.c - Dynamic loading wrapper for MLX-C library
+// This file provides runtime dynamic loading of libmlxc instead of link-time binding
+
+#include "mlx_dynamic.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _WIN32
+#include <windows.h>
+typedef HMODULE lib_handle_t;
+#define LOAD_LIB(path) LoadLibraryA(path)
+#define GET_SYMBOL(handle, name) GetProcAddress(handle, name)
+#define CLOSE_LIB(handle) FreeLibrary(handle)
+#define LIB_ERROR() "LoadLibrary failed"
+static const char* LIB_NAMES[] = {"libmlxc.dll", NULL};
+#else
+#include <dlfcn.h>
+typedef void* lib_handle_t;
+#define LOAD_LIB(path) dlopen(path, RTLD_LAZY | RTLD_GLOBAL)
+#define GET_SYMBOL(handle, name) dlsym(handle, name)
+#define CLOSE_LIB(handle) dlclose(handle)
+#define LIB_ERROR() dlerror()
+#ifdef __APPLE__
+static const char* LIB_NAMES[] = {
+    "libmlxc.dylib",
+    "@loader_path/../build/lib/ollama/libmlxc.dylib",
+    "@executable_path/../build/lib/ollama/libmlxc.dylib",
+    "build/lib/ollama/libmlxc.dylib",
+    "../build/lib/ollama/libmlxc.dylib",
+    NULL
+};
+#else
+static const char* LIB_NAMES[] = {
+    "libmlxc.so",
+    "$ORIGIN/../build/lib/ollama/libmlxc.so",
+    "build/lib/ollama/libmlxc.so",
+    "../build/lib/ollama/libmlxc.so",
+    NULL
+};
+#endif
+#endif
+
+static lib_handle_t mlx_handle = NULL;
+static int mlx_initialized = 0;
+static char mlx_error_buffer[512] = {0};
+
+// Initialize MLX dynamic library
+// Returns 0 on success, -1 on failure
+// On failure, call mlx_dynamic_error() to get error message
+int mlx_dynamic_init(void) {
+    if (mlx_initialized) {
+        return 0;  // Already initialized
+    }
+
+    // Try each possible library path
+    for (int i = 0; LIB_NAMES[i] != NULL; i++) {
+        mlx_handle = LOAD_LIB(LIB_NAMES[i]);
+        if (mlx_handle != NULL) {
+            mlx_initialized = 1;
+            snprintf(mlx_error_buffer, sizeof(mlx_error_buffer),
+                     "MLX: Successfully loaded %s", LIB_NAMES[i]);
+            return 0;
+        }
+    }
+
+    // Failed to load library
+    const char* err = LIB_ERROR();
+    snprintf(mlx_error_buffer, sizeof(mlx_error_buffer),
+             "MLX: Failed to load libmlxc library. %s",
+             err ? err : "Unknown error");
+    return -1;
+}
+
+// Get the last error message
+const char* mlx_dynamic_error(void) {
+    return mlx_error_buffer;
+}
+
+// Check if MLX is initialized
+int mlx_dynamic_is_initialized(void) {
+    return mlx_initialized;
+}
+
+// Cleanup (optional, called at program exit)
+void mlx_dynamic_cleanup(void) {
+    if (mlx_handle != NULL) {
+        CLOSE_LIB(mlx_handle);
+        mlx_handle = NULL;
+        mlx_initialized = 0;
+    }
+}
--- a/x/ml/backend/mlx/mlx_dynamic.h
+++ b/x/ml/backend/mlx/mlx_dynamic.h
@@ -0,0 +1,26 @@
+// mlx_dynamic.h - Dynamic loading interface for MLX-C library
+#ifndef MLX_DYNAMIC_H
+#define MLX_DYNAMIC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Initialize the MLX dynamic library
+// Returns 0 on success, -1 on failure
+int mlx_dynamic_init(void);
+
+// Get the last error message from dynamic loading
+const char* mlx_dynamic_error(void);
+
+// Check if MLX is initialized
+int mlx_dynamic_is_initialized(void);
+
+// Cleanup resources (optional, for clean shutdown)
+void mlx_dynamic_cleanup(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // MLX_DYNAMIC_H