refactor: consolidate imagegen default constants

Add defaults.go with DefaultWidth and DefaultHeight constants. Steps are model-dependent and now handled by each model: - Z-Image uses 9 steps (turbo model) - Other models define their own defaults Updated all files using hardcoded 1024x1024 values to use constants.
fix: capture multiple stderr lines for better error reporting
2026-01-15 10:58:36 -05:00 · 2026-01-15 00:42:53 -08:00 · 2026-01-14 22:27:37 -08:00 · 2026-01-14 22:25:24 -08:00 · 2026-01-14 22:21:49 -08:00 · 2026-01-14 22:21:00 -08:00
14 changed files with 112 additions and 123 deletions
--- a/api/types.go
+++ b/api/types.go
@@ -127,6 +127,10 @@ type GenerateRequest struct {
 	// each with an associated log probability. Only applies when Logprobs is true.
 	// Valid values are 0-20. Default is 0 (only return the selected token's logprob).
 	TopLogprobs int `json:"top_logprobs,omitempty"`
+
+	// Size specifies the image dimensions for image generation models.
+	// Format: "WxH" (e.g., "1024x1024"). OpenAI-compatible.
+	Size string `json:"size,omitempty"`
 }

 // ChatRequest describes a request sent by [Client.Chat].
--- a/llm/server.go
+++ b/llm/server.go
@@ -1464,6 +1464,10 @@ type CompletionRequest struct {

 	// TopLogprobs specifies the number of most likely alternative tokens to return (0-20)
 	TopLogprobs int
+
+	// Size specifies image dimensions for image generation models.
+	// Format: "WxH" (e.g., "1024x1024"). OpenAI-compatible.
+	Size string
 }

 // DoneReason represents the reason why a completion response is done
--- a/server/routes.go
+++ b/server/routes.go
@@ -216,7 +216,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {

 	// Check if this is a known image generation model
 	if imagegen.ResolveModelName(req.Model) != "" {
-		imagegenapi.HandleGenerateRequest(c, s, req.Model, req.Prompt, req.KeepAlive, streamResponse)
+		imagegenapi.HandleGenerateRequest(c, s, &req, streamResponse)
 		return
 	}

--- a/server/sched.go
+++ b/server/sched.go
@@ -574,7 +574,6 @@ func (s *Scheduler) loadImageGen(req *LlmRequest) bool {
 		Options:         &req.opts,
 		loading:         false,
 		sessionDuration: sessionDuration,
-		refCount:        1,
 	}

 	s.loadedMu.Lock()
--- a/x/imagegen/api/handler.go
+++ b/x/imagegen/api/handler.go
@@ -2,8 +2,8 @@ package api

 import (
 	"fmt"
+	"log/slog"
 	"net/http"
-	"strconv"
 	"strings"
 	"time"

@@ -50,7 +50,7 @@ func ImageGenerationHandler(c *gin.Context, scheduler RunnerScheduler) {
 		req.N = 1
 	}
 	if req.Size == "" {
-		req.Size = "1024x1024"
+		req.Size = fmt.Sprintf("%dx%d", imagegen.DefaultWidth, imagegen.DefaultHeight)
 	}
 	if req.ResponseFormat == "" {
 		req.ResponseFormat = "b64_json"
@@ -62,16 +62,8 @@ func ImageGenerationHandler(c *gin.Context, scheduler RunnerScheduler) {
 		return
 	}

-	// Parse size
-	width, height := parseSize(req.Size)
-
-	// Build options - we repurpose NumCtx/NumGPU for width/height
-	opts := api.Options{}
-	opts.NumCtx = int(width)
-	opts.NumGPU = int(height)
-
 	// Schedule runner
-	runner, err := scheduler.ScheduleImageGenRunner(c, req.Model, opts, nil)
+	runner, err := scheduler.ScheduleImageGenRunner(c, req.Model, api.Options{}, nil)
 	if err != nil {
 		status := http.StatusInternalServerError
 		if strings.Contains(err.Error(), "not found") {
@@ -81,10 +73,10 @@ func ImageGenerationHandler(c *gin.Context, scheduler RunnerScheduler) {
 		return
 	}

-	// Build completion request
+	// Build completion request with size (OpenAI format)
 	completionReq := llm.CompletionRequest{
-		Prompt:  req.Prompt,
-		Options: &opts,
+		Prompt: req.Prompt,
+		Size:   req.Size,
 	}

 	if req.Stream {
@@ -134,22 +126,6 @@ func handleNonStreamingResponse(c *gin.Context, runner llm.LlamaServer, req llm.
 	c.JSON(http.StatusOK, buildResponse(imageBase64, format))
 }

-func parseSize(size string) (int32, int32) {
-	parts := strings.Split(size, "x")
-	if len(parts) != 2 {
-		return 1024, 1024
-	}
-	w, _ := strconv.Atoi(parts[0])
-	h, _ := strconv.Atoi(parts[1])
-	if w == 0 {
-		w = 1024
-	}
-	if h == 0 {
-		h = 1024
-	}
-	return int32(w), int32(h)
-}
-
 func extractBase64(content string) string {
 	if strings.HasPrefix(content, "IMAGE_BASE64:") {
 		return content[13:]
@@ -185,20 +161,18 @@ func buildResponse(imageBase64, format string) ImageGenerationResponse {

 // HandleGenerateRequest handles Ollama /api/generate requests for image gen models.
 // This allows routes.go to delegate image generation with minimal code.
-func HandleGenerateRequest(c *gin.Context, scheduler RunnerScheduler, modelName, prompt string, keepAlive *api.Duration, streamFn func(c *gin.Context, ch chan any)) {
-	opts := api.Options{}
-
+func HandleGenerateRequest(c *gin.Context, scheduler RunnerScheduler, req *api.GenerateRequest, streamFn func(c *gin.Context, ch chan any)) {
 	// Schedule runner
-	runner, err := scheduler.ScheduleImageGenRunner(c, modelName, opts, keepAlive)
+	runner, err := scheduler.ScheduleImageGenRunner(c, req.Model, api.Options{}, req.KeepAlive)
 	if err != nil {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}

-	// Build completion request
+	// Build completion request with size (OpenAI format)
 	completionReq := llm.CompletionRequest{
-		Prompt:  prompt,
-		Options: &opts,
+		Prompt: req.Prompt,
+		Size:   req.Size,
 	}

 	// Stream responses via channel
@@ -207,15 +181,14 @@ func HandleGenerateRequest(c *gin.Context, scheduler RunnerScheduler, modelName,
 		defer close(ch)
 		err := runner.Completion(c.Request.Context(), completionReq, func(resp llm.CompletionResponse) {
 			ch <- GenerateResponse{
-				Model:     modelName,
+				Model:     req.Model,
 				CreatedAt: time.Now().UTC(),
 				Response:  resp.Content,
 				Done:      resp.Done,
 			}
 		})
 		if err != nil {
-			// Log error but don't block - channel is already being consumed
-			_ = err
+			slog.Error("image generation failed", "model", req.Model, "error", err)
 		}
 	}()

--- a/x/imagegen/cli.go
+++ b/x/imagegen/cli.go
@@ -37,9 +37,9 @@ type ImageGenOptions struct {
 // DefaultOptions returns the default image generation options.
 func DefaultOptions() ImageGenOptions {
 	return ImageGenOptions{
-		Width:  1024,
-		Height: 1024,
-		Steps:  9,
+		Width:  DefaultWidth,
+		Height: DefaultHeight,
+		Steps:  0, // 0 means model default
 		Seed:   0, // 0 means random
 	}
 }
@@ -107,9 +107,9 @@ func GetModelInfo(modelName string) (*ModelInfo, error) {
 // RegisterFlags adds image generation flags to the given command.
 // Flags are hidden since they only apply to image generation models.
 func RegisterFlags(cmd *cobra.Command) {
-	cmd.Flags().Int("width", 1024, "Image width")
-	cmd.Flags().Int("height", 1024, "Image height")
-	cmd.Flags().Int("steps", 9, "Denoising steps")
+	cmd.Flags().Int("width", DefaultWidth, "Image width")
+	cmd.Flags().Int("height", DefaultHeight, "Image height")
+	cmd.Flags().Int("steps", 0, "Denoising steps (0 = model default)")
 	cmd.Flags().Int("seed", 0, "Random seed (0 for random)")
 	cmd.Flags().String("negative", "", "Negative prompt")
 	cmd.Flags().MarkHidden("width")
@@ -158,17 +158,10 @@ func generateImageWithOptions(cmd *cobra.Command, modelName, prompt string, keep
 		return err
 	}

-	// Build request with image gen options encoded in Options fields
-	// NumCtx=width, NumGPU=height, NumPredict=steps, Seed=seed
 	req := &api.GenerateRequest{
 		Model:  modelName,
 		Prompt: prompt,
-		Options: map[string]any{
-			"num_ctx":     opts.Width,
-			"num_gpu":     opts.Height,
-			"num_predict": opts.Steps,
-			"seed":        opts.Seed,
-		},
+		Size:   fmt.Sprintf("%dx%d", opts.Width, opts.Height),
 	}
 	if keepAlive != nil {
 		req.KeepAlive = keepAlive
--- a/x/imagegen/cmd/engine/main.go
+++ b/x/imagegen/cmd/engine/main.go
@@ -12,6 +12,7 @@ import (
 	"path/filepath"
 	"runtime/pprof"

+	"github.com/ollama/ollama/x/imagegen"
 	"github.com/ollama/ollama/x/imagegen/mlx"
 	"github.com/ollama/ollama/x/imagegen/models/gemma3"
 	"github.com/ollama/ollama/x/imagegen/models/gpt_oss"
@@ -46,9 +47,9 @@ func main() {
 	imagePath := flag.String("image", "", "Image path for multimodal models")

 	// Image generation params
-	width := flag.Int("width", 1024, "Image width")
-	height := flag.Int("height", 1024, "Image height")
-	steps := flag.Int("steps", 9, "Denoising steps")
+	width := flag.Int("width", imagegen.DefaultWidth, "Image width")
+	height := flag.Int("height", imagegen.DefaultHeight, "Image height")
+	steps := flag.Int("steps", 0, "Denoising steps (0 = model default)")
 	seed := flag.Int64("seed", 42, "Random seed")
 	out := flag.String("output", "output.png", "Output path")

@@ -149,10 +150,10 @@ func main() {
 		// unless explicitly overridden from defaults
 		editWidth := int32(0)
 		editHeight := int32(0)
-		if *width != 1024 {
+		if *width != imagegen.DefaultWidth {
 			editWidth = int32(*width)
 		}
-		if *height != 1024 {
+		if *height != imagegen.DefaultHeight {
 			editHeight = int32(*height)
 		}

--- a/x/imagegen/defaults.go
+++ b/x/imagegen/defaults.go
@@ -0,0 +1,7 @@
+package imagegen
+
+// Default image generation parameters.
+const (
+	DefaultWidth  = 1024
+	DefaultHeight = 1024
+)
--- a/x/imagegen/memory.go
+++ b/x/imagegen/memory.go
@@ -95,8 +95,3 @@ func EstimateVRAM(modelName string) uint64 {
 	}
 	return 21 * GB
 }
-
-// HasTensorLayers checks if the given model has tensor layers.
-func HasTensorLayers(modelName string) bool {
-	return ResolveModelName(modelName) != ""
-}
--- a/x/imagegen/memory_test.go
+++ b/x/imagegen/memory_test.go
@@ -94,13 +94,6 @@ func TestEstimateVRAMDefault(t *testing.T) {
 	}
 }

-func TestHasTensorLayers(t *testing.T) {
-	// Non-existent model should return false
-	if HasTensorLayers("nonexistent-model") {
-		t.Error("HasTensorLayers() should return false for non-existent model")
-	}
-}
-
 func TestResolveModelName(t *testing.T) {
 	// Non-existent model should return empty string
 	result := ResolveModelName("nonexistent-model")
--- a/x/imagegen/models/qwen_image/qwen_image.go
+++ b/x/imagegen/models/qwen_image/qwen_image.go
@@ -9,6 +9,7 @@ import (
 	"path/filepath"
 	"time"

+	"github.com/ollama/ollama/x/imagegen"
 	"github.com/ollama/ollama/x/imagegen/cache"
 	"github.com/ollama/ollama/x/imagegen/mlx"
 	"github.com/ollama/ollama/x/imagegen/tokenizer"
@@ -166,10 +167,10 @@ func (m *Model) GenerateImage(ctx context.Context, prompt string, width, height
 func (m *Model) generate(cfg *GenerateConfig) (*mlx.Array, error) {
 	// Apply defaults
 	if cfg.Width <= 0 {
-		cfg.Width = 1024
+		cfg.Width = imagegen.DefaultWidth
 	}
 	if cfg.Height <= 0 {
-		cfg.Height = 1024
+		cfg.Height = imagegen.DefaultHeight
 	}
 	if cfg.Steps <= 0 {
 		cfg.Steps = 30
--- a/x/imagegen/models/zimage/zimage.go
+++ b/x/imagegen/models/zimage/zimage.go
@@ -188,13 +188,13 @@ func (m *Model) GenerateImage(ctx context.Context, prompt string, width, height
 func (m *Model) generate(ctx context.Context, cfg *GenerateConfig) (*mlx.Array, error) {
 	// Apply defaults
 	if cfg.Width <= 0 {
-		cfg.Width = 1024
+		cfg.Width = imagegen.DefaultWidth
 	}
 	if cfg.Height <= 0 {
-		cfg.Height = 1024
+		cfg.Height = imagegen.DefaultHeight
 	}
 	if cfg.Steps <= 0 {
-		cfg.Steps = 9 // Turbo default
+		cfg.Steps = 9 // Z-Image turbo default
 	}
 	if cfg.CFGScale <= 0 {
 		cfg.CFGScale = 4.0
--- a/x/imagegen/runner/runner.go
+++ b/x/imagegen/runner/runner.go
@@ -136,15 +136,12 @@ func (s *Server) completionHandler(w http.ResponseWriter, r *http.Request) {
 	s.mu.Lock()
 	defer s.mu.Unlock()

-	// Apply defaults
+	// Apply defaults (steps left to model)
 	if req.Width <= 0 {
-		req.Width = 1024
+		req.Width = imagegen.DefaultWidth
 	}
 	if req.Height <= 0 {
-		req.Height = 1024
-	}
-	if req.Steps <= 0 {
-		req.Steps = 9
+		req.Height = imagegen.DefaultHeight
 	}
 	if req.Seed <= 0 {
 		req.Seed = time.Now().UnixNano()
--- a/x/imagegen/server.go
+++ b/x/imagegen/server.go
@@ -33,10 +33,12 @@ type Server struct {
 	vramSize    uint64
 	done        chan error
 	client      *http.Client
-	lastErr     string // Last stderr line for error reporting
-	lastErrLock sync.Mutex
+	stderrLines []string // Recent stderr lines for error reporting (max 10)
+	stderrLock  sync.Mutex
 }

+const maxStderrLines = 10
+
 // completionRequest is sent to the subprocess
 type completionRequest struct {
 	Prompt string `json:"prompt"`
@@ -139,10 +141,13 @@ func NewServer(modelName string) (*Server, error) {
 		for scanner.Scan() {
 			line := scanner.Text()
 			slog.Warn("image-runner", "msg", line)
-			// Capture last error line for better error reporting
-			s.lastErrLock.Lock()
-			s.lastErr = line
-			s.lastErrLock.Unlock()
+			// Capture recent stderr lines for error reporting
+			s.stderrLock.Lock()
+			s.stderrLines = append(s.stderrLines, line)
+			if len(s.stderrLines) > maxStderrLines {
+				s.stderrLines = s.stderrLines[1:]
+			}
+			s.stderrLock.Unlock()
 		}
 	}()

@@ -171,7 +176,9 @@ func (s *Server) ModelPath() string {
 	return s.modelName
 }

-// Load is called by the scheduler after the server is created.
+// Load is a no-op for image generation models.
+// Unlike LLM models, imagegen models are loaded by the subprocess at startup
+// rather than through this interface method.
 func (s *Server) Load(ctx context.Context, systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, requireFull bool) ([]ml.DeviceID, error) {
 	return nil, nil
 }
@@ -204,20 +211,16 @@ func (s *Server) waitUntilRunning() error {
 	for {
 		select {
 		case err := <-s.done:
-			// Include last stderr line for better error context
-			s.lastErrLock.Lock()
-			lastErr := s.lastErr
-			s.lastErrLock.Unlock()
-			if lastErr != "" {
-				return fmt.Errorf("image runner failed: %s (exit: %v)", lastErr, err)
+			// Include recent stderr lines for better error context
+			stderrContext := s.getStderrContext()
+			if stderrContext != "" {
+				return fmt.Errorf("image runner failed: %s (exit: %v)", stderrContext, err)
 			}
 			return fmt.Errorf("image runner exited unexpectedly: %w", err)
 		case <-timeout:
-			s.lastErrLock.Lock()
-			lastErr := s.lastErr
-			s.lastErrLock.Unlock()
-			if lastErr != "" {
-				return fmt.Errorf("timeout waiting for image runner: %s", lastErr)
+			stderrContext := s.getStderrContext()
+			if stderrContext != "" {
+				return fmt.Errorf("timeout waiting for image runner: %s", stderrContext)
 			}
 			return errors.New("timeout waiting for image runner to start")
 		case <-ticker.C:
@@ -229,34 +232,38 @@ func (s *Server) waitUntilRunning() error {
 	}
 }

-// WaitUntilRunning implements the LlamaServer interface (no-op since NewServer waits).
+// getStderrContext returns recent stderr lines joined as a single string.
+func (s *Server) getStderrContext() string {
+	s.stderrLock.Lock()
+	defer s.stderrLock.Unlock()
+	if len(s.stderrLines) == 0 {
+		return ""
+	}
+	return strings.Join(s.stderrLines, "; ")
+}
+
+// WaitUntilRunning is a no-op for image generation models.
+// NewServer already blocks until the subprocess is ready, so this method
+// returns immediately. Required by the llm.LlamaServer interface.
 func (s *Server) WaitUntilRunning(ctx context.Context) error {
 	return nil
 }

 // Completion generates an image from the prompt via the subprocess.
 func (s *Server) Completion(ctx context.Context, req llm.CompletionRequest, fn func(llm.CompletionResponse)) error {
-	// Build request
+	// Build request with defaults (steps left to model)
 	creq := completionRequest{
 		Prompt: req.Prompt,
-		Width:  1024,
-		Height: 1024,
-		Steps:  9,
+		Width:  DefaultWidth,
+		Height: DefaultHeight,
 		Seed:   time.Now().UnixNano(),
 	}

-	if req.Options != nil {
-		if req.Options.NumCtx > 0 && req.Options.NumCtx <= 4096 {
-			creq.Width = int32(req.Options.NumCtx)
-		}
-		if req.Options.NumGPU > 0 && req.Options.NumGPU <= 4096 {
-			creq.Height = int32(req.Options.NumGPU)
-		}
-		if req.Options.NumPredict > 0 && req.Options.NumPredict <= 100 {
-			creq.Steps = req.Options.NumPredict
-		}
-		if req.Options.Seed > 0 {
-			creq.Seed = int64(req.Options.Seed)
+	// Parse size string (OpenAI format: "WxH")
+	if req.Size != "" {
+		if w, h := parseSize(req.Size); w > 0 && h > 0 {
+			creq.Width = w
+			creq.Height = h
 		}
 	}

@@ -346,17 +353,20 @@ func (s *Server) VRAMByGPU(id ml.DeviceID) uint64 {
 	return s.vramSize
 }

-// Embedding is not supported for image generation models.
+// Embedding returns an error as image generation models don't produce embeddings.
+// Required by the llm.LlamaServer interface.
 func (s *Server) Embedding(ctx context.Context, input string) ([]float32, int, error) {
 	return nil, 0, errors.New("embedding not supported for image generation models")
 }

-// Tokenize is not supported for image generation models.
+// Tokenize returns an error as image generation uses internal tokenization.
+// Required by the llm.LlamaServer interface.
 func (s *Server) Tokenize(ctx context.Context, content string) ([]int, error) {
 	return nil, errors.New("tokenize not supported for image generation models")
 }

-// Detokenize is not supported for image generation models.
+// Detokenize returns an error as image generation uses internal tokenization.
+// Required by the llm.LlamaServer interface.
 func (s *Server) Detokenize(ctx context.Context, tokens []int) (string, error) {
 	return "", errors.New("detokenize not supported for image generation models")
 }
@@ -376,7 +386,8 @@ func (s *Server) GetPort() int {
 	return s.port
 }

-// GetDeviceInfos returns nil since we don't track GPU info.
+// GetDeviceInfos returns nil as GPU tracking is handled by the subprocess.
+// Required by the llm.LlamaServer interface.
 func (s *Server) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo {
 	return nil
 }
@@ -393,3 +404,14 @@ func (s *Server) HasExited() bool {

 // Ensure Server implements llm.LlamaServer
 var _ llm.LlamaServer = (*Server)(nil)
+
+// parseSize parses an OpenAI-style size string "WxH" into width and height.
+func parseSize(size string) (int32, int32) {
+	parts := strings.Split(size, "x")
+	if len(parts) != 2 {
+		return 0, 0
+	}
+	w, _ := strconv.Atoi(parts[0])
+	h, _ := strconv.Atoi(parts[1])
+	return int32(w), int32(h)
+}
Author	SHA1	Message	Date
jmorganca	f1c7d8718f	refactor: consolidate imagegen default constants Add defaults.go with DefaultWidth and DefaultHeight constants. Steps are model-dependent and now handled by each model: - Z-Image uses 9 steps (turbo model) - Other models define their own defaults Updated all files using hardcoded 1024x1024 values to use constants.	2026-01-15 00:42:53 -08:00
jmorganca	9d07e26b62	fix: capture multiple stderr lines for better error reporting Previously only the last stderr line was stored, which often missed important context when the subprocess failed. Now stores up to 10 recent stderr lines and joins them in error messages, providing more complete diagnostic information.	2026-01-14 22:27:37 -08:00
jmorganca	8485b6546e	docs: clarify no-op interface methods in imagegen server Add explanatory comments for methods that are required by the llm.LlamaServer interface but don't apply to image generation: - Load: models loaded by subprocess at startup - WaitUntilRunning: NewServer already blocks until ready - Embedding/Tokenize/Detokenize: not applicable to image gen - GetDeviceInfos: GPU tracking handled by subprocess	2026-01-14 22:25:24 -08:00
jmorganca	d9ae425d54	refactor: remove unused HasTensorLayers wrapper function The package-level HasTensorLayers(modelName string) was just a thin wrapper around ResolveModelName(modelName) != "". Callers can use ResolveModelName directly. The ModelManifest.HasTensorLayers() method remains for checking if a manifest has tensor layers.	2026-01-14 22:21:49 -08:00
jmorganca	9e1d79ac67	refactor: consolidate imagegen default constants Add defaults.go with DefaultWidth, DefaultHeight, and DefaultSteps constants. Update all files using hardcoded 1024/1024/9 defaults to use these constants instead: - server.go: completion request defaults - cli.go: DefaultOptions and flag defaults - runner/runner.go: request defaults - api/handler.go: size parsing defaults - cmd/engine/main.go: flag defaults - models/zimage/zimage.go: generate config defaults - models/qwen_image/qwen_image.go: width/height defaults	2026-01-14 22:21:00 -08:00
jmorganca	7273d9925e	fix: log errors in imagegen generate handler instead of discarding Previously errors from runner.Completion were silently discarded with `_ = err`. Now they are properly logged with slog.Error for debugging.	2026-01-14 22:18:03 -08:00
jmorganca	4896240528	fix: imagegen models now properly expire after keepalive timeout The refCount was incorrectly initialized to 1 in loadImageGen, causing the reference count to be 2 after useLoadedRunner incremented it. When requests completed, refCount would decrement to 1 (not 0), so the expiration timer was never set up and models stayed loaded forever. Removing the explicit refCount:1 lets it default to 0, matching the behavior of regular LLM model loading.	2026-01-14 18:02:10 -08:00