Compare commits

...

7 Commits

Author SHA1 Message Date
jmorganca
f1c7d8718f refactor: consolidate imagegen default constants
Add defaults.go with DefaultWidth and DefaultHeight constants.
Steps are model-dependent and now handled by each model:
- Z-Image uses 9 steps (turbo model)
- Other models define their own defaults

Updated all files using hardcoded 1024x1024 values to use constants.
2026-01-15 00:42:53 -08:00
jmorganca
9d07e26b62 fix: capture multiple stderr lines for better error reporting
Previously only the last stderr line was stored, which often missed
important context when the subprocess failed. Now stores up to 10
recent stderr lines and joins them in error messages, providing
more complete diagnostic information.
2026-01-14 22:27:37 -08:00
jmorganca
8485b6546e docs: clarify no-op interface methods in imagegen server
Add explanatory comments for methods that are required by the
llm.LlamaServer interface but don't apply to image generation:

- Load: models loaded by subprocess at startup
- WaitUntilRunning: NewServer already blocks until ready
- Embedding/Tokenize/Detokenize: not applicable to image gen
- GetDeviceInfos: GPU tracking handled by subprocess
2026-01-14 22:25:24 -08:00
jmorganca
d9ae425d54 refactor: remove unused HasTensorLayers wrapper function
The package-level HasTensorLayers(modelName string) was just a thin
wrapper around ResolveModelName(modelName) != "". Callers can use
ResolveModelName directly. The ModelManifest.HasTensorLayers() method
remains for checking if a manifest has tensor layers.
2026-01-14 22:21:49 -08:00
jmorganca
9e1d79ac67 refactor: consolidate imagegen default constants
Add defaults.go with DefaultWidth, DefaultHeight, and DefaultSteps
constants. Update all files using hardcoded 1024/1024/9 defaults to
use these constants instead:

- server.go: completion request defaults
- cli.go: DefaultOptions and flag defaults
- runner/runner.go: request defaults
- api/handler.go: size parsing defaults
- cmd/engine/main.go: flag defaults
- models/zimage/zimage.go: generate config defaults
- models/qwen_image/qwen_image.go: width/height defaults
2026-01-14 22:21:00 -08:00
jmorganca
7273d9925e fix: log errors in imagegen generate handler instead of discarding
Previously errors from runner.Completion were silently discarded with
`_ = err`. Now they are properly logged with slog.Error for debugging.
2026-01-14 22:18:03 -08:00
jmorganca
4896240528 fix: imagegen models now properly expire after keepalive timeout
The refCount was incorrectly initialized to 1 in loadImageGen, causing
the reference count to be 2 after useLoadedRunner incremented it. When
requests completed, refCount would decrement to 1 (not 0), so the
expiration timer was never set up and models stayed loaded forever.

Removing the explicit refCount:1 lets it default to 0, matching the
behavior of regular LLM model loading.
2026-01-14 18:02:10 -08:00
14 changed files with 112 additions and 123 deletions

View File

@@ -127,6 +127,10 @@ type GenerateRequest struct {
// each with an associated log probability. Only applies when Logprobs is true.
// Valid values are 0-20. Default is 0 (only return the selected token's logprob).
TopLogprobs int `json:"top_logprobs,omitempty"`
// Size specifies the image dimensions for image generation models.
// Format: "WxH" (e.g., "1024x1024"). OpenAI-compatible.
Size string `json:"size,omitempty"`
}
// ChatRequest describes a request sent by [Client.Chat].

View File

@@ -1464,6 +1464,10 @@ type CompletionRequest struct {
// TopLogprobs specifies the number of most likely alternative tokens to return (0-20)
TopLogprobs int
// Size specifies image dimensions for image generation models.
// Format: "WxH" (e.g., "1024x1024"). OpenAI-compatible.
Size string
}
// DoneReason represents the reason why a completion response is done

View File

@@ -216,7 +216,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
// Check if this is a known image generation model
if imagegen.ResolveModelName(req.Model) != "" {
imagegenapi.HandleGenerateRequest(c, s, req.Model, req.Prompt, req.KeepAlive, streamResponse)
imagegenapi.HandleGenerateRequest(c, s, &req, streamResponse)
return
}

View File

@@ -574,7 +574,6 @@ func (s *Scheduler) loadImageGen(req *LlmRequest) bool {
Options: &req.opts,
loading: false,
sessionDuration: sessionDuration,
refCount: 1,
}
s.loadedMu.Lock()

View File

@@ -2,8 +2,8 @@ package api
import (
"fmt"
"log/slog"
"net/http"
"strconv"
"strings"
"time"
@@ -50,7 +50,7 @@ func ImageGenerationHandler(c *gin.Context, scheduler RunnerScheduler) {
req.N = 1
}
if req.Size == "" {
req.Size = "1024x1024"
req.Size = fmt.Sprintf("%dx%d", imagegen.DefaultWidth, imagegen.DefaultHeight)
}
if req.ResponseFormat == "" {
req.ResponseFormat = "b64_json"
@@ -62,16 +62,8 @@ func ImageGenerationHandler(c *gin.Context, scheduler RunnerScheduler) {
return
}
// Parse size
width, height := parseSize(req.Size)
// Build options - we repurpose NumCtx/NumGPU for width/height
opts := api.Options{}
opts.NumCtx = int(width)
opts.NumGPU = int(height)
// Schedule runner
runner, err := scheduler.ScheduleImageGenRunner(c, req.Model, opts, nil)
runner, err := scheduler.ScheduleImageGenRunner(c, req.Model, api.Options{}, nil)
if err != nil {
status := http.StatusInternalServerError
if strings.Contains(err.Error(), "not found") {
@@ -81,10 +73,10 @@ func ImageGenerationHandler(c *gin.Context, scheduler RunnerScheduler) {
return
}
// Build completion request
// Build completion request with size (OpenAI format)
completionReq := llm.CompletionRequest{
Prompt: req.Prompt,
Options: &opts,
Prompt: req.Prompt,
Size: req.Size,
}
if req.Stream {
@@ -134,22 +126,6 @@ func handleNonStreamingResponse(c *gin.Context, runner llm.LlamaServer, req llm.
c.JSON(http.StatusOK, buildResponse(imageBase64, format))
}
func parseSize(size string) (int32, int32) {
parts := strings.Split(size, "x")
if len(parts) != 2 {
return 1024, 1024
}
w, _ := strconv.Atoi(parts[0])
h, _ := strconv.Atoi(parts[1])
if w == 0 {
w = 1024
}
if h == 0 {
h = 1024
}
return int32(w), int32(h)
}
func extractBase64(content string) string {
if strings.HasPrefix(content, "IMAGE_BASE64:") {
return content[13:]
@@ -185,20 +161,18 @@ func buildResponse(imageBase64, format string) ImageGenerationResponse {
// HandleGenerateRequest handles Ollama /api/generate requests for image gen models.
// This allows routes.go to delegate image generation with minimal code.
func HandleGenerateRequest(c *gin.Context, scheduler RunnerScheduler, modelName, prompt string, keepAlive *api.Duration, streamFn func(c *gin.Context, ch chan any)) {
opts := api.Options{}
func HandleGenerateRequest(c *gin.Context, scheduler RunnerScheduler, req *api.GenerateRequest, streamFn func(c *gin.Context, ch chan any)) {
// Schedule runner
runner, err := scheduler.ScheduleImageGenRunner(c, modelName, opts, keepAlive)
runner, err := scheduler.ScheduleImageGenRunner(c, req.Model, api.Options{}, req.KeepAlive)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
// Build completion request
// Build completion request with size (OpenAI format)
completionReq := llm.CompletionRequest{
Prompt: prompt,
Options: &opts,
Prompt: req.Prompt,
Size: req.Size,
}
// Stream responses via channel
@@ -207,15 +181,14 @@ func HandleGenerateRequest(c *gin.Context, scheduler RunnerScheduler, modelName,
defer close(ch)
err := runner.Completion(c.Request.Context(), completionReq, func(resp llm.CompletionResponse) {
ch <- GenerateResponse{
Model: modelName,
Model: req.Model,
CreatedAt: time.Now().UTC(),
Response: resp.Content,
Done: resp.Done,
}
})
if err != nil {
// Log error but don't block - channel is already being consumed
_ = err
slog.Error("image generation failed", "model", req.Model, "error", err)
}
}()

View File

@@ -37,9 +37,9 @@ type ImageGenOptions struct {
// DefaultOptions returns the default image generation options.
func DefaultOptions() ImageGenOptions {
return ImageGenOptions{
Width: 1024,
Height: 1024,
Steps: 9,
Width: DefaultWidth,
Height: DefaultHeight,
Steps: 0, // 0 means model default
Seed: 0, // 0 means random
}
}
@@ -107,9 +107,9 @@ func GetModelInfo(modelName string) (*ModelInfo, error) {
// RegisterFlags adds image generation flags to the given command.
// Flags are hidden since they only apply to image generation models.
func RegisterFlags(cmd *cobra.Command) {
cmd.Flags().Int("width", 1024, "Image width")
cmd.Flags().Int("height", 1024, "Image height")
cmd.Flags().Int("steps", 9, "Denoising steps")
cmd.Flags().Int("width", DefaultWidth, "Image width")
cmd.Flags().Int("height", DefaultHeight, "Image height")
cmd.Flags().Int("steps", 0, "Denoising steps (0 = model default)")
cmd.Flags().Int("seed", 0, "Random seed (0 for random)")
cmd.Flags().String("negative", "", "Negative prompt")
cmd.Flags().MarkHidden("width")
@@ -158,17 +158,10 @@ func generateImageWithOptions(cmd *cobra.Command, modelName, prompt string, keep
return err
}
// Build request with image gen options encoded in Options fields
// NumCtx=width, NumGPU=height, NumPredict=steps, Seed=seed
req := &api.GenerateRequest{
Model: modelName,
Prompt: prompt,
Options: map[string]any{
"num_ctx": opts.Width,
"num_gpu": opts.Height,
"num_predict": opts.Steps,
"seed": opts.Seed,
},
Size: fmt.Sprintf("%dx%d", opts.Width, opts.Height),
}
if keepAlive != nil {
req.KeepAlive = keepAlive

View File

@@ -12,6 +12,7 @@ import (
"path/filepath"
"runtime/pprof"
"github.com/ollama/ollama/x/imagegen"
"github.com/ollama/ollama/x/imagegen/mlx"
"github.com/ollama/ollama/x/imagegen/models/gemma3"
"github.com/ollama/ollama/x/imagegen/models/gpt_oss"
@@ -46,9 +47,9 @@ func main() {
imagePath := flag.String("image", "", "Image path for multimodal models")
// Image generation params
width := flag.Int("width", 1024, "Image width")
height := flag.Int("height", 1024, "Image height")
steps := flag.Int("steps", 9, "Denoising steps")
width := flag.Int("width", imagegen.DefaultWidth, "Image width")
height := flag.Int("height", imagegen.DefaultHeight, "Image height")
steps := flag.Int("steps", 0, "Denoising steps (0 = model default)")
seed := flag.Int64("seed", 42, "Random seed")
out := flag.String("output", "output.png", "Output path")
@@ -149,10 +150,10 @@ func main() {
// unless explicitly overridden from defaults
editWidth := int32(0)
editHeight := int32(0)
if *width != 1024 {
if *width != imagegen.DefaultWidth {
editWidth = int32(*width)
}
if *height != 1024 {
if *height != imagegen.DefaultHeight {
editHeight = int32(*height)
}

7
x/imagegen/defaults.go Normal file
View File

@@ -0,0 +1,7 @@
package imagegen
// Default image generation parameters.
const (
DefaultWidth = 1024
DefaultHeight = 1024
)

View File

@@ -95,8 +95,3 @@ func EstimateVRAM(modelName string) uint64 {
}
return 21 * GB
}
// HasTensorLayers checks if the given model has tensor layers.
func HasTensorLayers(modelName string) bool {
return ResolveModelName(modelName) != ""
}

View File

@@ -94,13 +94,6 @@ func TestEstimateVRAMDefault(t *testing.T) {
}
}
func TestHasTensorLayers(t *testing.T) {
// Non-existent model should return false
if HasTensorLayers("nonexistent-model") {
t.Error("HasTensorLayers() should return false for non-existent model")
}
}
func TestResolveModelName(t *testing.T) {
// Non-existent model should return empty string
result := ResolveModelName("nonexistent-model")

View File

@@ -9,6 +9,7 @@ import (
"path/filepath"
"time"
"github.com/ollama/ollama/x/imagegen"
"github.com/ollama/ollama/x/imagegen/cache"
"github.com/ollama/ollama/x/imagegen/mlx"
"github.com/ollama/ollama/x/imagegen/tokenizer"
@@ -166,10 +167,10 @@ func (m *Model) GenerateImage(ctx context.Context, prompt string, width, height
func (m *Model) generate(cfg *GenerateConfig) (*mlx.Array, error) {
// Apply defaults
if cfg.Width <= 0 {
cfg.Width = 1024
cfg.Width = imagegen.DefaultWidth
}
if cfg.Height <= 0 {
cfg.Height = 1024
cfg.Height = imagegen.DefaultHeight
}
if cfg.Steps <= 0 {
cfg.Steps = 30

View File

@@ -188,13 +188,13 @@ func (m *Model) GenerateImage(ctx context.Context, prompt string, width, height
func (m *Model) generate(ctx context.Context, cfg *GenerateConfig) (*mlx.Array, error) {
// Apply defaults
if cfg.Width <= 0 {
cfg.Width = 1024
cfg.Width = imagegen.DefaultWidth
}
if cfg.Height <= 0 {
cfg.Height = 1024
cfg.Height = imagegen.DefaultHeight
}
if cfg.Steps <= 0 {
cfg.Steps = 9 // Turbo default
cfg.Steps = 9 // Z-Image turbo default
}
if cfg.CFGScale <= 0 {
cfg.CFGScale = 4.0

View File

@@ -136,15 +136,12 @@ func (s *Server) completionHandler(w http.ResponseWriter, r *http.Request) {
s.mu.Lock()
defer s.mu.Unlock()
// Apply defaults
// Apply defaults (steps left to model)
if req.Width <= 0 {
req.Width = 1024
req.Width = imagegen.DefaultWidth
}
if req.Height <= 0 {
req.Height = 1024
}
if req.Steps <= 0 {
req.Steps = 9
req.Height = imagegen.DefaultHeight
}
if req.Seed <= 0 {
req.Seed = time.Now().UnixNano()

View File

@@ -33,10 +33,12 @@ type Server struct {
vramSize uint64
done chan error
client *http.Client
lastErr string // Last stderr line for error reporting
lastErrLock sync.Mutex
stderrLines []string // Recent stderr lines for error reporting (max 10)
stderrLock sync.Mutex
}
const maxStderrLines = 10
// completionRequest is sent to the subprocess
type completionRequest struct {
Prompt string `json:"prompt"`
@@ -139,10 +141,13 @@ func NewServer(modelName string) (*Server, error) {
for scanner.Scan() {
line := scanner.Text()
slog.Warn("image-runner", "msg", line)
// Capture last error line for better error reporting
s.lastErrLock.Lock()
s.lastErr = line
s.lastErrLock.Unlock()
// Capture recent stderr lines for error reporting
s.stderrLock.Lock()
s.stderrLines = append(s.stderrLines, line)
if len(s.stderrLines) > maxStderrLines {
s.stderrLines = s.stderrLines[1:]
}
s.stderrLock.Unlock()
}
}()
@@ -171,7 +176,9 @@ func (s *Server) ModelPath() string {
return s.modelName
}
// Load is called by the scheduler after the server is created.
// Load is a no-op for image generation models.
// Unlike LLM models, imagegen models are loaded by the subprocess at startup
// rather than through this interface method.
func (s *Server) Load(ctx context.Context, systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, requireFull bool) ([]ml.DeviceID, error) {
return nil, nil
}
@@ -204,20 +211,16 @@ func (s *Server) waitUntilRunning() error {
for {
select {
case err := <-s.done:
// Include last stderr line for better error context
s.lastErrLock.Lock()
lastErr := s.lastErr
s.lastErrLock.Unlock()
if lastErr != "" {
return fmt.Errorf("image runner failed: %s (exit: %v)", lastErr, err)
// Include recent stderr lines for better error context
stderrContext := s.getStderrContext()
if stderrContext != "" {
return fmt.Errorf("image runner failed: %s (exit: %v)", stderrContext, err)
}
return fmt.Errorf("image runner exited unexpectedly: %w", err)
case <-timeout:
s.lastErrLock.Lock()
lastErr := s.lastErr
s.lastErrLock.Unlock()
if lastErr != "" {
return fmt.Errorf("timeout waiting for image runner: %s", lastErr)
stderrContext := s.getStderrContext()
if stderrContext != "" {
return fmt.Errorf("timeout waiting for image runner: %s", stderrContext)
}
return errors.New("timeout waiting for image runner to start")
case <-ticker.C:
@@ -229,34 +232,38 @@ func (s *Server) waitUntilRunning() error {
}
}
// WaitUntilRunning implements the LlamaServer interface (no-op since NewServer waits).
// getStderrContext returns recent stderr lines joined as a single string.
func (s *Server) getStderrContext() string {
s.stderrLock.Lock()
defer s.stderrLock.Unlock()
if len(s.stderrLines) == 0 {
return ""
}
return strings.Join(s.stderrLines, "; ")
}
// WaitUntilRunning is a no-op for image generation models.
// NewServer already blocks until the subprocess is ready, so this method
// returns immediately. Required by the llm.LlamaServer interface.
func (s *Server) WaitUntilRunning(ctx context.Context) error {
return nil
}
// Completion generates an image from the prompt via the subprocess.
func (s *Server) Completion(ctx context.Context, req llm.CompletionRequest, fn func(llm.CompletionResponse)) error {
// Build request
// Build request with defaults (steps left to model)
creq := completionRequest{
Prompt: req.Prompt,
Width: 1024,
Height: 1024,
Steps: 9,
Width: DefaultWidth,
Height: DefaultHeight,
Seed: time.Now().UnixNano(),
}
if req.Options != nil {
if req.Options.NumCtx > 0 && req.Options.NumCtx <= 4096 {
creq.Width = int32(req.Options.NumCtx)
}
if req.Options.NumGPU > 0 && req.Options.NumGPU <= 4096 {
creq.Height = int32(req.Options.NumGPU)
}
if req.Options.NumPredict > 0 && req.Options.NumPredict <= 100 {
creq.Steps = req.Options.NumPredict
}
if req.Options.Seed > 0 {
creq.Seed = int64(req.Options.Seed)
// Parse size string (OpenAI format: "WxH")
if req.Size != "" {
if w, h := parseSize(req.Size); w > 0 && h > 0 {
creq.Width = w
creq.Height = h
}
}
@@ -346,17 +353,20 @@ func (s *Server) VRAMByGPU(id ml.DeviceID) uint64 {
return s.vramSize
}
// Embedding is not supported for image generation models.
// Embedding returns an error as image generation models don't produce embeddings.
// Required by the llm.LlamaServer interface.
func (s *Server) Embedding(ctx context.Context, input string) ([]float32, int, error) {
return nil, 0, errors.New("embedding not supported for image generation models")
}
// Tokenize is not supported for image generation models.
// Tokenize returns an error as image generation uses internal tokenization.
// Required by the llm.LlamaServer interface.
func (s *Server) Tokenize(ctx context.Context, content string) ([]int, error) {
return nil, errors.New("tokenize not supported for image generation models")
}
// Detokenize is not supported for image generation models.
// Detokenize returns an error as image generation uses internal tokenization.
// Required by the llm.LlamaServer interface.
func (s *Server) Detokenize(ctx context.Context, tokens []int) (string, error) {
return "", errors.New("detokenize not supported for image generation models")
}
@@ -376,7 +386,8 @@ func (s *Server) GetPort() int {
return s.port
}
// GetDeviceInfos returns nil since we don't track GPU info.
// GetDeviceInfos returns nil as GPU tracking is handled by the subprocess.
// Required by the llm.LlamaServer interface.
func (s *Server) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo {
return nil
}
@@ -393,3 +404,14 @@ func (s *Server) HasExited() bool {
// Ensure Server implements llm.LlamaServer
var _ llm.LlamaServer = (*Server)(nil)
// parseSize parses an OpenAI-style size string "WxH" into width and height.
func parseSize(size string) (int32, int32) {
parts := strings.Split(size, "x")
if len(parts) != 2 {
return 0, 0
}
w, _ := strconv.Atoi(parts[0])
h, _ := strconv.Atoi(parts[1])
return int32(w), int32(h)
}